Add GLprobs and MSAprobs to binaries
authorSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Tue, 24 Sep 2013 14:08:57 +0000 (15:08 +0100)
committerSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Tue, 24 Sep 2013 14:08:57 +0000 (15:08 +0100)
57 files changed:
binaries/src/GLProbs-1.0/Defaults.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/FileBuffer.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSA.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSA.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSA2.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAClusterTree.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAClusterTree.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSADef.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAGuideTree.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAGuideTree.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAPartProbs.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAProbs.vcproj [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAProbs.vcproj.STUDENT.LIUY0039.user [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAReadMatrix.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAReadMatrix.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAdiv3.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAfull.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/MSAgl+l+p+gl.cpp [new file with mode: 0644]
binaries/src/GLProbs-1.0/Makefile [new file with mode: 0644]
binaries/src/GLProbs-1.0/MultiSequence.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/ProbabilisticModel.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/SafeVector.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/ScoreType.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/Sequence.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/SparseMatrix.h [new file with mode: 0644]
binaries/src/GLProbs-1.0/glprobs [new file with mode: 0755]
binaries/src/GLProbs-1.0/main.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/ChangeLog [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs.ncb [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs.sln [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs.suo [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/Defaults.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/FileBuffer.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSADef.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAPartProbs.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj.STUDENT.LIUY0039.user [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/Makefile [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/MultiSequence.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/ProbabilisticModel.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/SafeVector.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/ScoreType.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/Sequence.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/SparseMatrix.h [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/main.cpp [new file with mode: 0644]
binaries/src/MSAProbs-0.9.7/MSAProbs/msaprobs [new file with mode: 0755]
binaries/src/MSAProbs-0.9.7/README [new file with mode: 0644]
binaries/src/compilebin.sh
binaries/src/setexecflag.sh

diff --git a/binaries/src/GLProbs-1.0/Defaults.h b/binaries/src/GLProbs-1.0/Defaults.h
new file mode 100644 (file)
index 0000000..953cdac
--- /dev/null
@@ -0,0 +1,120 @@
+/////////////////////////////////////////////////////////////////
+// Defaults.h
+//
+// Default constants for use in MSAPROBS.  The emission
+// probabilities were computed using the program used to build
+// the BLOSUM62 matrix from the BLOCKS 5.0 dataset.  Transition
+// parameters were obtained via unsupervised EM training on the
+// BALIBASE 2.0 benchmark alignment database.
+/////////////////////////////////////////////////////////////////
+
+#ifndef DEFAULTS_H
+#define DEFAULTS_H
+
+#include <string>
+
+using namespace std;
+
+/*
+ float initDistrib1Default[] = { 0.3202854395, 0.3398572505, 0.3398572505 };
+ float gapOpen1Default[] = { 0.1375414133, 0.1375414133 };
+ float gapExtend1Default[] = { 0.7832147479, 0.7832147479 };
+ */
+/*
+float initDistrib1Default[] = { 0.6080327034f, 0.1959836632f, 0.1959836632f };
+float gapOpen1Default[] = { 0.01993141696f, 0.01993141696f };
+float gapExtend1Default[] = { 0.7943345308f, 0.7943345308f };
+*/
+
+float initDistrib1Default[] = { 0.06188, 0.93812, 0.1959836632f };
+float gapOpen1Default[] = { 0.01993141696f, 0.01993141696f };
+float gapExtend1Default[] = { 0.7943345308f, 0.7943345308f };
+
+/*
+float initDistrib1Default[] = { 0.2031769156f, 0.7968229055f, 0.05529401079f };
+float gapOpen1Default[] = { 0.006541831419f, 0.006541831419f };
+float gapExtend1Default[] = { 0.3042867482f, 0.3042867482f };
+*/
+/*
+float initDistrib1Default[] = { 0.109684445f, 0.8903156519f, 0.01231110841f };
+float gapOpen1Default[] = { 0.01968936995f, 0.01968936995f };
+float gapExtend1Default[] = { 0.5699355602f, 0.5699355602f };
+*/
+float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f,
+               0.700645f, 0.1591759622f, 0.1591759622 };
+float gapOpen2Default[] = { 0.0119511066f, 0.01993141696f, 0.008008334786f,
+               0.008008334786 };
+float gapExtend2Default[] = { 0.3965826333f, 0.7943345308f, 0.8988758326f,
+               0.8988758326 };
+
+string alphabetDefault = "ARNDCQEGHILKMFPSTWYV";
+float emitSingleDefault[20] = { 0.07831005f, 0.05246024f, 0.04433257f,
+               0.05130349f, 0.02189704f, 0.03585766f, 0.05615771f, 0.07783433f,
+               0.02601093f, 0.06511648f, 0.09716489f, 0.05877077f, 0.02438117f,
+               0.04463228f, 0.03940142f, 0.05849916f, 0.05115306f, 0.01203523f,
+               0.03124726f, 0.07343426f };
+
+float emitPairsDefault[20][20] = { { 0.02373072f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f }, { 0.00244502f, 0.01775118f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00210228f, 0.00207782f, 0.01281864f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00223549f, 0.00161657f, 0.00353540f, 0.01911178f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f }, { 0.00145515f, 0.00044701f, 0.00042479f,
+               0.00036798f, 0.01013470f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00219102f,
+               0.00253532f, 0.00158223f, 0.00176784f, 0.00032102f, 0.00756604f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00332218f, 0.00268865f, 0.00224738f, 0.00496800f,
+               0.00037956f, 0.00345128f, 0.01676565f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00597898f,
+               0.00194865f, 0.00288882f, 0.00235249f, 0.00071206f, 0.00142432f,
+               0.00214860f, 0.04062876f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00114353f, 0.00132105f, 0.00141205f,
+               0.00097077f, 0.00026421f, 0.00113901f, 0.00131767f, 0.00103704f,
+               0.00867996f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f },
+               { 0.00318853f, 0.00138145f, 0.00104273f, 0.00105355f, 0.00094040f,
+                               0.00100883f, 0.00124207f, 0.00142520f, 0.00059716f, 0.01778263f,
+                               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00449576f, 0.00246811f, 0.00160275f, 0.00161966f, 0.00138494f,
+                               0.00180553f, 0.00222063f, 0.00212853f, 0.00111754f, 0.01071834f,
+                               0.03583921f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f }, { 0.00331693f, 0.00595650f, 0.00257310f, 0.00252518f,
+                               0.00046951f, 0.00312308f, 0.00428420f, 0.00259311f, 0.00121376f,
+                               0.00157852f, 0.00259626f, 0.01612228f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f, 0.0f, 0.0f, 0.0f }, { 0.00148878f, 0.00076734f,
+                               0.00063401f, 0.00047808f, 0.00037421f, 0.00075546f, 0.00076105f,
+                               0.00066504f, 0.00042237f, 0.00224097f, 0.00461939f, 0.00096120f,
+                               0.00409522f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00165004f, 0.00090768f, 0.00084658f, 0.00069041f, 0.00052274f,
+                               0.00059248f, 0.00078814f, 0.00115204f, 0.00072545f, 0.00279948f,
+                               0.00533369f, 0.00087222f, 0.00116111f, 0.01661038f, 0.0f, 0.0f,
+                               0.0f, 0.0f, 0.0f, 0.0f }, { 0.00230618f, 0.00106268f,
+                               0.00100282f, 0.00125381f, 0.00034766f, 0.00090111f, 0.00151550f,
+                               0.00155601f, 0.00049078f, 0.00103767f, 0.00157310f, 0.00154836f,
+                               0.00046718f, 0.00060701f, 0.01846071f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f }, { 0.00631752f, 0.00224540f, 0.00301397f, 0.00285226f,
+                               0.00094867f, 0.00191155f, 0.00293898f, 0.00381962f, 0.00116422f,
+                               0.00173565f, 0.00250962f, 0.00312633f, 0.00087787f, 0.00119036f,
+                               0.00180037f, 0.01346609f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00389995f, 0.00186053f, 0.00220144f, 0.00180488f, 0.00073798f,
+                               0.00154526f, 0.00216760f, 0.00214841f, 0.00077747f, 0.00248968f,
+                               0.00302273f, 0.00250862f, 0.00093371f, 0.00107595f, 0.00147982f,
+                               0.00487295f, 0.01299436f, 0.0f, 0.0f, 0.0f }, { 0.00039119f,
+                               0.00029139f, 0.00021006f, 0.00016015f, 0.00010666f, 0.00020592f,
+                               0.00023815f, 0.00038786f, 0.00019097f, 0.00039549f, 0.00076736f,
+                               0.00028448f, 0.00016253f, 0.00085751f, 0.00015674f, 0.00026525f,
+                               0.00024961f, 0.00563625f, 0.0f, 0.0f }, { 0.00131840f,
+                               0.00099430f, 0.00074960f, 0.00066005f, 0.00036626f, 0.00070192f,
+                               0.00092548f, 0.00089301f, 0.00131038f, 0.00127857f, 0.00219713f,
+                               0.00100817f, 0.00054105f, 0.00368739f, 0.00047608f, 0.00102648f,
+                               0.00094759f, 0.00069226f, 0.00999315f, 0.0f }, { 0.00533241f,
+                               0.00169359f, 0.00136609f, 0.00127915f, 0.00119152f, 0.00132844f,
+                               0.00178697f, 0.00194579f, 0.00071553f, 0.01117956f, 0.00914460f,
+                               0.00210897f, 0.00197461f, 0.00256159f, 0.00135781f, 0.00241601f,
+                               0.00343452f, 0.00038538f, 0.00148001f, 0.02075171f } };
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/FileBuffer.h b/binaries/src/GLProbs-1.0/FileBuffer.h
new file mode 100644 (file)
index 0000000..06af54b
--- /dev/null
@@ -0,0 +1,117 @@
+/////////////////////////////////////////////////////////////////
+// FileBuffer.h
+//
+// Buffered file reading.
+/////////////////////////////////////////////////////////////////
+
+#ifndef FILEBUFFER_H
+#define FILEBUFFER_H
+
+#include <string>
+#include <fstream>
+#include <iostream>
+
+using namespace std;
+
+const int BufferSize = 1000;
+
+/////////////////////////////////////////////////////////////////
+// FileBuffer
+//
+// Class for buffering file reading.
+/////////////////////////////////////////////////////////////////
+
+class FileBuffer {
+       ifstream file;
+       char buffer[BufferSize];
+       int currPos;
+       int size;
+       bool isEOF;
+       bool isValid;
+       bool canUnget;
+
+public:
+
+       // Some common routines
+
+       FileBuffer(const char *filename) :
+                       file(filename), currPos(0), size(0), isEOF(false), isValid(
+                                       !file.fail()), canUnget(false) {
+       }
+       ~FileBuffer() {
+               close();
+       }
+       bool fail() const {
+               return !isValid;
+       }
+       bool eof() const {
+               return (!isValid || isEOF);
+       }
+       void close() {
+               file.close();
+               isValid = false;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::Get()
+       //
+       // Retrieve a character from the file buffer.  Returns true if
+       // and only if a character is read.
+       /////////////////////////////////////////////////////////////////
+
+       bool Get(char &ch) {
+
+               // check to make sure that there's more stuff in the file
+               if (!isValid || isEOF)
+                       return false;
+
+               // if the buffer is empty, it's time to reload it
+               if (currPos == size) {
+                       file.read(buffer, BufferSize);
+                       size = file.gcount();
+                       isEOF = (size == 0);
+                       currPos = 0;
+                       if (isEOF)
+                               return false;
+               }
+
+               // store the read character
+               ch = buffer[currPos++];
+               canUnget = true;
+               return true;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::UnGet()
+       //
+       // Unretrieve the most recently read character from the file
+       // buffer.  Note that this allows only a one-level undo.
+       /////////////////////////////////////////////////////////////////
+
+       void UnGet() {
+               assert(canUnget);
+               assert(isValid);
+               assert(currPos > 0);
+               currPos--;
+               assert(currPos < size);
+               isEOF = false;
+               canUnget = false;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::GetLine()
+       //
+       // Retrieve characters of text until a newline character is
+       // encountered.  Terminates properly on end-of-file condition.
+       /////////////////////////////////////////////////////////////////
+
+       void GetLine(string &s) {
+               char ch;
+               s = "";
+               while (Get(ch) && ch != '\n')
+                       s += ch;
+       }
+
+};
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/MSA.cpp b/binaries/src/GLProbs-1.0/MSA.cpp
new file mode 100644 (file)
index 0000000..0144492
--- /dev/null
@@ -0,0 +1,1541 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 100;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+double normalized_matrix[26][26];// add by YE Yongtao
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+//             cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+//     cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif
+/*     
+FILE *fi = fopen ("accuracy", "a");
+fprintf (fi, "%s ", argv[1]); 
+fclose (fi);
+*/
+       int levelid = AdjustmentTest(sequences,ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,emitSingle));
+        //cerr<<levelid<<endl;
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), levelid);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, int levelid) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+       //create distance matrix
+       VVF distances(numSeqs, VF(numSeqs, 0));
+        //creat sparseMatrices
+        SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL)); 
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       //posterior probability matrix
+                       VF* posterior;
+
+//low similarity use local model
+                       if(levelid == 1){
+
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               delete forward;
+                               delete backward; 
+
+                       }
+//high similarity use global model
+                       else if(levelid >= 2) posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+
+//extreme low or extreme high similarity use combined model
+                       else{
+
+//probcons
+                               // compute forward and backward probabilities
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                               assert(backward);
+                               // compute posterior probability matrix from HMM
+                               VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
+                               assert(probcons_posterior);             
+                               delete forward;
+                               delete backward;   
+                 
+//probalign
+                               VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                               assert(probalign_posterior);
+//local
+                               forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               assert(posterior);
+                               delete forward;
+                               delete backward;                                
+//combined model
+                               //merge probalign + local + probcons 
+                               VF::iterator ptr1 = probcons_posterior->begin();                        
+                               VF::iterator ptr2 = probalign_posterior->begin();
+                               VF::iterator ptr = posterior->begin();                 
+                               for (int i = 0; i <= seq1->GetLength(); i++) {
+                                       for (int j = 0; j <= seq2->GetLength(); j++) {
+                                               float v1 = *ptr1;
+                                               float v2 = *ptr2;
+                                               float v3 = *ptr;
+                                               *ptr = sqrt((v1*v1 + v2*v2 + v3*v3)/3);
+                                               ptr1++;
+                                               ptr2++;
+                                               ptr++;
+                                       }
+                               }
+                               delete probcons_posterior;
+                               delete probalign_posterior;
+                       }
+
+                        assert(posterior);
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute expected accuracy
+                       distances[a][b] = distances[b][a] = 1.0f - alignment.second
+                                       / min(seq1->GetLength(), seq2->GetLength());
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+
+                       delete posterior;
+                       delete alignment.first;
+#ifndef _OPENMP
+               }
+#endif
+       } 
+
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model,levelid);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // parameter file
+                       } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
+                               if (i < argc - 1)
+                                       parametersInputFilename = string (argv[++i]);
+                               else {
+                                               cerr << "ERROR: Filename expected for option " << argv[i] << endl;
+                                               exit (1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, int levelid) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       int numSeqs = alignment->GetNumSequences();
+       if (enableAlignOrder) {
+               for (int i = 0; i < numSeqs; i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*
+        int numSeqs = alignment->GetNumSequences();
+        //if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 5; iter ++){
+               for(int i = 0; i < numSeqs - 1; i++){
+                       DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+                }
+        }
+        //}*/
+/*
+        //DoIterativeRefinement() return 1,2: this refinement unsuccessful
+       if(levelid == 3) numIterativeRefinementReps=10;
+       int ineffectiveness = 0;
+       for (int i = 0; i < numIterativeRefinementReps; i++){
+               int flag = DoIterativeRefinement(sparseMatrices, model, alignment);
+               if(numSeqs > 35 && levelid < 3){
+                       if(flag > 0){
+                           if(numIterativeRefinementReps < 10*numSeqs) 
+                               numIterativeRefinementReps ++;                          
+                            if(flag == 1) ineffectiveness ++;
+                       }
+                       //else ineffectiveness = 0;
+                       if(ineffectiveness > numSeqs && i >100 ) break;
+               }               
+       }
+*/
+
+       //if(levelid == 3) numIterativeRefinementReps=10;
+       for (int i = 0; i < numIterativeRefinementReps; i++)
+               DoIterativeRefinement(sparseMatrices, model, alignment);
+
+        cerr << endl;  
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 0
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement
+
+       pair<SafeVector<char> *, float> alignment;
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                                //posterior[k] = w*posterior[k];
+                               posterior[k] += posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               //posterior[k] /= sumW;
+                               posterior[k] /= numSeqs;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * XZval * ZYptr->second;
+                                base[ZYptr->first] += XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               base[ZYptr->first] += ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+// return 0: successful refinement, 1: ineffective refinement, 2: random problem 
+/////////////////////////////////////////////////////////////////
+int MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+       int i;
+       // create two separate groups
+       for (i = 0; i < numSeqs; i++) {
+                int index = rand();
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty()) return 2;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);   
+
+//start add by Yongtao
+#if 1
+       VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement        
+        SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
+       SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
+        i=0; 
+       for (set<int>::const_iterator iter = groupOne.begin();
+                       iter != groupOne.end(); ++iter) {
+               oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+        i=0;
+       for (set<int>::const_iterator iter = groupTwo.begin();
+                       iter != groupTwo.end(); ++iter) {
+               oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+
+        VF &posteriorArr = *posterior;
+        int oldLength = alignment->GetSequence(0)->GetLength();
+       int groupOneindex=0; int groupTwoindex=0;
+       float accuracy_before = 0; 
+        int j;
+       for (i = 1; i <= oldLength; i++) {
+               // check to see if there is a gap in every sequence of the set
+               bool foundOne = false;
+               for (j = 0; !foundOne && j < (int) groupOne.size(); j++)
+                       foundOne = (oldOnePtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundOne) groupOneindex ++;
+               bool foundTwo = false;
+               for (j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
+                       foundTwo = (oldTwoPtrs[j][i] != '-');
+               if (foundTwo) groupTwoindex ++;
+                if(foundOne && foundTwo) accuracy_before += 
+                               posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
+       }
+       
+       pair<SafeVector<char> *, float> refinealignment;
+       //perform alignment
+       refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
+                       groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
+        delete posterior;
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
+       for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
+       // free temporary alignment
+       delete refinealignment.first;
+       delete alignment;
+        alignment = result;
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+        if(accuracy_before == refinealignment.second) return 1;
+        else return 0; 
+}
+
+
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeSimilarity ()
+//
+// Computes the average similarity for a particular family.
+// extreme low similarity(<=25%) return 0
+// low similarity(<=40%) return 1
+// high similarity(<=70%) return 2
+// extreme high similarity(>70%) return 3
+/////////////////////////////////////////////////////////////////
+int MSA::AdjustmentTest(MultiSequence *sequences,const ProbabilisticModel &model){
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+        //average identity for all sequences
+        float identity = 0;
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+
+       // do all pairwise alignments for family similarity 
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+                       pair<SafeVector<char> *, float> alignment = model.ComputeViterbiAlignment(seq1,seq2);
+                       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+                       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+                        float N_correct_match = 0;
+                       //float N_alignment = 0;
+                        int i = 1;int j = 1;
+                       for (SafeVector<char>::iterator iter = alignment.first->begin(); 
+                               iter != alignment.first->end(); ++iter){
+                               //N_alignment += 1;
+                               if (*iter == 'B'){
+                                       unsigned char c1 = (unsigned char) iter1[i++];
+                                       unsigned char c2 = (unsigned char) iter2[j++];
+                                       if(c1==c2) N_correct_match += 1;
+                               }
+                                else if(*iter == 'X') i++;
+                               else if(*iter == 'Y') j++;
+                        }
+                        if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "similarity error"<< endl;
+                       identity += N_correct_match / alignment.first->size();
+                       delete alignment.first;                   
+#ifndef _OPENMP
+               }
+#endif
+       } 
+       identity /= numPairs;
+/*
+FILE *fi = fopen ("accuracy", "a");
+fprintf (fi, " %.10f ", similarity); fprintf (fi, "\n");
+fclose (fi);
+*/
+
+        //adapative     
+        if( identity <= 0.15 ) initDistrib[2] = 0.143854;
+       else if( identity <= 0.2 ) initDistrib[2] = 0.191948;
+       else if( identity <= 0.25 ) initDistrib[2] = 0.170705;
+       else if( identity <= 0.3 ) initDistrib[2] = 0.100675;
+       else if( identity <= 0.35 ) initDistrib[2] = 0.090755;
+       else if( identity <= 0.4 ) initDistrib[2] = 0.146188;
+        else if( identity <= 0.45 ) initDistrib[2] = 0.167858;
+       else if( identity <= 0.5) initDistrib[2] = 0.250769;
+
+
+        if( identity <= 0.25 ) return 0;
+        else if( identity <= 0.4) return 1;
+       else if( identity <= 0.7) return 2;
+        else return 3;
+
+}
diff --git a/binaries/src/GLProbs-1.0/MSA.h b/binaries/src/GLProbs-1.0/MSA.h
new file mode 100644 (file)
index 0000000..2e37e85
--- /dev/null
@@ -0,0 +1,95 @@
+#ifndef _MSA_H
+#define _MSA_H
+#include "MSADef.h"
+#include "MSAGuideTree.h"
+
+#include "SafeVector.h"
+#include "MultiSequence.h"
+#include "ScoreType.h"
+#include "ProbabilisticModel.h"
+#include "SparseMatrix.h"
+#include <string>
+using namespace std;
+
+class MSAGuideTree;
+struct TreeNode;
+class MSA {
+public:
+       MSA(int argc, char* argv[]);
+       ~MSA();
+
+       static void getSysTime(double * dtime);
+       MSAGuideTree* getGuideTree() {
+               return tree;
+       }
+       int * getSeqsWeights() {
+               return seqsWeights;
+       }
+private:
+       //print usage
+       void printUsage();
+       //do multiple sequence alignment
+       void doAlign();
+
+       //for sequence weights
+       void createSeqsWeights(int seqsNum);
+       void releaseSeqsWeights();
+
+       //weights of sequences
+       int * seqsWeights;
+       //guide tree
+       MSAGuideTree* tree;
+       //output file
+       string alignOutFileName;
+       std::ostream* alignOutFile;
+private:
+       SafeVector<string> ParseParams(int argc, char *argv[]);
+       void PrintParameters(const char *message, const VF &initDistrib,
+                       const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+                       const VF &emitSingle, const char *filename);
+
+       SafeVector<string> PostProbsParseParams(int argc, char **argv);
+       MultiSequence *doAlign(MultiSequence *sequence,
+                       const ProbabilisticModel &model, int levelid);
+       void ReadParameters();
+       MultiSequence* ProcessTree(TreeNode *tree, MultiSequence *sequences,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model);
+       MultiSequence *ComputeFinalAlignment(MSAGuideTree *tree,
+                       MultiSequence *sequences,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model,int levelid);
+       MultiSequence *AlignAlignments(MultiSequence *align1, MultiSequence *align2,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model);
+       SafeVector<SafeVector<SparseMatrix *> > DoRelaxation(float* seqsWeights,
+                       MultiSequence *sequences,
+                       SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+       void Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+                       VF &posterior);
+       void Relax1(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+                       VF &posterior);
+       int DoIterativeRefinement(
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model, MultiSequence* &alignment);
+       void DoIterativeRefinementTreeNode(
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model, MultiSequence* &alignment,
+                       int nodeIndex);
+       void WriteAnnotation(MultiSequence *alignment,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+       int ComputeScore(const SafeVector<pair<int, int> > &active,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+        int AdjustmentTest(MultiSequence *sequences,const ProbabilisticModel &model);
+#ifdef _OPENMP
+       //private struct
+       struct SeqsPair {
+               int seq1;
+               int seq2;
+       };
+       int numPairs;
+       SeqsPair* seqsPairs;
+#endif
+};
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/MSA2.cpp b/binaries/src/GLProbs-1.0/MSA2.cpp
new file mode 100644 (file)
index 0000000..f27a7bc
--- /dev/null
@@ -0,0 +1,1562 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 100;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+double normalized_matrix[26][26];// add by YE Yongtao
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+               cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+       cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif 
+FILE *fi = fopen ("accuracy", "a");
+fprintf (fi, "%s ", argv[1]); 
+fclose (fi);
+
+       int levelid = ComputeSimilarity (sequences,ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,emitSingle));
+/*
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), levelid);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+*/
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, int levelid) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+       //create distance matrix
+       VVF distances(numSeqs, VF(numSeqs, 0));
+        //creat sparseMatrices
+        SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL)); 
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       //posterior probability matrix
+                       VF* posterior;
+
+
+//high similarity use global model
+                       //if(levelid == 2) 
+                       if(1) posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+
+/*
+//low similarity use local model
+                       else if(levelid == 1){
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               delete forward;
+                               delete backward; 
+                       }
+
+//extreme low or extreme high similarity use combined model
+                       else{
+
+//probcons
+                               // compute forward and backward probabilities
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                               assert(backward);
+                               // compute posterior probability matrix from HMM
+                               VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
+                               assert(probcons_posterior);             
+                               delete forward;
+                               delete backward;   
+                 
+//probalign
+                               VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                               assert(probalign_posterior);
+//local
+                               forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               assert(posterior);
+                               delete forward;
+                               delete backward;                                
+//combined model
+                               //merge probalign + local + probcons 
+                               VF::iterator ptr1 = probcons_posterior->begin();                        
+                               VF::iterator ptr2 = probalign_posterior->begin();
+                               VF::iterator ptr = posterior->begin();                 
+                               for (int i = 0; i <= seq1->GetLength(); i++) {
+                                       for (int j = 0; j <= seq2->GetLength(); j++) {
+                                               float v1 = *ptr1;
+                                               float v2 = *ptr2;
+                                               float v3 = *ptr;
+                                               *ptr = sqrt((v1*v1 + v2*v2 + v3*v3)/3);
+                                               ptr1++;
+                                               ptr2++;
+                                               ptr++;
+                                       }
+                               }
+                               delete probcons_posterior;
+                               delete probalign_posterior;
+                       }
+*/
+                        assert(posterior);
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute expected accuracy
+                       distances[a][b] = distances[b][a] = 1.0f - alignment.second
+                                       / min(seq1->GetLength(), seq2->GetLength());
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+
+                       delete posterior;
+                       delete alignment.first;
+#ifndef _OPENMP
+               }
+#endif
+       } 
+
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // parameter file
+                       } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
+                               if (i < argc - 1)
+                                       parametersInputFilename = string (argv[++i]);
+                               else {
+                                               cerr << "ERROR: Filename expected for option " << argv[i] << endl;
+                                               exit (1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       int numSeqs = alignment->GetNumSequences();
+       if (enableAlignOrder) {
+               for (int i = 0; i < numSeqs; i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*
+        int numSeqs = alignment->GetNumSequences();
+        //if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 5; iter ++){
+               for(int i = 0; i < numSeqs - 1; i++){
+                       DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+                }
+        }
+        //}*/
+        //DoIterativeRefinement() return 1,2: this refinement unsuccessful
+/*
+       int ineffectiveness = 0;
+       for (int i = 0; i < numIterativeRefinementReps; i++){
+               int flag = DoIterativeRefinement(sparseMatrices, model, alignment);
+               if(numSeqs > 25){
+                       if(flag > 0){
+                           if(numIterativeRefinementReps < 20*numSeqs) 
+                               numIterativeRefinementReps ++;                          
+                            if(flag == 1) ineffectiveness ++;
+                       }
+                       //else ineffectiveness = 0;
+                       if(ineffectiveness > 2*numSeqs && i >100 ) break;
+               }               
+       }
+*/
+
+       for (int i = 0; i < numIterativeRefinementReps; i++)
+               DoIterativeRefinement(sparseMatrices, model, alignment);
+
+        cerr << endl;  
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 0
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement
+
+       pair<SafeVector<char> *, float> alignment;
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                                //posterior[k] = w*posterior[k];
+                               posterior[k] += posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               //posterior[k] /= sumW;
+                               posterior[k] /= numSeqs;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * XZval * ZYptr->second;
+                                base[ZYptr->first] += XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               base[ZYptr->first] += ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+// return 0: successful refinement, 1: ineffective refinement, 2: random problem 
+/////////////////////////////////////////////////////////////////
+int MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+       int i;
+       // create two separate groups
+       for (i = 0; i < numSeqs; i++) {
+                int index = rand();
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty()) return 2;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);   
+
+//start add by Yongtao
+#if 1
+       VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement        
+        SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
+       SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
+        i=0; 
+       for (set<int>::const_iterator iter = groupOne.begin();
+                       iter != groupOne.end(); ++iter) {
+               oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+        i=0;
+       for (set<int>::const_iterator iter = groupTwo.begin();
+                       iter != groupTwo.end(); ++iter) {
+               oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+
+        VF &posteriorArr = *posterior;
+        int oldLength = alignment->GetSequence(0)->GetLength();
+       int groupOneindex=0; int groupTwoindex=0;
+       float accuracy_before = 0; 
+        int j;
+       for (i = 1; i <= oldLength; i++) {
+               // check to see if there is a gap in every sequence of the set
+               bool foundOne = false;
+               for (j = 0; !foundOne && j < (int) groupOne.size(); j++)
+                       foundOne = (oldOnePtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundOne) groupOneindex ++;
+               bool foundTwo = false;
+               for (j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
+                       foundTwo = (oldTwoPtrs[j][i] != '-');
+               if (foundTwo) groupTwoindex ++;
+                if(foundOne && foundTwo) accuracy_before += 
+                               posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
+       }
+       
+       pair<SafeVector<char> *, float> refinealignment;
+       //perform alignment
+       refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
+                       groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
+        delete posterior;
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
+       for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
+       // free temporary alignment
+       delete refinealignment.first;
+       delete alignment;
+        alignment = result;
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+        if(accuracy_before == refinealignment.second) return 1;
+        else return 0; 
+}
+
+
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeSimilarity ()
+//
+// Computes the average similarity for a particular family.
+// extreme low or extreme high similarity(<=20% or >80%) return 0
+// low similarity(20%-50%) return 1
+// high similarity(50%-80%) return 2
+/////////////////////////////////////////////////////////////////
+extern pair<SafeVector<char> *, float> partViterbi(string seq1, string seq2); 
+extern float computeS(string seq1, string seq2, SafeVector<char> * alignment);
+
+int MSA::ComputeSimilarity (MultiSequence *sequences,const ProbabilisticModel &model){
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+        //average identity for all sequences
+        float identity = 0;
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+
+       // do all pairwise alignments for family similarity 
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+                       
+                       //pair<SafeVector<char> *, float> alignment = ::partViterbi(seq1->GetString(),seq2->GetString()); 
+                       //cerr << alignment.second / alignment.first->size();
+                       //cerr << computeS(seq1->GetString(),seq2->GetString(),alignment.first)<< endl;
+                       pair<SafeVector<char> *, float> alignment = model.ComputeViterbiAlignment(seq1,seq2);
+/*
+                       VF* posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                       pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+*/
+/*
+                       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+                       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+                        float N_correct_match = 0;
+                       //float N_match;
+                       //float N_column = 0;
+                       //float N_alignment = 0;
+                        int i = 1;int j = 1;
+                       //bool start = false; bool end = false;
+                       for (SafeVector<char>::iterator iter = alignment.first->begin(); 
+                               iter != alignment.first->end(); ++iter){
+                               if (*iter == 'B'){
+                                        //N_match += 1;
+                                       //start = true;
+                                       //if(i==seq1->GetLength() || j==seq2->GetLength()) end = true;
+                                       unsigned char c1 = (unsigned char) iter1[i++];
+                                       unsigned char c2 = (unsigned char) iter2[j++];
+                                       if(c1==c2) N_correct_match += 1;
+                               }
+                                else if(*iter == 'X') i++;
+                               else if(*iter == 'Y') j++;
+                               //if(start && !end) N_column += 1;
+                               N_alignment += 1;
+                        }
+                        if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "similarity error"<< endl;
+                       identity += N_correct_match / N_alignment;
+                        //
+*/
+                       identity += alignment.second / alignment.first->size();
+                       delete alignment.first;                   
+#ifndef _OPENMP
+               }
+#endif
+       } 
+       identity /= numPairs;
+
+FILE *fi = fopen ("accuracy", "a");
+fprintf (fi, " %.10f ", identity); fprintf (fi, "\n");
+fclose (fi);
+
+/*
+        //adapative        
+        if(identity <= 0.15) initDistrib[2] = 0.143854;
+       else if(identity <= 0.2) initDistrib[2] = 0.191948;
+       else if(identity <= 0.25) initDistrib[2] = 0.170705;
+       else if(identity <= 0.3) initDistrib[2] = 0.100675;
+       else if(identity <= 0.35) initDistrib[2] = 0.090755;
+       else if(identity <= 0.4) initDistrib[2] = 0.146188;
+        else if(identity <= 0.45) initDistrib[2] = 0.167858;
+       else if(identity <= 0.5) initDistrib[2] = 0.250769;
+        //else if(identity <= 0.6) initDistrib[2] = 0.500829;
+        //else if(identity <= 0.7) initDistrib[2] = 0.259622;
+*/
+        if( identity<= 0.25 || identity > 0.8 ) return 0;
+        else if(identity > 0.2 && identity<= 0.4) return 1;
+        else return 2;
+
+}
diff --git a/binaries/src/GLProbs-1.0/MSAClusterTree.cpp b/binaries/src/GLProbs-1.0/MSAClusterTree.cpp
new file mode 100644 (file)
index 0000000..3bf34a1
--- /dev/null
@@ -0,0 +1,153 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include "MSAClusterTree.h"
+MSAClusterTree::MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs) :
+               MSAGuideTree(msa, distMatrix, numSeqs) {
+}
+MSAClusterTree::~MSAClusterTree() {
+}
+void MSAClusterTree::create() {
+       //generate the neighbor-joining tree
+       this->generateClusterTree();
+
+       //calculate sequence weights
+       this->getSeqsWeights();
+
+       //construct the alignment orders
+       this->createAlignmentOrders();
+}
+void MSAClusterTree::generateClusterTree() {
+       int i;
+       ValidNode* validNodes, *headValidNodes;
+       ValidNode* miniPtr, *minjPtr, *ivalid, *jvalid;
+       int mini, minj;
+       float* joins;
+       unsigned int* clusterLeafs;
+
+       //initialize the valid nodes link list
+       validNodes = new ValidNode[leafsNum + 1];
+       joins = new float[leafsNum + 1];
+       clusterLeafs = new unsigned int[nodesNum + 1];
+       if (!validNodes || !joins || !clusterLeafs) {
+               cerr << "Out of memory of the reconstruction of cluster tree" << endl;
+       }
+       //initialize cluster size 
+       for (i = 0; i < this->leafsNum; i++) {
+               clusterLeafs[i] = 1;
+       }
+
+       headValidNodes = &validNodes[0];
+       headValidNodes->next = &validNodes[1];
+       headValidNodes->n = -1;
+       headValidNodes->node = -1;
+       headValidNodes->prev = NULL;
+
+       //build an initial link list
+       ValidNode* curr = &validNodes[1];
+       ValidNode* prev = headValidNodes;
+       ValidNode* next = &validNodes[2];
+       for (i = 0; i < leafsNum; i++) {
+               curr->n = i;
+               curr->node = i;
+               curr->prev = prev;
+               curr->next = next;
+               prev = curr;
+               curr = next;
+               next++;
+       }
+       prev->next = NULL;
+
+       //to generate the cluster tree
+       int nodeIdx;    //the index of an internal node
+       int firstNode = leafsNum;       //the index of the first internal node
+       int lastNode = firstNode + leafsNum - 1;//the index of the last internal node
+
+       for (nodeIdx = firstNode; nodeIdx < lastNode; nodeIdx++) {
+               //find closest pair of clusters
+               float minDist = 1.1f;
+               miniPtr = headValidNodes;
+               minjPtr = headValidNodes;
+
+               for (ivalid = headValidNodes->next; ivalid != NULL;
+                               ivalid = ivalid->next) {
+                       mini = ivalid->n;
+                       
+                       for (jvalid = headValidNodes->next;
+                                       jvalid != NULL && jvalid->n < mini; jvalid = jvalid->next) {
+                               minj = jvalid->n;
+                               float dist = (*distMatrix)[mini][minj];
+                               if (dist < 0) {
+                                       cerr
+                                                       << "ERROR: It is impossible to have distance value less than zero"
+                                                       << endl;
+                                       dist = 0;
+                               }
+                               if (dist < minDist) {
+                                       minDist = dist;
+                                       miniPtr = ivalid;
+                                       minjPtr = jvalid;
+                               }
+                               //printf("dist %g mini %d minj %d\n", dist, ivalid->node, jvalid->node);
+                       }
+               }
+               //printf("**** mini %d minj %d minDist %g *****\n", miniPtr->node, minjPtr->node, minDist);
+               //check the validity of miniPtr and minjPtr;
+               if (miniPtr == headValidNodes || minjPtr == headValidNodes) {
+                       cerr << "OOPS: Error occurred while constructing the cluster tree\n"
+                                       << endl;
+                       exit(-1);
+               }
+               //computing branch length and join the two nodes
+               float branchLength = minDist * 0.5f;
+               this->connectNodes(&nodes[nodeIdx], nodeIdx, &nodes[miniPtr->node],
+                               branchLength, &nodes[minjPtr->node], branchLength);
+               clusterLeafs[nodeIdx] = clusterLeafs[miniPtr->node]
+                               + clusterLeafs[minjPtr->node];
+
+               //remove the valid node minjPtr from the list
+               minjPtr->prev->next = minjPtr->next;
+               if (minjPtr->next != NULL) {
+                       minjPtr->next->prev = minjPtr->prev;
+               }
+               minjPtr->prev = minjPtr->next = NULL;
+
+               //compute the distance of each remaining valid node to the new node
+               for (ivalid = headValidNodes->next; ivalid != NULL;
+                               ivalid = ivalid->next) {
+                       int idx = ivalid->n;
+
+                       float idist = (*distMatrix)[miniPtr->n][idx];
+                       float jdist = (*distMatrix)[minjPtr->n][idx];
+
+                       unsigned int isize = clusterLeafs[miniPtr->node];
+                       unsigned int jsize = clusterLeafs[minjPtr->node];
+                       joins[idx] = (idist * isize + jdist * jsize) / (isize + jsize);
+                       //joins[idx] = (idist + jdist )/ 2;
+               }
+               //update the distance to the new node
+               miniPtr->node = nodeIdx;
+               mini = miniPtr->n;
+               for (jvalid = headValidNodes->next; jvalid != NULL;
+                               jvalid = jvalid->next) {
+                       minj = jvalid->n;
+
+                       float dist = joins[minj];
+                       (*distMatrix)[mini][minj] = dist;
+                       (*distMatrix)[minj][mini] = dist;
+               }
+       }
+       //add a pseudo root to this unrooted NJ tree
+       this->root = &nodes[lastNode - 1];
+
+       delete[] validNodes;
+       delete[] joins;
+       delete[] clusterLeafs;
+}
diff --git a/binaries/src/GLProbs-1.0/MSAClusterTree.h b/binaries/src/GLProbs-1.0/MSAClusterTree.h
new file mode 100644 (file)
index 0000000..30bce05
--- /dev/null
@@ -0,0 +1,27 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#ifndef _MSA_CLUSTER_TREE_H
+#define _MSA_CLUSTER_TREE_H
+
+#include "MSAGuideTree.h"
+
+class MSAClusterTree: public MSAGuideTree {
+public:
+       MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs);
+       ~MSAClusterTree();
+
+       //construct the cluster tree
+       void create();
+private:
+       //generate the cluster tree
+       void generateClusterTree();
+};
+#endif
diff --git a/binaries/src/GLProbs-1.0/MSADef.h b/binaries/src/GLProbs-1.0/MSADef.h
new file mode 100644 (file)
index 0000000..6a3d178
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _MSA_DEF_H
+#define _MSA_DEF_H
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <math.h>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+//maximum number
+#define MAX_INT_NUM                            0x7FFFFFFF
+#define MAX_FLOAT_NUM                  FLT_MAX
+#define INT_MULTIPLY                   1000
+
+#define SUBMATRIX_INT_SCALE            100
+
+//a tree node is a leaf or a node
+enum {
+       NONE, NODE, LEAF
+};
+
+#endif
+
diff --git a/binaries/src/GLProbs-1.0/MSAGuideTree.cpp b/binaries/src/GLProbs-1.0/MSAGuideTree.cpp
new file mode 100644 (file)
index 0000000..ec9a5e8
--- /dev/null
@@ -0,0 +1,327 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "MSAGuideTree.h"
+#include "MSA.h"
+MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) {
+       int i;
+       TreeNode* node;
+       //system configuration
+       this->msa = msa;
+       this->distMatrix = &distances;
+       this->numSeqs = numSeqs;
+       this->seqsWeights = msa->getSeqsWeights();
+
+       //tree structure
+       this->nodesSize = this->numSeqs * 2 + 1;
+       this->nodes = new TreeNode[this->nodesSize];
+       if (!this->nodes) {
+               cerr << "TreeNodes memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize all the tree nodes
+       this->leafs = this->nodes;
+       this->leafsNum = this->numSeqs;
+       this->nodesNum = 2 * this->leafsNum - 1;
+       for (i = 0; i < this->nodesSize; i++) {
+               node = &nodes[i];
+               node->left = 0;
+               node->right = 0;
+               node->parent = 0;
+               node->leftIdx = -1;
+               node->rightIdx = -1;
+               node->parentIdx = -1;
+               node->idx = -1;
+               node->dist = 0;
+               node->leaf = NODE;              //setted to be NODE, by default
+               node->order = 0;
+               node->depth = 0;
+       }
+       //initialize the leaf nodes
+       for (i = 0; i < this->leafsNum; i++) {
+               node = &this->leafs[i];
+               node->idx = i;
+               node->leaf = LEAF;
+       }
+}
+MSAGuideTree::~MSAGuideTree() {
+       //release tree nodes
+       delete[] this->nodes;
+
+       //release alignment orders
+       releaseAlignmentOrders();
+
+}
+//get the tree nodes
+TreeNode* MSAGuideTree::getNodes() {
+       return nodes;
+}
+//get the leaf nodes
+TreeNode* MSAGuideTree::getLeafs() {
+       return leafs;
+}
+//get the number of nodes;
+int MSAGuideTree::getNodesNum() {
+       return nodesNum;
+}
+//get the number of leaf nodes
+int MSAGuideTree::getLeafsNum() {
+       return leafsNum;
+}
+//get the alignment orders
+AlignmentOrder* MSAGuideTree::getAlignOrders() {
+       return alignOrders;
+}
+int MSAGuideTree::getAlignOrdersNum() {
+       return alignOrdersNum;
+}
+/****************************************************
+ create the evolutionary relationship
+ ****************************************************/
+void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx,
+               TreeNode* leftChild, float leftDist, TreeNode* rightChild,
+               float rightDist) {
+       //save the parents index for each child
+       leftChild->parent = parent;
+       leftChild->parentIdx = parentIdx;
+       rightChild->parent = parent;
+       rightChild->parentIdx = parentIdx;
+
+       //save the branch lengths (i.e. distance) from each child to its parent
+       leftChild->dist = leftDist;
+       rightChild->dist = rightDist;
+
+       //save the indices of itself and its children for this new tree node
+       parent->idx = parentIdx;
+       parent->left = leftChild;
+       parent->leftIdx = leftChild->idx;
+       parent->right = rightChild;
+       parent->rightIdx = rightChild->idx;
+}
+/*****************************************
+ compute the alignment order of the phylogentic tree
+ *****************************************/
+void MSAGuideTree::createAlignmentOrders() {
+       int i;
+
+       AlignmentOrder* order;
+       //allocate memory space for alignment orders vector
+       this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0
+       this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1
+       this->alignOrders = new AlignmentOrder[this->alignOrdersSize];
+       if (!this->alignOrders) {
+               cerr << "OOPS: Alignment orders memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize the alignment orders vector
+       for (i = 0; i < this->alignOrdersSize; i++) {
+               order = &this->alignOrders[i];
+               order->leftOrder = 0;
+               order->rightOrder = 0;
+               order->leftLeafs = 0;
+               order->leftNum = 0;
+               order->rightLeafs = 0;
+               order->rightNum = 0;
+       }
+       //starting out constructing the alignment orders
+       int subLeafsNum;
+       int nodeDepth = 1;
+       int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum,
+                       nodeDepth);
+
+       //check whether the function works well
+       if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) {
+               fprintf(stderr,
+                               "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n",
+                               subLeafsNum, alignOrdersNum, subOrder);
+       }
+
+}
+int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot,
+               int* subLeafs, int& subLeafsNum, int nodeDepth) {
+       int leftNum, rightNum;
+       int leftOrder, rightOrder;
+       int* leftLeafs, *rightLeafs;
+
+       if (subRoot->leaf == LEAF) {
+               subLeafs[0] = subRoot->idx;
+               subLeafsNum = 1;
+
+               return 0;                       //if it is a leaf, return the index 0
+       }
+       leftOrder = rightOrder = 0;
+       leftNum = rightNum = 0;
+       leftLeafs = new int[numSeqs];
+       rightLeafs = new int[numSeqs];
+
+       //check the left subtree
+       if (subRoot->left) {
+               //recursively tranverse the left subtree
+               leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs,
+                               leftNum, nodeDepth + 1);
+       }
+       //check the right subtree
+       if (subRoot->right) {
+               rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs,
+                               rightNum, nodeDepth + 1);
+       }
+       //save the leafs in the left and right subtrees of the current subtree
+       if (this->alignOrdersNum > this->alignOrdersSize) {
+               fprintf(stderr, "the alignment order function works bad\n");\
+               exit(-1);
+       }
+
+       AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum];
+       order->nodeDepth = nodeDepth;
+       order->leftOrder = leftOrder;
+       order->rightOrder = rightOrder;
+       order->leftNum = leftNum;
+       order->rightNum = rightNum;
+       order->leftLeafs = new int[order->leftNum];
+       order->rightLeafs = new int[order->rightNum];
+       if (!order->leftLeafs || !order->rightLeafs) {
+               fprintf(stderr,
+                               "memory allocation failed while recursively constructing alignment orders\n");
+               exit(-1);
+       }
+       memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int));
+       memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int));
+
+       delete[] leftLeafs;
+       delete[] rightLeafs;
+
+       //for the root of the tree, subLeafs buffer is set to 0
+       if (subLeafs) {
+               //copy the results to the parent tree node
+               memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int));
+               memcpy(subLeafs + order->leftNum, order->rightLeafs,
+                               order->rightNum * sizeof(int));
+       }
+       //compute the total number of leafs in this subtree
+       subLeafsNum = order->leftNum + order->rightNum;
+
+       return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0
+}
+void MSAGuideTree::releaseAlignmentOrders() {
+       if (!this->alignOrders) {
+               return;
+       }
+       for (int i = 0; i < this->alignOrdersNum; i++) {
+               AlignmentOrder* order = &this->alignOrders[i];
+               if (order->leftLeafs) {
+                       delete[] order->leftLeafs;
+               }
+               if (order->rightLeafs) {
+                       delete[] order->rightLeafs;
+               }
+       }
+       delete[] alignOrders;
+}
+/********************************
+ display the alignment orders
+ ********************************/
+void MSAGuideTree::displayAlignmentOrders() {
+       int i, j;
+       AlignmentOrder* order;
+       fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n");
+       for (i = 1; i <= this->alignOrdersNum; i++) {
+               order = &this->alignOrders[i];
+
+               fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i,
+                               order->nodeDepth, order->leftOrder);
+               fprintf(stderr, "---LEFT: ");
+               for (j = 0; j < order->leftNum; j++) {
+                       fprintf(stderr, "%d ", order->leftLeafs[j]);
+               }
+
+               fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder);
+               fprintf(stderr, "\n---RIGHT: ");
+               for (j = 0; j < order->rightNum; j++) {
+                       fprintf(stderr, "%d ", order->rightLeafs[j]);
+               }
+               fprintf(stderr, "\n");
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ display the tree
+ *********************************/
+void MSAGuideTree::displayTree() {
+       fprintf(stderr, "**************DISPLAY TREE*********************\n");
+       for (int i = 0; i < nodesNum; i++) {
+               TreeNode* node = &nodes[i];
+
+               fprintf(stderr,
+                               "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n",
+                               (node == &nodes[node->idx]) ? node->idx : -2, node, node->left,
+                               (!node->left || node->left == &nodes[node->leftIdx]) ?
+                                               node->leftIdx : -2, node->right,
+                               (!node->right || node->right == &nodes[node->rightIdx]) ?
+                                               node->rightIdx : -2, node->parent,
+                               (!node->parent || node->parent == &nodes[node->parentIdx]) ?
+                                               node->parentIdx : -2, node->dist);
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ compute the sequence weights
+ *********************************/
+void MSAGuideTree::getSeqsWeights() {
+       int i;
+       TreeNode* curr;
+
+       //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it.
+       for (i = 0; i < leafsNum; i++) {
+               //for each leaf nodes
+               curr = &this->leafs[i];
+               while (curr != 0) {
+                       curr->order++;
+
+                       curr = curr->parent;
+               }
+       }
+       //compute the weight of each sequence, which corresponds to a leaf node
+       for (i = 0; i < numSeqs; i++) {
+               //compute the weight of each sequence
+               float weights = 0;
+               curr = &this->leafs[i];
+               while (curr->parent != 0) {
+                       weights += curr->dist / curr->order;
+                       curr = curr->parent;
+                       //printf("order:%d weights: %f\n", curr->order, weights);
+               }
+               //save the weight of this sequence
+               seqsWeights[i] = (int) (100 * weights);
+               //printf("%d\n", seqsWeights[i]);
+       }
+       //normalize the weights 
+       int wsum = 0;
+       for (i = 0; i < numSeqs; i++) {
+               wsum += seqsWeights[i];
+       }
+       if (wsum == 0) {
+               //in this case, every sequence is assumed to have an identical weight
+               for (i = 0; i < numSeqs; i++) {
+                       seqsWeights[i] = 1;
+               }
+               wsum = numSeqs;
+       }
+       //printf("wsum:%d \n", wsum);
+       for (i = 0; i < numSeqs; i++) {
+               seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum;
+               if (seqsWeights[i] < 1) { 
+                       seqsWeights[i] = 1;
+               }
+               //printf("%d \n", seqsWeights[i]);
+       }
+}
+void MSAGuideTree::create() {
+       //do nothing
+}
+
diff --git a/binaries/src/GLProbs-1.0/MSAGuideTree.h b/binaries/src/GLProbs-1.0/MSAGuideTree.h
new file mode 100644 (file)
index 0000000..97d538a
--- /dev/null
@@ -0,0 +1,119 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#ifndef _MSA_GUIDE_TREE_H
+#define _MSA_GUIDE_TREE_H
+#include "MSADef.h"
+#include "MSA.h"
+
+#include "SafeVector.h"
+#include "MultiSequence.h"
+#include "ScoreType.h"
+#include "ProbabilisticModel.h"
+#include "SparseMatrix.h"
+
+class MSA;
+struct ValidNode {
+       ValidNode* prev;
+       ValidNode* next;
+       int n;                          //the index in the distance matrix                      
+       int node;                       //the index in the tree node entries
+};
+
+struct TreeNode {
+       struct TreeNode *left;                  //the pointer to its left child
+       struct TreeNode *right;                 //the pointer to its right child
+       struct TreeNode *parent;                //the pointer to its parent
+       int leftIdx;                                    //the index of the left child
+       int rightIdx;                                   //the index of the right child
+       int parentIdx;                                  //the index of its parent
+       int idx;                                                //the index of itself
+       float dist;                                             //the distance to its parent
+       int leaf;                                               //whether it is a leaf node or not
+       int order;                      //the number of generations dating back to its ancestor
+       int depth;                                              //the depth of the node
+};
+struct AlignmentOrder {
+       int nodeDepth;                  //the depth of the internal node
+       int leftOrder;                  //the order number of the right child
+       int rightOrder;                 //the order number of the left child
+       int* leftLeafs;                 //the indices of leafs in the left subtree
+       int leftNum;                    //the number of leafs in the left subtree
+       int* rightLeafs;                        //the indices of leafs in the right subtree
+       int rightNum;                   //the number of leafs in the right substree
+};
+
+class MSAGuideTree {
+public:
+       MSAGuideTree(MSA* msa, VVF& distMatrix, int numSeqs);
+       virtual ~MSAGuideTree() = 0;    //abstract class
+
+       //get the tree nodes
+       TreeNode* getNodes();
+       //get the leaf nodes
+       TreeNode* getLeafs();
+       //get the number of nodes;
+       int getNodesNum();
+       //get the number of leaf nodes
+       int getLeafsNum();
+       //get the root of the tree
+       TreeNode* getRoot() {
+               return this->root;
+       }
+       //get the alignment orders
+       AlignmentOrder* getAlignOrders();
+       int getAlignOrdersNum();
+       //construct the alignment orders
+       void createAlignmentOrders();
+
+       //construct the guide tree
+       virtual void create();
+       //calculate the sequence weights
+       virtual void getSeqsWeights();
+
+       /**********DEBUGING****************/
+       //display the tree
+       void displayTree();
+       //display the alignment orders
+       void displayAlignmentOrders();
+
+protected:
+       //join two nodes
+       void connectNodes(TreeNode* parent, int parentIdx, TreeNode* leftChild,
+                       float leftDist, TreeNode* rightChild, float rightDist);
+       //release the alignment orders vector
+       void releaseAlignmentOrders();
+       //recursive implemenation of constructing the alignment orders
+       int recursiveCreateAlignmentOrders(TreeNode* subRoot, int* subLeafs,
+                       int& subLeafsNum, int nodeDepth);
+
+       //system configurations
+       MSA* msa;
+       VVF* distMatrix;
+       int numSeqs;
+       int* seqsWeights;
+
+       //all the tree nodes
+       TreeNode* nodes;
+       int nodesNum;
+       int nodesSize;
+       //the root tree node
+       TreeNode* root;
+       //leaf node
+       TreeNode* leafs;
+       int leafsNum;
+
+       //alignment order
+       AlignmentOrder* alignOrders;
+       int alignOrdersNum;
+       int alignOrdersSize;
+};
+#endif
+
diff --git a/binaries/src/GLProbs-1.0/MSAPartProbs.cpp b/binaries/src/GLProbs-1.0/MSAPartProbs.cpp
new file mode 100644 (file)
index 0000000..b234588
--- /dev/null
@@ -0,0 +1,1023 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "SafeVector.h"
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <ctype.h>
+#include <assert.h>
+#include "MultiSequence.h"
+#include "ScoreType.h"
+
+#define  TRACE 0               // 0: NOTRACE 1: TRACE
+//proba like settings
+#define  endgaps 1             // 1: engap penaties enabled 0: disabled
+#define  PART_FULL_MEMORY 0    //0: LOW MEM OPTION
+#define  REVPART_FULL_MEMORY 0 //0: LOW MEM OPTION
+using namespace std;
+
+#ifdef _WIN32
+#define OS_HUGE_VALL   HUGE_VAL        
+#else
+#define OS_HUGE_VALL   HUGE_VALL
+#endif
+
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+typedef struct sequence {
+       char *title;
+       char *text;
+       int length;
+} fasta;
+
+typedef struct alignment {
+       char *title;
+       char *text;
+       int length;
+} align;
+
+////////////////////////////////////////////////////////
+//externs related to scoring matrix and input arguments
+///////////////////////////////////////////////////////////
+extern float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+extern char aminos[26], matrixtype[20], bases[26];
+
+extern double sub_matrix[26][26];
+extern double normalized_matrix[26][26]; // add by YE Yongtao
+extern int subst_index[26];
+
+extern float TEMPERATURE;
+extern int MATRIXTYPE;
+
+extern float GAPOPEN;
+extern float GAPEXT;
+extern argument_decl argument;
+
+//////////////////////////////////////////////////////////////////////////////
+//calculates reverse partition function values based on z matrices
+//and also simulaneously calculates the propability of each basepair
+//or aminoacid residue pair i,j
+//////////////////////////////////////////////////////////////////////////////
+
+VF *revers_partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, long double **Zfm, const double d,
+               const double e) {
+       // printf("revpart\n");
+       //rest of the declarations
+       int i, j;
+       long double **Zm = NULL;
+       long double **Ze = NULL;
+       long double **Zf = NULL;
+       int len0, len1;
+       float probability;
+       long double tempvar;
+       int Si, Tj;
+       double endgapopen, endgapextend;
+       FILE *fo;
+
+       //Init lengths of sequences
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       //Safe vector declared
+       VF *posteriorPtr = new VF((len0 + 1) * (len1 + 1));
+       VF & posterior = *posteriorPtr;
+       VF::iterator ptr = posterior.begin();
+
+       if (TRACE)                      //open the trace file
+               fo = fopen("revpartdump", "a");
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //instantiate the z matrix
+       if (REVPART_FULL_MEMORY) {
+
+               Ze = new long double *[sequences[1].length + 1];
+               Zf = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               if (TRACE)
+                       printf("\n\n %e %e\n", d, e);
+
+               //DYNAMICALLY GROW 2D Zm Zf Ze MARICES (long double)
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[2];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= 1; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+               }
+
+       }
+
+       if (TRACE) {
+               printf("in rev partf---");
+               printf("\n\n");
+       }
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++)
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0.0;
+                               Zf[i][j] = 0.0;
+                               Ze[i][j] = 0.0;
+                       }
+       } else {
+
+               for (j = 0; j <= len0; j++) {
+                       Zm[0][j] = 0;
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+                       Zm[1][j] = 0;
+               }
+       }
+
+       //fill the probability matrix with 0s
+       for (i = 0; i <= len1; i++)
+               for (j = 0; j <= len0; j++)
+                       ptr[j * (len1 + 1) + i] = 0;
+
+       if (endgaps == 0) {
+               Zm[len1][len0] = 1;
+               Ze[len1][len0] = Zf[len1][len0] = 0;
+               Zf[len1 - 1][len0] = Zm[len1][len0] * d;
+               Ze[len1][len0 - 1] = Zm[len1][len0] * d;
+
+               //>=2ND ROW INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * e;
+                       }
+               } else {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * e;
+                       }
+               }
+       } else {
+
+               if (REVPART_FULL_MEMORY) {
+
+                       Zm[len1][len0] = 1;
+                       Ze[len1][len0] = Zf[len1][len0] = 0;
+                       Zf[len1 - 1][len0] = Zm[len1][len0] * endgapopen;
+                       Ze[len1][len0 - 1] = Zm[len1][len0] * endgapopen;
+
+                       //>=2ND ROW INIT
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * endgapextend;
+                       }
+
+                       //M Iy= d+j*e
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * endgapextend;
+                       }
+
+               } else {
+                       //in Zm
+                       //let:
+                       //  Zm(0) be the current row being filled/computed
+                       //  Zm(1) be the previous row
+
+                       Zm[1][len0] = 1;
+                       Ze[0][len0] = Zf[0][len0] = 0;
+                       Zf[1][len0] = Zm[1][len0] * endgapopen;
+                       Ze[0][len0 - 1] = Zm[1][len0] * endgapopen;
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * endgapextend;
+                       }
+
+               }                       //END ELSE
+
+       }                               //END FULL MEMORY and GAP enablement IF STATEMENT
+
+       double scorez, zz = 0;
+
+       for (i = len1 - 1; i >= 0; i--) {
+
+               for (j = len0 - 1; j >= 0; j--) {
+                       Si = subst_index[sequences[1].text[i] - 'A'];
+                       Tj = subst_index[sequences[0].text[j] - 'A'];
+                       scorez = sub_matrix[Si][Tj];
+
+                       //endgaps modification aug 10
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == 0) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == 0) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+
+                       }
+
+                       if (REVPART_FULL_MEMORY) {
+                               //z computation
+
+                               Ze[i][j] = Zm[i][j + 1] * open0 + Ze[i][j + 1] * extend0;
+                               Zf[i][j] = Zm[i + 1][j] * open1 + Zf[i + 1][j] * extend1;
+                               Zm[i][j] = (Zm[i + 1][j + 1] + Zf[i + 1][j + 1]
+                                               + Ze[i + 1][j + 1]) * scorez;
+                               zz = Zm[i][j] + Zf[i][j] + Ze[i][j];
+
+                       } else {
+
+                               //2 ROW zE zF ALGORITHM GOES...:
+                               //Ze[1][j] =Zm[i][j + 1] * exp(beta * open0) + Ze[1][j + 1] *exp(beta * extend0);
+                               //Zf[1][j] = Zm[i + 1][j] * exp(beta * open1) + Zf[0][j] * exp(beta * extend1);
+                               //Zm[i][j] = (Zm[i + 1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1]) * exp(beta * scorez);
+                               //zz = Zm[0][j] + Zf[1][j] + Ze[1][j];
+
+                               //lowmem code for merging probability calculating module
+                               //Here we make use of Zm as a 2 row matrix
+
+                               Zf[1][j] = Zm[1][j] * open1 + Zf[0][j] * extend1;
+                               Ze[1][j] = Zm[0][j + 1] * open0 + Ze[1][j + 1] * extend0;
+                               Zm[0][j] = (Zm[1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1])
+                                               * scorez;
+
+                               tempvar = Zfm[i + 1][j + 1] * Zm[0][j];
+                               //divide P(i,j) i.e. pairwise probability by denominator
+                               tempvar /= (scorez * Zfm[0][0]);
+                               probability = (float) tempvar;
+
+                               //store only noticable probabilities
+                               //if (probability <= 1 && probability >= 0.001) {
+                                       //algorithm goes...
+                                       //validprob[i + 1][j + 1] = probability;
+                                       ptr[(j + 1) * (len1 + 1) + (i + 1)] = probability;
+                               //}
+                               //lowmem code ends here
+
+                       }
+
+               }                       //end of for
+
+               if (REVPART_FULL_MEMORY == 0) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+
+                               Zm[1][t] = Zm[0][t];
+                               Zm[0][t] = 0;
+
+                       }
+                       Zf[0][len0] = 1;
+
+               }
+
+       }                               //end of for
+
+       if (TRACE) {
+               printf("\n\nrM:....\n\n");
+               if (REVPART_FULL_MEMORY) {
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zm[i][j]);
+                               printf("\n");
+                       }
+
+                       printf("\n\nrE:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Ze[i][j]);
+                               printf("\n");
+
+                       }
+
+                       printf("\n\nrF:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zf[i][j]);
+                               printf("\n");
+
+                       }
+
+               }
+
+       }
+
+       if (TRACE) {
+               fprintf(fo, "\n");
+               fclose(fo);
+       }
+
+       //delete unused memory
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++) {
+                       delete (Zm[i]);
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zm[0]);
+
+               delete (Zm[1]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       for (i = 0; i <= len1; i++) {
+               delete (Zfm[i]);
+       }
+
+       if (Zf != NULL)
+               delete (Zf);
+
+       if (Ze != NULL)
+               delete (Ze);
+
+       if (Zm != NULL)
+               delete (Zm);
+
+       if (Zfm != NULL)
+               delete (Zfm);
+
+       posterior[0] = 0;
+       return (posteriorPtr);
+
+}
+
+//////////////////////////////////////////////////////////////
+//forward partition function
+/////////////////////////////////////////////////////////////
+
+long double **partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, const double d, const double e) {
+       //printf("partf\n");
+       int i, j, len1, len0;
+       long double **Zm = NULL, **Zf = NULL, **Ze = NULL, zz = 0;
+       double endgapopen, endgapextend;
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //the flag endgaps is set at the #define section
+       if (PART_FULL_MEMORY) {
+
+               Zf = new long double *[sequences[1].length + 1];
+               Ze = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               //comment
+               if (TRACE)
+                       printf("\nPARTF:====\n");
+
+               //DYNAMICALLY GROW 2D M,IX,IY,PIX,PIY MARICES
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[sequences[1].length + 1];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+               Ze[0] = new long double[sequences[0].length + 1];
+               Zf[0] = new long double[sequences[0].length + 1];
+               Ze[1] = new long double[sequences[0].length + 1];
+               Zf[1] = new long double[sequences[0].length + 1];
+       }
+
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++)
+                       for (j = 0; j <= sequences[0].length; j++) {
+                               Zm[i][j] = 0.00;
+                               Zf[i][j] = 0.00;
+                               Ze[i][j] = 0.00;
+                       }
+       } else {
+               for (i = 0; i <= len1; i++) {
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0;
+                       }
+               }
+               for (j = 0; j <= len0; j++) {
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+               }
+       }
+
+       //INTITIALIZE THE DP 
+
+       if (endgaps == 0) {
+               Zm[0][0] = 1.00;
+
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * d;
+               Ze[0][1] = Zm[0][0] * d;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * e;
+               }
+       } else {
+               //init z
+               Zm[0][0] = 1.00;
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * endgapopen;
+               Ze[0][1] = Zm[0][0] * endgapopen;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * endgapextend;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * endgapextend;
+               }
+       }
+
+       //1ST ROW/COL INIT
+
+       int Si, Tj;
+       double score;
+
+       for (i = 1; i <= sequences[1].length; i++) {
+
+               for (j = 1; j <= sequences[0].length; j++) {
+
+                       Si = subst_index[sequences[1].text[i - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[j - 1] - 'A'];
+
+                       score = sub_matrix[Si][Tj];
+
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == sequences[1].length) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == sequences[0].length) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+                       }
+
+                       //
+                       //z computation using open and extend temp vars
+                       //open0 is gap open in seq0 and open1 is gap open in seq1
+                       //entend0 is gap extend in seq0 and extend1 is gap extend in seq1
+
+                       if (PART_FULL_MEMORY) {
+                               Ze[i][j] = Zm[i][j - 1] * open0 + Ze[i][j - 1] * extend0;
+
+                               if (Ze[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Ze\n");
+                                       exit(1);
+                               }
+
+                               Zf[i][j] = Zm[i - 1][j] * open1 + Zf[i - 1][j] * extend1;
+
+                               if (Zf[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zf\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[i - 1][j - 1]
+                                               + Zf[i - 1][j - 1]) * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zm\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[i][j] + Zf[i][j];
+                       } else {
+                               Ze[1][j] = Zm[i][j - 1] * open0 + Ze[1][j - 1] * extend0;
+
+                               if (Ze[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zE\n");
+                                       exit(1);
+                               }
+
+                               Zf[1][j] = Zm[i - 1][j] * open1 + Zf[0][j] * extend1;
+
+                               if (Zf[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zF\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[0][j - 1] + Zf[0][j - 1])
+                                               * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zM\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[1][j] + Zf[1][j];
+                       }
+
+               }                       //end for
+
+               if (!PART_FULL_MEMORY) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+                       }
+
+                       Zf[1][0] = 1;
+
+               }
+
+       }                               //end for
+
+       //store the sum of zm zf ze (m,n)s in zm's 0,0 th position
+       Zm[0][0] = zz;
+
+       if (TRACE) {
+               //debug code aug 3 
+               //print the 3 Z matrices namely Zm Zf and Ze
+
+               printf("\n\nFINAL Zm:\n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zm[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Zf \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zf[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Ze \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Ze[i][j]);
+                       printf("\n");
+               }
+
+               //end debug dump code
+
+       }
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++) {
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       delete (Zf);
+       delete (Ze);
+
+       return Zm;
+
+}                              //end of forward partition function
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//entry point (was the main function) , returns the posterior probability safe vector
+////////////////////////////////////////////////////////////////////////////////////////
+VF *ComputePostProbs(int a, int b, string seq1, string seq2) {
+       //printf("probamod\n"); 
+       double gap_open = -22, gap_ext = -1, beta = 0.2;//T = 5, beta = 1/T = 0.2, by default
+       int stock_loop = 1;
+       int le = 160;
+       double termgapopen = 1.0f;      //exp(0)
+       double termgapextend = 1.0f;    //exp(0)
+
+       //initialize the sequence structure
+       fasta sequences[2];
+
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+       if (TRACE)
+
+       {
+               printf("%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               printf("after init\n");
+
+               FILE *dump1 = fopen("dump1", "a");
+               fprintf(dump1, "%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               fclose(dump1);
+       }
+
+       gap_open = argument.gapopen;
+       gap_ext = argument.gapext;
+       beta = argument.beta;
+
+       stock_loop = argument.N;
+       le = argument.matrix;
+
+       //compute the values of exp(beta * ?)
+       termgapopen = exp(beta * 0.0);
+       termgapextend = exp(beta * 0.0);
+       gap_open = exp(beta * gap_open);
+       gap_ext = exp(beta * gap_ext);
+
+       if (TRACE)
+               printf("%f %f %f %d\n", gap_open, gap_ext, beta, le);
+
+       //call for calculating the posterior probabilities
+       // 1. call partition function partf
+       // 2. calculate revpartition using revers_parf
+       // 3. calculate probabilities
+       /// MODIFICATION... POPULATE SAFE VECTOR
+
+       long double **MAT1;
+
+       MAT1 = partf(sequences, termgapopen, termgapextend, gap_open, gap_ext);
+
+       return revers_partf(sequences, termgapopen, termgapextend, MAT1, gap_open,
+                       gap_ext);
+
+}
+
+//////////////////////////////////////////////////////////////
+//Compute Viterbi Alignment 
+// Added by YE Yongtao
+/////////////////////////////////////////////////////////////
+
+pair<SafeVector<char> *, float> partViterbi(string seq1, string seq2) {
+
+
+       double gap_open = -12, gap_ext = -1, beta = 0.2;//T = 5, beta = 1/T = 0.2, by default
+       int stock_loop = 1;
+       int le = 160;
+       //double termgapopen = 1.0f;    //exp(0)
+       //double termgapextend = 1.0f;  //exp(0)
+
+       //initialize the sequence structure 
+       fasta sequences[2];
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+       gap_open = argument.gapopen;
+       gap_ext = argument.gapext;
+       beta = argument.beta;
+
+       stock_loop = argument.N;
+       le = argument.matrix;
+
+       //compute the values of exp(beta * ?)
+       double endgapopen = exp(beta * 0.0);
+       double endgapextend = exp(beta * 0.0);
+       double d = exp(beta * gap_open);
+       double e = exp(beta * gap_ext);
+
+       int i, j, len1, len0;
+       long double **Zm = NULL, **Zf = NULL, **Ze = NULL;
+       int **traceZm = NULL, **traceZf = NULL, **traceZe = NULL;
+
+       //the flag endgaps is set at the #define section
+       Zf = new long double *[sequences[1].length + 1];
+       Ze = new long double *[sequences[1].length + 1];
+       Zm = new long double *[sequences[1].length + 1];
+
+       traceZf = new int *[sequences[1].length + 1];
+       traceZe = new int *[sequences[1].length + 1];
+       traceZm = new int *[sequences[1].length + 1];
+
+       //DYNAMICALLY GROW 2D M,IX,IY,PIX,PIY MARICES
+       for (i = 0; i <= sequences[1].length; i++) {
+               Zf[i] = new long double[sequences[0].length + 1];
+               Ze[i] = new long double[sequences[0].length + 1];
+               Zm[i] = new long double[sequences[0].length + 1];
+
+               traceZf[i] = new int[sequences[0].length + 1];
+               traceZe[i] = new int[sequences[0].length + 1];
+               traceZm[i] = new int[sequences[0].length + 1];
+       }
+       
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       
+       for (i = 0; i <= sequences[1].length; i++)
+               for (j = 0; j <= sequences[0].length; j++) {
+                       Zm[i][j] = 0.00;
+                       Zf[i][j] = 0.00;
+                       Ze[i][j] = 0.00;
+
+                       traceZm[i][j] = -1;
+                       traceZf[i][j] = -1;
+                       traceZe[i][j] = -1;
+               }
+       
+
+       //INTITIALIZE THE DP 
+       if (endgaps == 0) {
+               Zm[0][0] = 1.00;
+
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * d;
+               Ze[0][1] = Zm[0][0] * d;
+
+               //>=2ND ROW INIT
+               
+               for (i = 2; i <= sequences[1].length; i++) {
+                       Zf[i][0] = Zf[i - 1][0] * e;
+                       traceZf[i][0] = 2;
+               }
+               
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * e;
+                       traceZe[0][j] = 1;
+               }
+       } else {
+               //init z
+               Zm[0][0] = 1.00;
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * endgapopen;
+               Ze[0][1] = Zm[0][0] * endgapopen;
+
+               //>=2ND ROW INIT
+               
+               for (i = 2; i <= sequences[1].length; i++) {
+                       Zf[i][0] = Zf[i - 1][0] * endgapextend;
+                       traceZf[i][0] = 2;
+               }
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * endgapextend;
+                       traceZe[0][j] = 1;
+               }
+       }
+
+       //1ST ROW/COL INIT
+
+       int Si, Tj;
+       double score;
+
+       for (i = 1; i <= sequences[1].length; i++) {
+
+               for (j = 1; j <= sequences[0].length; j++) {
+
+                       Si = subst_index[sequences[1].text[i - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[j - 1] - 'A'];
+
+                       score = sub_matrix[Si][Tj];
+
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == sequences[1].length) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == sequences[0].length) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+                       }
+
+                       //
+                       //z computation using open and extend temp vars
+                       //open0 is gap open in seq0 and open1 is gap open in seq1
+                       //entend0 is gap extend in seq0 and extend1 is gap extend in seq1
+                       Zf[i][j] = Zf[i - 1][j] * extend1; 
+                       traceZf[i][j] = 2;
+
+                       if(Zm[i - 1][j] * open1 > Zf[i][j]){
+                               Zf[i][j] = Zm[i - 1][j] * open1;
+                               traceZf[i][j] = 0;
+                       }
+                       if (Zf[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Zf\n");
+                               exit(1);
+                       }
+                       Ze[i][j] = Ze[i][j - 1] * extend0;
+                       traceZe[i][j] = 1;
+                       if(Zm[i][j - 1] * open0 > Ze[i][j]){
+                               Ze[i][j] = Zm[i][j - 1] * open0;
+                               traceZe[i][j] = 0;
+                       }
+
+                       if (Ze[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Ze\n");
+                               exit(1);
+                       }
+
+                        Zm[i][j] = Zm[i - 1][j - 1] * score;
+                       traceZm[i][j] = 0;
+                        if(Zf[i - 1][j - 1] * score > Zm[i][j]){
+                               Zm[i][j] = Zf[i - 1][j - 1] * score;
+                               traceZm[i][j] = 2;
+                       }
+                        if(Ze[i - 1][j - 1] * score > Zm[i][j]){ 
+                               Zm[i][j] = Ze[i - 1][j - 1] * score;
+                               traceZm[i][j] = 1;
+                       }
+                       if (Zm[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Zm\n");
+                               exit(1);
+                       }               
+
+               }//end for
+       }//end for
+        // figure out best terminating cell
+
+       float bestProb = Zm[sequences[1].length][sequences[0].length];
+       int state = 0;
+        if( bestProb < Zf[sequences[1].length][sequences[0].length]){     
+               bestProb = Zf[sequences[1].length][sequences[0].length];
+               state = 2;
+       }
+        if( bestProb < Ze[sequences[1].length][sequences[0].length]){     
+               bestProb = Ze[sequences[1].length][sequences[0].length];
+               state = 1;
+       }
+       assert (state != -1);
+       // compute traceback
+        SafeVector<char> *alignment = new SafeVector<char>; assert (alignment);
+        int c = sequences[1].length, r = sequences[0].length;
+        while (r != 0 || c != 0){
+                int newState;
+               if(state == 0){
+                       newState = traceZm[c][r];
+                       c--; r--; alignment->push_back ('B');
+               }
+               else if(state == 1){
+                       newState = traceZe[c][r];
+                       r--; alignment->push_back ('X');
+               }
+               else{
+                       newState = traceZf[c][r];
+                       c--; alignment->push_back ('Y');
+               }
+               state = newState;
+        }
+
+       reverse (alignment->begin(), alignment->end());
+
+       for (i = 0; i <= sequences[1].length; i++) {
+               delete (Zf[i]);
+               delete (Ze[i]);
+               delete (Zm[i]);
+               delete (traceZf[i]);
+               delete (traceZe[i]);
+               delete (traceZm[i]);
+       }
+
+       delete (Zf);
+       delete (Ze);
+       delete (Zm);
+       delete (traceZf);
+       delete (traceZe);
+       delete (traceZm);
+
+       return make_pair(alignment, bestProb);
+}
+
+//////////////////////////////////////////////////////////////
+// Compute two sequences' similarity defined as the normalized alignment score without gap penalties
+// Added by YE Yongtao
+/////////////////////////////////////////////////////////////
+
+float computeSimilarity(string seq1, string seq2, SafeVector<char> * alignment) {
+
+       //initialize the sequence structure 
+       fasta sequences[2];
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+        float bestProb = 0;
+       int Si, Tj;
+       double score;
+        int i = 1;int j = 1;
+       for (SafeVector<char>::iterator iter = alignment->begin(); 
+               iter != alignment->end(); ++iter){
+               if (*iter == 'B'){
+                       Si = subst_index[sequences[1].text[j - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[i - 1] - 'A'];
+                       score = normalized_matrix[Si][Tj];
+                       bestProb += score;
+                       i++; j++;               
+               }
+                else if(*iter == 'X') i++;
+               else if(*iter == 'Y') j++;
+        }
+        if(i!= sequences[0].length + 1 || j!= sequences[1].length + 1 ) cerr << "similarity error"<< endl;
+       bestProb /= alignment->size();      
+       //bestProb /= min(sequences[0].length, sequences[1].length);
+       return bestProb;
+}                              
+//end of posterior probability  module
diff --git a/binaries/src/GLProbs-1.0/MSAProbs.vcproj b/binaries/src/GLProbs-1.0/MSAProbs.vcproj
new file mode 100644 (file)
index 0000000..5212610
--- /dev/null
@@ -0,0 +1,272 @@
+<?xml version="1.0" encoding="gb2312"?>\r
+<VisualStudioProject\r
+       ProjectType="Visual C++"\r
+       Version="8.00"\r
+       Name="MSAProbs"\r
+       ProjectGUID="{671563E4-93A2-419E-8B41-48DDF71DD144}"\r
+       RootNamespace="MSAProbs"\r
+       Keyword="Win32Proj"\r
+       >\r
+       <Platforms>\r
+               <Platform\r
+                       Name="Win32"\r
+               />\r
+       </Platforms>\r
+       <ToolFiles>\r
+       </ToolFiles>\r
+       <Configurations>\r
+               <Configuration\r
+                       Name="Debug|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="1"\r
+                       CharacterSet="1"\r
+                       >\r
+                       <Tool\r
+                               Name="VCPreBuildEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCustomBuildTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXMLDataGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebServiceProxyGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCMIDLTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               AdditionalOptions="/openmp"\r
+                               Optimization="0"\r
+                               PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"\r
+                               MinimalRebuild="true"\r
+                               BasicRuntimeChecks="3"\r
+                               RuntimeLibrary="3"\r
+                               OpenMP="true"\r
+                               UsePrecompiledHeader="0"\r
+                               WarningLevel="3"\r
+                               Detect64BitPortabilityProblems="true"\r
+                               DebugInformationFormat="4"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManagedResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPreLinkEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCLinkerTool"\r
+                               LinkIncremental="2"\r
+                               GenerateDebugInformation="true"\r
+                               SubSystem="1"\r
+                               TargetMachine="1"\r
+                       />\r
+                       <Tool\r
+                               Name="VCALinkTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManifestTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXDCMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCBscMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCFxCopTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCAppVerifierTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebDeploymentTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPostBuildEventTool"\r
+                       />\r
+               </Configuration>\r
+               <Configuration\r
+                       Name="Release|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="1"\r
+                       CharacterSet="1"\r
+                       WholeProgramOptimization="1"\r
+                       >\r
+                       <Tool\r
+                               Name="VCPreBuildEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCustomBuildTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXMLDataGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebServiceProxyGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCMIDLTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"\r
+                               RuntimeLibrary="2"\r
+                               OpenMP="true"\r
+                               UsePrecompiledHeader="0"\r
+                               WarningLevel="3"\r
+                               Detect64BitPortabilityProblems="true"\r
+                               DebugInformationFormat="3"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManagedResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPreLinkEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCLinkerTool"\r
+                               LinkIncremental="1"\r
+                               GenerateDebugInformation="true"\r
+                               SubSystem="1"\r
+                               OptimizeReferences="2"\r
+                               EnableCOMDATFolding="2"\r
+                               TargetMachine="1"\r
+                       />\r
+                       <Tool\r
+                               Name="VCALinkTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManifestTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXDCMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCBscMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCFxCopTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCAppVerifierTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebDeploymentTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPostBuildEventTool"\r
+                       />\r
+               </Configuration>\r
+       </Configurations>\r
+       <References>\r
+       </References>\r
+       <Files>\r
+               <Filter\r
+                       Name="Source Files"\r
+                       Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"\r
+                       UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"\r
+                       >\r
+                       <File\r
+                               RelativePath=".\main.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSA.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAClusterTree.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAGuideTree.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAPartProbs.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAReadMatrix.cpp"\r
+                               >\r
+                       </File>\r
+               </Filter>\r
+               <Filter\r
+                       Name="Header Files"\r
+                       Filter="h;hpp;hxx;hm;inl;inc;xsd"\r
+                       UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"\r
+                       >\r
+                       <File\r
+                               RelativePath=".\Defaults.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\FileBuffer.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSA.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAClusterTree.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSADef.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAGuideTree.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAReadMatrix.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MultiSequence.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\ProbabilisticModel.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\SafeVector.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\ScoreType.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\Sequence.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\SparseMatrix.h"\r
+                               >\r
+                       </File>\r
+               </Filter>\r
+               <Filter\r
+                       Name="Resource Files"\r
+                       Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"\r
+                       UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"\r
+                       >\r
+               </Filter>\r
+       </Files>\r
+       <Globals>\r
+       </Globals>\r
+</VisualStudioProject>\r
diff --git a/binaries/src/GLProbs-1.0/MSAProbs.vcproj.STUDENT.LIUY0039.user b/binaries/src/GLProbs-1.0/MSAProbs.vcproj.STUDENT.LIUY0039.user
new file mode 100644 (file)
index 0000000..99168da
--- /dev/null
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="gb2312"?>\r
+<VisualStudioUserFile\r
+       ProjectType="Visual C++"\r
+       Version="8.00"\r
+       ShowAllFiles="false"\r
+       >\r
+       <Configurations>\r
+               <Configuration\r
+                       Name="Debug|Win32"\r
+                       >\r
+                       <DebugSettings\r
+                               Command="$(TargetPath)"\r
+                               WorkingDirectory=""\r
+                               CommandArguments=""\r
+                               Attach="false"\r
+                               DebuggerType="3"\r
+                               Remote="1"\r
+                               RemoteMachine="GS-14"\r
+                               RemoteCommand=""\r
+                               HttpUrl=""\r
+                               PDBPath=""\r
+                               SQLDebugging=""\r
+                               Environment=""\r
+                               EnvironmentMerge="true"\r
+                               DebuggerFlavor=""\r
+                               MPIRunCommand=""\r
+                               MPIRunArguments=""\r
+                               MPIRunWorkingDirectory=""\r
+                               ApplicationCommand=""\r
+                               ApplicationArguments=""\r
+                               ShimCommand=""\r
+                               MPIAcceptMode=""\r
+                               MPIAcceptFilter=""\r
+                       />\r
+               </Configuration>\r
+               <Configuration\r
+                       Name="Release|Win32"\r
+                       >\r
+                       <DebugSettings\r
+                               Command="$(TargetPath)"\r
+                               WorkingDirectory=""\r
+                               CommandArguments=""\r
+                               Attach="false"\r
+                               DebuggerType="3"\r
+                               Remote="1"\r
+                               RemoteMachine="GS-14"\r
+                               RemoteCommand=""\r
+                               HttpUrl=""\r
+                               PDBPath=""\r
+                               SQLDebugging=""\r
+                               Environment=""\r
+                               EnvironmentMerge="true"\r
+                               DebuggerFlavor=""\r
+                               MPIRunCommand=""\r
+                               MPIRunArguments=""\r
+                               MPIRunWorkingDirectory=""\r
+                               ApplicationCommand=""\r
+                               ApplicationArguments=""\r
+                               ShimCommand=""\r
+                               MPIAcceptMode=""\r
+                               MPIAcceptFilter=""\r
+                       />\r
+               </Configuration>\r
+       </Configurations>\r
+</VisualStudioUserFile>\r
diff --git a/binaries/src/GLProbs-1.0/MSAReadMatrix.cpp b/binaries/src/GLProbs-1.0/MSAReadMatrix.cpp
new file mode 100644 (file)
index 0000000..e978eb4
--- /dev/null
@@ -0,0 +1,215 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "MSAReadMatrix.h"
+
+#define TRACE 0
+
+////////////////////////////////////////////////////////////
+// extern variables for scoring matrix data
+////////////////////////////////////////////////////////////
+extern float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+extern char *aminos, *bases, matrixtype[20];
+extern int subst_index[26];
+
+extern double sub_matrix[26][26];
+extern double normalized_matrix[26][26];
+
+extern float TEMPERATURE;
+extern int MATRIXTYPE;
+
+extern float GAPOPEN;
+extern float GAPEXT;
+
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+//argument support
+extern argument_decl argument;
+
+/////////////////////////////////////////////////////////
+//sets substitution matrix type
+////////////////////////////////////////////////////////
+void setmatrixtype(int le) {
+       switch (le) {
+       case 160:
+               strcpy(matrixtype, "gonnet_160");
+               break;
+       case 4:
+               strcpy(matrixtype, "nuc_simple");
+               break;
+       default:
+               strcpy(matrixtype, "CUSTOM");
+               break;
+
+       };
+
+}
+
+///////////////////////////////////////////////////////////////////
+//sets matrix flag
+///////////////////////////////////////////////////////////////////
+inline int matrixtype_to_int() {
+
+       if (!strcmp(matrixtype, "nuc_simple"))
+               return 4;
+       else if (!strcmp(matrixtype, "gonnet_160"))
+               return 160;
+       else
+               return 1000;
+
+}
+
+/////////////////////////////////////////////////////////////////
+//
+// Can read any scoring matrix as long as it is defined in Matrix.h
+// AND it is a lower triangular 
+// AND the order of amino acids/bases is mentioned
+/////////////////////////////////////////////////////////////////
+
+inline void read_matrix(score_matrix matrx) {
+       int i, j, basecount, position = 0;
+
+       bases = (char *) matrx.monomers;
+
+       basecount = strlen(bases);
+
+       for (i = 0; i < basecount; i++)
+               subst_index[i] = -1;
+
+       for (i = 0; i < basecount; i++)
+               subst_index[bases[i] - 'A'] = i;
+
+       if (TRACE == 1)
+               printf("\nbases read: %d\n", basecount);
+
+       for (i = 0; i < basecount; i++)
+               for (j = 0; j <= i; j++) {
+
+                       double value = exp(argument.beta * matrx.matrix[position++]);
+                       sub_matrix[i][j] = value;
+                       sub_matrix[j][i] = value;
+               }
+
+       if (TRACE)
+               for (i = 0; i < basecount; i++) {
+                       for (j = 0; j < basecount; j++)
+                               printf(" %g ", sub_matrix[i][j]);
+                       printf("\n");
+               }
+
+}
+
+/////////////////////////////////////////////////////////////////
+// read normalized residue exchange matrix
+// compute sequence similarity
+// add by YE Yongtao
+/////////////////////////////////////////////////////////////////
+
+inline void read_normalized_matrix(score_matrix matrx) {
+       int i, j, basecount, position = 0;
+
+       bases = (char *) matrx.monomers;
+
+       basecount = strlen(bases);
+
+       for (i = 0; i < basecount; i++)
+               subst_index[i] = -1;
+
+       for (i = 0; i < basecount; i++)
+               subst_index[bases[i] - 'A'] = i;
+
+       if (TRACE == 1)
+               printf("\nbases read: %d\n", basecount);
+
+       for (i = 0; i < basecount; i++)
+               for (j = 0; j <= i; j++) {
+
+                       double value =  matrx.matrix[position++];
+                       normalized_matrix[i][j] = value;
+                       normalized_matrix[j][i] = value;
+               }
+
+       if (TRACE)
+               for (i = 0; i < basecount; i++) {
+                       for (j = 0; j < basecount; j++)
+                               printf(" %g ", normalized_matrix[i][j]);
+                       printf("\n");
+               }
+
+}
+////////////////////////////////////////////////////////////////////////////////// 
+//intialize the arguments (default values)
+//////////////////////////////////////////////////////////////////////////////////
+void init_arguments() {
+       float gap_open = 0, gap_ext = 0;
+       int le;
+
+       le = matrixtype_to_int();
+
+       argument.N = 1;
+       strcpy(argument.input, "tempin");
+       argument.matrix = le;
+       argument.gapopen = GAPOPEN;
+       argument.gapext = GAPEXT;
+       argument.T = TEMPERATURE;
+       argument.beta = 1.0 / TEMPERATURE;
+       argument.opt = 'P';
+
+       if (le == 4)            //NUC OPTION :default is nuc_simple
+                       {
+               read_matrix(nuc_simple);
+               gap_open = -4;
+               gap_ext = -0.25;
+       }
+
+       else if (le == 160)  //PROT option: default is gonnet_160
+                       {
+               if (TRACE)
+                       printf("read matrix\n");
+               read_matrix(gonnet_160);
+               gap_open = -22;
+               gap_ext = -1;
+
+               read_normalized_matrix(normalized_blosum_30); // add by YE Yongtao
+       } else if (le == 1000) {  //Error handling
+               printf("Error: enter a valid matrix type\n");
+               exit(1);
+               //additional matrices can only be lower triangular
+       }
+
+       //now override the gapopen and gapext
+       if (argument.gapopen != 0.0 || argument.gapext != 0.00)
+
+       {
+               gap_open = -argument.gapopen;
+               gap_ext = -argument.gapext;
+       }
+
+       if (TRACE)
+               printf("%f %f %f %d\n", argument.T, gap_open, gap_ext, le);
+
+       argument.gapopen = gap_open;
+       argument.gapext = gap_ext;
+       argument.opt = 'P';
+
+}
diff --git a/binaries/src/GLProbs-1.0/MSAReadMatrix.h b/binaries/src/GLProbs-1.0/MSAReadMatrix.h
new file mode 100644 (file)
index 0000000..0f0ad00
--- /dev/null
@@ -0,0 +1,151 @@
+/////////////////////////////////////////////////////////////////
+//  Matrix.h
+//
+//  Specifies scoring matrices and their structure
+//  
+//
+//
+/////////////////////////////////////////////////////////////////
+
+#ifndef _MSA_READ_MATRIX_H
+#define _MSA_READ_MATRIX_H
+
+typedef struct {
+       char monomers[26]; /* amino or nucleic acid order */
+       float matrix[676]; /* entries of the score matix, 26*26=676 */
+} score_matrix;
+
+//default protein sequence scoring matrix as well as default scoring matrix of the PROBALIGN
+//also used when -prot option is used
+
+score_matrix gonnet_160 = { "ABCDEFGHIKLMNPQRSTVWXYZ",
+
+{ 4.6, 0.0, 0.0, 0.3, 0.0, 13.5, -1.1, 0.0, -5.3, 7.0, -0.4, 0.0, -5.2, 3.4,
+               5.9, -3.8, 0.0, -1.8, -7.0, -6.2, 9.1, 0.2, 0.0, -3.4, -0.7, -2.1, -7.6,
+               8.2, -1.8, 0.0, -2.3, -0.1, -0.1, -0.7, -2.7, 9.3, -1.8, 0.0, -2.5,
+               -6.2, -4.3, 0.3, -7.0, -3.7, 5.9, -1.2, 0.0, -4.8, -0.1, 1.3, -5.3,
+               -2.4, 0.2, -3.5, 5.5, -2.2, 0.0, -2.9, -6.5, -4.5, 1.9, -6.7, -3.2, 3.0,
+               -3.4, 5.7, -1.2, 0.0, -1.9, -5.0, -3.1, 1.4, -5.2, -2.1, 2.9, -2.1, 3.4,
+               7.6, -1.2, 0.0, -3.1, 2.6, 0.5, -4.7, -0.2, 1.5, -4.4, 0.8, -4.8, -3.6,
+               6.5, -0.1, 0.0, -5.2, -1.9, -1.4, -5.8, -3.0, -2.2, -4.3, -1.6, -3.5,
+               -4.2, -2.2, 9.6, -0.7, 0.0, -4.2, 0.6, 2.3, -4.1, -2.1, 1.7, -3.2, 2.0,
+               -2.4, -1.2, 0.5, -0.8, 5.6, -1.6, 0.0, -3.5, -1.6, -0.3, -5.3, -2.1,
+               0.3, -4.1, 3.5, -3.5, -2.9, -0.4, -2.1, 1.7, 7.1, 1.6, 0.0, -0.2, 0.0,
+               -0.3, -4.5, -0.1, -0.8, -3.3, -0.4, -3.6, -2.3, 1.1, 0.0, -0.2, -0.9,
+               4.4, 0.5, 0.0, -1.4, -0.6, -0.8, -3.6, -2.4, -0.8, -1.2, -0.2, -2.4,
+               -1.1, 0.3, -0.4, -0.4, -0.9, 2.3, 5.0, 0.1, 0.0, -0.6, -4.9, -3.0, -0.8,
+               -5.2, -3.5, 4.0, -3.0, 1.7, 1.4, -3.8, -3.2, -2.7, -3.4, -2.0, 0.0, 5.3,
+               -5.5, 0.0, -2.1, -7.8, -6.4, 3.2, -5.5, -1.9, -3.4, -5.4, -2.0, -2.2,
+               -5.5, -7.4, -4.0, -2.4, -4.7, -5.4, -4.5, 15.8, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, -3.7, 0.0, -1.3, -4.2, -4.4, 5.6, -6.0, 2.7, -2.0, -3.5, -1.1,
+               -1.3, -2.2, -4.8, -2.9, -2.9, -2.8, -3.2, -2.4, 3.8, 0.0, 10.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }
+
+};
+
+//normalized blosum_62 scoring matrix for computing protein sequence similarity
+score_matrix normalized_blosum_62 = {
+
+"ABCDEFGHIKLMNPQRSTVWXYZ",
+{
+0.533333333,                                                                                                                                                                                                                                                                                                                                                   
+0.133333333,0.533333333,                                                                                                                                                                                                                                                                                                                                       
+0.266666667,0.066666667,0.866666667,                                                                                                                                                                                                                                                                                                                   
+0.133333333,0.533333333,0.066666667,0.666666667,                                                                                                                                                                                                                                                                                                       
+0.2,0.333333333,0,0.4,0.6,                                                                                                                                                                                                                                                                                     
+0.133333333,0.066666667        ,0.133333333,0.066666667,0.066666667,0.666666667,                                                                                                                                                                                                                                                                       
+0.266666667,0.2        ,0.066666667,0.2,0.133333333,0.066666667,0.666666667,                                                                                                                                                                                                                                                   
+0.133333333,0.266666667        ,0.066666667,0.2,0.266666667,0.2,0.133333333,0.8,                                                                                                                                                                                                                               
+0.2,0.066666667        ,0.2,0.066666667,0.066666667,0.266666667,0,0.066666667,0.533333333,                                                                                                                                                                                                                     
+0.2,0.266666667        ,0.066666667,0.2,0.333333333,0.066666667,0.133333333,0.2,0.066666667,0.6,                                                                                                                                                                                                       
+0.2,0,0.2,0,0.066666667,0.266666667    ,0,0.066666667  ,0.4,0.133333333,0.533333333,                                                                                                                                                           
+0.2,0.066666667        ,0.2,0.066666667,0.133333333,0.266666667,0.066666667    ,0.133333333,0.333333333,0.2,0.4,0.6,                                                                                                                           
+0.133333333,0.466666667        ,0.066666667,0.333333333,0.266666667,0.066666667,0.266666667,0.333333333,0.066666667,   0.266666667,0.066666667 ,0.133333333,0.666666667,                                                                                                                               
+0.2,0.133333333        ,0.066666667,0.2,0.2,   0,0.133333333,  0.133333333,0.066666667 ,0.2,0.066666667,0.133333333,   0.133333333,0.733333333 ,                                                                                                                               
+0.2,0.266666667        ,0.066666667,0.266666667,0.4,0.066666667,0.133333333,   0.266666667,0.066666667 ,0.333333333,   0.133333333,0.266666667 ,0.266666667,0.2,0.6,                                                                           
+0.2,0.2        ,0.066666667,0.133333333,0.266666667,0.066666667,0.133333333,0.266666667,0.066666667,0.4,       0.133333333,0.2,0.266666667,0.133333333,0.333333333,0.6 ,                                                                                       
+0.333333333,0.266666667        ,0.2,0.266666667,0.266666667,0.133333333,0.266666667,0.2,0.133333333,   0.266666667,0.133333333 ,0.2,0.333333333,0.2,0.266666667,0.2,0.533333333,                                                                               
+0.266666667,   0.2,0.2,0.2,    0.2,    0.133333333,0.133333333,0.133333333,0.2 ,0.2,0.2,0.2,0.266666667,0.2,   0.2,0.2 ,0.333333333,0.6,               
+0.266666667,0.066666667        ,0.2,0.066666667,0.133333333,0.2,0.066666667,0.066666667,0.466666667,0.133333333,       0.333333333,0.333333333,        0.066666667,0.133333333,0.133333333     ,0.066666667,   0.133333333,    0.266666667,    0.533333333,
+0.066666667,0,0.133333333,0,0.066666667        ,0.333333333    ,       0.133333333,    0.133333333,0.066666667 ,       0.066666667,    0.133333333     ,0.2    ,       0,      0,0.133333333   ,0.066666667    ,       0.066666667     ,       0.133333333     ,       0.066666667     ,       1,                      
+0.266666667,0.2        ,0.133333333    ,       0.2     ,       0.2     ,       0.2     ,       0.2,0.2 ,       0.2,0.2 ,0.2    ,       0.2     ,       0.2     ,       0.133333333,    0.2     ,0.2    ,       0.266666667     ,       0.266666667,0.2 ,0.133333333,   0.2     ,                       
+0.133333333,0.066666667        ,       0.133333333     ,       0.066666667     ,       0.133333333     ,       0.466666667,    0.066666667,0.4 ,0.2,   0.133333333     ,       0.2     ,       0.2,    0.133333333     ,       0.066666667,    0.2,    0.133333333,0.133333333 ,0.133333333    ,       0.2     ,       0.4,0.2 ,       0.733333333,
+0.2,0.333333333        ,0.066666667    ,0.333333333    ,       0.533333333     ,       0.066666667,    0.133333333,    0.266666667,0.066666667 ,0.333333333    ,       0.066666667,    0.2,    0.266666667,    0.2     ,       0.466666667,0.266666667 ,0.266666667,   0.2     ,       0.133333333,    0.066666667     ,       0.2,0.133333333 ,0.533333333
+
+}
+};
+
+//normalized blosum_30 scoring matrix for computing protein sequence similarity
+score_matrix normalized_blosum_30 = {
+
+"ABCDEFGHIKLMNPQRSTVWXYZ",
+{
+0.407407407    ,                                                                                                                                                                                                                                                                                                                                                       
+0.259259259    ,       0.444444444     ,                                                                                                                                                                                                                                                                                                                                       
+0.148148148    ,       0.185185185     ,       0.888888889     ,                                                                                                                                                                                                                                                                                                                       
+0.259259259    ,       0.444444444     ,       0.148148148     ,       0.592592593     ,                                                                                                                                                                                                                                                                                                       
+0.259259259    ,       0.259259259     ,       0.296296296     ,       0.296296296     ,       0.481481481     ,                                                                                                                                                                                                                                                                                       
+0.185185185    ,       0.148148148     ,       0.148148148     ,       0.074074074     ,       0.111111111     ,       0.62962963      ,                                                                                                                                                                                                                                                                       
+0.259259259    ,       0.259259259     ,       0.111111111     ,       0.222222222     ,       0.185185185     ,       0.148148148     ,       0.555555556     ,                                                                                                                                                                                                                                                       
+0.185185185    ,       0.185185185     ,       0.074074074     ,       0.185185185     ,       0.259259259     ,       0.148148148     ,       0.148148148     ,       0.777777778     ,                                                                                                                                                                                                                                       
+0.259259259    ,       0.185185185     ,       0.185185185     ,       0.111111111     ,       0.148148148     ,       0.259259259     ,       0.222222222     ,       0.185185185     ,       0.481481481     ,                                                                                                                                                                                                                       
+0.259259259    ,       0.259259259     ,       0.148148148     ,       0.259259259     ,       0.333333333     ,       0.222222222     ,       0.222222222     ,       0.185185185     ,       0.185185185     ,       0.407407407     ,                                                                                                                                                                                                       
+0.222222222    ,       0.222222222     ,       0.259259259     ,       0.222222222     ,       0.222222222     ,       0.333333333     ,       0.185185185     ,       0.222222222     ,       0.333333333     ,       0.185185185     ,       0.407407407     ,                                                                                                                                                                                       
+0.296296296    ,       0.185185185     ,       0.185185185     ,       0.148148148     ,       0.222222222     ,       0.185185185     ,       0.185185185     ,       0.333333333     ,       0.296296296     ,       0.333333333     ,       0.333333333     ,       0.481481481     ,                                                                                                                                                                       
+0.259259259    ,       0.407407407     ,       0.222222222     ,       0.296296296     ,       0.222222222     ,       0.222222222     ,       0.259259259     ,       0.222222222     ,       0.259259259     ,       0.259259259     ,       0.185185185     ,       0.259259259     ,       0.555555556     ,                                                                                                                                                       
+0.222222222    ,       0.185185185     ,       0.148148148     ,       0.222222222     ,       0.296296296     ,       0.111111111     ,       0.222222222     ,       0.296296296     ,       0.148148148     ,       0.296296296     ,       0.148148148     ,       0.111111111     ,       0.148148148     ,       0.666666667     ,                                                                                                                                       
+0.296296296    ,       0.222222222     ,       0.185185185     ,       0.222222222     ,       0.333333333     ,       0.148148148     ,       0.185185185     ,       0.259259259     ,       0.185185185     ,       0.259259259     ,       0.185185185     ,       0.222222222     ,       0.222222222     ,       0.259259259     ,       0.555555556     ,                                                                                                                       
+0.222222222    ,       0.185185185     ,       0.185185185     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.185185185     ,       0.222222222     ,       0.148148148     ,       0.296296296     ,       0.185185185     ,       0.259259259     ,       0.185185185     ,       0.222222222     ,       0.37037037      ,       0.555555556     ,                                                                                                       
+0.296296296    ,       0.259259259     ,       0.185185185     ,       0.259259259     ,       0.259259259     ,       0.222222222     ,       0.259259259     ,       0.222222222     ,       0.222222222     ,       0.259259259     ,       0.185185185     ,       0.185185185     ,       0.259259259     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.407407407     ,                                                                                       
+0.296296296    ,       0.259259259     ,       0.185185185     ,       0.222222222     ,       0.185185185     ,       0.185185185     ,       0.185185185     ,       0.185185185     ,       0.259259259     ,       0.222222222     ,       0.259259259     ,       0.259259259     ,       0.296296296     ,       0.259259259     ,       0.259259259     ,       0.148148148     ,       0.333333333     ,       0.444444444     ,                                                                       
+0.296296296    ,       0.185185185     ,       0.185185185     ,       0.185185185     ,       0.148148148     ,       0.296296296     ,       0.148148148     ,       0.148148148     ,       0.407407407     ,       0.185185185     ,       0.296296296     ,       0.259259259     ,       0.185185185     ,       0.111111111     ,       0.148148148     ,       0.222222222     ,       0.222222222     ,       0.296296296     ,       0.444444444     ,                                                       
+0.074074074    ,       0.074074074     ,       0.185185185     ,       0.111111111     ,       0.222222222     ,       0.296296296     ,       0.296296296     ,       0.074074074     ,       0.148148148     ,       0.185185185     ,       0.185185185     ,       0.148148148     ,       0       ,       0.148148148     ,       0.222222222     ,       0.259259259     ,       0.148148148     ,       0.074074074     ,       0.148148148     ,       1       ,                                       
+0.259259259    ,       0.222222222     ,       0.185185185     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.259259259     ,       0.259259259     ,       0.259259259     ,       0.259259259     ,       0.259259259     ,       0.222222222     ,       0.259259259     ,       0.222222222     ,       0.259259259     ,       0.259259259     ,       0.259259259     ,       0.185185185     ,       0.222222222     ,                       
+0.111111111    ,       0.148148148     ,       0.037037037     ,       0.222222222     ,       0.185185185     ,       0.37037037      ,       0.148148148     ,       0.259259259     ,       0.222222222     ,       0.222222222     ,       0.37037037      ,       0.222222222     ,       0.111111111     ,       0.185185185     ,       0.222222222     ,       0.259259259     ,       0.185185185     ,       0.222222222     ,       0.296296296     ,       0.444444444     ,       0.222222222     ,       0.592592593     ,       
+0.259259259    ,       0.259259259     ,       0.259259259     ,       0.259259259     ,       0.444444444     ,       0.111111111     ,       0.185185185     ,       0.259259259     ,       0.148148148     ,       0.296296296     ,       0.222222222     ,       0.222222222     ,       0.222222222     ,       0.259259259     ,       0.407407407     ,       0.259259259     ,       0.222222222     ,       0.222222222     ,       0.148148148     ,       0.222222222     ,       0.259259259     ,       0.185185185     ,       0.407407407
+
+}
+};
+
+//default nucleotide sequence scoring matrix
+//used when -nuc option is used
+score_matrix nuc_simple = {
+
+"ABCDGHKMNRSTUVWXY",
+
+{ 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0 }
+
+//Ribosum85-60
+               /*
+                {
+                2.22,
+                0,  0,
+                -1.86,  0,  1.16,
+                0,  0,  0,  0,
+                -1.46,  0,  -2.48,  0,  1.03,
+                0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                -1.39,  0,  -1.05,  0,  -1.74,  0,  0,  0,  0,  0,  0,  1.65,
+                -1.39,  0,  -1.05,  0,  -1.74,  0,  0,  0,  0,  0,  0,  0,  1.65,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+                }
+                */
+
+               };
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/MSAdiv3.cpp b/binaries/src/GLProbs-1.0/MSAdiv3.cpp
new file mode 100644 (file)
index 0000000..2d0411d
--- /dev/null
@@ -0,0 +1,1472 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 100;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+               cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+       cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif 
+
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), initDistrib, gapOpen, gapExtend, emitPairs,
+                       emitSingle);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       /*
+        for (int i = 0; i < 5; i++){
+        for (int j = 0; j <= i; j++){
+        cerr << emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]] << " ";
+        }
+        cerr << endl;
+        }*/
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, VF &initDistrib, VF &gapOpen,
+               VF &gapExtend, VVF &emitPairs, VF &emitSingle) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+
+       //create distance matrix
+       VVF probalign_distances(numSeqs, VF(numSeqs, 0));
+       VVF distances(numSeqs, VF(numSeqs, 0));//msa
+
+        float gl_accuracy = 0;
+        //creat sparseMatrices
+       SafeVector<SafeVector<SparseMatrix *> > probalign_sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+        SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL)); // msa
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       // verbose output
+                       if (enableVerbose) {
+                               cerr << "Computing posterior matrix: (" << a + 1 << ") "
+                                               << seq1->GetHeader() << " vs. " << "(" << b + 1 << ") "
+                                               << seq2->GetHeader() << " -- ";
+                       }
+
+//probcons
+                       // compute forward and backward probabilities
+                       VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                       assert(forward);
+                       VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                       assert(backward);
+                       // compute posterior probability matrix from HMM
+                       VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
+                       assert(probcons_posterior);             
+                       delete forward;
+                       delete backward;                     
+
+//probalign
+                       VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                       assert(probalign_posterior);
+                       probalign_sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),seq2->GetLength(),                                                                  *probalign_posterior);
+                       probalign_sparseMatrices[b][a] = NULL;                 
+                       pair<SafeVector<char> *, float> probalign_alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *probalign_posterior);                     
+                       probalign_distances[a][b] =1.0f - probalign_alignment.second / min(seq1->GetLength(), seq2->GetLength());                       
+                       delete probalign_alignment.first;  
+
+//local
+                       forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                       assert(forward);
+                       backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                       assert(backward);
+                       VF* local_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                       delete forward;
+                       delete backward; 
+                  
+//GL
+                       //merge probalign + local + probcons 
+                       VF::iterator ptr1 = probcons_posterior->begin();
+                       VF::iterator ptr2 = probalign_posterior->begin();                       
+                       VF::iterator ptr3 = local_posterior->begin();
+                       VF* posterior = new VF((seq1->GetLength()+1) * (seq2->GetLength()+1)); assert (posterior);  //msa
+                       VF::iterator ptr = posterior->begin();                 
+                       for (int i = 0; i <= seq1->GetLength(); i++) {
+                               for (int j = 0; j <= seq2->GetLength(); j++) {
+                                       float v1 = *ptr1;
+                                       float v2 = *ptr2;
+                                       float v3 = *ptr3;
+                                       *ptr = sqrt((v1*v1 + v2*v2 + v3*v3)/3);
+                                       ptr1++;
+                                       ptr2++;
+                                        ptr3++;
+                                       ptr++;
+                               }
+                       }
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> gl_alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute expected accuracy
+                       distances[a][b] = distances[b][a] = 1.0f - gl_alignment.second
+                                       / min(seq1->GetLength(), seq2->GetLength());
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+                        //
+                       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+                       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+                        float N_correct_match = 0;
+                        int i = 1;int j = 1;
+                       for (SafeVector<char>::iterator iter = gl_alignment.first->begin(); 
+                               iter != gl_alignment.first->end(); ++iter){
+                               if (*iter == 'B'){
+                                       unsigned char c1 = (unsigned char) iter1[i++];
+                                       unsigned char c2 = (unsigned char) iter2[j++];
+                                       if(c1==c2) N_correct_match += 1;
+                               }
+                                else if(*iter == 'X') i++;
+                               else if(*iter == 'Y') j++;
+                        }
+                        if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "GL"<< endl;
+                       gl_accuracy += N_correct_match / min(seq1->GetLength(), seq2->GetLength());
+                        //
+                        delete probcons_posterior;
+                       delete probalign_posterior;
+                       delete local_posterior;
+                       delete posterior;
+
+#ifndef _OPENMP
+               }
+#endif
+       } 
+
+/*
+//self-adaptive   
+       gl_accuracy /= numPairs;
+        if(gl_accuracy > 0.4){
+               for (int a = 0; a < numSeqs - 1; a++) 
+                       for (int b = a + 1; b < numSeqs; b++) {
+                               distances[a][b] = distances[b][a] = probalign_distances[a][b];
+                               sparseMatrices[a][b] = probalign_sparseMatrices[a][b];
+                               sparseMatrices[b][a] = NULL;
+                       }
+        }
+*/
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // parameter file
+                       } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
+                               if (i < argc - 1)
+                                       parametersInputFilename = string (argv[++i]);
+                               else {
+                                               cerr << "ERROR: Filename expected for option " << argv[i] << endl;
+                                               exit (1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       if (enableAlignOrder) {
+               for (int i = 0; i < alignment->GetNumSequences(); i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*
+        int numSeqs = alignment->GetNumSequences();
+        //if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 5; iter ++){
+               for(int i = 0; i < numSeqs - 1; i++){
+                       DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+                }
+        }
+        //}*/
+        //Refinement return false:no improvement 
+       for (int i = 0; i < numIterativeRefinementReps; i++) {               
+               DoIterativeRefinement(sparseMatrices, model, alignment);
+       }
+        cerr << endl;  
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 1
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement
+
+       pair<SafeVector<char> *, float> alignment;
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                                //posterior[k] = w*posterior[k];
+                               posterior[k] += posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               //posterior[k] /= sumW;
+                               posterior[k] /= numSeqs;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * XZval * ZYptr->second;
+                                base[ZYptr->first] += XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               base[ZYptr->first] += ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+/////////////////////////////////////////////////////////////////
+
+void MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+                int index = rand();
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty()) return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);   
+/*
+//start add by Yongtao
+#if 0
+       VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
+                       sparseMatrices, cutoff);
+#endif
+
+       // compute an "accuracy" measure for the alignment before refinement        
+        SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
+       SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
+        int i=0; 
+       for (set<int>::const_iterator iter = groupOne.begin();
+                       iter != groupOne.end(); ++iter) {
+               oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+        i=0;
+       for (set<int>::const_iterator iter = groupTwo.begin();
+                       iter != groupTwo.end(); ++iter) {
+               oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+
+        VF &posteriorArr = *posterior;
+        int oldLength = alignment->GetSequence(0)->GetLength();
+       int groupOneindex=0; int groupTwoindex=0;
+       float accuracy_before = 0; 
+       for (int i = 1; i <= oldLength; i++) {
+               // check to see if there is a gap in every sequence of the set
+               bool foundOne = false;
+               for (int j = 0; !foundOne && j < (int) groupOne.size(); j++)
+                       foundOne = (oldOnePtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundOne) groupOneindex ++;
+               bool foundTwo = false;
+               for (int j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
+                       foundTwo = (oldTwoPtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundTwo) groupTwoindex ++;
+                if(foundOne && foundTwo) accuracy_before += 
+                               posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
+       }
+       
+       pair<SafeVector<char> *, float> refinealignment;
+       //perform alignment
+       refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
+                       groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
+        delete posterior;
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+        //compare accuracy measure before and after refinement
+       //if (refinealignment.second > accuracy_before) {
+                //cerr<<"Before:" << accuracy_before<<" after: "<< refinealignment.second<< endl; 
+               for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
+                       result->AddSequence(
+                               groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
+               for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
+                       result->AddSequence(
+                               groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
+               // free temporary alignment
+               delete refinealignment.first;
+               delete alignment;
+                alignment = result;
+
+       }
+        else{
+               if(numIterativeRefinementReps < 8*numSeqs) numIterativeRefinementReps++;
+               delete groupOneSeqs;
+               delete groupTwoSeqs;
+                return false;
+       }
+  */   
+//end add by yongtao
+
+        delete alignment;
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices, model); //original
+       delete groupOneSeqs;
+       delete groupTwoSeqs;            
+               
+}
+
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
diff --git a/binaries/src/GLProbs-1.0/MSAfull.cpp b/binaries/src/GLProbs-1.0/MSAfull.cpp
new file mode 100644 (file)
index 0000000..efe4dc5
--- /dev/null
@@ -0,0 +1,1471 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 100;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+               cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+       cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif 
+
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), initDistrib, gapOpen, gapExtend, emitPairs,
+                       emitSingle);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       /*
+        for (int i = 0; i < 5; i++){
+        for (int j = 0; j <= i; j++){
+        cerr << emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]] << " ";
+        }
+        cerr << endl;
+        }*/
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, VF &initDistrib, VF &gapOpen,
+               VF &gapExtend, VVF &emitPairs, VF &emitSingle) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+
+       //create distance matrix
+       VVF probalign_distances(numSeqs, VF(numSeqs, 0));
+       VVF distances(numSeqs, VF(numSeqs, 0));//msa
+
+        float gl_accuracy = 0;
+        //creat sparseMatrices
+       SafeVector<SafeVector<SparseMatrix *> > probalign_sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+        SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL)); // msa
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       // verbose output
+                       if (enableVerbose) {
+                               cerr << "Computing posterior matrix: (" << a + 1 << ") "
+                                               << seq1->GetHeader() << " vs. " << "(" << b + 1 << ") "
+                                               << seq2->GetHeader() << " -- ";
+                       }
+
+//probcons
+                       // compute forward and backward probabilities
+                       VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                       assert(forward);
+                       VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                       assert(backward);
+                       // compute posterior probability matrix from HMM
+                       VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
+                       assert(probcons_posterior);             
+                       delete forward;
+                       delete backward;                     
+
+//probalign
+                       VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                       assert(probalign_posterior);
+                       probalign_sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),seq2->GetLength(),                                                                  *probalign_posterior);
+                       probalign_sparseMatrices[b][a] = NULL;                 
+                       pair<SafeVector<char> *, float> probalign_alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *probalign_posterior);                     
+                       probalign_distances[a][b] =1.0f - probalign_alignment.second / min(seq1->GetLength(), seq2->GetLength());                       
+                       delete probalign_alignment.first;  
+
+//local
+                       forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                       assert(forward);
+                       backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                       assert(backward);
+                       VF* local_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                       delete forward;
+                       delete backward; 
+                  
+//GL
+                       //merge probalign + local + probcons 
+                       VF::iterator ptr1 = probcons_posterior->begin();
+                       VF::iterator ptr2 = probalign_posterior->begin();                       
+                       VF::iterator ptr3 = local_posterior->begin();
+                       VF* posterior = new VF((seq1->GetLength()+1) * (seq2->GetLength()+1)); assert (posterior);  //msa
+                       VF::iterator ptr = posterior->begin();                 
+                       for (int i = 0; i <= seq1->GetLength(); i++) {
+                               for (int j = 0; j <= seq2->GetLength(); j++) {
+                                       float v1 = *ptr1;
+                                       float v2 = *ptr2;
+                                       float v3 = *ptr3;
+                                       *ptr = sqrt(v1*v1 + v2*v2 + v3*v3);
+                                       ptr1++;
+                                       ptr2++;
+                                        ptr3++;
+                                       ptr++;
+                               }
+                       }
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> gl_alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute expected accuracy
+                       distances[a][b] = distances[b][a] = 1.0f - gl_alignment.second
+                                       / (3*min(seq1->GetLength(), seq2->GetLength()));
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+                        //
+                       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+                       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+                        float N_correct_match = 0;
+                        int i = 1;int j = 1;
+                       for (SafeVector<char>::iterator iter = gl_alignment.first->begin(); 
+                               iter != gl_alignment.first->end(); ++iter){
+                               if (*iter == 'B'){
+                                       unsigned char c1 = (unsigned char) iter1[i++];
+                                       unsigned char c2 = (unsigned char) iter2[j++];
+                                       if(c1==c2) N_correct_match += 1;
+                               }
+                                else if(*iter == 'X') i++;
+                               else if(*iter == 'Y') j++;
+                        }
+                        if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "GL"<< endl;
+                       gl_accuracy += N_correct_match / min(seq1->GetLength(), seq2->GetLength());
+                        //
+                        delete probcons_posterior;
+                       delete probalign_posterior;
+                       delete local_posterior;
+                       delete posterior;
+
+#ifndef _OPENMP
+               }
+#endif
+       } 
+
+//self-adaptive   
+       gl_accuracy /= numPairs;
+        if(gl_accuracy > 0.4){
+               for (int a = 0; a < numSeqs - 1; a++) 
+                       for (int b = a + 1; b < numSeqs; b++) {
+                               distances[a][b] = distances[b][a] = probalign_distances[a][b];
+                               sparseMatrices[a][b] = probalign_sparseMatrices[a][b];
+                               sparseMatrices[b][a] = NULL;
+                       }
+        }
+
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // parameter file
+                       } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
+                               if (i < argc - 1)
+                                       parametersInputFilename = string (argv[++i]);
+                               else {
+                                               cerr << "ERROR: Filename expected for option " << argv[i] << endl;
+                                               exit (1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       if (enableAlignOrder) {
+               for (int i = 0; i < alignment->GetNumSequences(); i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*
+        int numSeqs = alignment->GetNumSequences();
+        //if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 5; iter ++){
+               for(int i = 0; i < numSeqs - 1; i++){
+                       DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+                }
+        }
+        //}*/
+        //Refinement return false:no improvement 
+       for (int i = 0; i < numIterativeRefinementReps; i++) {               
+               DoIterativeRefinement(sparseMatrices, model, alignment);
+       }
+        cerr << endl;  
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 0
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement
+
+       pair<SafeVector<char> *, float> alignment;
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                                //posterior[k] = w*posterior[k];
+                               posterior[k] += posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               //posterior[k] /= sumW;
+                               posterior[k] /= numSeqs;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * XZval * ZYptr->second;
+                                base[ZYptr->first] += XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               base[ZYptr->first] += ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+/////////////////////////////////////////////////////////////////
+
+void MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+                int index = rand();
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty()) return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);   
+/*
+//start add by Yongtao
+#if 1
+       VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
+                       sparseMatrices, cutoff);
+#endif
+
+       // compute an "accuracy" measure for the alignment before refinement        
+        SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
+       SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
+        int i=0; 
+       for (set<int>::const_iterator iter = groupOne.begin();
+                       iter != groupOne.end(); ++iter) {
+               oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+        i=0;
+       for (set<int>::const_iterator iter = groupTwo.begin();
+                       iter != groupTwo.end(); ++iter) {
+               oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+
+        VF &posteriorArr = *posterior;
+        int oldLength = alignment->GetSequence(0)->GetLength();
+       int groupOneindex=0; int groupTwoindex=0;
+       float accuracy_before = 0; 
+       for (int i = 1; i <= oldLength; i++) {
+               // check to see if there is a gap in every sequence of the set
+               bool foundOne = false;
+               for (int j = 0; !foundOne && j < (int) groupOne.size(); j++)
+                       foundOne = (oldOnePtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundOne) groupOneindex ++;
+               bool foundTwo = false;
+               for (int j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
+                       foundTwo = (oldTwoPtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundTwo) groupTwoindex ++;
+                if(foundOne && foundTwo) accuracy_before += 
+                               posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
+       }
+        
+       pair<SafeVector<char> *, float> refinealignment;
+       //perform alignment
+       refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
+                       groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
+        delete posterior;
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+        //compare accuracy measure before and after refinement
+       //if (refinealignment.second > accuracy_before) {
+                //cerr<<"Before:" << accuracy_before<<" after: "<< refinealignment.second<< endl; 
+               for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
+                       result->AddSequence(
+                               groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
+               for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
+                       result->AddSequence(
+                               groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
+               // free temporary alignment
+               delete refinealignment.first;
+               delete alignment;
+                alignment = result;
+
+       }
+        else{
+               if(numIterativeRefinementReps < 8*numSeqs) numIterativeRefinementReps++;
+               delete groupOneSeqs;
+               delete groupTwoSeqs;
+                return false;
+       }
+  */   
+//end add by yongtao
+
+        //delete alignment;
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices, model); //original
+               delete groupOneSeqs;
+               delete groupTwoSeqs;            
+               
+}
+
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
diff --git a/binaries/src/GLProbs-1.0/MSAgl+l+p+gl.cpp b/binaries/src/GLProbs-1.0/MSAgl+l+p+gl.cpp
new file mode 100644 (file)
index 0000000..713acbe
--- /dev/null
@@ -0,0 +1,1512 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 100;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+               cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+       cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif 
+       int levelid = ComputeSimilarity (sequences,ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,emitSingle));
+
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), levelid);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, int levelid) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+       //create distance matrix
+       VVF distances(numSeqs, VF(numSeqs, 0));
+        //creat sparseMatrices
+        SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL)); 
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       //posterior probability matrix
+                       VF* posterior;
+
+//high similarity use global model
+                       if(levelid == 2) posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+
+//low similarity use local model
+                       else if(levelid == 1){
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               delete forward;
+                               delete backward; 
+                       }
+
+//extreme low or extreme high similarity use combined model
+                       else{
+//probcons
+                               // compute forward and backward probabilities
+                               VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                               assert(forward);
+                               VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                               assert(backward);
+                               // compute posterior probability matrix from HMM
+                               VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
+                               assert(probcons_posterior);             
+                               delete forward;
+                               delete backward;   
+                 
+//probalign
+                               VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
+                               assert(probalign_posterior);
+//local
+                               forward = model.ComputeForwardMatrix(seq1, seq2,false);
+                               assert(forward);
+                               backward = model.ComputeBackwardMatrix(seq1, seq2,false);
+                               assert(backward);
+                               posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
+                               assert(posterior);
+                               delete forward;
+                               delete backward;                                
+//combined model
+                               //merge probalign + local + probcons 
+                               VF::iterator ptr1 = probcons_posterior->begin();                        
+                               VF::iterator ptr2 = probalign_posterior->begin();
+                               VF::iterator ptr = posterior->begin();                 
+                               for (int i = 0; i <= seq1->GetLength(); i++) {
+                                       for (int j = 0; j <= seq2->GetLength(); j++) {
+                                               float v1 = *ptr1;
+                                               float v2 = *ptr2;
+                                               float v3 = *ptr;
+                                               *ptr = sqrt((v1*v1 + v2*v2 + v3*v3)/3);
+                                               ptr1++;
+                                               ptr2++;
+                                               ptr++;
+                                       }
+                               }
+                               delete probcons_posterior;
+                               delete probalign_posterior;
+                       }
+                        assert(posterior);
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute expected accuracy
+                       distances[a][b] = distances[b][a] = 1.0f - alignment.second
+                                       / min(seq1->GetLength(), seq2->GetLength());
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+
+                       delete posterior;
+                       delete alignment.first;
+#ifndef _OPENMP
+               }
+#endif
+       } 
+
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // parameter file
+                       } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
+                               if (i < argc - 1)
+                                       parametersInputFilename = string (argv[++i]);
+                               else {
+                                               cerr << "ERROR: Filename expected for option " << argv[i] << endl;
+                                               exit (1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       if (enableAlignOrder) {
+               for (int i = 0; i < alignment->GetNumSequences(); i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*
+        int numSeqs = alignment->GetNumSequences();
+        //if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 5; iter ++){
+               for(int i = 0; i < numSeqs - 1; i++){
+                       DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+                }
+        }
+        //}*/
+        //Refinement return false:no improvement 
+       for (int i = 0; i < numIterativeRefinementReps; i++) {               
+               DoIterativeRefinement(sparseMatrices, model, alignment);
+       }
+        cerr << endl;  
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 0
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement
+
+       pair<SafeVector<char> *, float> alignment;
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                                //posterior[k] = w*posterior[k];
+                               posterior[k] += posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               //posterior[k] /= sumW;
+                               posterior[k] /= numSeqs;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * XZval * ZYptr->second;
+                                base[ZYptr->first] += XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               //base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               base[ZYptr->first] += ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+// return 0: successful refinement, 1: ineffective refinement, 2: random problem 
+/////////////////////////////////////////////////////////////////
+int MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+       int i;
+       // create two separate groups
+       for (i = 0; i < numSeqs; i++) {
+                int index = rand();
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty()) return 2;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);   
+
+//start add by Yongtao
+#if 1
+       VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
+                       sparseMatrices, cutoff);
+#endif
+       // compute an "accuracy" measure for the alignment before refinement        
+        SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
+       SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
+        i=0; 
+       for (set<int>::const_iterator iter = groupOne.begin();
+                       iter != groupOne.end(); ++iter) {
+               oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+        i=0;
+       for (set<int>::const_iterator iter = groupTwo.begin();
+                       iter != groupTwo.end(); ++iter) {
+               oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
+       }
+
+        VF &posteriorArr = *posterior;
+        int oldLength = alignment->GetSequence(0)->GetLength();
+       int groupOneindex=0; int groupTwoindex=0;
+       float accuracy_before = 0; 
+        int j;
+       for (i = 1; i <= oldLength; i++) {
+               // check to see if there is a gap in every sequence of the set
+               bool foundOne = false;
+               for (j = 0; !foundOne && j < (int) groupOne.size(); j++)
+                       foundOne = (oldOnePtrs[j][i] != '-');
+               // if not, then this column counts towards the sequence length
+               if (foundOne) groupOneindex ++;
+               bool foundTwo = false;
+               for (j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
+                       foundTwo = (oldTwoPtrs[j][i] != '-');
+               if (foundTwo) groupTwoindex ++;
+                if(foundOne && foundTwo) accuracy_before += 
+                               posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
+       }
+       
+       pair<SafeVector<char> *, float> refinealignment;
+       //perform alignment
+       refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
+                       groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
+        delete posterior;
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
+       for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
+               result->AddSequence(
+                       groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
+       // free temporary alignment
+       delete refinealignment.first;
+       delete alignment;
+        alignment = result;
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+        if(accuracy_before == refinealignment.second) return 1;
+        else return 0; 
+}
+
+
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeSimilarity ()
+//
+// Computes the average similarity for a particular family.
+// extreme low or extreme high similarity(<=20% or >80%) return 0
+// low similarity(20%-50%) return 1
+// high similarity(50%-80%) return 2
+/////////////////////////////////////////////////////////////////
+int MSA::ComputeSimilarity (MultiSequence *sequences,const ProbabilisticModel &model){
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+        //average identity for all sequences
+        float identity = 0;
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+
+       // do all pairwise alignments for family similarity 
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+                       pair<SafeVector<char> *, float> alignment = model.ComputeViterbiAlignment(seq1,seq2);
+                        //
+                       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+                       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+                        float N_correct_match = 0;
+                       //float N_match;
+                       //float N_column = 0;
+                       float N_alignment = 0;
+                        int i = 1;int j = 1;
+                       //bool start = false; bool end = false;
+                       for (SafeVector<char>::iterator iter = alignment.first->begin(); 
+                               iter != alignment.first->end(); ++iter){
+                               if (*iter == 'B'){
+                                        //N_match += 1;
+                                       //start = true;
+                                       //if(i==seq1->GetLength() || j==seq2->GetLength()) end = true;
+                                       unsigned char c1 = (unsigned char) iter1[i++];
+                                       unsigned char c2 = (unsigned char) iter2[j++];
+                                       if(c1==c2) N_correct_match += 1;
+                               }
+                                else if(*iter == 'X') i++;
+                               else if(*iter == 'Y') j++;
+                               //if(start && !end) N_column += 1;
+                               N_alignment += 1;
+                        }
+                        if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "similarity error"<< endl;
+                       identity += N_correct_match / N_alignment;
+                        //
+                       delete alignment.first;                   
+#ifndef _OPENMP
+               }
+#endif
+       } 
+       identity /= numPairs;
+        //adapative         
+        if(identity <= 0.15) initDistrib[2] = 0.143854;
+       else if(identity <= 0.2) initDistrib[2] = 0.191948;
+       else if(identity <= 0.25) initDistrib[2] = 0.170705;
+       else if(identity <= 0.3) initDistrib[2] = 0.100675;
+       else if(identity <= 0.35) initDistrib[2] = 0.090755;
+       else if(identity <= 0.4) initDistrib[2] = 0.146188;
+        else if(identity <= 0.45) initDistrib[2] = 0.167858;
+       else if(identity <= 0.5) initDistrib[2] = 0.250769;
+        else if(identity <= 0.6) initDistrib[2] = 0.500829;
+        else if(identity <= 0.7) initDistrib[2] = 0.259622;
+
+        if( identity<= 0.2 || identity > 0.8) return 0;
+        else if(identity > 0.2 && identity<= 0.5) return 1;
+        else return 2;
+}
diff --git a/binaries/src/GLProbs-1.0/Makefile b/binaries/src/GLProbs-1.0/Makefile
new file mode 100644 (file)
index 0000000..9128fbc
--- /dev/null
@@ -0,0 +1,16 @@
+
+CXXOBJS = MSA.o MSAGuideTree.o MSAClusterTree.o MSAPartProbs.o MSAReadMatrix.o main.o
+
+OPENMP = -fopenmp
+CXX = g++
+COMMON_FLAGS = -O3 $(OPENMP) -Wall -funroll-loops -I . -I /usr/include
+CXXFLAGS = $(COMMON_FLAGS)
+
+EXEC = glprobs
+
+all: $(CXXOBJS)
+       $(CXX) $(CXXFLAGS) -o $(EXEC) $(CXXOBJS) $(NVCCOBJS) $(NVCCLIBS)
+       strip $(EXEC)
+clean:
+       rm -rf *.o $(EXEC)
+
diff --git a/binaries/src/GLProbs-1.0/MultiSequence.h b/binaries/src/GLProbs-1.0/MultiSequence.h
new file mode 100644 (file)
index 0000000..96a61f5
--- /dev/null
@@ -0,0 +1,735 @@
+////////////////////////////////////////////////////////////////
+// MultiSequence.h
+//
+// Utilities for reading/writing multiple sequence data.
+/////////////////////////////////////////////////////////////////
+
+#ifndef MULTISEQUENCE_H
+#define MULTISEQUENCE_H
+
+#include <cctype>
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <set>
+#include "SafeVector.h"
+#include "Sequence.h"
+#include "FileBuffer.h"
+
+#define VERSION "0.9.7"
+/////////////////////////////////////////////////////////////////
+// MultiSequence
+//
+// Class for multiple sequence alignment input/output.
+/////////////////////////////////////////////////////////////////
+
+class MultiSequence {
+
+       SafeVector<Sequence *> *sequences;
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Default constructor.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence() :
+                       sequences(NULL) {
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Constructor.  Load MFA from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence(FileBuffer &infile) :
+                       sequences(NULL) {
+               LoadMFA(infile);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Constructor.  Load MFA from a filename.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence(const string &filename) :
+                       sequences(NULL) {
+               LoadMFA(filename);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::~MultiSequence()
+       //
+       // Destructor.  Gets rid of sequence objects contained in the
+       // multiple alignment.
+       /////////////////////////////////////////////////////////////////
+
+       ~MultiSequence() {
+
+               // if sequences allocated
+               if (sequences) {
+
+                       // free all sequences
+                       for (SafeVector<Sequence *>::iterator iter = sequences->begin();
+                                       iter != sequences->end(); ++iter) {
+                               assert(*iter);
+                               delete *iter;
+                               *iter = NULL;
+                       }
+
+                       // free sequence vector
+                       delete sequences;
+                       sequences = NULL;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MFA from a filename.
+       /////////////////////////////////////////////////////////////////
+
+       void LoadMFA(const string &filename, bool stripGaps = false) {
+
+               // try opening file
+               FileBuffer infile(filename.c_str());
+
+               if (infile.fail()) {
+                       cerr << "ERROR: Could not open file '" << filename
+                                       << "' for reading." << endl;
+                       exit(1);
+               }
+
+               // if successful, then load using other LoadMFA() routine
+               LoadMFA(infile, stripGaps);
+
+               infile.close();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MSF from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       void ParseMSF(FileBuffer &infile, string header, bool stripGaps = false) {
+
+               SafeVector<SafeVector<char> *> seqData;
+               SafeVector<string> seqNames;
+               SafeVector<int> seqLengths;
+
+               istringstream in;
+               bool valid = true;
+               bool missingHeader = false;
+               bool clustalW = false;
+
+               // read until data starts
+               while (!infile.eof() && header.find("..", 0) == string::npos) {
+                       if (header.find("CLUSTAL", 0) == 0
+                                       || header.find("MSAPROBS", 0) == 0) {
+                               clustalW = true;
+                               break;
+                       }
+                       infile.GetLine(header);
+                       if (header.find("//", 0) != string::npos) {
+                               missingHeader = true;
+                               break;
+                       }
+               }
+
+               // read until end-of-file
+               while (valid) {
+                       infile.GetLine(header);
+                       if (infile.eof())
+                               break;
+
+                       string word;
+                       in.clear();
+                       in.str(header);
+
+                       // check if there's anything on this line
+                       if (in >> word) {
+
+                               // clustalw name parsing
+                               if (clustalW) {
+                                       if (!isspace(header[0])
+                                                       && find(seqNames.begin(), seqNames.end(), word)
+                                                                       == seqNames.end()) {
+                                               seqNames.push_back(word);
+                                               seqData.push_back(new SafeVector<char>());
+                                               seqLengths.push_back(0);
+                                               seqData[(int) seqData.size() - 1]->push_back('@');
+                                       }
+                               }
+
+                               // look for new sequence label
+                               if (word == string("Name:")) {
+                                       if (in >> word) {
+                                               seqNames.push_back(word);
+                                               seqData.push_back(new SafeVector<char>());
+                                               seqLengths.push_back(0);
+                                               seqData[(int) seqData.size() - 1]->push_back('@');
+                                       } else
+                                               valid = false;
+                               }
+
+                               // check if this is sequence data
+                               else if (find(seqNames.begin(), seqNames.end(), word)
+                                               != seqNames.end()) {
+                                       int index = find(seqNames.begin(), seqNames.end(), word)
+                                                       - seqNames.begin();
+
+                                       // read all remaining characters on the line
+                                       char ch;
+                                       while (in >> ch) {
+                                               if (isspace(ch))
+                                                       continue;
+                                               if (ch >= 'a' && ch <= 'z')
+                                                       ch = ch - 'a' + 'A';
+                                               if (ch == '.')
+                                                       ch = '-';
+                                               if (stripGaps && ch == '-')
+                                                       continue;
+                                               if (!((ch >= 'A' && ch <= 'Z') || ch == '*' || ch == '-')) {
+                                                       cerr << "ERROR: Unknown character encountered: "
+                                                                       << ch << endl;
+                                                       exit(1);
+                                               }
+
+                                               // everything's ok so far, so just store this character.
+                                               seqData[index]->push_back(ch);
+                                               seqLengths[index]++;
+                                       }
+                               } else if (missingHeader) {
+                                       seqNames.push_back(word);
+                                       seqData.push_back(new SafeVector<char>());
+                                       seqLengths.push_back(0);
+                                       seqData[(int) seqData.size() - 1]->push_back('@');
+
+                                       int index = (int) seqNames.size() - 1;
+
+                                       // read all remaining characters on the line
+                                       char ch;
+                                       while (in >> ch) {
+                                               if (isspace(ch))
+                                                       continue;
+                                               if (ch >= 'a' && ch <= 'z')
+                                                       ch = ch - 'a' + 'A';
+                                               if (ch == '.')
+                                                       ch = '-';
+                                               if (stripGaps && ch == '-')
+                                                       continue;
+                                               if (!((ch >= 'A' && ch <= 'Z') || ch == '*' || ch == '-')) {
+                                                       cerr << "ERROR: Unknown character encountered: "
+                                                                       << ch << endl;
+                                                       exit(1);
+                                               }
+
+                                               // everything's ok so far, so just store this character.
+                                               seqData[index]->push_back(ch);
+                                               seqLengths[index]++;
+                                       }
+                               }
+                       }
+               }
+
+               // check for errors
+               if (seqNames.size() == 0) {
+                       cerr << "ERROR: No sequences read!" << endl;
+                       exit(1);
+               }
+
+               assert(!sequences);
+               sequences = new SafeVector<Sequence *>;
+               for (int i = 0; i < (int) seqNames.size(); i++) {
+                       if (seqLengths[i] == 0) {
+                               cerr << "ERROR: Sequence of zero length!" << endl;
+                               exit(1);
+                       }
+                       Sequence *seq = new Sequence(seqData[i], seqNames[i], seqLengths[i],
+                                       i, i);
+                       sequences->push_back(seq);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MFA from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       void LoadMFA(FileBuffer &infile, bool stripGaps = false) {
+
+               // check to make sure that file reading is ok
+               if (infile.fail()) {
+                       cerr << "ERROR: Error reading file." << endl;
+                       exit(1);
+               }
+
+               // read all sequences
+               while (true) {
+
+                       // get the sequence label as being the current # of sequences
+                       // NOTE: sequence labels here are zero-based
+                       int index = (!sequences) ? 0 : sequences->size();
+
+                       // read the sequence
+                       Sequence *seq = new Sequence(infile, stripGaps);
+                       if (seq->Fail()) {
+
+                               // check if alternative file format (i.e. not MFA)
+                               if (index == 0) {
+                                       string header = seq->GetHeader();
+                                       if (header.length() > 0 && header[0] != '>') {
+
+                                               // try MSF format
+                                               ParseMSF(infile, header);
+                                               break;
+                                       }
+                               }
+
+                               delete seq;
+                               break;
+                       }
+                       seq->SetLabel(index);
+
+                       // add the sequence to the list of current sequences
+                       if (!sequences)
+                               sequences = new SafeVector<Sequence *>;
+                       sequences->push_back(seq);
+               }
+
+               // make sure at least one sequence was read
+               if (!sequences) {
+                       cerr << "ERROR: No sequences read." << endl;
+                       exit(1);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::AddSequence()
+       //
+       // Add another sequence to an existing sequence list
+       /////////////////////////////////////////////////////////////////
+
+       void AddSequence(Sequence *sequence) {
+               assert(sequence);
+               assert(!sequence->Fail());
+
+               // add sequence
+               if (!sequences)
+                       sequences = new SafeVector<Sequence *>;
+               sequences->push_back(sequence);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::RemoveSequence()
+       //
+       // Remove a sequence from the MultiSequence
+       /////////////////////////////////////////////////////////////////
+
+       void RemoveSequence(int index) {
+               assert(sequences);
+
+               assert(index >= 0 && index < (int) sequences->size());
+               delete (*sequences)[index];
+
+               sequences->erase(sequences->begin() + index);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::WriteMFA()
+       //
+       // Write MFA to the outfile.  Allows the user to specify the
+       // number of columns for the output.  Also, useIndices determines
+       // whether or not the actual sequence comments will be printed
+       // out or whether the artificially assigned sequence labels will
+       // be used instead.
+       /////////////////////////////////////////////////////////////////
+
+       void WriteMFA(ostream &outfile, int numColumns = 60,
+                       bool useIndices = false) {
+               if (!sequences)
+                       return;
+
+               // loop through all sequences and write them out
+               for (SafeVector<Sequence *>::iterator iter = sequences->begin();
+                               iter != sequences->end(); ++iter) {
+                       (*iter)->WriteMFA(outfile, numColumns, useIndices);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetAnnotationChar()
+       //
+       // Return CLUSTALW annotation for column.
+       /////////////////////////////////////////////////////////////////
+
+       char GetAnnotationChar(SafeVector<char> &column) {
+               SafeVector<int> counts(256, 0);
+               int allChars = (int) column.size();
+
+               for (int i = 0; i < allChars; i++) {
+                       counts[(unsigned char) toupper(column[i])]++;
+               }
+
+               allChars -= counts[(unsigned char) '-'];
+               if (allChars == 1)
+                       return ' ';
+
+               for (int i = 0; i < 256; i++)
+                       if ((char) i != '-' && counts[i] == allChars)
+                               return '*';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'A'] == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'H']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               + counts[(unsigned char) 'E'] + counts[(unsigned char) 'Q']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'Q'] + counts[(unsigned char) 'H']
+                               + counts[(unsigned char) 'R'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'M'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'V']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'M'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'F']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'H'] + counts[(unsigned char) 'Y']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'F'] + counts[(unsigned char) 'Y']
+                               + counts[(unsigned char) 'W'] == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'C'] + counts[(unsigned char) 'S']
+                               + counts[(unsigned char) 'A'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'A'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'V'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'A']
+                               + counts[(unsigned char) 'G'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'N'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'P'] + counts[(unsigned char) 'A']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'G']
+                               + counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'N']
+                               + counts[(unsigned char) 'D'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               + counts[(unsigned char) 'E'] + counts[(unsigned char) 'Q']
+                               + counts[(unsigned char) 'H'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'H'] + counts[(unsigned char) 'Q']
+                               + counts[(unsigned char) 'R'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'F'] + counts[(unsigned char) 'V']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'M'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'H'] + counts[(unsigned char) 'F']
+                               + counts[(unsigned char) 'Y'] == allChars)
+                       return '.';
+
+               return ' ';
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::WriteALN()
+       //
+       // Write ALN to the outfile.  Allows the user to specify the
+       // number of columns for the output.  
+       /////////////////////////////////////////////////////////////////
+
+       void WriteALN(ostream &outfile, int numColumns = 60) {
+               if (!sequences)
+                       return;
+
+               outfile << "CLUSTAL for GLPROBS version " << VERSION << " multiple sequence alignment" << endl;
+//
+//             outfile << "//"<<endl<<endl;
+
+               int longestComment = 0;
+               SafeVector<SafeVector<char>::iterator> ptrs(GetNumSequences());
+               SafeVector<int> lengths(GetNumSequences());
+               for (int i = 0; i < GetNumSequences(); i++) {
+                       ptrs[i] = GetSequence(i)->GetDataPtr();
+                       lengths[i] = GetSequence(i)->GetLength();
+                       longestComment = max(longestComment,
+                                       (int) GetSequence(i)->GetName().length());
+               }
+               longestComment += 4;
+
+               int writtenChars = 0;
+               bool allDone = false;
+
+               while (!allDone) {
+                       outfile << endl;
+                       allDone = true;
+
+                       // loop through all sequences and write them out
+                       for (int i = 0; i < GetNumSequences(); i++) {
+
+                               if (writtenChars < lengths[i]) {
+                                       outfile << GetSequence(i)->GetName();
+                                       for (int j = 0;
+                                                       j
+                                                                       < longestComment
+                                                                                       - (int) GetSequence(i)->GetName().length();
+                                                       j++)
+                                               outfile << ' ';
+
+                                       for (int j = 0; j < numColumns; j++) {
+                                               if (writtenChars + j < lengths[i])
+                                                       outfile << ptrs[i][writtenChars + j + 1];
+                                               else
+                                                       break;
+                                       }
+
+                                       outfile << endl;
+
+                                       if (writtenChars + numColumns < lengths[i])
+                                               allDone = false;
+                               }
+                       }
+
+                       // write annotation line
+/*
+                       for (int j = 0; j < longestComment; j++)
+                               outfile << ' ';
+
+                       for (int j = 0; j < numColumns; j++) {
+                               SafeVector<char> column;
+
+                               for (int i = 0; i < GetNumSequences(); i++)
+                                       if (writtenChars + j < lengths[i])
+                                               column.push_back(ptrs[i][writtenChars + j + 1]);
+
+                               if (column.size() > 0)
+                                       outfile << GetAnnotationChar(column);
+                       }
+*/
+                       outfile << endl;
+                       writtenChars += numColumns;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetSequence()
+       //
+       // Retrieve a sequence from the MultiSequence object.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence* GetSequence(int i) {
+               assert(sequences);
+               assert(0 <= i && i < (int) sequences->size());
+
+               return (*sequences)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetSequence()
+       //
+       // Retrieve a sequence from the MultiSequence object
+       // (const version).
+       /////////////////////////////////////////////////////////////////
+
+       const Sequence* GetSequence(int i) const {
+               assert(sequences);
+               assert(0 <= i && i < (int) sequences->size());
+
+               return (*sequences)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetNumSequences()
+       //
+       // Returns the number of sequences in the MultiSequence.
+       /////////////////////////////////////////////////////////////////
+
+       int GetNumSequences() const {
+               if (!sequences)
+                       return 0;
+               return (int) sequences->size();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SortByHeader()
+       //
+       // Organizes the sequences according to their sequence headers
+       // in ascending order.
+       /////////////////////////////////////////////////////////////////
+
+       void SortByHeader() {
+               assert(sequences);
+
+               // a quick and easy O(n^2) sort
+               for (int i = 0; i < (int) sequences->size() - 1; i++) {
+                       for (int j = i + 1; j < (int) sequences->size(); j++) {
+                               if ((*sequences)[i]->GetHeader() > (*sequences)[j]->GetHeader())
+                                       swap((*sequences)[i], (*sequences)[j]);
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SortByLabel()
+       //
+       // Organizes the sequences according to their sequence labels
+       // in ascending order.
+       /////////////////////////////////////////////////////////////////
+
+       void SortByLabel() {
+               assert(sequences);
+
+               // a quick and easy O(n^2) sort
+               for (int i = 0; i < (int) sequences->size() - 1; i++) {
+                       for (int j = i + 1; j < (int) sequences->size(); j++) {
+                               if ((*sequences)[i]->GetSortLabel()
+                                               > (*sequences)[j]->GetSortLabel())
+                                       swap((*sequences)[i], (*sequences)[j]);
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SaveOrdering()
+       //
+       // Relabels sequences so as to preserve the current ordering.
+       /////////////////////////////////////////////////////////////////
+
+       void SaveOrdering() {
+               assert(sequences);
+
+               for (int i = 0; i < (int) sequences->size(); i++)
+                       (*sequences)[i]->SetSortLabel(i);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::Project()
+       //
+       // Given a set of indices, extract all sequences from the current
+       // MultiSequence object whose index is included in the set.
+       // Then, project the multiple alignments down to the desired
+       // subset, and return the projection as a new MultiSequence
+       // object.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence *Project(const set<int> &indices) {
+               SafeVector<SafeVector<char>::iterator> oldPtrs(indices.size());
+               SafeVector<SafeVector<char> *> newPtrs(indices.size());
+
+               assert(indices.size() != 0);
+
+               // grab old data
+               int i = 0;
+               for (set<int>::const_iterator iter = indices.begin();
+                               iter != indices.end(); ++iter) {
+                       oldPtrs[i++] = GetSequence(*iter)->GetDataPtr();
+               }
+
+               // compute new length
+               int oldLength = GetSequence(*indices.begin())->GetLength();
+               int newLength = 0;
+               for (i = 1; i <= oldLength; i++) {
+
+                       // check to see if there is a gap in every sequence of the set
+                       bool found = false;
+                       for (int j = 0; !found && j < (int) indices.size(); j++)
+                               found = (oldPtrs[j][i] != '-');
+
+                       // if not, then this column counts towards the sequence length
+                       if (found)
+                               newLength++;
+               }
+
+               // build new alignments
+               for (i = 0; i < (int) indices.size(); i++) {
+                       newPtrs[i] = new SafeVector<char>();
+                       assert(newPtrs[i]);
+                       newPtrs[i]->push_back('@');
+               }
+
+               // add all needed columns
+               for (i = 1; i <= oldLength; i++) {
+
+                       // make sure column is not gapped in all sequences in the set
+                       bool found = false;
+                       for (int j = 0; !found && j < (int) indices.size(); j++)
+                               found = (oldPtrs[j][i] != '-');
+
+                       // if not, then add it
+                       if (found) {
+                               for (int j = 0; j < (int) indices.size(); j++)
+                                       newPtrs[j]->push_back(oldPtrs[j][i]);
+                       }
+               }
+
+               // wrap sequences in MultiSequence object
+               MultiSequence *ret = new MultiSequence();
+               i = 0;
+               for (set<int>::const_iterator iter = indices.begin();
+                               iter != indices.end(); ++iter) {
+                       ret->AddSequence(
+                                       new Sequence(newPtrs[i++], GetSequence(*iter)->GetHeader(),
+                                                       newLength, GetSequence(*iter)->GetSortLabel(),
+                                                       GetSequence(*iter)->GetLabel()));
+               }
+
+               return ret;
+       }
+};
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/ProbabilisticModel.h b/binaries/src/GLProbs-1.0/ProbabilisticModel.h
new file mode 100644 (file)
index 0000000..6c7ab1b
--- /dev/null
@@ -0,0 +1,1338 @@
+/////////////////////////////////////////////////////////////////\r
+// ProbabilisticModel.h\r
+//\r
+// Routines for (1) posterior probability computations\r
+//              (2) chained anchoring\r
+//              (3) maximum weight trace alignment\r
+/////////////////////////////////////////////////////////////////\r
+\r
+#ifndef PROBABILISTICMODEL_H\r
+#define PROBABILISTICMODEL_H\r
+\r
+#include <list>\r
+#include <cmath>\r
+#include <cstdio>\r
+#include "SafeVector.h"\r
+#include "ScoreType.h"\r
+#include "SparseMatrix.h"\r
+#include "MultiSequence.h"\r
+\r
+using namespace std;\r
+\r
+const int NumMatchStates = 1;                                    // note that in this version the number\r
+                                                                 // of match states is fixed at 1...will\r
+const int NumInsertStates = 2;                                             // change in future versions\r
+const int NumMatrixTypes = NumMatchStates + NumInsertStates * 2;\r
+\r
+/////////////////////////////////////////////////////////////////\r
+// ProbabilisticModel\r
+//\r
+// Class for storing the parameters of a probabilistic model and\r
+// performing different computations based on those parameters.\r
+// In particular, this class handles the computation of\r
+// posterior probabilities that may be used in alignment.\r
+/////////////////////////////////////////////////////////////////\r
+\r
+class ProbabilisticModel {\r
+\r
+  float initialDistribution[NumMatrixTypes];               // holds the initial probabilities for each state\r
+  float transProb[NumMatrixTypes][NumMatrixTypes];         // holds all state-to-state transition probabilities\r
+  float matchProb[256][256];                               // emission probabilities for match states\r
+  float insProb[256][NumMatrixTypes];                      // emission probabilities for insert states\r
+  float local_transProb[3][3];\r
+  float random_transProb[2];\r
+\r
+ public:\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ProbabilisticModel()\r
+  //\r
+  // Constructor.  Builds a new probabilistic model using the\r
+  // given parameters.\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  ProbabilisticModel (const VF &initDistribMat, const VF &gapOpen, const VF &gapExtend,\r
+                      const VVF &emitPairs, const VF &emitSingle){\r
+\r
+//Probcons model\r
+    // build transition matrix\r
+    VVF transMat (NumMatrixTypes, VF (NumMatrixTypes, 0.0f));\r
+    transMat[0][0] = 1;\r
+    for (int i = 0; i < NumInsertStates; i++){\r
+      transMat[0][2*i+1] = gapOpen[2*i];\r
+      transMat[0][2*i+2] = gapOpen[2*i];\r
+      transMat[0][0] -= (gapOpen[2*i] + gapOpen[2*i]);\r
+      assert (transMat[0][0] > 0);\r
+      transMat[2*i+1][2*i+1] = gapExtend[2*i];\r
+      transMat[2*i+2][2*i+2] = gapExtend[2*i];\r
+      transMat[2*i+1][2*i+2] = 0;\r
+      transMat[2*i+2][2*i+1] = 0;\r
+      transMat[2*i+1][0] = 1 - gapExtend[2*i];\r
+      transMat[2*i+2][0] = 1 - gapExtend[2*i];\r
+    }\r
+\r
+    // create initial and transition probability matrices    \r
+    for (int i = 0; i < NumMatrixTypes; i++){\r
+      initialDistribution[i] = LOG (initDistribMat[i]);\r
+      for (int j = 0; j < NumMatrixTypes; j++)\r
+        transProb[i][j] = LOG (transMat[i][j]);\r
+    }\r
+//due to Local model parameters' initilization, need to correct initialDistribution[2]\r
+    initialDistribution[2] = LOG (initDistribMat[1]);\r
+\r
+    // create insertion and match probability matrices\r
+    for (int i = 0; i < 256; i++){\r
+      for (int j = 0; j < NumMatrixTypes; j++)\r
+        insProb[i][j] = LOG (emitSingle[i]);\r
+      for (int j = 0; j < 256; j++)\r
+        matchProb[i][j] = LOG (emitPairs[i][j]);\r
+    }\r
+\r
+//Local model  \r
+    // build transition matrix\r
+    VVF ltransMat (3, VF (3, 0.0f));\r
+    ltransMat[0][0] = 1;\r
+    \r
+      ltransMat[0][1] = gapOpen[1];\r
+      ltransMat[0][2] = gapOpen[1];\r
+      ltransMat[0][0] -= (gapOpen[1] + gapOpen[1]);\r
+      assert (ltransMat[0][0] > 0);\r
+      ltransMat[1][1] = gapExtend[1];\r
+      ltransMat[2][2] = gapExtend[1];\r
+      ltransMat[1][2] = 0;\r
+      ltransMat[2][1] = 0;\r
+      ltransMat[1][0] = 1 - gapExtend[1];\r
+      ltransMat[2][0] = 1 - gapExtend[1];\r
+    \r
+    // create initial and transition probability matrices\r
+    for (int i = 0; i < 3; i++){\r
+      for (int j = 0; j < 3; j++)\r
+        local_transProb[i][j] = LOG (ltransMat[i][j]);\r
+    }\r
+\r
+    // create initial and transition probability matrices\r
+    random_transProb[0] = LOG (initDistribMat[2]);//sigma\r
+    random_transProb[1] = LOG (1-initDistribMat[2]);//1-sigma\r
+\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeForwardMatrix()\r
+  //\r
+  // Computes a set of forward probability matrices for aligning\r
+  // seq1 and seq2.\r
+  //\r
+  // For efficiency reasons, a single-dimensional floating-point\r
+  // array is used here, with the following indexing scheme:\r
+  //\r
+  //    forward[i + NumMatrixTypes * (j * (seq2Length+1) + k)]\r
+  //    refers to the probability of aligning through j characters\r
+  //    of the first sequence, k characters of the second sequence,\r
+  //    and ending in state i.\r
+  //    flag: 1 probcons, 0 local\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  VF *ComputeForwardMatrix (Sequence *seq1, Sequence *seq2, bool flag=true) const {\r
+\r
+    assert (seq1);\r
+    assert (seq2);\r
+\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+\r
+    // retrieve the points to the beginning of each sequence\r
+    SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+    SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+\r
+    // create matrix\r
+    VF *forwardPtr;\r
+    if(flag) forwardPtr = new VF (NumMatrixTypes * (seq1Length+1) * (seq2Length+1), LOG_ZERO);\r
+    else forwardPtr = new VF (3 * (seq1Length+1) * (seq2Length+1), LOG_ZERO);\r
+    assert (forwardPtr);\r
+    VF &forward = *forwardPtr;\r
+\r
+    // initialization condition\r
+    if(flag){\r
+       forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] = \r
+               initialDistribution[0] + matchProb[(unsigned char) iter1[1]][(unsigned char) iter2[1]];\r
+   \r
+       for (int k = 0; k < NumInsertStates; k++){\r
+               forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] = \r
+                       initialDistribution[2*k+1] + insProb[(unsigned char) iter1[1]][k];\r
+               forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] = \r
+                       initialDistribution[2*k+2] + insProb[(unsigned char) iter2[1]][k]; \r
+       }\r
+    }\r
+    \r
+    // remember offset for each index combination\r
+    int ij = 0;\r
+    int i1j = -seq2Length - 1;\r
+    int ij1 = -1;\r
+    int i1j1 = -seq2Length - 2;\r
+    \r
+    if(flag){\r
+       ij *= NumMatrixTypes;\r
+       i1j *= NumMatrixTypes;\r
+       ij1 *= NumMatrixTypes;\r
+       i1j1 *= NumMatrixTypes;\r
+    }\r
+    else{\r
+       ij *= 3;\r
+       i1j *= 3;\r
+       ij1 *= 3;\r
+       i1j1 *= 3;\r
+    }\r
+\r
+    // compute forward scores\r
+    for (int i = 0; i <= seq1Length; i++){\r
+      unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];\r
+      for (int j = 0; j <= seq2Length; j++){\r
+        unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];\r
+       //local\r
+       if(i == 1 && j == 1 && !flag) forward[0 + ij] = \r
+                       matchProb[c1][c2] - insProb[c1][0] - insProb[c2][0] - 2*random_transProb[1];\r
+\r
+       if (i > 1 || j > 1){\r
+         if (i > 0 && j > 0){\r
+            if(flag){\r
+               forward[0 + ij] = forward[0 + i1j1] + transProb[0][0];\r
+               for (int k = 1; k < NumMatrixTypes; k++)\r
+               LOG_PLUS_EQUALS (forward[0 + ij], forward[k + i1j1] + transProb[k][0]);\r
+               forward[0 + ij] += matchProb[c1][c2];\r
+            }\r
+            //local\r
+           else{\r
+               forward[0 + ij] = matchProb[c1][c2] - insProb[c1][0] - insProb[c2][0] - 2*random_transProb[1];\r
+               for (int k = 0; k < 3; k++)\r
+                       LOG_PLUS_EQUALS (forward[0 + ij], matchProb[c1][c2] - insProb[c1][0] - insProb[c2][0] +\r
+                               forward[k + i1j1] + local_transProb[k][0] - 2*random_transProb[1]);\r
+               }\r
+         }\r
+         if (i > 0){\r
+           if(flag){\r
+               for (int k = 0; k < NumInsertStates; k++)\r
+                       forward[2*k+1 + ij] = insProb[c1][k] +\r
+                       LOG_ADD (forward[0 + i1j] + transProb[0][2*k+1],\r
+                        forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1]);\r
+            }\r
+\r
+\r
+\r
+\r
+\r
+\r
+            //local\r
+            else{\r
+               forward[1 + ij] = LOG_ADD (forward[0 + i1j] + local_transProb[0][1] - random_transProb[1],\r
+                                               forward[1 + i1j] + local_transProb[1][1] - random_transProb[1]);\r
+            }\r
+\r
+         }\r
+         if (j > 0){\r
+            if(flag){\r
+               for (int k = 0; k < NumInsertStates; k++)\r
+                       forward[2*k+2 + ij] = insProb[c2][k] +\r
+                       LOG_ADD (forward[0 + ij1] + transProb[0][2*k+2],\r
+                        forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2]);\r
+            }\r
+            //local\r
+           else{\r
+                  forward[2 + ij] = LOG_ADD (forward[0 + ij1] + local_transProb[0][2] - random_transProb[1],\r
+                                               forward[2 + ij1] + local_transProb[2][2] - random_transProb[1]);\r
+            }\r
+         }\r
+       }\r
+        if(flag){\r
+               ij += NumMatrixTypes;\r
+               i1j += NumMatrixTypes;\r
+               ij1 += NumMatrixTypes;\r
+               i1j1 += NumMatrixTypes;\r
+        }\r
+        else{\r
+               ij += 3;\r
+               i1j += 3;\r
+               ij1 += 3;\r
+               i1j1 += 3;\r
+        }\r
+      }\r
+    }\r
+\r
+    return forwardPtr;\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeBackwardMatrix()\r
+  //\r
+  // Computes a set of backward probability matrices for aligning\r
+  // seq1 and seq2.\r
+  //\r
+  // For efficiency reasons, a single-dimensional floating-point\r
+  // array is used here, with the following indexing scheme:\r
+  //\r
+  //    backward[i + NumMatrixTypes * (j * (seq2Length+1) + k)]\r
+  //    refers to the probability of starting in state i and\r
+  //    aligning from character j+1 to the end of the first\r
+  //    sequence and from character k+1 to the end of the second\r
+  //    sequence.\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  VF *ComputeBackwardMatrix (Sequence *seq1, Sequence *seq2, bool flag=true) const {\r
+\r
+    assert (seq1);\r
+    assert (seq2);\r
+\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+    SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+    SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+\r
+    // create matrix\r
+    VF *backwardPtr;\r
+    if(flag) backwardPtr = new VF (NumMatrixTypes * (seq1Length+1) * (seq2Length+1), LOG_ZERO);\r
+    else backwardPtr = new VF (3 * (seq1Length+1) * (seq2Length+1), LOG_ZERO);\r
+    assert (backwardPtr);\r
+    VF &backward = *backwardPtr;\r
+\r
+    // initialization condition\r
+    if(flag){\r
+       for (int k = 0; k < NumMatrixTypes; k++)\r
+           backward[NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1) + k] = initialDistribution[k];\r
+    }\r
+    // remember offset for each index combination\r
+    int ij = (seq1Length+1) * (seq2Length+1) - 1;\r
+    int i1j = ij + seq2Length + 1;\r
+    int ij1 = ij + 1;\r
+    int i1j1 = ij + seq2Length + 2;\r
+    \r
+    if(flag){\r
+       ij *= NumMatrixTypes;\r
+       i1j *= NumMatrixTypes;\r
+       ij1 *= NumMatrixTypes;\r
+       i1j1 *= NumMatrixTypes;\r
+    }\r
+    else{\r
+       ij *= 3;\r
+       i1j *= 3;\r
+       ij1 *= 3;\r
+       i1j1 *= 3;     \r
+    }\r
+\r
+    // compute backward scores\r
+    for (int i = seq1Length; i >= 0; i--){\r
+      unsigned char c1 = (i == seq1Length) ? '~' : (unsigned char) iter1[i+1];\r
+      for (int j = seq2Length; j >= 0; j--){\r
+        unsigned char c2 = (j == seq2Length) ? '~' : (unsigned char) iter2[j+1];\r
\r
+       if(!flag) backward[0 + ij] = LOG_ONE;//local\r
+        if (i < seq1Length && j < seq2Length){\r
+          if(flag){\r
+               const float ProbXY = backward[0 + i1j1] + matchProb[c1][c2];\r
+               for (int k = 0; k < NumMatrixTypes; k++)\r
+                       LOG_PLUS_EQUALS (backward[k + ij], ProbXY + transProb[k][0]);\r
+          }\r
+          //local\r
+         else{\r
+               const float ProbXY = backward[0 + i1j1] + matchProb[c1][c2] - insProb[c1][0] - insProb[c2][0];\r
+               for (int k = 0; k < 3; k++)\r
+                       LOG_PLUS_EQUALS (backward[k + ij], ProbXY + local_transProb[k][0] - 2*random_transProb[1] );\r
+         }\r
+        }\r
+        if (i < seq1Length){\r
+          if(flag){\r
+               for (int k = 0; k < NumInsertStates; k++){\r
+                       LOG_PLUS_EQUALS (backward[0 + ij], backward[2*k+1 + i1j] + insProb[c1][k] + transProb[0][2*k+1]);\r
+                       LOG_PLUS_EQUALS (backward[2*k+1 + ij], backward[2*k+1 + i1j] + insProb[c1][k] + transProb[2*k+1][2*k+1]);\r
+               }\r
+           }\r
+            //local\r
+          else{\r
+                LOG_PLUS_EQUALS (backward[0 + ij], backward[1 + i1j] + local_transProb[0][1] - random_transProb[1]);\r
+                LOG_PLUS_EQUALS (backward[1 + ij], backward[1 + i1j] + local_transProb[1][1] - random_transProb[1]);\r
+          }\r
+        }\r
+        if (j < seq2Length){\r
+          if(flag){\r
+               for (int k = 0; k < NumInsertStates; k++){\r
+                       LOG_PLUS_EQUALS (backward[0 + ij], backward[2*k+2 + ij1] + insProb[c2][k] + transProb[0][2*k+2]);\r
+                       LOG_PLUS_EQUALS (backward[2*k+2 + ij], backward[2*k+2 + ij1] + insProb[c2][k] + transProb[2*k+2][2*k+2]);\r
+               }\r
+           }\r
+            //local      \r
+          else{\r
+               LOG_PLUS_EQUALS (backward[0 + ij], backward[2 + ij1] + local_transProb[0][2] - random_transProb[1]);\r
+               LOG_PLUS_EQUALS (backward[2 + ij], backward[2 + ij1] + local_transProb[2][2] - random_transProb[1]);\r
+         }\r
+        }\r
+        if(flag){\r
+               ij -= NumMatrixTypes;\r
+               i1j -= NumMatrixTypes;\r
+               ij1 -= NumMatrixTypes;\r
+               i1j1 -= NumMatrixTypes;\r
+        }\r
+        else{\r
+               ij -= 3;\r
+               i1j -= 3;\r
+               ij1 -= 3;\r
+               i1j1 -= 3;\r
+       }\r
+      }\r
+    }\r
+\r
+    return backwardPtr;\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeTotalProbability()\r
+  //\r
+  // Computes the total probability of an alignment given\r
+  // the forward and backward matrices.\r
+  // flag: 1 probcons, 0 local\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  float ComputeTotalProbability (Sequence *seq1, Sequence *seq2,\r
+                                 const VF &forward, const VF &backward, bool flag=true) const {\r
+\r
+    // compute total probability\r
+    float totalForwardProb = LOG_ZERO;\r
+    float totalBackwardProb = LOG_ZERO;\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+\r
+    if(flag){\r
+       for (int k = 0; k < NumMatrixTypes; k++){\r
+               LOG_PLUS_EQUALS (totalForwardProb,\r
+                       forward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + \r
+                      backward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]);\r
+       }\r
+\r
+       totalBackwardProb = \r
+               forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] +\r
+               backward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)];\r
+\r
+       for (int k = 0; k < NumInsertStates; k++){\r
+               LOG_PLUS_EQUALS (totalBackwardProb,\r
+                      forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] +\r
+                      backward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)]);\r
+               LOG_PLUS_EQUALS (totalBackwardProb,\r
+                      forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] +\r
+                      backward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)]);\r
+       }\r
+    }\r
+    else{\r
+       SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+       SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+       int ij = 0;\r
+       for (int i = 0; i <= seq1Length; i++){\r
+               unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];\r
+               for (int j = 0; j <= seq2Length; j++){\r
+                       unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];\r
+                       if(i>0&&j>0) {\r
+                               LOG_PLUS_EQUALS (totalForwardProb,forward[ij]); \r
+                               LOG_PLUS_EQUALS (totalBackwardProb,backward[ij] + matchProb[c1][c2] \r
+                                       - insProb[c1][0] - insProb[c2][0] - 2*random_transProb[1]);  \r
+                       }\r
+                       ij += 3;\r
+               }\r
+       }\r
+\r
+    }\r
+        \r
+    return (totalForwardProb + totalBackwardProb) / 2;\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputePosteriorMatrix()\r
+  //\r
+  // Computes the posterior probability matrix based on\r
+  // the forward and backward matrices.\r
+  // flag: 1 probcons, 0 local \r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  VF *ComputePosteriorMatrix (Sequence *seq1, Sequence *seq2,\r
+                              const VF &forward, const VF &backward, bool flag=true) const {\r
+\r
+    assert (seq1);\r
+    assert (seq2);\r
+\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+\r
+    float totalProb = ComputeTotalProbability (seq1, seq2,forward, backward, flag);\r
+\r
+    // compute posterior matrices\r
+    VF *posteriorPtr = new VF((seq1Length+1) * (seq2Length+1)); assert (posteriorPtr);\r
+    VF &posterior = *posteriorPtr;\r
+\r
+    int ij = 0;\r
+    VF::iterator ptr = posterior.begin();\r
+\r
+    for (int i = 0; i <= seq1Length; i++){\r
+      for (int j = 0; j <= seq2Length; j++){\r
+        *(ptr++) = EXP (min (LOG_ONE, forward[ij] + backward[ij] - totalProb));\r
+        if(flag) ij += NumMatrixTypes;\r
+        else ij += 3;\r
+      }\r
+    }\r
+\r
+    posterior[0] = 0;\r
+\r
+    return posteriorPtr;\r
+  }\r
+\r
+  /*\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeExpectedCounts()\r
+  //\r
+  // Computes the expected counts for the various transitions.\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  VVF *ComputeExpectedCounts () const {\r
+\r
+    assert (seq1);\r
+    assert (seq2);\r
+\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+    SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+    SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+\r
+    // compute total probability\r
+    float totalProb = ComputeTotalProbability (seq1Length, seq2Length,\r
+                                               forward, backward);\r
+\r
+    // initialize expected counts\r
+    VVF *countsPtr = new VVF(NumMatrixTypes + 1, VF(NumMatrixTypes, LOG_ZERO)); assert (countsPtr);\r
+    VVF &counts = *countsPtr;\r
+\r
+    // remember offset for each index combination\r
+    int ij = 0;\r
+    int i1j = -seq2Length - 1;\r
+    int ij1 = -1;\r
+    int i1j1 = -seq2Length - 2;\r
+\r
+    ij *= NumMatrixTypes;\r
+    i1j *= NumMatrixTypes;\r
+    ij1 *= NumMatrixTypes;\r
+    i1j1 *= NumMatrixTypes;\r
+\r
+    // compute expected counts\r
+    for (int i = 0; i <= seq1Length; i++){\r
+      unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];\r
+      for (int j = 0; j <= seq2Length; j++){\r
+        unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];\r
+\r
+        if (i > 0 && j > 0){\r
+          for (int k = 0; k < NumMatrixTypes; k++)\r
+            LOG_PLUS_EQUALS (counts[k][0],\r
+                             forward[k + i1j1] + transProb[k][0] +\r
+                             matchProb[c1][c2] + backward[0 + ij]);\r
+        }\r
+        if (i > 0){\r
+          for (int k = 0; k < NumInsertStates; k++){\r
+            LOG_PLUS_EQUALS (counts[0][2*k+1],\r
+                             forward[0 + i1j] + transProb[0][2*k+1] +\r
+                             insProb[c1][k] + backward[2*k+1 + ij]);\r
+            LOG_PLUS_EQUALS (counts[2*k+1][2*k+1],\r
+                             forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1] +\r
+                             insProb[c1][k] + backward[2*k+1 + ij]);\r
+          }\r
+        }\r
+        if (j > 0){\r
+          for (int k = 0; k < NumInsertStates; k++){\r
+            LOG_PLUS_EQUALS (counts[0][2*k+2],\r
+                             forward[0 + ij1] + transProb[0][2*k+2] +\r
+                             insProb[c2][k] + backward[2*k+2 + ij]);\r
+            LOG_PLUS_EQUALS (counts[2*k+2][2*k+2],\r
+                             forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2] +\r
+                             insProb[c2][k] + backward[2*k+2 + ij]);\r
+          }\r
+        }\r
+\r
+        ij += NumMatrixTypes;\r
+        i1j += NumMatrixTypes;\r
+        ij1 += NumMatrixTypes;\r
+        i1j1 += NumMatrixTypes;\r
+      }\r
+    }\r
+\r
+    // scale all expected counts appropriately\r
+    for (int i = 0; i < NumMatrixTypes; i++)\r
+      for (int j = 0; j < NumMatrixTypes; j++)\r
+        counts[i][j] -= totalProb;\r
+\r
+  }\r
+  */\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeNewParameters()\r
+  //\r
+  // Computes a new parameter set based on the expected counts\r
+  // given.\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  void ComputeNewParameters (Sequence *seq1, Sequence *seq2,\r
+                            const VF &forward, const VF &backward,\r
+                             VF &initDistribMat, VF &gapOpen,\r
+                             VF &gapExtend, VVF &emitPairs, VF &emitSingle, bool enableTrainEmissions) const {\r
+    \r
+    assert (seq1);\r
+    assert (seq2);\r
+\r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+    SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+    SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+\r
+    // compute total probability\r
+    float totalProb = ComputeTotalProbability (seq1, seq2,\r
+                                               forward, backward);\r
+    \r
+    // initialize expected counts\r
+    VVF transCounts (NumMatrixTypes, VF (NumMatrixTypes, LOG_ZERO));\r
+    VF initCounts (NumMatrixTypes, LOG_ZERO);\r
+    VVF pairCounts (256, VF (256, LOG_ZERO));\r
+    VF singleCounts (256, LOG_ZERO);\r
+    \r
+    // remember offset for each index combination\r
+    int ij = 0;\r
+    int i1j = -seq2Length - 1;\r
+    int ij1 = -1;\r
+    int i1j1 = -seq2Length - 2;\r
+\r
+    ij *= NumMatrixTypes;\r
+    i1j *= NumMatrixTypes;\r
+    ij1 *= NumMatrixTypes;\r
+    i1j1 *= NumMatrixTypes;\r
+\r
+    // compute initial distribution posteriors\r
+    initCounts[0] = LOG_ADD (forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] +\r
+                            backward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)],\r
+                            forward[0 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + \r
+                            backward[0 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]);\r
+    for (int k = 0; k < NumInsertStates; k++){\r
+      initCounts[2*k+1] = LOG_ADD (forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] +\r
+                                  backward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)],\r
+                                  forward[2*k+1 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + \r
+                                  backward[2*k+1 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]);\r
+      initCounts[2*k+2] = LOG_ADD (forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] +\r
+                                  backward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)],\r
+                                  forward[2*k+2 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + \r
+                                  backward[2*k+2 + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]);\r
+    }\r
+\r
+    // compute expected counts\r
+    for (int i = 0; i <= seq1Length; i++){\r
+      unsigned char c1 = (i == 0) ? '~' : (unsigned char) toupper(iter1[i]);\r
+      for (int j = 0; j <= seq2Length; j++){\r
+        unsigned char c2 = (j == 0) ? '~' : (unsigned char) toupper(iter2[j]);\r
+\r
+       if (i > 0 && j > 0){\r
+         if (enableTrainEmissions && i == 1 && j == 1){\r
+           LOG_PLUS_EQUALS (pairCounts[c1][c2],\r
+                            initialDistribution[0] + matchProb[c1][c2] + backward[0 + ij]);\r
+           LOG_PLUS_EQUALS (pairCounts[c2][c1],\r
+                            initialDistribution[0] + matchProb[c2][c1] + backward[0 + ij]);\r
+         }\r
+\r
+         for (int k = 0; k < NumMatrixTypes; k++){\r
+           LOG_PLUS_EQUALS (transCounts[k][0],\r
+                            forward[k + i1j1] + transProb[k][0] +\r
+                            matchProb[c1][c2] + backward[0 + ij]);\r
+           if (enableTrainEmissions && i != 1 || j != 1){\r
+             LOG_PLUS_EQUALS (pairCounts[c1][c2],\r
+                              forward[k + i1j1] + transProb[k][0] +\r
+                              matchProb[c1][c2] + backward[0 + ij]);\r
+             LOG_PLUS_EQUALS (pairCounts[c2][c1],\r
+                              forward[k + i1j1] + transProb[k][0] +\r
+                              matchProb[c2][c1] + backward[0 + ij]);\r
+           }\r
+         }\r
+       }\r
+       if (i > 0){\r
+         for (int k = 0; k < NumInsertStates; k++){\r
+           LOG_PLUS_EQUALS (transCounts[0][2*k+1],\r
+                            forward[0 + i1j] + transProb[0][2*k+1] +\r
+                            insProb[c1][k] + backward[2*k+1 + ij]);\r
+           LOG_PLUS_EQUALS (transCounts[2*k+1][2*k+1],\r
+                            forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1] +\r
+                            insProb[c1][k] + backward[2*k+1 + ij]);\r
+           if (enableTrainEmissions){\r
+             if (i == 1 && j == 0){\r
+               LOG_PLUS_EQUALS (singleCounts[c1],\r
+                                initialDistribution[2*k+1] + insProb[c1][k] + backward[2*k+1 + ij]);\r
+             }\r
+             else {\r
+               LOG_PLUS_EQUALS (singleCounts[c1],\r
+                                forward[0 + i1j] + transProb[0][2*k+1] +\r
+                                insProb[c1][k] + backward[2*k+1 + ij]);\r
+               LOG_PLUS_EQUALS (singleCounts[c1],\r
+                                forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1] +\r
+                                insProb[c1][k] + backward[2*k+1 + ij]);\r
+             }\r
+           }\r
+         }\r
+       }\r
+       if (j > 0){\r
+         for (int k = 0; k < NumInsertStates; k++){\r
+           LOG_PLUS_EQUALS (transCounts[0][2*k+2],\r
+                            forward[0 + ij1] + transProb[0][2*k+2] +\r
+                            insProb[c2][k] + backward[2*k+2 + ij]);\r
+           LOG_PLUS_EQUALS (transCounts[2*k+2][2*k+2],\r
+                            forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2] +\r
+                            insProb[c2][k] + backward[2*k+2 + ij]);\r
+           if (enableTrainEmissions){\r
+             if (i == 0 && j == 1){\r
+               LOG_PLUS_EQUALS (singleCounts[c2],\r
+                                initialDistribution[2*k+2] + insProb[c2][k] + backward[2*k+2 + ij]);\r
+             }\r
+             else {\r
+               LOG_PLUS_EQUALS (singleCounts[c2],\r
+                                forward[0 + ij1] + transProb[0][2*k+2] +\r
+                                insProb[c2][k] + backward[2*k+2 + ij]);\r
+               LOG_PLUS_EQUALS (singleCounts[c2],\r
+                                forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2] +\r
+                                insProb[c2][k] + backward[2*k+2 + ij]);\r
+             }\r
+           }\r
+         }\r
+       }\r
+      \r
+        ij += NumMatrixTypes;\r
+        i1j += NumMatrixTypes;\r
+        ij1 += NumMatrixTypes;\r
+        i1j1 += NumMatrixTypes;\r
+      }\r
+    }\r
+\r
+    // scale all expected counts appropriately\r
+    for (int i = 0; i < NumMatrixTypes; i++){\r
+      initCounts[i] -= totalProb;\r
+      for (int j = 0; j < NumMatrixTypes; j++)\r
+        transCounts[i][j] -= totalProb;\r
+    }\r
+    if (enableTrainEmissions){\r
+      for (int i = 0; i < 256; i++){\r
+       for (int j = 0; j < 256; j++)\r
+         pairCounts[i][j] -= totalProb;\r
+       singleCounts[i] -= totalProb;\r
+      }\r
+    }\r
+\r
+    // compute new initial distribution\r
+    float totalInitDistribCounts = 0;\r
+    for (int i = 0; i < NumMatrixTypes; i++)\r
+      totalInitDistribCounts += exp (initCounts[i]); // should be 2\r
+    initDistribMat[0] = min (1.0f, max (0.0f, (float) exp (initCounts[0]) / totalInitDistribCounts));\r
+    for (int k = 0; k < NumInsertStates; k++){\r
+      float val = (exp (initCounts[2*k+1]) + exp (initCounts[2*k+2])) / 2;\r
+      initDistribMat[2*k+1] = initDistribMat[2*k+2] = min (1.0f, max (0.0f, val / totalInitDistribCounts));\r
+    }\r
+\r
+    // compute total counts for match state\r
+    float inMatchStateCounts = 0;\r
+    for (int i = 0; i < NumMatrixTypes; i++)\r
+      inMatchStateCounts += exp (transCounts[0][i]);\r
+    for (int i = 0; i < NumInsertStates; i++){\r
+\r
+      // compute total counts for gap state\r
+      float inGapStateCounts =\r
+        exp (transCounts[2*i+1][0]) +\r
+        exp (transCounts[2*i+1][2*i+1]) +\r
+        exp (transCounts[2*i+2][0]) +\r
+        exp (transCounts[2*i+2][2*i+2]);\r
+\r
+      gapOpen[2*i] = gapOpen[2*i+1] =\r
+        (exp (transCounts[0][2*i+1]) +\r
+         exp (transCounts[0][2*i+2])) /\r
+        (2 * inMatchStateCounts);\r
+\r
+      gapExtend[2*i] = gapExtend[2*i+1] =\r
+        (exp (transCounts[2*i+1][2*i+1]) +\r
+         exp (transCounts[2*i+2][2*i+2])) /\r
+        inGapStateCounts;\r
+    }\r
+\r
+    if (enableTrainEmissions){\r
+      float totalPairCounts = 0;\r
+      float totalSingleCounts = 0;\r
+      for (int i = 0; i < 256; i++){\r
+       for (int j = 0; j <= i; j++)\r
+         totalPairCounts += exp (pairCounts[j][i]);\r
+       totalSingleCounts += exp (singleCounts[i]);\r
+      }\r
+      \r
+      for (int i = 0; i < 256; i++) if (!islower ((char) i)){\r
+       int li = (int)((unsigned char) tolower ((char) i));\r
+       for (int j = 0; j <= i; j++) if (!islower ((char) j)){\r
+         int lj = (int)((unsigned char) tolower ((char) j));\r
+         emitPairs[i][j] = emitPairs[i][lj] = emitPairs[li][j] = emitPairs[li][lj] = \r
+           emitPairs[j][i] = emitPairs[j][li] = emitPairs[lj][i] = emitPairs[lj][li] = exp(pairCounts[j][i]) / totalPairCounts;\r
+       }\r
+       emitSingle[i] = emitSingle[li] = exp(singleCounts[i]) / totalSingleCounts;\r
+      }\r
+    }\r
+  }\r
+    \r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeAlignment()\r
+  //\r
+  // Computes an alignment based on the given posterior matrix.\r
+  // This is done by finding the maximum summing path (or\r
+  // maximum weight trace) through the posterior matrix.  The\r
+  // final alignment is returned as a pair consisting of:\r
+  //    (1) a string (e.g., XXXBBXXXBBBBBBYYYYBBB) where X's and\r
+  //        denote insertions in one of the two sequences and\r
+  //        B's denote that both sequences are present (i.e.\r
+  //        matches).\r
+  //    (2) a float indicating the sum achieved\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  pair<SafeVector<char> *, float> ComputeAlignment (int seq1Length, int seq2Length,\r
+                                                    const VF &posterior) const {\r
+\r
+    float *twoRows = new float[(seq2Length+1)*2]; assert (twoRows);\r
+    float *oldRow = twoRows;\r
+    float *newRow = twoRows + seq2Length + 1;\r
+\r
+    char *tracebackMatrix = new char[(seq1Length+1)*(seq2Length+1)]; assert (tracebackMatrix);\r
+    char *tracebackPtr = tracebackMatrix;\r
+\r
+    VF::const_iterator posteriorPtr = posterior.begin() + seq2Length + 1;\r
+\r
+    // initialization\r
+    for (int i = 0; i <= seq2Length; i++){\r
+      oldRow[i] = 0;\r
+      *(tracebackPtr++) = 'L';\r
+    }\r
+\r
+    // fill in matrix\r
+    for (int i = 1; i <= seq1Length; i++){\r
+\r
+      // initialize left column\r
+      newRow[0] = 0;\r
+      posteriorPtr++;\r
+      *(tracebackPtr++) = 'U';\r
+\r
+      // fill in rest of row\r
+      for (int j = 1; j <= seq2Length; j++){\r
+        ChooseBestOfThree (*(posteriorPtr++) + oldRow[j-1], newRow[j-1], oldRow[j],\r
+                           'D', 'L', 'U', &newRow[j], tracebackPtr++);\r
+      }\r
+\r
+      // swap rows\r
+      float *temp = oldRow;\r
+      oldRow = newRow;\r
+      newRow = temp;\r
+    }\r
+\r
+    // store best score\r
+    float total = oldRow[seq2Length];\r
+    delete [] twoRows;\r
+\r
+    // compute traceback\r
+    SafeVector<char> *alignment = new SafeVector<char>; assert (alignment);\r
+    int r = seq1Length, c = seq2Length;\r
+    while (r != 0 || c != 0){\r
+      char ch = tracebackMatrix[r*(seq2Length+1) + c];\r
+      switch (ch){\r
+      case 'L': c--; alignment->push_back ('Y'); break;\r
+      case 'U': r--; alignment->push_back ('X'); break;\r
+      case 'D': c--; r--; alignment->push_back ('B'); break;\r
+      default: assert (false);\r
+      }\r
+    }\r
+\r
+    delete [] tracebackMatrix;\r
+\r
+    reverse (alignment->begin(), alignment->end());\r
+    \r
+    return make_pair(alignment, total);\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeAlignmentWithGapPenalties()\r
+  //\r
+  // Similar to ComputeAlignment() except with gap penalties.\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  pair<SafeVector<char> *, float> ComputeAlignmentWithGapPenalties (MultiSequence *align1,\r
+                                                                    MultiSequence *align2,\r
+                                                                    const VF &posterior, int numSeqs1,\r
+                                                                    int numSeqs2,\r
+                                                                    float gapOpenPenalty,\r
+                                                                    float gapContinuePenalty) const {\r
+    int seq1Length = align1->GetSequence(0)->GetLength();\r
+    int seq2Length = align2->GetSequence(0)->GetLength();\r
+    SafeVector<SafeVector<char>::iterator > dataPtrs1 (align1->GetNumSequences());\r
+    SafeVector<SafeVector<char>::iterator > dataPtrs2 (align2->GetNumSequences());\r
+\r
+    // grab character data\r
+    for (int i = 0; i < align1->GetNumSequences(); i++)\r
+      dataPtrs1[i] = align1->GetSequence(i)->GetDataPtr();\r
+    for (int i = 0; i < align2->GetNumSequences(); i++)\r
+      dataPtrs2[i] = align2->GetSequence(i)->GetDataPtr();\r
+\r
+    // the number of active sequences at any given column is defined to be the\r
+    // number of non-gap characters in that column; the number of gap opens at\r
+    // any given column is defined to be the number of gap characters in that\r
+    // column where the previous character in the respective sequence was not\r
+    // a gap\r
+    SafeVector<int> numActive1 (seq1Length+1), numGapOpens1 (seq1Length+1);\r
+    SafeVector<int> numActive2 (seq2Length+1), numGapOpens2 (seq2Length+1);\r
+\r
+    // compute number of active sequences and gap opens for each group\r
+    for (int i = 0; i < align1->GetNumSequences(); i++){\r
+      SafeVector<char>::iterator dataPtr = align1->GetSequence(i)->GetDataPtr();\r
+      numActive1[0] = numGapOpens1[0] = 0;\r
+      for (int j = 1; j <= seq1Length; j++){\r
+        if (dataPtr[j] != '-'){\r
+          numActive1[j]++;\r
+          numGapOpens1[j] += (j != 1 && dataPtr[j-1] != '-');\r
+        }\r
+      }\r
+    }\r
+    for (int i = 0; i < align2->GetNumSequences(); i++){\r
+      SafeVector<char>::iterator dataPtr = align2->GetSequence(i)->GetDataPtr();\r
+      numActive2[0] = numGapOpens2[0] = 0;\r
+      for (int j = 1; j <= seq2Length; j++){\r
+        if (dataPtr[j] != '-'){\r
+          numActive2[j]++;\r
+          numGapOpens2[j] += (j != 1 && dataPtr[j-1] != '-');\r
+        }\r
+      }\r
+    }\r
+\r
+    VVF openingPenalty1 (numSeqs1+1, VF (numSeqs2+1));\r
+    VF continuingPenalty1 (numSeqs1+1);\r
+    VVF openingPenalty2 (numSeqs1+1, VF (numSeqs2+1));\r
+    VF continuingPenalty2 (numSeqs2+1);\r
+\r
+    // precompute penalties\r
+    for (int i = 0; i <= numSeqs1; i++)\r
+      for (int j = 0; j <= numSeqs2; j++)\r
+        openingPenalty1[i][j] = i * (gapOpenPenalty * j + gapContinuePenalty * (numSeqs2 - j));\r
+    for (int i = 0; i <= numSeqs1; i++)\r
+      continuingPenalty1[i] = i * gapContinuePenalty * numSeqs2;\r
+    for (int i = 0; i <= numSeqs2; i++)\r
+      for (int j = 0; j <= numSeqs1; j++)\r
+        openingPenalty2[i][j] = i * (gapOpenPenalty * j + gapContinuePenalty * (numSeqs1 - j));\r
+    for (int i = 0; i <= numSeqs2; i++)\r
+      continuingPenalty2[i] = i * gapContinuePenalty * numSeqs1;\r
+\r
+    float *twoRows = new float[6*(seq2Length+1)]; assert (twoRows);\r
+    float *oldRowMatch = twoRows;\r
+    float *newRowMatch = twoRows + (seq2Length+1);\r
+    float *oldRowInsertX = twoRows + 2*(seq2Length+1);\r
+    float *newRowInsertX = twoRows + 3*(seq2Length+1);\r
+    float *oldRowInsertY = twoRows + 4*(seq2Length+1);\r
+    float *newRowInsertY = twoRows + 5*(seq2Length+1);\r
+\r
+    char *tracebackMatrix = new char[3*(seq1Length+1)*(seq2Length+1)]; assert (tracebackMatrix);\r
+    char *tracebackPtr = tracebackMatrix;\r
+\r
+    VF::const_iterator posteriorPtr = posterior.begin() + seq2Length + 1;\r
+\r
+    // initialization\r
+    for (int i = 0; i <= seq2Length; i++){\r
+      oldRowMatch[i] = oldRowInsertX[i] = (i == 0) ? 0 : LOG_ZERO;\r
+      oldRowInsertY[i] = (i == 0) ? 0 : oldRowInsertY[i-1] + continuingPenalty2[numActive2[i]];\r
+      *(tracebackPtr) = *(tracebackPtr+1) = *(tracebackPtr+2) = 'Y';\r
+      tracebackPtr += 3;\r
+    }\r
+\r
+    // fill in matrix\r
+    for (int i = 1; i <= seq1Length; i++){\r
+\r
+      // initialize left column\r
+      newRowMatch[0] = newRowInsertY[0] = LOG_ZERO;\r
+      newRowInsertX[0] = oldRowInsertX[0] + continuingPenalty1[numActive1[i]];\r
+      posteriorPtr++;\r
+      *(tracebackPtr) = *(tracebackPtr+1) = *(tracebackPtr+2) = 'X';\r
+      tracebackPtr += 3;\r
+\r
+      // fill in rest of row\r
+      for (int j = 1; j <= seq2Length; j++){\r
+\r
+        // going to MATCH state\r
+        ChooseBestOfThree (oldRowMatch[j-1],\r
+                           oldRowInsertX[j-1],\r
+                           oldRowInsertY[j-1],\r
+                           'M', 'X', 'Y', &newRowMatch[j], tracebackPtr++);\r
+        newRowMatch[j] += *(posteriorPtr++);\r
+\r
+        // going to INSERT X state\r
+        ChooseBestOfThree (oldRowMatch[j] + openingPenalty1[numActive1[i]][numGapOpens2[j]],\r
+                           oldRowInsertX[j] + continuingPenalty1[numActive1[i]],\r
+                           oldRowInsertY[j] + openingPenalty1[numActive1[i]][numGapOpens2[j]],\r
+                           'M', 'X', 'Y', &newRowInsertX[j], tracebackPtr++);\r
+\r
+        // going to INSERT Y state\r
+        ChooseBestOfThree (newRowMatch[j-1] + openingPenalty2[numActive2[j]][numGapOpens1[i]],\r
+                           newRowInsertX[j-1] + openingPenalty2[numActive2[j]][numGapOpens1[i]],\r
+                           newRowInsertY[j-1] + continuingPenalty2[numActive2[j]],\r
+                           'M', 'X', 'Y', &newRowInsertY[j], tracebackPtr++);\r
+      }\r
+\r
+      // swap rows\r
+      float *temp;\r
+      temp = oldRowMatch; oldRowMatch = newRowMatch; newRowMatch = temp;\r
+      temp = oldRowInsertX; oldRowInsertX = newRowInsertX; newRowInsertX = temp;\r
+      temp = oldRowInsertY; oldRowInsertY = newRowInsertY; newRowInsertY = temp;\r
+    }\r
+\r
+    // store best score\r
+    float total;\r
+    char matrix;\r
+    ChooseBestOfThree (oldRowMatch[seq2Length], oldRowInsertX[seq2Length], oldRowInsertY[seq2Length],\r
+                       'M', 'X', 'Y', &total, &matrix);\r
+\r
+    delete [] twoRows;\r
+\r
+    // compute traceback\r
+    SafeVector<char> *alignment = new SafeVector<char>; assert (alignment);\r
+    int r = seq1Length, c = seq2Length;\r
+    while (r != 0 || c != 0){\r
+\r
+      int offset = (matrix == 'M') ? 0 : (matrix == 'X') ? 1 : 2;\r
+      char ch = tracebackMatrix[(r*(seq2Length+1) + c) * 3 + offset];\r
+      switch (matrix){\r
+      case 'Y': c--; alignment->push_back ('Y'); break;\r
+      case 'X': r--; alignment->push_back ('X'); break;\r
+      case 'M': c--; r--; alignment->push_back ('B'); break;\r
+      default: assert (false);\r
+      }\r
+      matrix = ch;\r
+    }\r
+\r
+    delete [] tracebackMatrix;\r
+\r
+    reverse (alignment->begin(), alignment->end());\r
+\r
+    return make_pair(alignment, 1.0f);\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::ComputeViterbiAlignment()\r
+  //\r
+  // Computes the highest probability pairwise alignment using the\r
+  // probabilistic model.  The final alignment is returned as a\r
+  //  pair consisting of:\r
+  //    (1) a string (e.g., XXXBBXXXBBBBBBYYYYBBB) where X's and\r
+  //        denote insertions in one of the two sequences and\r
+  //        B's denote that both sequences are present (i.e.\r
+  //        matches).\r
+  //    (2) a float containing the log probability of the best\r
+  //        alignment (not used)\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+\r
+  pair<SafeVector<char> *, float> ComputeViterbiAlignment (Sequence *seq1, Sequence *seq2) const {\r
+    \r
+    assert (seq1);\r
+    assert (seq2);\r
+    \r
+    const int seq1Length = seq1->GetLength();\r
+    const int seq2Length = seq2->GetLength();\r
+    \r
+    // retrieve the points to the beginning of each sequence\r
+    SafeVector<char>::iterator iter1 = seq1->GetDataPtr();\r
+    SafeVector<char>::iterator iter2 = seq2->GetDataPtr();\r
+    \r
+    // create viterbi matrix\r
+    VF *viterbiPtr = new VF (3 * (seq1Length+1) * (seq2Length+1), LOG_ZERO);\r
+    assert (viterbiPtr);\r
+    VF &viterbi = *viterbiPtr;\r
+\r
+    // create traceback matrix\r
+    VI *tracebackPtr = new VI (3 * (seq1Length+1) * (seq2Length+1), -1);\r
+    assert (tracebackPtr);\r
+    VI &traceback = *tracebackPtr;\r
+\r
+    // initialization condition\r
+/*\r
+    for (int k = 0; k < NumMatrixTypes; k++)\r
+      viterbi[k] = initialDistribution[k];\r
+*/\r
+       viterbi[0] = LOG(0.6080327034);\r
+       viterbi[1] = LOG(0.1959836632);\r
+       viterbi[2] = LOG(0.1959836632);\r
+\r
+    // remember offset for each index combination\r
+    int ij = 0;\r
+    int i1j = -seq2Length - 1;\r
+    int ij1 = -1;\r
+    int i1j1 = -seq2Length - 2;\r
+\r
+    ij *= 3;\r
+    i1j *= 3;\r
+    ij1 *= 3;\r
+    i1j1 *= 3;\r
+\r
+    // compute viterbi scores\r
+    for (int i = 0; i <= seq1Length; i++){\r
+      unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];\r
+      for (int j = 0; j <= seq2Length; j++){\r
+        unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];\r
+\r
+        if (i > 0 && j > 0){\r
+          for (int k = 0; k < 3; k++){\r
+           float newVal = viterbi[k + i1j1] + local_transProb[k][0] + matchProb[c1][c2];\r
+           if (viterbi[0 + ij] < newVal){\r
+             viterbi[0 + ij] = newVal;\r
+             traceback[0 + ij] = k;\r
+           }\r
+         }\r
+        }\r
+        if (i > 0){\r
+          for (int k = 0; k < 1; k++){\r
+           float valFromMatch = insProb[c1][k] + viterbi[0 + i1j] + local_transProb[0][2*k+1];\r
+           float valFromIns = insProb[c1][k] + viterbi[2*k+1 + i1j] + local_transProb[2*k+1][2*k+1];\r
+           if (valFromMatch >= valFromIns){\r
+             viterbi[2*k+1 + ij] = valFromMatch;\r
+             traceback[2*k+1 + ij] = 0;\r
+           }\r
+           else {\r
+             viterbi[2*k+1 + ij] = valFromIns;\r
+             traceback[2*k+1 + ij] = 2*k+1;\r
+           }\r
+         }\r
+       }\r
+        if (j > 0){\r
+          for (int k = 0; k < 1; k++){\r
+           float valFromMatch = insProb[c2][k] + viterbi[0 + ij1] + local_transProb[0][2*k+2];\r
+           float valFromIns = insProb[c2][k] + viterbi[2*k+2 + ij1] + local_transProb[2*k+2][2*k+2];\r
+           if (valFromMatch >= valFromIns){\r
+             viterbi[2*k+2 + ij] = valFromMatch;\r
+             traceback[2*k+2 + ij] = 0;\r
+           }\r
+           else {\r
+             viterbi[2*k+2 + ij] = valFromIns;\r
+             traceback[2*k+2 + ij] = 2*k+2;\r
+           }\r
+         }\r
+        }\r
+\r
+        ij += 3;\r
+        i1j += 3;\r
+        ij1 += 3;\r
+        i1j1 += 3;\r
+      }\r
+    }\r
+\r
+    // figure out best terminating cell\r
+    float bestProb = LOG_ZERO;\r
+    int state = -1;\r
+    viterbi[0] = LOG(0.6080327034);\r
+    viterbi[1] = LOG(0.1959836632);\r
+    viterbi[2] = LOG(0.1959836632);\r
+\r
+    for (int k = 0; k < 3; k++){\r
+      float thisProb = viterbi[k + 3 * ((seq1Length+1)*(seq2Length+1) - 1)] + viterbi[k];\r
+      if (bestProb < thisProb){\r
+       bestProb = thisProb;\r
+       state = k;\r
+      }\r
+    }\r
+    assert (state != -1);\r
+\r
+    delete viterbiPtr;\r
+\r
+    // compute traceback\r
+    SafeVector<char> *alignment = new SafeVector<char>; assert (alignment);\r
+    int r = seq1Length, c = seq2Length;\r
+    while (r != 0 || c != 0){\r
+      int newState = traceback[state + 3 * (r * (seq2Length+1) + c)];     \r
+      if (state == 0){ c--; r--; alignment->push_back ('B');}\r
+      else if (state % 2 == 1){ r--; alignment->push_back ('X'); }\r
+      else { c--; alignment->push_back ('Y'); }      \r
+      state = newState;\r
+    }\r
+\r
+    delete tracebackPtr;\r
+\r
+    reverse (alignment->begin(), alignment->end());\r
+    \r
+    return make_pair(alignment, bestProb);\r
+  }\r
+\r
+  /////////////////////////////////////////////////////////////////\r
+  // ProbabilisticModel::BuildPosterior()\r
+  //\r
+  // Builds a posterior probability matrix needed to align a pair\r
+  // of alignments.  Mathematically, the returned matrix M is\r
+  // defined as follows:\r
+  //    M[i,j] =     sum          sum      f(s,t,i,j)\r
+  //             s in align1  t in align2\r
+  // where\r
+  //                  [  P(s[i'] <--> t[j'])\r
+  //                  [       if s[i'] is a letter in the ith column of align1 and\r
+  //                  [          t[j'] it a letter in the jth column of align2\r
+  //    f(s,t,i,j) =  [\r
+  //                  [  0    otherwise\r
+  //\r
+  /////////////////////////////////////////////////////////////////\r
+\r
+  VF *BuildPosterior (MultiSequence *align1, MultiSequence *align2,\r
+                      const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,\r
+                     float cutoff = 0.0f) const {\r
+    const int seq1Length = align1->GetSequence(0)->GetLength();\r
+    const int seq2Length = align2->GetSequence(0)->GetLength();\r
+\r
+    VF *posteriorPtr = new VF((seq1Length+1) * (seq2Length+1), 0); assert (posteriorPtr);\r
+    VF &posterior = *posteriorPtr;\r
+    VF::iterator postPtr = posterior.begin();\r
+\r
+    // for each s in align1\r
+    for (int i = 0; i < align1->GetNumSequences(); i++){\r
+      int first = align1->GetSequence(i)->GetLabel();\r
+      SafeVector<int> *mapping1 = align1->GetSequence(i)->GetMapping();\r
+\r
+      // for each t in align2\r
+      for (int j = 0; j < align2->GetNumSequences(); j++){\r
+        int second = align2->GetSequence(j)->GetLabel();\r
+        SafeVector<int> *mapping2 = align2->GetSequence(j)->GetMapping();\r
+\r
+       if (first < second){\r
+\r
+         // get the associated sparse matrix\r
+         SparseMatrix *matrix = sparseMatrices[first][second];\r
+         \r
+         for (int ii = 1; ii <= matrix->GetSeq1Length(); ii++){\r
+           SafeVector<PIF>::iterator row = matrix->GetRowPtr(ii);\r
+           int base = (*mapping1)[ii] * (seq2Length+1);\r
+           int rowSize = matrix->GetRowSize(ii);\r
+           \r
+           // add in all relevant values\r
+           for (int jj = 0; jj < rowSize; jj++)\r
+             posterior[base + (*mapping2)[row[jj].first]] += row[jj].second;\r
+           \r
+           // subtract cutoff \r
+           for (int jj = 0; jj < matrix->GetSeq2Length(); jj++)\r
+             posterior[base + (*mapping2)[jj]] -= cutoff;\r
+         }\r
+\r
+       } else {\r
+\r
+         // get the associated sparse matrix\r
+         SparseMatrix *matrix = sparseMatrices[second][first];\r
+         \r
+         for (int jj = 1; jj <= matrix->GetSeq1Length(); jj++){\r
+           SafeVector<PIF>::iterator row = matrix->GetRowPtr(jj);\r
+           int base = (*mapping2)[jj];\r
+           int rowSize = matrix->GetRowSize(jj);\r
+           \r
+           // add in all relevant values\r
+           for (int ii = 0; ii < rowSize; ii++)\r
+             posterior[base + (*mapping1)[row[ii].first] * (seq2Length + 1)] += row[ii].second;\r
+           \r
+           // subtract cutoff \r
+           for (int ii = 0; ii < matrix->GetSeq2Length(); ii++)\r
+             posterior[base + (*mapping1)[ii] * (seq2Length + 1)] -= cutoff;\r
+         }\r
+\r
+       }\r
+       \r
+\r
+        delete mapping2;\r
+      }\r
+\r
+      delete mapping1;\r
+    }\r
+\r
+    return posteriorPtr;\r
+  }\r
+\r
+       //added by Liu Yongchao.Feb 23, 2010\r
+       VF *BuildPosterior(int* seqsWeights, MultiSequence *align1,\r
+                       MultiSequence *align2,\r
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,\r
+                       float cutoff = 0.0f) const {\r
+               const int seq1Length = align1->GetSequence(0)->GetLength();\r
+               const int seq2Length = align2->GetSequence(0)->GetLength();\r
+\r
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1), 0);\r
+               assert(posteriorPtr);\r
+               VF &posterior = *posteriorPtr;\r
+               VF::iterator postPtr = posterior.begin();\r
+\r
+               //compute the total sum of all weights\r
+               float totalWeights = 0;\r
+               for (int i = 0; i < align1->GetNumSequences(); i++) {\r
+                       int first = align1->GetSequence(i)->GetLabel();\r
+                       int w1 = seqsWeights[first];\r
+                       for (int j = 0; j < align2->GetNumSequences(); j++) {\r
+                               int second = align2->GetSequence(j)->GetLabel();\r
+                               int w2 = seqsWeights[second];\r
+\r
+                               totalWeights += w1 * w2;\r
+                       }\r
+               }\r
+               // for each s in align1\r
+               for (int i = 0; i < align1->GetNumSequences(); i++) {\r
+                       int first = align1->GetSequence(i)->GetLabel();\r
+                       int w1 = seqsWeights[first];\r
+                       SafeVector<int> *mapping1 = align1->GetSequence(i)->GetMapping();\r
+                       // for each t in align2\r
+                       for (int j = 0; j < align2->GetNumSequences(); j++) {\r
+                               int second = align2->GetSequence(j)->GetLabel();\r
+                               int w2 = seqsWeights[second];\r
+                               SafeVector<int> *mapping2 =\r
+                                               align2->GetSequence(j)->GetMapping();\r
+\r
+                               float w = (float) (w1 * w2) / totalWeights;\r
+                               if (first < second) {\r
+\r
+                                       // get the associated sparse matrix\r
+                                       SparseMatrix *matrix = sparseMatrices[first][second];\r
+\r
+                                       for (int ii = 1; ii <= matrix->GetSeq1Length(); ii++) {\r
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(ii);\r
+                                               int base = (*mapping1)[ii] * (seq2Length + 1);\r
+                                               int rowSize = matrix->GetRowSize(ii);\r
+\r
+                                               // add in all relevant values\r
+                                               for (int jj = 0; jj < rowSize; jj++)\r
+                                                       posterior[base + (*mapping2)[row[jj].first]] += w\r
+                                                                       * row[jj].second;\r
+\r
+                                               // subtract cutoff \r
+                                               for (int jj = 0; jj < matrix->GetSeq2Length(); jj++)\r
+                                                       posterior[base + (*mapping2)[jj]] -= w * cutoff;\r
+                                       }\r
+\r
+                               } else {\r
+\r
+                                       // get the associated sparse matrix\r
+                                       SparseMatrix *matrix = sparseMatrices[second][first];\r
+\r
+                                       for (int jj = 1; jj <= matrix->GetSeq1Length(); jj++) {\r
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(jj);\r
+                                               int base = (*mapping2)[jj];\r
+                                               int rowSize = matrix->GetRowSize(jj);\r
+\r
+                                               // add in all relevant values\r
+                                               for (int ii = 0; ii < rowSize; ii++)\r
+                                                       posterior[base\r
+                                                                       + (*mapping1)[row[ii].first]\r
+                                                                                       * (seq2Length + 1)] += w\r
+                                                                       * row[ii].second;\r
+\r
+                                               // subtract cutoff \r
+                                               for (int ii = 0; ii < matrix->GetSeq2Length(); ii++)\r
+                                                       posterior[base + (*mapping1)[ii] * (seq2Length + 1)] -=\r
+                                                                       w * cutoff;\r
+                                       }\r
+\r
+                               }\r
+\r
+                               delete mapping2;\r
+                       }\r
+\r
+                       delete mapping1;\r
+               }\r
+\r
+               return posteriorPtr;\r
+       }\r
+};\r
+\r
+#endif\r
diff --git a/binaries/src/GLProbs-1.0/SafeVector.h b/binaries/src/GLProbs-1.0/SafeVector.h
new file mode 100644 (file)
index 0000000..f42c2da
--- /dev/null
@@ -0,0 +1,65 @@
+/////////////////////////////////////////////////////////////////
+// SafeVector.h
+//
+// STL vector with array bounds checking.  To enable bounds
+// checking, #define ENABLE_CHECKS.
+/////////////////////////////////////////////////////////////////
+
+#ifndef SAFEVECTOR_H
+#define SAFEVECTOR_H
+
+#include <cassert>
+#include <vector>
+using namespace std;
+
+/////////////////////////////////////////////////////////////////
+// SafeVector
+//
+// Class derived from the STL std::vector for bounds checking.
+/////////////////////////////////////////////////////////////////
+
+template<class TYPE>
+class SafeVector: public std::vector<TYPE> {
+public:
+
+       // miscellaneous constructors
+       SafeVector() :
+                       std::vector<TYPE>() {
+       }
+       SafeVector(size_t size) :
+                       std::vector<TYPE>(size) {
+       }
+       SafeVector(size_t size, const TYPE &value) :
+                       std::vector<TYPE>(size, value) {
+       }
+       SafeVector(const SafeVector &source) :
+                       std::vector<TYPE>(source) {
+       }
+
+#ifdef ENABLE_CHECKS
+
+       // [] array bounds checking
+       TYPE &operator[](int index) {
+               assert (index >= 0 && index < (int) size());
+               return std::vector<TYPE>::operator[] ((size_t) index);
+       }
+
+       // [] const array bounds checking
+       const TYPE &operator[] (int index) const {
+               assert (index >= 0 && index < (int) size());
+               return std::vector<TYPE>::operator[] ((size_t) index);
+       }
+
+#endif
+
+};
+
+// some commonly used vector types
+typedef SafeVector<int> VI;
+typedef SafeVector<VI> VVI;
+typedef SafeVector<VVI> VVVI;
+typedef SafeVector<float> VF;
+typedef SafeVector<VF> VVF;
+typedef SafeVector<VVF> VVVF;
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/ScoreType.h b/binaries/src/GLProbs-1.0/ScoreType.h
new file mode 100644 (file)
index 0000000..47de13d
--- /dev/null
@@ -0,0 +1,368 @@
+/////////////////////////////////////////////////////////////////
+// ScoreType.h
+//
+// Routines for doing math operations in MSAPROBS
+/////////////////////////////////////////////////////////////////
+
+#ifndef SCORETYPE_H
+#define SCORETYPE_H
+
+#include <cmath>
+#include <algorithm>
+#include <cfloat>
+#include <assert.h>
+
+typedef float ScoreType;
+
+const float LOG_ZERO = -2e20;
+const float LOG_ONE = 0.0;
+
+/////////////////////////////////////////////////////////////////
+// LOG()
+//
+// Compute the logarithm of x.
+/////////////////////////////////////////////////////////////////
+
+inline ScoreType LOG(ScoreType x) {
+       return log(x);
+}
+
+/////////////////////////////////////////////////////////////////
+// EXP()
+//
+// Computes exp(x).
+/////////////////////////////////////////////////////////////////
+
+inline ScoreType EXP(ScoreType x) {
+       //return exp(x);
+       if (x > -2) {
+               if (x > -0.5) {
+                       if (x > 0)
+                               return exp(x);
+                       return (((0.03254409303190190000 * x + 0.16280432765779600000) * x
+                                       + 0.49929760485974900000) * x + 0.99995149601363700000) * x
+                                       + 0.99999925508501600000;
+               }
+               if (x > -1)
+                       return (((0.01973899026052090000 * x + 0.13822379685007000000) * x
+                                       + 0.48056651562365000000) * x + 0.99326940370383500000) * x
+                                       + 0.99906756856399500000;
+               return (((0.00940528203591384000 * x + 0.09414963667859410000) * x
+                               + 0.40825793595877300000) * x + 0.93933625499130400000) * x
+                               + 0.98369508190545300000;
+       }
+       if (x > -8) {
+               if (x > -4)
+                       return (((0.00217245711583303000 * x + 0.03484829428350620000) * x
+                                       + 0.22118199801337800000) * x + 0.67049462206469500000) * x
+                                       + 0.83556950223398500000;
+               return (((0.00012398771025456900 * x + 0.00349155785951272000) * x
+                               + 0.03727721426017900000) * x + 0.17974997741536900000) * x
+                               + 0.33249299994217400000;
+       }
+       if (x > -16)
+               return (((0.00000051741713416603 * x + 0.00002721456879608080) * x
+                               + 0.00053418601865636800) * x + 0.00464101989351936000) * x
+                               + 0.01507447981459420000;
+       return 0;
+}
+
+/*
+ /////////////////////////////////////////////////////////////////
+ // LOOKUP()
+ //
+ // Computes log (exp (x) + 1), for 0 <= x <= 7.5.
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOOKUP (ScoreType x){
+ //return log (exp(x) + 1);
+ if (x < 2){
+ if (x < 0.5){
+ if (x < 0)
+ return log (exp(x) + 1);
+ return (((-0.00486373205785640000*x - 0.00020245408813934800)*x + 0.12504222666029800000)*x + 0.49999685320563000000)*x + 0.69314723138948900000;
+ }
+ if (x < 1)
+ return (((-0.00278634205460548000*x - 0.00458097251248546000)*x + 0.12865849880472500000)*x + 0.49862228499205200000)*x + 0.69334810088688000000;
+ return (((0.00059633755154209200*x - 0.01918996666063320000)*x + 0.15288232492093800000)*x + 0.48039958825756900000)*x + 0.69857578503189200000;
+ }
+ if (x < 8){
+ if (x < 4)
+ return (((0.00135958539181047000*x - 0.02329807659316430000)*x + 0.15885799609532100000)*x + 0.48167498563270800000)*x + 0.69276185058669200000;
+ return (((0.00011992394456683500*x - 0.00338464503306568000)*x + 0.03622746366545470000)*x + 0.82481250248383700000)*x + 0.32507892994863100000;
+ }
+ if (x < 16)
+ return (((0.00000051726300753785*x - 0.00002720671238876090)*x + 0.00053403733818413500)*x + 0.99536021775747900000)*x + 0.01507065715532010000;
+ return x;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOOKUP_SLOW()
+ //
+ // Computes log (exp (x) + 1).
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOOKUP_SLOW (ScoreType x){
+ return log (exp (x) + 1);
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // MAX()
+ //
+ // Compute max of three numbers
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType MAX (ScoreType x, ScoreType y, ScoreType z){
+ if (x >= y){
+ if (x >= z)
+ return x;
+ return z;
+ }
+ if (y >= z)
+ return y;
+ return z;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_PLUS_EQUALS()
+ //
+ // Add two log probabilities and store in the first argument
+ /////////////////////////////////////////////////////////////////
+
+ inline void LOG_PLUS_EQUALS (ScoreType &x, ScoreType y){
+ if (x < y)
+ x = (x <= LOG_ZERO) ? y : LOOKUP(y-x) + x;
+ else
+ x = (y <= LOG_ZERO) ? x : LOOKUP(x-y) + y;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_PLUS_EQUALS_SLOW()
+ //
+ // Add two log probabilities and store in the first argument
+ /////////////////////////////////////////////////////////////////
+
+ inline void LOG_PLUS_EQUALS_SLOW (ScoreType &x, ScoreType y){
+ if (x < y)
+ x = (x <= LOG_ZERO) ? y : LOOKUP_SLOW(y-x) + x;
+ else
+ x = (y <= LOG_ZERO) ? x : LOOKUP_SLOW(x-y) + y;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_ADD()
+ //
+ // Add two log probabilities
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOG_ADD (ScoreType x, ScoreType y){
+ if (x < y) return (x <= LOG_ZERO) ? y : LOOKUP(y-x) + x;
+ return (y <= LOG_ZERO) ? x : LOOKUP(x-y) + y;
+ }
+ */
+
+/*
+ /////////////////////////////////////////////////////////////////
+ // LOG()
+ //
+ // Compute the logarithm of x.
+ /////////////////////////////////////////////////////////////////
+
+ inline float LOG (float x){
+ return log (x);
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // EXP()
+ //
+ // Computes exp(x), fr -4.6 <= x <= 0.
+ /////////////////////////////////////////////////////////////////
+
+ inline float EXP (float x){
+ assert (x <= 0.00f);
+ if (x < EXP_UNDERFLOW_THRESHOLD) return 0.0f;
+ return (((0.006349841068584 * x + 0.080775412572352) * x + 0.397982026296272) * x + 0.95279335963787f) * x + 0.995176455837312f;
+ //return (((0.00681169825657f * x + 0.08386267698832f) * x + 0.40413983195844f) * x + 0.95656674979767f) * x + 0.99556744049130f;
+ }
+ */
+
+const float EXP_UNDERFLOW_THRESHOLD = -4.6;
+const float LOG_UNDERFLOW_THRESHOLD = 7.5;
+
+/////////////////////////////////////////////////////////////////
+// LOOKUP()
+//
+// Computes log (exp (x) + 1), for 0 <= x <= 7.5.
+/////////////////////////////////////////////////////////////////
+
+inline float LOOKUP(float x) {
+       assert(x >= 0.00f);
+       assert(x <= LOG_UNDERFLOW_THRESHOLD);
+       //return ((-0.00653779113685f * x + 0.09537236626558f) * x + 0.55317574459331f) * x + 0.68672959851568f;
+       if (x <= 1.00f)
+               return ((-0.009350833524763f * x + 0.130659527668286f) * x
+                               + 0.498799810682272f) * x + 0.693203116424741f;
+       if (x <= 2.50f)
+               return ((-0.014532321752540f * x + 0.139942324101744f) * x
+                               + 0.495635523139337f) * x + 0.692140569840976f;
+       if (x <= 4.50f)
+               return ((-0.004605031767994f * x + 0.063427417320019f) * x
+                               + 0.695956496475118f) * x + 0.514272634594009f;
+       assert(x <= LOG_UNDERFLOW_THRESHOLD);
+       return ((-0.000458661602210f * x + 0.009695946122598f) * x
+                       + 0.930734667215156f) * x + 0.168037164329057f;
+
+       //return (((0.00089738532761f * x - 0.01859488697982f) * x + 0.14415772028626f) * x + 0.49515490689159f) * x + 0.69311928966454f;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOOKUP_SLOW()
+//
+// Computes log (exp (x) + 1).
+/////////////////////////////////////////////////////////////////
+
+inline float LOOKUP_SLOW(float x) {
+       return log(exp(x) + 1);
+}
+
+/////////////////////////////////////////////////////////////////
+// MAX()
+//
+// Compute max of three numbers
+/////////////////////////////////////////////////////////////////
+
+inline float MAX(float x, float y, float z) {
+       if (x >= y) {
+               if (x >= z)
+                       return x;
+               return z;
+       }
+       if (y >= z)
+               return y;
+       return z;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_PLUS_EQUALS()
+//
+// Add two log probabilities and store in the first argument
+/////////////////////////////////////////////////////////////////
+
+inline void LOG_PLUS_EQUALS(float &x, float y) {
+       if (x < y)
+               x = (x == LOG_ZERO || y - x >= LOG_UNDERFLOW_THRESHOLD) ?
+                               y : LOOKUP(y - x) + x;
+       else
+               x = (y == LOG_ZERO || x - y >= LOG_UNDERFLOW_THRESHOLD) ?
+                               x : LOOKUP(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_PLUS_EQUALS_SLOW()
+//
+// Add two log probabilities and store in the first argument
+/////////////////////////////////////////////////////////////////
+
+inline void LOG_PLUS_EQUALS_SLOW(float &x, float y) {
+       if (x < y)
+               x = (x == LOG_ZERO) ? y : LOOKUP_SLOW(y - x) + x;
+       else
+               x = (y == LOG_ZERO) ? x : LOOKUP_SLOW(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add two log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x, float y) {
+       if (x < y)
+               return (x == LOG_ZERO || y - x >= LOG_UNDERFLOW_THRESHOLD) ?
+                               y : LOOKUP(y - x) + x;
+       return (y == LOG_ZERO || x - y >= LOG_UNDERFLOW_THRESHOLD) ?
+                       x : LOOKUP(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add three log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3) {
+       return LOG_ADD(x1, LOG_ADD(x2, x3));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add four log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, x4)));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add five log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, x5))));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add siz log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5,
+               float x6) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, LOG_ADD(x5, x6)))));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add seven log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5, float x6,
+               float x7) {
+       return LOG_ADD(x1,
+                       LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, LOG_ADD(x5, LOG_ADD(x6, x7))))));
+}
+
+/////////////////////////////////////////////////////////////////
+// ChooseBestOfThree()
+//
+// Store the largest of three values x1, x2, and x3 in *x.  Also
+// if xi is the largest value, then store bi in *b.
+/////////////////////////////////////////////////////////////////
+
+inline void ChooseBestOfThree(float x1, float x2, float x3, char b1, char b2,
+               char b3, float *x, char *b) {
+       if (x1 >= x2) {
+               if (x1 >= x3) {
+                       *x = x1;
+                       *b = b1;
+                       return;
+               }
+               *x = x3;
+               *b = b3;
+               return;
+       }
+       if (x2 >= x3) {
+               *x = x2;
+               *b = b2;
+               return;
+       }
+       *x = x3;
+       *b = b3;
+}
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/Sequence.h b/binaries/src/GLProbs-1.0/Sequence.h
new file mode 100644 (file)
index 0000000..5bd1ef9
--- /dev/null
@@ -0,0 +1,444 @@
+/////////////////////////////////////////////////////////////////
+// Sequence.h
+//
+// Class for reading/manipulating single sequence character data.
+/////////////////////////////////////////////////////////////////
+
+#ifndef SEQUENCE_H
+#define SEQUENCE_H
+
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <cctype>
+#include <cstdlib>
+#include "SafeVector.h"
+#include "FileBuffer.h"
+
+/////////////////////////////////////////////////////////////////
+// Sequence
+//
+// Class for storing sequence information.
+/////////////////////////////////////////////////////////////////
+
+class Sequence {
+
+       bool isValid; // a boolean indicating whether the sequence data is valid or not
+       string header;       // string containing the comment line of the FASTA file
+       SafeVector<char> *data;      // pointer to character data
+       int length;                  // length of the sequence
+       int sequenceLabel; // integer sequence label, typically to indicate the ordering of sequences
+                                          //   in a Multi-FASTA file
+       int inputLabel;              // position of sequence in original input
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Default constructor.  Does nothing.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence() :
+                       isValid(false), header(""), data(NULL), length(0), sequenceLabel(0), inputLabel(
+                                       0) {
+       }
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Constructor.  Reads the sequence from a FileBuffer.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence(FileBuffer &infile, bool stripGaps = false) :
+                       isValid(false), header("~"), data(NULL), length(0), sequenceLabel(
+                                       0), inputLabel(0) {
+
+               // read until the first non-blank line
+               while (!infile.eof()) {
+                       infile.GetLine(header);
+                       if (header.length() != 0)
+                               break;
+               }
+
+               // check to make sure that it is a correct header line
+               if (header[0] == '>') {
+
+                       // if so, remove the leading ">"
+                       header = header.substr(1);
+
+                       // remove any leading or trailing white space in the header comment
+                       while (header.length() > 0 && isspace(header[0]))
+                               header = header.substr(1);
+                       while (header.length() > 0 && isspace(header[header.length() - 1]))
+                               header = header.substr(0, header.length() - 1);
+
+                       // get ready to read the data[] array; note that data[0] is always '@'
+                       char ch;
+                       data = new SafeVector<char>;
+                       assert(data);
+                       data->push_back('@');
+
+                       // get a character from the file
+                       while (infile.Get(ch)) {
+
+                               // if we've reached a new comment line, put the character back and stop
+                               if (ch == '>') {
+                                       infile.UnGet();
+                                       break;
+                               }
+
+                               // skip whitespace
+                               if (isspace(ch))
+                                       continue;
+
+                               // substitute gap character
+                               if (ch == '.')
+                                       ch = '-';
+                               if (stripGaps && ch == '-')
+                                       continue;
+
+                               // check for known characters
+                               if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) {
+                                       cerr << "ERROR: Unknown character encountered: " << ch
+                                                       << endl;
+                                       exit(1);
+                               }
+
+                               // everything's ok so far, so just store this character.
+                               if (ch >= 'a' && ch <= 'z') {
+                                       ch = ch - 'a' + 'A';
+                               }       //change to upper case. fixed by Liu Yongchao, May 21, 2010
+
+                               data->push_back(ch);
+                               ++length;
+                       }
+
+                       // sequence must contain data in order to be valid
+                       isValid = length > 0;
+                       if (!isValid) {
+                               delete data;
+                               data = NULL;
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Constructor.  Builds a sequence from existing data.  Note
+       // that the data must use one-based indexing where data[0] should
+       // be set to '@'.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence(SafeVector<char> *data, string header, int length,
+                       int sequenceLabel, int inputLabel) :
+                       isValid(data != NULL), header(header), data(data), length(length), sequenceLabel(
+                                       sequenceLabel), inputLabel(inputLabel) {
+               assert(data);
+               assert((*data)[0] == '@');
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Destructor.  Release allocated memory.
+       /////////////////////////////////////////////////////////////////
+
+       ~Sequence() {
+               if (data) {
+                       assert(isValid);
+                       delete data;
+                       data = NULL;
+                       isValid = false;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetHeader()
+       //
+       // Return the string comment associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       string GetHeader() const {
+               return header;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetName()
+       //
+       // Return the first word of the string comment associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       string GetName() const {
+               char name[1024];
+               sscanf(header.c_str(), "%s", name);
+               return string(name);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetDataPtr()
+       //
+       // Return the iterator to data associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<char>::iterator GetDataPtr() {
+               assert(isValid);
+               assert(data);
+               return data->begin();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetPosition()
+       //
+       // Return the character at position i.  Recall that the character
+       // data is stored with one-based indexing.
+       /////////////////////////////////////////////////////////////////
+
+       char GetPosition(int i) const {
+               assert(isValid);
+               assert(data);
+               assert(i >= 1 && i <= length);
+               return (*data)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::SetLabel()
+       //
+       // Sets the sequence label to i.
+       /////////////////////////////////////////////////////////////////
+
+       void SetLabel(int i) {
+               assert(isValid);
+               sequenceLabel = i;
+               inputLabel = i;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::SetSortLabel()
+       //
+       // Sets the sequence sorting label to i.
+       /////////////////////////////////////////////////////////////////
+
+       void SetSortLabel(int i) {
+               assert(isValid);
+               sequenceLabel = i;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetLabel()
+       //
+       // Retrieves the input label.
+       /////////////////////////////////////////////////////////////////
+
+       int GetLabel() const {
+               assert(isValid);
+               return inputLabel;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetSortLabel()
+       //
+       // Retrieves the sorting label.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSortLabel() const {
+               assert(isValid);
+               return sequenceLabel;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Fail()
+       //
+       // Checks to see if the sequence successfully loaded.
+       /////////////////////////////////////////////////////////////////
+
+       bool Fail() const {
+               return !isValid;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Length()
+       //
+       // Returns the length of the sequence.
+       /////////////////////////////////////////////////////////////////
+
+       int GetLength() const {
+               assert(isValid);
+               assert(data);
+               return length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::WriteMFA()
+       //
+       // Writes the sequence to outfile in MFA format.  Uses numColumns
+       // columns per line.  If useIndex is set to false, then the
+       // header is printed as normal, but if useIndex is true, then
+       // ">S###" is printed where ### represents the sequence label.
+       /////////////////////////////////////////////////////////////////
+
+       void WriteMFA(ostream &outfile, int numColumns,
+                       bool useIndex = false) const {
+               assert(isValid);
+               assert(data);
+               assert(!outfile.fail());
+
+               // print out heading
+               if (useIndex)
+                       outfile << ">S" << GetLabel() << endl;
+               else
+                       outfile << ">" << header << endl;
+
+               // print out character data
+               int ct = 1;
+               for (; ct <= length; ct++) {
+                       outfile << (*data)[ct];
+                       if (ct % numColumns == 0)
+                               outfile << endl;
+               }
+               if ((ct - 1) % numColumns != 0)
+                       outfile << endl;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Clone()
+       //
+       // Returns a new deep copy of the seqeuence.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *Clone() const {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               *(ret->data) = *data;
+               ret->length = length;
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetRange()
+       //
+       // Returns a new sequence object consisting of a range of
+       // characters from the current seuquence.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *GetRange(int start, int end) const {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               assert(start >= 1 && start <= length);
+               assert(end >= 1 && end <= length);
+               assert(start <= end);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               ret->data->push_back('@');
+               for (int i = start; i <= end; i++)
+                       ret->data->push_back((*data)[i]);
+               ret->length = end - start + 1;
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::AddGaps()
+       //
+       // Given an SafeVector<char> containing the skeleton for an
+       // alignment and the identity of the current character, this
+       // routine will create a new sequence with all necesssary gaps added.
+       // For instance,
+       //    alignment = "XXXBBYYYBBYYXX"
+       //    id = 'X'
+       // will perform the transformation
+       //    "ATGCAGTCA" --> "ATGCC---GT--CA"
+       //                    (XXXBBYYYBBYYXX)
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *AddGaps(SafeVector<char> *alignment, char id) {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               ret->length = (int) alignment->size();
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+               ret->data->push_back('@');
+
+               SafeVector<char>::iterator dataIter = data->begin() + 1;
+               for (SafeVector<char>::iterator iter = alignment->begin();
+                               iter != alignment->end(); ++iter) {
+                       if (*iter == 'B' || *iter == id) {
+                               ret->data->push_back(*dataIter);
+                               ++dataIter;
+                       } else
+                               ret->data->push_back('-');
+               }
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetString()
+       //
+       // Returns the sequence as a string with gaps removed.
+       /////////////////////////////////////////////////////////////////
+
+       string GetString() {
+               string s = "";
+               for (int i = 1; i <= length; i++) {
+                       if ((*data)[i] != '-')
+                               s += (*data)[i];
+               }
+               return s;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetMapping()
+       //
+       // Returns a SafeVector<int> containing the indices of every
+       // character in the sequence.  For instance, if the data is
+       // "ATGCC---GT--CA", the method returns {1,2,3,4,5,9,10,13,14}.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<int> *GetMapping() const {
+               SafeVector<int> *ret = new SafeVector<int>(1, 0);
+               for (int i = 1; i <= length; i++) {
+                       if ((*data)[i] != '-')
+                               ret->push_back(i);
+               }
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Highlight()
+       //
+       // Changes all positions with score >= cutoff to upper case and
+       // all positions with score < cutoff to lower case.
+       /////////////////////////////////////////////////////////////////
+
+       void Highlight(const SafeVector<float> &scores, const float cutoff) {
+               for (int i = 1; i <= length; i++) {
+                       if (scores[i - 1] >= cutoff)
+                               (*data)[i] = toupper((*data)[i]);
+                       else
+                               (*data)[i] = tolower((*data)[i]);
+               }
+       }
+};
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/SparseMatrix.h b/binaries/src/GLProbs-1.0/SparseMatrix.h
new file mode 100644 (file)
index 0000000..51b273d
--- /dev/null
@@ -0,0 +1,266 @@
+/////////////////////////////////////////////////////////////////
+// SparseMatrix.h
+//
+// Sparse matrix computations
+/////////////////////////////////////////////////////////////////
+
+#ifndef SPARSEMATRIX_H
+#define SPARSEMATRIX_H
+
+#include <iostream>
+
+using namespace std;
+
+const float POSTERIOR_CUTOFF = 0.01;         // minimum posterior probability
+// value that is maintained in the
+// sparse matrix representation
+
+typedef pair<int, float> PIF;                 // Sparse matrix entry type
+//   first --> column
+//   second --> value
+
+/////////////////////////////////////////////////////////////////
+// SparseMatrix
+//
+// Class for sparse matrix computations
+/////////////////////////////////////////////////////////////////
+
+class SparseMatrix {
+
+       int seq1Length, seq2Length;                     // dimensions of matrix
+       VI rowSize;                              // rowSize[i] = # of cells in row i
+       SafeVector<PIF> data;                           // data values
+       SafeVector<SafeVector<PIF>::iterator> rowPtrs; // pointers to the beginning of each row
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::SparseMatrix()
+       //
+       // Private constructor.
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix() {
+       }
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::SparseMatrix()
+       //
+       // Constructor.  Builds a sparse matrix from a posterior matrix.
+       // Note that the expected format for the posterior matrix is as
+       // a (seq1Length+1) x (seq2Length+1) matrix where the 0th row
+       // and 0th column are ignored (they should contain all zeroes).
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix(int seq1Length, int seq2Length, const VF &posterior) :
+                       seq1Length(seq1Length), seq2Length(seq2Length) {
+
+               int numCells = 0;
+
+               assert(seq1Length > 0);
+               assert(seq2Length > 0);
+
+               // calculate memory required; count the number of cells in the
+               // posterior matrix above the threshold
+               VF::const_iterator postPtr = posterior.begin();
+               for (int i = 0; i <= seq1Length; i++) {
+                       for (int j = 0; j <= seq2Length; j++) {
+                               if (*(postPtr++) >= POSTERIOR_CUTOFF) {
+                                       assert(i != 0 && j != 0);
+                                       numCells++;
+                               }
+                       }
+               }
+
+               // allocate memory
+               data.resize(numCells);
+               rowSize.resize(seq1Length + 1);
+               rowSize[0] = -1;
+               rowPtrs.resize(seq1Length + 1);
+               rowPtrs[0] = data.end();
+
+               // build sparse matrix
+               postPtr = posterior.begin() + seq2Length + 1; // note that we're skipping the first row here
+               SafeVector<PIF>::iterator dataPtr = data.begin();
+               for (int i = 1; i <= seq1Length; i++) {
+                       postPtr++;              // and skipping the first column of each row
+                       rowPtrs[i] = dataPtr;
+                       for (int j = 1; j <= seq2Length; j++) {
+                               if (*postPtr >= POSTERIOR_CUTOFF) {
+                                       dataPtr->first = j;
+                                       dataPtr->second = *postPtr;
+                                       dataPtr++;
+                               }
+                               postPtr++;
+                       }
+                       rowSize[i] = dataPtr - rowPtrs[i];
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowPtr()
+       //
+       // Returns the pointer to a particular row in the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<PIF>::iterator GetRowPtr(int row) const {
+               assert(row >= 1 && row <= seq1Length);
+               return rowPtrs[row];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetValue()
+       //
+       // Returns value at a particular row, column.
+       /////////////////////////////////////////////////////////////////
+
+       float GetValue(int row, int col) {
+               assert(row >= 1 && row <= seq1Length);
+               assert(col >= 1 && col <= seq2Length);
+               for (int i = 0; i < rowSize[row]; i++) {
+                       if (rowPtrs[row][i].first == col)
+                               return rowPtrs[row][i].second;
+               }
+               return 0;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowSize()
+       //
+       // Returns the number of entries in a particular row.
+       /////////////////////////////////////////////////////////////////
+
+       int GetRowSize(int row) const {
+               assert(row >= 1 && row <= seq1Length);
+               return rowSize[row];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetSeq1Length()
+       //
+       // Returns the first dimension of the matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSeq1Length() const {
+               return seq1Length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetSeq2Length()
+       //
+       // Returns the second dimension of the matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSeq2Length() const {
+               return seq2Length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowPtr
+       //
+       // Returns the pointer to a particular row in the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetNumCells() const {
+               return data.size();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::Print()
+       //
+       // Prints out a sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       void Print(ostream &outfile) const {
+               outfile << "Sparse Matrix:" << endl;
+               for (int i = 1; i <= seq1Length; i++) {
+                       outfile << "  " << i << ":";
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               outfile << " (" << rowPtrs[i][j].first << ","
+                                               << rowPtrs[i][j].second << ")";
+                       }
+                       outfile << endl;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::ComputeTranspose()
+       //
+       // Returns a new sparse matrix containing the transpose of the
+       // current matrix.
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix *ComputeTranspose() const {
+
+               // create a new sparse matrix
+               SparseMatrix *ret = new SparseMatrix();
+               int numCells = data.size();
+
+               ret->seq1Length = seq2Length;
+               ret->seq2Length = seq1Length;
+
+               // allocate memory
+               ret->data.resize(numCells);
+               ret->rowSize.resize(seq2Length + 1);
+               ret->rowSize[0] = -1;
+               ret->rowPtrs.resize(seq2Length + 1);
+               ret->rowPtrs[0] = ret->data.end();
+
+               // compute row sizes
+               for (int i = 1; i <= seq2Length; i++)
+                       ret->rowSize[i] = 0;
+               for (int i = 0; i < numCells; i++)
+                       ret->rowSize[data[i].first]++;
+
+               // compute row ptrs
+               for (int i = 1; i <= seq2Length; i++) {
+                       ret->rowPtrs[i] =
+                                       (i == 1) ?
+                                                       ret->data.begin() :
+                                                       ret->rowPtrs[i - 1] + ret->rowSize[i - 1];
+               }
+
+               // now fill in data
+               SafeVector<SafeVector<PIF>::iterator> currPtrs = ret->rowPtrs;
+
+               for (int i = 1; i <= seq1Length; i++) {
+                       SafeVector<PIF>::iterator row = rowPtrs[i];
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               currPtrs[row[j].first]->first = i;
+                               currPtrs[row[j].first]->second = row[j].second;
+                               currPtrs[row[j].first]++;
+                       }
+               }
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetPosterior()
+       //
+       // Return the posterior representation of the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       VF *GetPosterior() const {
+
+               // create a new posterior matrix
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1));
+               assert(posteriorPtr);
+               VF &posterior = *posteriorPtr;
+
+               // build the posterior matrix
+               for (int i = 0; i < (seq1Length + 1) * (seq2Length + 1); i++)
+                       posterior[i] = 0;
+               for (int i = 1; i <= seq1Length; i++) {
+                       VF::iterator postPtr = posterior.begin() + i * (seq2Length + 1);
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               postPtr[rowPtrs[i][j].first] = rowPtrs[i][j].second;
+                       }
+               }
+
+               return posteriorPtr;
+       }
+
+};
+
+#endif
diff --git a/binaries/src/GLProbs-1.0/glprobs b/binaries/src/GLProbs-1.0/glprobs
new file mode 100755 (executable)
index 0000000..238ac42
Binary files /dev/null and b/binaries/src/GLProbs-1.0/glprobs differ
diff --git a/binaries/src/GLProbs-1.0/main.cpp b/binaries/src/GLProbs-1.0/main.cpp
new file mode 100644 (file)
index 0000000..6fd1934
--- /dev/null
@@ -0,0 +1,16 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "MSA.h"
+
+int main(int argc, char* argv[]) {
+       MSA msa(argc, argv);
+
+       return 0;
+}
diff --git a/binaries/src/MSAProbs-0.9.7/ChangeLog b/binaries/src/MSAProbs-0.9.7/ChangeLog
new file mode 100644 (file)
index 0000000..c34e041
--- /dev/null
@@ -0,0 +1,9 @@
+(1) 23 Aug, 2010       
+       Add an option "-num_threads" to allow uses to specify the number of
+       threads useda
+(2) 12 April, 2012
+       GCC 4.6 can successfully compile it. 
+
+(3) 3 July, 2012
+       Add a new option "-o" (or "--outfile") to allow users to specify the output file name.
+       By default, it will output to STDOUT
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs.ncb b/binaries/src/MSAProbs-0.9.7/MSAProbs.ncb
new file mode 100644 (file)
index 0000000..248d356
Binary files /dev/null and b/binaries/src/MSAProbs-0.9.7/MSAProbs.ncb differ
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs.sln b/binaries/src/MSAProbs-0.9.7/MSAProbs.sln
new file mode 100644 (file)
index 0000000..fc350a8
--- /dev/null
@@ -0,0 +1,20 @@
+\feff\r
+Microsoft Visual Studio Solution File, Format Version 9.00\r
+# Visual Studio 2005\r
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MSAProbs", "MSAProbs\MSAProbs.vcproj", "{671563E4-93A2-419E-8B41-48DDF71DD144}"\r
+EndProject\r
+Global\r
+       GlobalSection(SolutionConfigurationPlatforms) = preSolution\r
+               Debug|Win32 = Debug|Win32\r
+               Release|Win32 = Release|Win32\r
+       EndGlobalSection\r
+       GlobalSection(ProjectConfigurationPlatforms) = postSolution\r
+               {671563E4-93A2-419E-8B41-48DDF71DD144}.Debug|Win32.ActiveCfg = Debug|Win32\r
+               {671563E4-93A2-419E-8B41-48DDF71DD144}.Debug|Win32.Build.0 = Debug|Win32\r
+               {671563E4-93A2-419E-8B41-48DDF71DD144}.Release|Win32.ActiveCfg = Release|Win32\r
+               {671563E4-93A2-419E-8B41-48DDF71DD144}.Release|Win32.Build.0 = Release|Win32\r
+       EndGlobalSection\r
+       GlobalSection(SolutionProperties) = preSolution\r
+               HideSolutionNode = FALSE\r
+       EndGlobalSection\r
+EndGlobal\r
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs.suo b/binaries/src/MSAProbs-0.9.7/MSAProbs.suo
new file mode 100644 (file)
index 0000000..08a75bc
Binary files /dev/null and b/binaries/src/MSAProbs-0.9.7/MSAProbs.suo differ
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/Defaults.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/Defaults.h
new file mode 100644 (file)
index 0000000..b108cf3
--- /dev/null
@@ -0,0 +1,105 @@
+/////////////////////////////////////////////////////////////////
+// Defaults.h
+//
+// Default constants for use in MSAPROBS.  The emission
+// probabilities were computed using the program used to build
+// the BLOSUM62 matrix from the BLOCKS 5.0 dataset.  Transition
+// parameters were obtained via unsupervised EM training on the
+// BALIBASE 2.0 benchmark alignment database.
+/////////////////////////////////////////////////////////////////
+
+#ifndef DEFAULTS_H
+#define DEFAULTS_H
+
+#include <string>
+
+using namespace std;
+
+/*
+ float initDistrib1Default[] = { 0.3202854395, 0.3398572505, 0.3398572505 };
+ float gapOpen1Default[] = { 0.1375414133, 0.1375414133 };
+ float gapExtend1Default[] = { 0.7832147479, 0.7832147479 };
+ */
+
+float initDistrib1Default[] = { 0.6080327034f, 0.1959836632f, 0.1959836632f };
+float gapOpen1Default[] = { 0.01993141696f, 0.01993141696f };
+float gapExtend1Default[] = { 0.7943345308f, 0.7943345308f };
+
+float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f,
+               8.615339902e-05f, 0.1591759622f, 0.1591759622 };
+float gapOpen2Default[] = { 0.0119511066f, 0.0119511066f, 0.008008334786f,
+               0.008008334786 };
+float gapExtend2Default[] = { 0.3965826333f, 0.3965826333f, 0.8988758326f,
+               0.8988758326 };
+
+string alphabetDefault = "ARNDCQEGHILKMFPSTWYV";
+float emitSingleDefault[20] = { 0.07831005f, 0.05246024f, 0.04433257f,
+               0.05130349f, 0.02189704f, 0.03585766f, 0.05615771f, 0.07783433f,
+               0.02601093f, 0.06511648f, 0.09716489f, 0.05877077f, 0.02438117f,
+               0.04463228f, 0.03940142f, 0.05849916f, 0.05115306f, 0.01203523f,
+               0.03124726f, 0.07343426f };
+
+float emitPairsDefault[20][20] = { { 0.02373072f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f }, { 0.00244502f, 0.01775118f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00210228f, 0.00207782f, 0.01281864f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00223549f, 0.00161657f, 0.00353540f, 0.01911178f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f }, { 0.00145515f, 0.00044701f, 0.00042479f,
+               0.00036798f, 0.01013470f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00219102f,
+               0.00253532f, 0.00158223f, 0.00176784f, 0.00032102f, 0.00756604f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f }, { 0.00332218f, 0.00268865f, 0.00224738f, 0.00496800f,
+               0.00037956f, 0.00345128f, 0.01676565f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00597898f,
+               0.00194865f, 0.00288882f, 0.00235249f, 0.00071206f, 0.00142432f,
+               0.00214860f, 0.04062876f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, { 0.00114353f, 0.00132105f, 0.00141205f,
+               0.00097077f, 0.00026421f, 0.00113901f, 0.00131767f, 0.00103704f,
+               0.00867996f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+               0.0f },
+               { 0.00318853f, 0.00138145f, 0.00104273f, 0.00105355f, 0.00094040f,
+                               0.00100883f, 0.00124207f, 0.00142520f, 0.00059716f, 0.01778263f,
+                               0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00449576f, 0.00246811f, 0.00160275f, 0.00161966f, 0.00138494f,
+                               0.00180553f, 0.00222063f, 0.00212853f, 0.00111754f, 0.01071834f,
+                               0.03583921f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f }, { 0.00331693f, 0.00595650f, 0.00257310f, 0.00252518f,
+                               0.00046951f, 0.00312308f, 0.00428420f, 0.00259311f, 0.00121376f,
+                               0.00157852f, 0.00259626f, 0.01612228f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f, 0.0f, 0.0f, 0.0f }, { 0.00148878f, 0.00076734f,
+                               0.00063401f, 0.00047808f, 0.00037421f, 0.00075546f, 0.00076105f,
+                               0.00066504f, 0.00042237f, 0.00224097f, 0.00461939f, 0.00096120f,
+                               0.00409522f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00165004f, 0.00090768f, 0.00084658f, 0.00069041f, 0.00052274f,
+                               0.00059248f, 0.00078814f, 0.00115204f, 0.00072545f, 0.00279948f,
+                               0.00533369f, 0.00087222f, 0.00116111f, 0.01661038f, 0.0f, 0.0f,
+                               0.0f, 0.0f, 0.0f, 0.0f }, { 0.00230618f, 0.00106268f,
+                               0.00100282f, 0.00125381f, 0.00034766f, 0.00090111f, 0.00151550f,
+                               0.00155601f, 0.00049078f, 0.00103767f, 0.00157310f, 0.00154836f,
+                               0.00046718f, 0.00060701f, 0.01846071f, 0.0f, 0.0f, 0.0f, 0.0f,
+                               0.0f }, { 0.00631752f, 0.00224540f, 0.00301397f, 0.00285226f,
+                               0.00094867f, 0.00191155f, 0.00293898f, 0.00381962f, 0.00116422f,
+                               0.00173565f, 0.00250962f, 0.00312633f, 0.00087787f, 0.00119036f,
+                               0.00180037f, 0.01346609f, 0.0f, 0.0f, 0.0f, 0.0f }, {
+                               0.00389995f, 0.00186053f, 0.00220144f, 0.00180488f, 0.00073798f,
+                               0.00154526f, 0.00216760f, 0.00214841f, 0.00077747f, 0.00248968f,
+                               0.00302273f, 0.00250862f, 0.00093371f, 0.00107595f, 0.00147982f,
+                               0.00487295f, 0.01299436f, 0.0f, 0.0f, 0.0f }, { 0.00039119f,
+                               0.00029139f, 0.00021006f, 0.00016015f, 0.00010666f, 0.00020592f,
+                               0.00023815f, 0.00038786f, 0.00019097f, 0.00039549f, 0.00076736f,
+                               0.00028448f, 0.00016253f, 0.00085751f, 0.00015674f, 0.00026525f,
+                               0.00024961f, 0.00563625f, 0.0f, 0.0f }, { 0.00131840f,
+                               0.00099430f, 0.00074960f, 0.00066005f, 0.00036626f, 0.00070192f,
+                               0.00092548f, 0.00089301f, 0.00131038f, 0.00127857f, 0.00219713f,
+                               0.00100817f, 0.00054105f, 0.00368739f, 0.00047608f, 0.00102648f,
+                               0.00094759f, 0.00069226f, 0.00999315f, 0.0f }, { 0.00533241f,
+                               0.00169359f, 0.00136609f, 0.00127915f, 0.00119152f, 0.00132844f,
+                               0.00178697f, 0.00194579f, 0.00071553f, 0.01117956f, 0.00914460f,
+                               0.00210897f, 0.00197461f, 0.00256159f, 0.00135781f, 0.00241601f,
+                               0.00343452f, 0.00038538f, 0.00148001f, 0.02075171f } };
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/FileBuffer.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/FileBuffer.h
new file mode 100644 (file)
index 0000000..06af54b
--- /dev/null
@@ -0,0 +1,117 @@
+/////////////////////////////////////////////////////////////////
+// FileBuffer.h
+//
+// Buffered file reading.
+/////////////////////////////////////////////////////////////////
+
+#ifndef FILEBUFFER_H
+#define FILEBUFFER_H
+
+#include <string>
+#include <fstream>
+#include <iostream>
+
+using namespace std;
+
+const int BufferSize = 1000;
+
+/////////////////////////////////////////////////////////////////
+// FileBuffer
+//
+// Class for buffering file reading.
+/////////////////////////////////////////////////////////////////
+
+class FileBuffer {
+       ifstream file;
+       char buffer[BufferSize];
+       int currPos;
+       int size;
+       bool isEOF;
+       bool isValid;
+       bool canUnget;
+
+public:
+
+       // Some common routines
+
+       FileBuffer(const char *filename) :
+                       file(filename), currPos(0), size(0), isEOF(false), isValid(
+                                       !file.fail()), canUnget(false) {
+       }
+       ~FileBuffer() {
+               close();
+       }
+       bool fail() const {
+               return !isValid;
+       }
+       bool eof() const {
+               return (!isValid || isEOF);
+       }
+       void close() {
+               file.close();
+               isValid = false;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::Get()
+       //
+       // Retrieve a character from the file buffer.  Returns true if
+       // and only if a character is read.
+       /////////////////////////////////////////////////////////////////
+
+       bool Get(char &ch) {
+
+               // check to make sure that there's more stuff in the file
+               if (!isValid || isEOF)
+                       return false;
+
+               // if the buffer is empty, it's time to reload it
+               if (currPos == size) {
+                       file.read(buffer, BufferSize);
+                       size = file.gcount();
+                       isEOF = (size == 0);
+                       currPos = 0;
+                       if (isEOF)
+                               return false;
+               }
+
+               // store the read character
+               ch = buffer[currPos++];
+               canUnget = true;
+               return true;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::UnGet()
+       //
+       // Unretrieve the most recently read character from the file
+       // buffer.  Note that this allows only a one-level undo.
+       /////////////////////////////////////////////////////////////////
+
+       void UnGet() {
+               assert(canUnget);
+               assert(isValid);
+               assert(currPos > 0);
+               currPos--;
+               assert(currPos < size);
+               isEOF = false;
+               canUnget = false;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // FileBuffer::GetLine()
+       //
+       // Retrieve characters of text until a newline character is
+       // encountered.  Terminates properly on end-of-file condition.
+       /////////////////////////////////////////////////////////////////
+
+       void GetLine(string &s) {
+               char ch;
+               s = "";
+               while (Get(ch) && ch != '\n')
+                       s += ch;
+       }
+
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.cpp
new file mode 100644 (file)
index 0000000..db1550e
--- /dev/null
@@ -0,0 +1,1349 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <set>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <iomanip>
+#include "MSA.h"
+#include "MSAClusterTree.h"
+#include "Defaults.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+string parametersInputFilename = "";
+string parametersOutputFilename = "no training";
+string annotationFilename = "";
+
+bool enableVerbose = false;
+bool enableAnnotation = false;
+bool enableClustalWOutput = false;
+bool enableAlignOrder = false;
+int numConsistencyReps = 2;
+int numPreTrainingReps = 0;
+int numIterativeRefinementReps = 10;
+
+float cutoff = 0;
+
+VF initDistrib(NumMatrixTypes);
+VF gapOpen(2 * NumInsertStates);
+VF gapExtend(2 * NumInsertStates);
+VVF emitPairs(256, VF(256, 1e-10));
+VF emitSingle(256, 1e-5);
+
+string alphabet = alphabetDefault;
+
+const int MIN_PRETRAINING_REPS = 0;
+const int MAX_PRETRAINING_REPS = 20;
+const int MIN_CONSISTENCY_REPS = 0;
+const int MAX_CONSISTENCY_REPS = 5;
+const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
+const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
+
+string posteriorProbsFilename = "";
+bool allscores = true;
+string infilename;
+
+int flag_gui = 0;   //0: no gui related o/p 
+//1: gui related o/p generated
+int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
+//1: pp score seq added to o/p fasta alignment
+
+///////////////////////////////
+// global scoring matrix variables
+//////////////////////////////
+float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+char *aminos, *bases, matrixtype[20] = "gonnet_160";
+int subst_index[26];
+
+double sub_matrix[26][26];
+int firstread = 0;             //this makes sure that matrices are read only once 
+
+float TEMPERATURE = 5;
+int MATRIXTYPE = 160;
+int prot_nuc = 0;              //0=prot, 1=nucleotide
+
+float GAPOPEN = 0;
+float GAPEXT = 0;
+int numThreads = 0;
+
+//argument support
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+argument_decl argument;
+
+extern inline void read_sustitution_matrix(char *fileName);
+extern void setmatrixtype(int le);
+extern inline int matrixtype_to_int();
+extern inline void read_dna_matrix();
+extern inline void read_vtml_la_matrix();
+extern void init_arguments();
+
+MSA::MSA(int argc, char* argv[]) {
+       //parse program parameters
+       SafeVector<string> sequenceNames = ParseParams(argc, argv);
+
+       //initialize arguments for partition function
+       init_arguments();
+
+       ReadParameters();
+       //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
+
+       //read the input sequences
+       MultiSequence *sequences = new MultiSequence();
+       assert(sequences);
+       for (int i = 0; i < (int) sequenceNames.size(); i++) {
+               cerr << "Loading sequence file: " << sequenceNames[i] << endl;
+               sequences->LoadMFA(sequenceNames[i], true);
+       }
+       //allocate space for sequence weights
+       this->seqsWeights = new int[sequences->GetNumSequences()];
+       //initilaize parameters for OPENMP
+#ifdef _OPENMP
+       if(numThreads <= 0) {
+               numThreads = omp_get_num_procs();
+               cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
+       }
+       cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
+
+       //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
+       omp_set_num_threads(numThreads);
+#endif 
+
+       // now, we can perform the alignments and write them out
+       MultiSequence *alignment = doAlign(sequences,
+                       ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
+                                       emitSingle), initDistrib, gapOpen, gapExtend, emitPairs,
+                       emitSingle);
+
+       //write the alignment results to standard output
+       if (enableClustalWOutput) {
+               alignment->WriteALN(*alignOutFile);
+       } else {
+               alignment->WriteMFA(*alignOutFile);
+       }
+       //release resources
+       delete[] this->seqsWeights;
+       delete alignment;
+       delete sequences;
+}
+MSA::~MSA() {
+       /*close the output file*/
+       if (alignOutFileName.length() > 0) {
+               ((std::ofstream*) alignOutFile)->close();
+       }
+}
+/////////////////////////////////////////////////////////////////
+// PrintParameters()
+//
+// Prints MSAPROBS parameters to STDERR.  If a filename is
+// specified, then the parameters are also written to the file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::PrintParameters(const char *message, const VF &initDistrib,
+               const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+               const VF &emitSingle, const char *filename) {
+
+       // print parameters to the screen
+       cerr << message << endl << "    initDistrib[] = { ";
+       for (int i = 0; i < NumMatrixTypes; i++)
+               cerr << setprecision(10) << initDistrib[i] << " ";
+       cerr << "}" << endl << "        gapOpen[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapOpen[i] << " ";
+       cerr << "}" << endl << "      gapExtend[] = { ";
+       for (int i = 0; i < NumInsertStates * 2; i++)
+               cerr << setprecision(10) << gapExtend[i] << " ";
+       cerr << "}" << endl << endl;
+
+       /*
+        for (int i = 0; i < 5; i++){
+        for (int j = 0; j <= i; j++){
+        cerr << emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]] << " ";
+        }
+        cerr << endl;
+        }*/
+
+       // if a file name is specified
+       if (filename) {
+
+               // attempt to open the file for writing
+               FILE *file = fopen(filename, "w");
+               if (!file) {
+                       cerr << "ERROR: Unable to write parameter file: " << filename
+                                       << endl;
+                       exit(1);
+               }
+
+               // if successful, then write the parameters to the file
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       fprintf(file, "%.10f ", initDistrib[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapOpen[i]);
+               fprintf(file, "\n");
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       fprintf(file, "%.10f ", gapExtend[i]);
+               fprintf(file, "\n");
+               fprintf(file, "%s\n", alphabet.c_str());
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++)
+                               fprintf(file, "%.10f ",
+                                               emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
+                       fprintf(file, "\n");
+               }
+               for (int i = 0; i < (int) alphabet.size(); i++)
+                       fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
+               fprintf(file, "\n");
+               fclose(file);
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// doAlign()
+//
+// First computes all pairwise posterior probability matrices.
+// Then, computes new parameters if training, or a final
+// alignment, otherwise.
+/////////////////////////////////////////////////////////////////
+extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
+MultiSequence* MSA::doAlign(MultiSequence *sequences,
+               const ProbabilisticModel &model, VF &initDistrib, VF &gapOpen,
+               VF &gapExtend, VVF &emitPairs, VF &emitSingle) {
+       assert(sequences);
+
+       //get the number of sequences
+       const int numSeqs = sequences->GetNumSequences();
+
+       //create distance matrix
+       VVF distances(numSeqs, VF(numSeqs, 0));
+       SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+#ifdef _OPENMP
+       //calculate sequence pairs for openmp model
+       int pairIdx = 0;
+       numPairs = (numSeqs - 1) * numSeqs / 2;
+       seqsPairs = new SeqsPair[numPairs];
+       for(int a = 0; a < numSeqs; a++) {
+               for(int b = a + 1; b < numSeqs; b++) {
+                       seqsPairs[pairIdx].seq1 = a;
+                       seqsPairs[pairIdx].seq2 = b;
+                       pairIdx++;
+               }
+       }
+#endif
+       // do all pairwise alignments for posterior probability matrices
+#ifdef _OPENMP
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int a= seqsPairs[pairIdx].seq1;
+               int b = seqsPairs[pairIdx].seq2;
+               if(enableVerbose) {
+#pragma omp critical
+                       cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
+               }
+#else
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+#endif
+                       Sequence *seq1 = sequences->GetSequence(a);
+                       Sequence *seq2 = sequences->GetSequence(b);
+
+                       // verbose output
+                       if (enableVerbose) {
+                               cerr << "Computing posterior matrix: (" << a + 1 << ") "
+                                               << seq1->GetHeader() << " vs. " << "(" << b + 1 << ") "
+                                               << seq2->GetHeader() << " -- ";
+                       }
+
+                       // compute forward and backward probabilities
+                       VF *forward = model.ComputeForwardMatrix(seq1, seq2);
+                       assert(forward);
+                       VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
+                       assert(backward);
+
+                       // compute posterior probability matrix from HMM
+                       VF *posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,
+                                       *backward);
+                       assert(posterior);
+                       delete forward;
+                       delete backward;
+
+                       //compute posterior probability matrix from partition function
+                       VF* part_posterior = ::ComputePostProbs(a, b, seq1->GetString(),
+                                       seq2->GetString());
+                       assert(part_posterior);
+
+                       //merge the two posterior matrices
+                       VF::iterator ptr1 = posterior->begin();
+                       VF::iterator ptr2 = part_posterior->begin();
+                       for (int i = 0; i <= seq1->GetLength(); i++) {
+                               for (int j = 0; j <= seq2->GetLength(); j++) {
+                                       float v1 = *ptr1;
+                                       float v2 = *ptr2;
+
+                                       *ptr1 = sqrt((v1 * v1 + v2 * v2) * 0.5f);
+                                       ptr1++;
+                                       ptr2++;
+                               }
+                       }
+                       delete part_posterior;
+
+                       // compute sparse representations
+                       sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), *posterior);
+                       sparseMatrices[b][a] = NULL;
+
+                       // perform the pairwise sequence alignment
+                       pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
+                                       seq1->GetLength(), seq2->GetLength(), *posterior);
+
+                       //compute the pairwise distance using expected accuracy
+                       float accuracy = alignment.second
+                                       / min(seq1->GetLength(), seq2->GetLength());
+                       distances[a][b] = distances[b][a] = 1.0f - accuracy;
+
+                       if (enableVerbose) {
+                               cerr << setprecision(10) << accuracy << endl;
+                       }
+                       delete alignment.first;
+                       delete posterior;
+#ifndef _OPENMP
+               }
+#endif
+       }
+       //create the guide tree
+       this->tree = new MSAClusterTree(this, distances, numSeqs);
+       this->tree->create();
+
+       // perform the consistency transformation the desired number of times
+       float* fweights = new float[numSeqs];
+       for (int r = 0; r < numSeqs; r++) {
+               fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
+               fweights[r] *= 10;
+       }
+       for (int r = 0; r < numConsistencyReps; r++) {
+               SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
+                               DoRelaxation(fweights, sequences, sparseMatrices);
+
+               // now replace the old posterior matrices
+               for (int i = 0; i < numSeqs; i++) {
+                       for (int j = 0; j < numSeqs; j++) {
+                               delete sparseMatrices[i][j];
+                               sparseMatrices[i][j] = newSparseMatrices[i][j];
+                       }
+               }
+       }
+       delete[] fweights;
+#ifdef _OPENMP
+       delete [] seqsPairs;
+#endif
+
+       //compute the final multiple sequence alignment
+       MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
+                       sparseMatrices, model);
+
+       // build annotation
+       if (enableAnnotation) {
+               WriteAnnotation(finalAlignment, sparseMatrices);
+       }
+       //destroy the guide tree
+       delete this->tree;
+       this->tree = 0;
+
+       // delete sparse matrices
+       for (int a = 0; a < numSeqs - 1; a++) {
+               for (int b = a + 1; b < numSeqs; b++) {
+                       delete sparseMatrices[a][b];
+                       delete sparseMatrices[b][a];
+               }
+       }
+
+       return finalAlignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetInteger()
+//
+// Attempts to parse an integer from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetInteger(char *data, int *val) {
+       char *endPtr;
+       long int retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtol(data, &endPtr, 0);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
+               return false;
+       if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
+               return false;
+       *val = (int) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// GetFloat()
+//
+// Attempts to parse a float from the character string given.
+// Returns true only if no parsing error occurs.
+/////////////////////////////////////////////////////////////////
+
+bool GetFloat(char *data, float *val) {
+       char *endPtr;
+       double retVal;
+
+       assert(val);
+
+       errno = 0;
+       retVal = strtod(data, &endPtr);
+       if (retVal == 0 && (errno != 0 || data == endPtr))
+               return false;
+       if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
+               return false;
+       *val = (float) retVal;
+       return true;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadParameters()
+//
+// Read initial distribution, transition, and emission
+// parameters from a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::ReadParameters() {
+
+       ifstream data;
+
+       emitPairs = VVF(256, VF(256, 1e-10));
+       emitSingle = VF(256, 1e-5);
+
+       // read initial state distribution and transition parameters
+       if (parametersInputFilename == string("")) {
+               if (NumInsertStates == 1) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen1Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend1Default[i];
+               } else if (NumInsertStates == 2) {
+                       for (int i = 0; i < NumMatrixTypes; i++)
+                               initDistrib[i] = initDistrib2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapOpen[i] = gapOpen2Default[i];
+                       for (int i = 0; i < 2 * NumInsertStates; i++)
+                               gapExtend[i] = gapExtend2Default[i];
+               } else {
+                       cerr
+                                       << "ERROR: No default initial distribution/parameter settings exist"
+                                       << endl << "       for " << NumInsertStates
+                                       << " pairs of insert states.  Use --paramfile." << endl;
+                       exit(1);
+               }
+
+               alphabet = alphabetDefault;
+
+               for (int i = 0; i < (int) alphabet.length(); i++) {
+                       emitSingle[(unsigned char) tolower(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       emitSingle[(unsigned char) toupper(alphabet[i])] =
+                                       emitSingleDefault[i];
+                       for (int j = 0; j <= i; j++) {
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = emitPairsDefault[i][j];
+                       }
+               }
+       } else {
+               data.open(parametersInputFilename.c_str());
+               if (data.fail()) {
+                       cerr << "ERROR: Unable to read parameter file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               string line[3];
+               for (int i = 0; i < 3; i++) {
+                       if (!getline(data, line[i])) {
+                               cerr
+                                               << "ERROR: Unable to read transition parameters from parameter file: "
+                                               << parametersInputFilename << endl;
+                               exit(1);
+                       }
+               }
+               istringstream data2;
+               data2.clear();
+               data2.str(line[0]);
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       data2 >> initDistrib[i];
+               data2.clear();
+               data2.str(line[1]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapOpen[i];
+               data2.clear();
+               data2.str(line[2]);
+               for (int i = 0; i < 2 * NumInsertStates; i++)
+                       data2 >> gapExtend[i];
+
+               if (!getline(data, line[0])) {
+                       cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
+                                       << parametersInputFilename << endl;
+                       exit(1);
+               }
+
+               // read alphabet as concatenation of all characters on alphabet line
+               alphabet = "";
+               string token;
+               data2.clear();
+               data2.str(line[0]);
+               while (data2 >> token)
+                       alphabet += token;
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       for (int j = 0; j <= i; j++) {
+                               float val;
+                               data >> val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
+                                               alphabet[j])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
+                                               alphabet[i])] = val;
+                               emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
+                                               alphabet[i])] = val;
+                       }
+               }
+
+               for (int i = 0; i < (int) alphabet.size(); i++) {
+                       float val;
+                       data >> val;
+                       emitSingle[(unsigned char) tolower(alphabet[i])] = val;
+                       emitSingle[(unsigned char) toupper(alphabet[i])] = val;
+               }
+               data.close();
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseParams()
+//
+// Parse all command-line options.
+/////////////////////////////////////////////////////////////////
+void MSA::printUsage() {
+       cerr
+                       << "************************************************************************"
+                       << endl
+                       << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
+                       << endl
+                       << "\tbased on pair hidden markov model and partition function postirior"
+                       << endl
+                       << "\tprobabilities. If any comments or problems, please contact"
+                       << endl
+                       << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
+                       << endl
+                       << "*************************************************************************"
+                       << endl << "Usage:" << endl
+                       << "       msaprobs [OPTION]... [infile]..." << endl << endl
+                       << "Description:" << endl
+                       << "       Align sequences in multi-FASTA format" << endl << endl
+                       << "       -o, --outfile <string>" << endl
+                       << "              specify the output file name (STDOUT by default)"
+                       << endl << "       -num_threads <integer>" << endl
+                       << "              specify the number of threads used, and otherwise detect automatically"
+                       << endl << "       -clustalw" << endl
+                       << "              use CLUSTALW output format instead of FASTA format"
+                       << endl << endl << "       -c, --consistency REPS" << endl
+                       << "              use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
+                       << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
+                       << ") passes of consistency transformation" << endl << endl
+                       << "       -ir, --iterative-refinement REPS" << endl
+                       << "              use " << MIN_ITERATIVE_REFINEMENT_REPS
+                       << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
+                       << numIterativeRefinementReps << ") passes of iterative-refinement"
+                       << endl << endl << "       -v, --verbose" << endl
+                       << "              report progress while aligning (default: "
+                       << (enableVerbose ? "on" : "off") << ")" << endl << endl
+                       << "       -annot FILENAME" << endl
+                       << "              write annotation for multiple alignment to FILENAME"
+                       << endl << endl << "       -a, --alignment-order" << endl
+                       << "              print sequences in alignment order rather than input order (default: "
+                       << (enableAlignOrder ? "on" : "off") << ")" << endl
+                       << "       -version " << endl
+                       << "              print out version of MSAPROBS " << endl << endl;
+}
+SafeVector<string> MSA::ParseParams(int argc, char **argv) {
+       if (argc < 2) {
+               printUsage();
+               exit(1);
+       }
+       SafeVector<string> sequenceNames;
+       int tempInt;
+       float tempFloat;
+
+       for (int i = 1; i < argc; i++) {
+               if (argv[i][0] == '-') {
+                       //help
+                       if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
+                               printUsage();
+                               exit(1);
+                               //output file name
+                       } else if (!strcmp(argv[i], "-o")
+                                       || !strcmp(argv[i], "--outfile")) {
+                               if (i < argc - 1) {
+                                       alignOutFileName = argv[++i];   //get the file name
+                               } else {
+                                       cerr << "ERROR: String expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               //number of threads used
+                       } else if (!strcmp(argv[i], "-p")
+                                       || !strcmp(argv[i], "-num_threads")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << " ERROR: invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < 0) {
+                                                       tempInt = 0;
+                                               }
+                                               numThreads = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                               // number of consistency transformations
+                       } else if (!strcmp(argv[i], "-c")
+                                       || !strcmp(argv[i], "--consistency")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_CONSISTENCY_REPS
+                                                               || tempInt > MAX_CONSISTENCY_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_CONSISTENCY_REPS << " and "
+                                                                       << MAX_CONSISTENCY_REPS << "." << endl;
+                                                       exit(1);
+                                               } else {
+                                                       numConsistencyReps = tempInt;
+                                               }
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // number of randomized partitioning iterative refinement passes
+                       else if (!strcmp(argv[i], "-ir")
+                                       || !strcmp(argv[i], "--iterative-refinement")) {
+                               if (i < argc - 1) {
+                                       if (!GetInteger(argv[++i], &tempInt)) {
+                                               cerr << "ERROR: Invalid integer following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
+                                                               || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", integer must be between "
+                                                                       << MIN_ITERATIVE_REFINEMENT_REPS << " and "
+                                                                       << MAX_ITERATIVE_REFINEMENT_REPS << "."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       numIterativeRefinementReps = tempInt;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Integer expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // annotation files
+                       else if (!strcmp(argv[i], "-annot")) {
+                               enableAnnotation = true;
+                               if (i < argc - 1) {
+                                       annotationFilename = argv[++i];
+                               } else {
+                                       cerr << "ERROR: FILENAME expected for option " << argv[i]
+                                                       << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // clustalw output format
+                       else if (!strcmp(argv[i], "-clustalw")) {
+                               enableClustalWOutput = true;
+                       }
+
+                       // cutoff
+                       else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
+                               if (i < argc - 1) {
+                                       if (!GetFloat(argv[++i], &tempFloat)) {
+                                               cerr
+                                                               << "ERROR: Invalid floating-point value following option "
+                                                               << argv[i - 1] << ": " << argv[i] << endl;
+                                               exit(1);
+                                       } else {
+                                               if (tempFloat < 0 || tempFloat > 1) {
+                                                       cerr << "ERROR: For option " << argv[i - 1]
+                                                                       << ", floating-point value must be between 0 and 1."
+                                                                       << endl;
+                                                       exit(1);
+                                               } else
+                                                       cutoff = tempFloat;
+                                       }
+                               } else {
+                                       cerr << "ERROR: Floating-point value expected for option "
+                                                       << argv[i] << endl;
+                                       exit(1);
+                               }
+                       }
+
+                       // verbose reporting
+                       else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
+                               enableVerbose = true;
+                       }
+
+                       // alignment order
+                       else if (!strcmp(argv[i], "-a")
+                                       || !strcmp(argv[i], "--alignment-order")) {
+                               enableAlignOrder = true;
+                       }
+
+                       //print out version
+                       else if (!strcmp(argv[i], "-version")) {
+                               cerr << "MSAPROBS version " << VERSION << endl;
+                               exit(1);
+                       }
+                       // bad arguments
+                       else {
+                               cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
+                               exit(1);
+                       }
+               } else {
+                       sequenceNames.push_back(string(argv[i]));
+               }
+       }
+
+       /*check the output file name*/
+       cerr << "-------------------------------------" << endl;
+       if (alignOutFileName.length() == 0) {
+               cerr << "The final alignments will be printed out to STDOUT" << endl;
+               alignOutFile = &std::cout;
+       } else {
+               cerr << "Open the output file " << alignOutFileName << endl;
+               alignOutFile = new ofstream(alignOutFileName.c_str(),
+                               ios::binary | ios::out | ios::trunc);
+       }
+       cerr << "-------------------------------------" << endl;
+       return sequenceNames;
+}
+
+/////////////////////////////////////////////////////////////////
+// ProcessTree()
+//
+// Process the tree recursively.  Returns the aligned sequences
+// corresponding to a node or leaf of the tree.
+/////////////////////////////////////////////////////////////////
+MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       MultiSequence *result;
+
+       // check if this is a node of the alignment tree
+       //if (tree->GetSequenceLabel() == -1){
+       if (tree->leaf == NODE) {
+               MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
+                               sparseMatrices, model);
+               MultiSequence *alignRight = ProcessTree(tree->right, sequences,
+                               sparseMatrices, model);
+
+               assert(alignLeft);
+               assert(alignRight);
+
+               result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
+               assert(result);
+
+               delete alignLeft;
+               delete alignRight;
+       }
+
+       // otherwise, this is a leaf of the alignment tree
+       else {
+               result = new MultiSequence();
+               assert(result);
+               //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
+               result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
+       }
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeFinalAlignment()
+//
+// Compute the final alignment by calling ProcessTree(), then
+// performing iterative refinement as needed.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
+               MultiSequence *sequences,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+       MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
+                       sparseMatrices, model);
+
+       SafeVector<int> oldOrdering;
+       if (enableAlignOrder) {
+               for (int i = 0; i < alignment->GetNumSequences(); i++)
+                       oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
+               alignment->SaveOrdering();
+               enableAlignOrder = false;
+       }
+
+       // tree-based refinement
+       // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
+       /*int numSeqs = alignment->GetNumSequences();
+        if(numSeqs < numIterativeRefinementReps){
+        for(int iter = 0; iter < 1; iter ++){
+        for(int i = 0; i < numSeqs - 1; i++){
+        DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
+        }
+        }
+        }*/
+       for (int i = 0; i < numIterativeRefinementReps; i++) {
+               DoIterativeRefinement(sparseMatrices, model, alignment, i);
+       }
+       cerr << endl;
+
+       if (oldOrdering.size() > 0) {
+               for (int i = 0; i < (int) oldOrdering.size(); i++) {
+                       alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
+               }
+       }
+
+       // return final alignment
+       return alignment;
+}
+
+/////////////////////////////////////////////////////////////////
+// AlignAlignments()
+//
+// Returns the alignment of two MultiSequence objects.
+/////////////////////////////////////////////////////////////////
+
+MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
+               MultiSequence *align2,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model) {
+
+       // print some info about the alignment
+       if (enableVerbose) {
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align1->GetSequence(i)->GetLabel();
+               cerr << "] vs. ";
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       cerr << ((i == 0) ? "[" : ",")
+                                       << align2->GetSequence(i)->GetLabel();
+               cerr << "]: ";
+       }
+#if 0
+       VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
+#else
+       VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
+                       sparseMatrices, cutoff);
+#endif
+       pair<SafeVector<char> *, float> alignment;
+
+       //perform alignment
+       alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
+                       align2->GetSequence(0)->GetLength(), *posterior);
+
+       delete posterior;
+
+       if (enableVerbose) {
+
+               // compute total length of sequences
+               int totLength = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       for (int j = 0; j < align2->GetNumSequences(); j++)
+                               totLength += min(align1->GetSequence(i)->GetLength(),
+                                               align2->GetSequence(j)->GetLength());
+
+               // give an "accuracy" measure for the alignment
+               cerr << alignment.second / totLength << endl;
+       }
+
+       // now build final alignment
+       MultiSequence *result = new MultiSequence();
+       for (int i = 0; i < align1->GetNumSequences(); i++)
+               result->AddSequence(
+                               align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
+       for (int i = 0; i < align2->GetNumSequences(); i++)
+               result->AddSequence(
+                               align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
+       if (!enableAlignOrder)
+               result->SortByLabel();
+
+       // free temporary alignment
+       delete alignment.first;
+
+       return result;
+}
+
+/////////////////////////////////////////////////////////////////
+// DoRelaxation()
+//
+// Performs one round of the weighted probabilistic consistency transformation.
+//                     1
+/////////////////////////////////////////////////////////////////
+
+SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
+               MultiSequence *sequences,
+               SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       const int numSeqs = sequences->GetNumSequences();
+
+       SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
+                       SafeVector<SparseMatrix *>(numSeqs, NULL));
+
+       // for every pair of sequences
+#ifdef _OPENMP
+       int pairIdx;
+#pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
+       for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
+               int i = seqsPairs[pairIdx].seq1;
+               int j = seqsPairs[pairIdx].seq2;
+               float wi = seqsWeights[i];
+               float wj = seqsWeights[j];
+#else
+       for (int i = 0; i < numSeqs; i++) {
+               float wi = seqsWeights[i];
+               for (int j = i + 1; j < numSeqs; j++) {
+                       float wj = seqsWeights[j];
+#endif
+                       Sequence *seq1 = sequences->GetSequence(i);
+                       Sequence *seq2 = sequences->GetSequence(j);
+
+                       if (enableVerbose) {
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+                               cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
+                                               << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
+                                               << ": ";
+                       }
+                       // get the original posterior matrix
+                       VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
+                       assert(posteriorPtr);
+                       VF &posterior = *posteriorPtr;
+
+                       const int seq1Length = seq1->GetLength();
+                       const int seq2Length = seq2->GetLength();
+
+                       // contribution from the summation where z = x and z = y
+                       float w = wi * wi * wj + wi * wj * wj;
+                       float sumW = w;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               posterior[k] = w * posterior[k];
+                       }
+
+                       if (enableVerbose)
+                               cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
+
+                       // contribution from all other sequences
+                       for (int k = 0; k < numSeqs; k++) {
+                               if (k != i && k != j) {
+                                       float wk = seqsWeights[k];
+                                       float w = wi * wj * wk;
+                                       sumW += w;
+                                       if (k < i)
+                                               Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
+                                                               posterior);
+                                       else if (k > i && k < j)
+                                               Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
+                                                               posterior);
+                                       else {
+                                               SparseMatrix *temp =
+                                                               sparseMatrices[j][k]->ComputeTranspose();
+                                               Relax(w, sparseMatrices[i][k], temp, posterior);
+                                               delete temp;
+                                       }
+                               }
+                       }
+                       //cerr<<"sumW "<<sumW<<endl;
+                       for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
+                               posterior[k] /= sumW;
+                       }
+                       // mask out positions not originally in the posterior matrix
+                       SparseMatrix *matXY = sparseMatrices[i][j];
+                       for (int y = 0; y <= seq2Length; y++)
+                               posterior[y] = 0;
+                       for (int x = 1; x <= seq1Length; x++) {
+                               SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
+                               SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
+                               VF::iterator base = posterior.begin() + x * (seq2Length + 1);
+                               int curr = 0;
+                               while (XYptr != XYend) {
+
+                                       // zero out all cells until the first filled column
+                                       while (curr < XYptr->first) {
+                                               base[curr] = 0;
+                                               curr++;
+                                       }
+
+                                       // now, skip over this column
+                                       curr++;
+                                       ++XYptr;
+                               }
+
+                               // zero out cells after last column
+                               while (curr <= seq2Length) {
+                                       base[curr] = 0;
+                                       curr++;
+                               }
+                       }
+
+                       // save the new posterior matrix
+                       newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
+                                       seq2->GetLength(), posterior);
+                       newSparseMatrices[j][i] = NULL;
+
+                       if (enableVerbose)
+                               cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
+
+                       delete posteriorPtr;
+
+                       if (enableVerbose)
+                               cerr << "done." << endl;
+#ifndef _OPENMP
+               }
+#endif
+       }
+
+       return newSparseMatrices;
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matXZ);
+       assert(matZY);
+
+       int lengthX = matXZ->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+       assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
+
+       // for every x[i]
+       for (int i = 1; i <= lengthX; i++) {
+               SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
+               SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
+
+               VF::iterator base = posterior.begin() + i * (lengthY + 1);
+
+               // iterate through all x[i]-z[k]
+               while (XZptr != XZend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
+                       SafeVector<PIF>::iterator ZYend = ZYptr
+                                       + matZY->GetRowSize(XZptr->first);
+                       const float XZval = XZptr->second;
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               base[ZYptr->first] += weight * XZval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       XZptr++;
+               }
+       }
+}
+
+/////////////////////////////////////////////////////////////////
+// Relax1()
+//
+// Computes the consistency transformation for a single sequence
+// z, and adds the transformed matrix to "posterior".
+/////////////////////////////////////////////////////////////////
+
+void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
+               VF &posterior) {
+
+       assert(matZX);
+       assert(matZY);
+
+       int lengthZ = matZX->GetSeq1Length();
+       int lengthY = matZY->GetSeq2Length();
+
+       // for every z[k]
+       for (int k = 1; k <= lengthZ; k++) {
+               SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
+               SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
+
+               // iterate through all z[k]-x[i]
+               while (ZXptr != ZXend) {
+                       SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
+                       SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
+                       const float ZXval = ZXptr->second;
+                       VF::iterator base = posterior.begin()
+                                       + ZXptr->first * (lengthY + 1);
+
+                       // iterate through all z[k]-y[j]
+                       while (ZYptr != ZYend) {
+                               base[ZYptr->first] += weight * ZXval * ZYptr->second;
+                               ZYptr++;
+                       }
+                       ZXptr++;
+               }
+       }
+}
+/////////////////////////////////////////////////////////////////
+// DoIterativeRefinement()
+//
+// Performs a single round of randomized partionining iterative
+// refinement.
+/////////////////////////////////////////////////////////////////
+
+int MSA::GenRandom(int m, int seed, bool init) {
+       static const int a = 5, b = 3, n = 7;
+       static int rand0;
+       if (init == true) {
+               rand0 = seed;
+       }
+       m *= 19;
+       int rand1;
+       for (int i = 0; i < n; i++) {
+               rand1 = (a * rand0 + b) % m;
+               rand0 = rand1;
+       }
+       return rand1;
+}
+
+void MSA::DoIterativeRefinement(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment, int si) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       int index = GenRandom(numSeqs, si, true);
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               index = GenRandom(numSeqs, si);
+               if (index % 2) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+void MSA::DoIterativeRefinementTreeNode(
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+               const ProbabilisticModel &model, MultiSequence* &alignment,
+               int nodeIndex) {
+       set<int> groupOne, groupTwo;
+       int numSeqs = alignment->GetNumSequences();
+
+       vector<bool> inGroup1;
+       inGroup1.resize(numSeqs);
+       for (int i = 0; i < numSeqs; i++) {
+               inGroup1[i] = false;
+       }
+
+       AlignmentOrder* orders = this->tree->getAlignOrders();
+       AlignmentOrder* order = &orders[nodeIndex];
+       for (int i = 0; i < order->leftNum; i++) {
+               int si = order->leftLeafs[i];
+               inGroup1[si] = true;
+       }
+       for (int i = 0; i < order->rightNum; i++) {
+               int si = order->rightLeafs[i];
+               inGroup1[si] = true;
+       }
+       // create two separate groups
+       for (int i = 0; i < numSeqs; i++) {
+               if (inGroup1[i]) {
+                       groupOne.insert(i);
+               } else {
+                       groupTwo.insert(i);
+               }
+       }
+       if (groupOne.empty() || groupTwo.empty())
+               return;
+
+       // project into the two groups
+       MultiSequence *groupOneSeqs = alignment->Project(groupOne);
+       assert(groupOneSeqs);
+       MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
+       assert(groupTwoSeqs);
+       delete alignment;
+
+       // realign
+       alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
+                       model);
+
+       delete groupOneSeqs;
+       delete groupTwoSeqs;
+}
+
+/////////////////////////////////////////////////////////////////
+// WriteAnnotation()
+//
+// Computes annotation for multiple alignment and write values
+// to a file.
+/////////////////////////////////////////////////////////////////
+
+void MSA::WriteAnnotation(MultiSequence *alignment,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+       ofstream outfile(annotationFilename.c_str());
+
+       if (outfile.fail()) {
+               cerr << "ERROR: Unable to write annotation file." << endl;
+               exit(1);
+       }
+
+       const int alignLength = alignment->GetSequence(0)->GetLength();
+       const int numSeqs = alignment->GetNumSequences();
+
+       SafeVector<int> position(numSeqs, 0);
+       SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
+       for (int i = 0; i < numSeqs; i++)
+               seqs[i] = alignment->GetSequence(i)->GetDataPtr();
+       SafeVector<pair<int, int> > active;
+       active.reserve(numSeqs);
+
+       SafeVector<int> lab;
+       for (int i = 0; i < numSeqs; i++)
+               lab.push_back(alignment->GetSequence(i)->GetSortLabel());
+
+       // for every column
+       for (int i = 1; i <= alignLength; i++) {
+
+               // find all aligned residues in this particular column
+               active.clear();
+               for (int j = 0; j < numSeqs; j++) {
+                       if (seqs[j][i] != '-') {
+                               active.push_back(make_pair(lab[j], ++position[j]));
+                       }
+               }
+
+               sort(active.begin(), active.end());
+               outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
+       }
+
+       outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeScore()
+//
+// Computes the annotation score for a particular column.
+/////////////////////////////////////////////////////////////////
+
+int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
+               const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
+
+       if (active.size() <= 1)
+               return 0;
+
+       // ALTERNATIVE #1: Compute the average alignment score.
+
+       float val = 0;
+       for (int i = 0; i < (int) active.size(); i++) {
+               for (int j = i + 1; j < (int) active.size(); j++) {
+                       val += sparseMatrices[active[i].first][active[j].first]->GetValue(
+                                       active[i].second, active[j].second);
+               }
+       }
+
+       return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
+
+}
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSA.h
new file mode 100644 (file)
index 0000000..9d4ef7c
--- /dev/null
@@ -0,0 +1,100 @@
+#ifndef _MSA_H
+#define _MSA_H
+#include "MSADef.h"
+#include "MSAGuideTree.h"
+
+#include "SafeVector.h"
+#include "MultiSequence.h"
+#include "ScoreType.h"
+#include "ProbabilisticModel.h"
+#include "SparseMatrix.h"
+#include <string>
+using namespace std;
+
+class MSAGuideTree;
+struct TreeNode;
+class MSA {
+public:
+       MSA(int argc, char* argv[]);
+       ~MSA();
+
+       static void getSysTime(double * dtime);
+       MSAGuideTree* getGuideTree() {
+               return tree;
+       }
+       int * getSeqsWeights() {
+               return seqsWeights;
+       }
+private:
+       //print usage
+       void printUsage();
+       //do multiple sequence alignment
+       void doAlign();
+
+       //for sequence weights
+       void createSeqsWeights(int seqsNum);
+       void releaseSeqsWeights();
+
+       //weights of sequences
+       int * seqsWeights;
+       //guide tree
+       MSAGuideTree* tree;
+       //output file
+       string alignOutFileName;
+       std::ostream* alignOutFile;
+private:
+       SafeVector<string> ParseParams(int argc, char *argv[]);
+       void PrintParameters(const char *message, const VF &initDistrib,
+                       const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
+                       const VF &emitSingle, const char *filename);
+
+       SafeVector<string> PostProbsParseParams(int argc, char **argv);
+       MultiSequence *doAlign(MultiSequence *sequence,
+                       const ProbabilisticModel &model, VF &initDistrib, VF &gapOpen,
+                       VF &gapExtend, VVF &emitPairs, VF &emitSingle);
+       void ReadParameters();
+       MultiSequence* ProcessTree(TreeNode *tree, MultiSequence *sequences,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model);
+       MultiSequence *ComputeFinalAlignment(MSAGuideTree *tree,
+                       MultiSequence *sequences,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model);
+       MultiSequence *AlignAlignments(MultiSequence *align1, MultiSequence *align2,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model);
+       SafeVector<SafeVector<SparseMatrix *> > DoRelaxation(float* seqsWeights,
+                       MultiSequence *sequences,
+                       SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+       void Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+                       VF &posterior);
+       void Relax1(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
+                       VF &posterior);
+
+       int GenRandom(int m, int seed, bool init = false);
+       void DoIterativeRefinement(
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model, MultiSequence* &alignment, int si);
+       void DoIterativeRefinement(
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model, MultiSequence* &alignment);
+       void DoIterativeRefinementTreeNode(
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       const ProbabilisticModel &model, MultiSequence* &alignment,
+                       int nodeIndex);
+       void WriteAnnotation(MultiSequence *alignment,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+       int ComputeScore(const SafeVector<pair<int, int> > &active,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices);
+#ifdef _OPENMP
+       //private struct
+       struct SeqsPair {
+               int seq1;
+               int seq2;
+       };
+       int numPairs;
+       SeqsPair* seqsPairs;
+#endif
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.cpp
new file mode 100644 (file)
index 0000000..a95efe0
--- /dev/null
@@ -0,0 +1,151 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include "MSAClusterTree.h"
+MSAClusterTree::MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs) :
+               MSAGuideTree(msa, distMatrix, numSeqs) {
+}
+MSAClusterTree::~MSAClusterTree() {
+}
+void MSAClusterTree::create() {
+       //generate the neighbor-joining tree
+       this->generateClusterTree();
+
+       //calculate sequence weights
+       this->getSeqsWeights();
+
+       //construct the alignment orders
+       this->createAlignmentOrders();
+}
+void MSAClusterTree::generateClusterTree() {
+       int i;
+       ValidNode* validNodes, *headValidNodes;
+       ValidNode* miniPtr, *minjPtr, *ivalid, *jvalid;
+       int mini, minj;
+       float* joins;
+       unsigned int* clusterLeafs;
+
+       //initialize the valid nodes link list
+       validNodes = new ValidNode[leafsNum + 1];
+       joins = new float[leafsNum + 1];
+       clusterLeafs = new unsigned int[nodesNum + 1];
+       if (!validNodes || !joins || !clusterLeafs) {
+               cerr << "Out of memory of the reconstruction of cluster tree" << endl;
+       }
+       //initialize cluster size 
+       for (i = 0; i < this->leafsNum; i++) {
+               clusterLeafs[i] = 1;
+       }
+
+       headValidNodes = &validNodes[0];
+       headValidNodes->next = &validNodes[1];
+       headValidNodes->n = -1;
+       headValidNodes->node = -1;
+       headValidNodes->prev = NULL;
+
+       //build an initial link list
+       ValidNode* curr = &validNodes[1];
+       ValidNode* prev = headValidNodes;
+       ValidNode* next = &validNodes[2];
+       for (i = 0; i < leafsNum; i++) {
+               curr->n = i;
+               curr->node = i;
+               curr->prev = prev;
+               curr->next = next;
+               prev = curr;
+               curr = next;
+               next++;
+       }
+       prev->next = NULL;
+
+       //to generate the cluster tree
+       int nodeIdx;    //the index of an internal node
+       int firstNode = leafsNum;       //the index of the first internal node
+       int lastNode = firstNode + leafsNum - 1;//the index of the last internal node
+
+       for (nodeIdx = firstNode; nodeIdx < lastNode; nodeIdx++) {
+               //find closest pair of clusters
+               float minDist = 2.0f;
+               miniPtr = headValidNodes;
+               minjPtr = headValidNodes;
+
+               for (ivalid = headValidNodes->next; ivalid != NULL;
+                               ivalid = ivalid->next) {
+                       mini = ivalid->n;
+                       for (jvalid = headValidNodes->next;
+                                       jvalid != NULL && jvalid->n < mini; jvalid = jvalid->next) {
+                               minj = jvalid->n;
+                               float dist = (*distMatrix)[mini][minj];
+                               if (dist < 0) {
+                                       cerr
+                                                       << "ERROR: It is impossible to have distance value less than zero"
+                                                       << endl;
+                                       dist = 0;
+                               }
+                               if (dist < minDist) {
+                                       minDist = dist;
+                                       miniPtr = ivalid;
+                                       minjPtr = jvalid;
+                               }
+                               //printf("dist %g mini %d minj %d\n", dist, ivalid->node, jvalid->node);
+                       }
+               }
+               //printf("**** mini %d minj %d minDist %g *****\n", miniPtr->node, minjPtr->node, minDist);
+               //check the validity of miniPtr and minjPtr;
+               if (miniPtr == headValidNodes || minjPtr == headValidNodes) {
+                       cerr << "OOPS: Error occurred while constructing the cluster tree\n"
+                                       << endl;
+                       exit(-1);
+               }
+               //computing branch length and join the two nodes
+               float branchLength = minDist * 0.5f;
+               this->connectNodes(&nodes[nodeIdx], nodeIdx, &nodes[miniPtr->node],
+                               branchLength, &nodes[minjPtr->node], branchLength);
+               clusterLeafs[nodeIdx] = clusterLeafs[miniPtr->node]
+                               + clusterLeafs[minjPtr->node];
+
+               //remove the valid node minjPtr from the list
+               minjPtr->prev->next = minjPtr->next;
+               if (minjPtr->next != NULL) {
+                       minjPtr->next->prev = minjPtr->prev;
+               }
+               minjPtr->prev = minjPtr->next = NULL;
+
+               //compute the distance of each remaining valid node to the new node
+               for (ivalid = headValidNodes->next; ivalid != NULL;
+                               ivalid = ivalid->next) {
+                       int idx = ivalid->n;
+
+                       float idist = (*distMatrix)[miniPtr->n][idx];
+                       float jdist = (*distMatrix)[minjPtr->n][idx];
+
+                       unsigned int isize = clusterLeafs[miniPtr->node];
+                       unsigned int jsize = clusterLeafs[minjPtr->node];
+                       joins[idx] = (idist * isize + jdist * jsize) / (isize + jsize);
+               }
+               //update the distance to the new node
+               miniPtr->node = nodeIdx;
+               mini = miniPtr->n;
+               for (jvalid = headValidNodes->next; jvalid != NULL;
+                               jvalid = jvalid->next) {
+                       minj = jvalid->n;
+
+                       float dist = joins[minj];
+                       (*distMatrix)[mini][minj] = dist;
+                       (*distMatrix)[minj][mini] = dist;
+               }
+       }
+       //add a pseudo root to this unrooted NJ tree
+       this->root = &nodes[lastNode - 1];
+
+       delete[] validNodes;
+       delete[] joins;
+       delete[] clusterLeafs;
+}
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAClusterTree.h
new file mode 100644 (file)
index 0000000..30bce05
--- /dev/null
@@ -0,0 +1,27 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#ifndef _MSA_CLUSTER_TREE_H
+#define _MSA_CLUSTER_TREE_H
+
+#include "MSAGuideTree.h"
+
+class MSAClusterTree: public MSAGuideTree {
+public:
+       MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs);
+       ~MSAClusterTree();
+
+       //construct the cluster tree
+       void create();
+private:
+       //generate the cluster tree
+       void generateClusterTree();
+};
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSADef.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSADef.h
new file mode 100644 (file)
index 0000000..6a3d178
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _MSA_DEF_H
+#define _MSA_DEF_H
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <math.h>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+//maximum number
+#define MAX_INT_NUM                            0x7FFFFFFF
+#define MAX_FLOAT_NUM                  FLT_MAX
+#define INT_MULTIPLY                   1000
+
+#define SUBMATRIX_INT_SCALE            100
+
+//a tree node is a leaf or a node
+enum {
+       NONE, NODE, LEAF
+};
+
+#endif
+
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.cpp
new file mode 100644 (file)
index 0000000..207d25b
--- /dev/null
@@ -0,0 +1,327 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "MSAGuideTree.h"
+#include "MSA.h"
+MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) {
+       int i;
+       TreeNode* node;
+       //system configuration
+       this->msa = msa;
+       this->distMatrix = &distances;
+       this->numSeqs = numSeqs;
+       this->seqsWeights = msa->getSeqsWeights();
+
+       //tree structure
+       this->nodesSize = this->numSeqs * 2 + 1;
+       this->nodes = new TreeNode[this->nodesSize];
+       if (!this->nodes) {
+               cerr << "TreeNodes memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize all the tree nodes
+       this->leafs = this->nodes;
+       this->leafsNum = this->numSeqs;
+       this->nodesNum = 2 * this->leafsNum - 1;
+       for (i = 0; i < this->nodesSize; i++) {
+               node = &nodes[i];
+               node->left = 0;
+               node->right = 0;
+               node->parent = 0;
+               node->leftIdx = -1;
+               node->rightIdx = -1;
+               node->parentIdx = -1;
+               node->idx = -1;
+               node->dist = 0;
+               node->leaf = NODE;              //setted to be NODE, by default
+               node->order = 0;
+               node->depth = 0;
+       }
+       //initialize the leaf nodes
+       for (i = 0; i < this->leafsNum; i++) {
+               node = &this->leafs[i];
+               node->idx = i;
+               node->leaf = LEAF;
+       }
+}
+MSAGuideTree::~MSAGuideTree() {
+       //release tree nodes
+       delete[] this->nodes;
+
+       //release alignment orders
+       releaseAlignmentOrders();
+
+}
+//get the tree nodes
+TreeNode* MSAGuideTree::getNodes() {
+       return nodes;
+}
+//get the leaf nodes
+TreeNode* MSAGuideTree::getLeafs() {
+       return leafs;
+}
+//get the number of nodes;
+int MSAGuideTree::getNodesNum() {
+       return nodesNum;
+}
+//get the number of leaf nodes
+int MSAGuideTree::getLeafsNum() {
+       return leafsNum;
+}
+//get the alignment orders
+AlignmentOrder* MSAGuideTree::getAlignOrders() {
+       return alignOrders;
+}
+int MSAGuideTree::getAlignOrdersNum() {
+       return alignOrdersNum;
+}
+/****************************************************
+ create the evolutionary relationship
+ ****************************************************/
+void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx,
+               TreeNode* leftChild, float leftDist, TreeNode* rightChild,
+               float rightDist) {
+       //save the parents index for each child
+       leftChild->parent = parent;
+       leftChild->parentIdx = parentIdx;
+       rightChild->parent = parent;
+       rightChild->parentIdx = parentIdx;
+
+       //save the branch lengths (i.e. distance) from each child to its parent
+       leftChild->dist = leftDist;
+       rightChild->dist = rightDist;
+
+       //save the indices of itself and its children for this new tree node
+       parent->idx = parentIdx;
+       parent->left = leftChild;
+       parent->leftIdx = leftChild->idx;
+       parent->right = rightChild;
+       parent->rightIdx = rightChild->idx;
+}
+/*****************************************
+ compute the alignment order of the phylogentic tree
+ *****************************************/
+void MSAGuideTree::createAlignmentOrders() {
+       int i;
+
+       AlignmentOrder* order;
+       //allocate memory space for alignment orders vector
+       this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0
+       this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1
+       this->alignOrders = new AlignmentOrder[this->alignOrdersSize];
+       if (!this->alignOrders) {
+               cerr << "OOPS: Alignment orders memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize the alignment orders vector
+       for (i = 0; i < this->alignOrdersSize; i++) {
+               order = &this->alignOrders[i];
+               order->leftOrder = 0;
+               order->rightOrder = 0;
+               order->leftLeafs = 0;
+               order->leftNum = 0;
+               order->rightLeafs = 0;
+               order->rightNum = 0;
+       }
+       //starting out constructing the alignment orders
+       int subLeafsNum;
+       int nodeDepth = 1;
+       int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum,
+                       nodeDepth);
+
+       //check whether the function works well
+       if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) {
+               fprintf(stderr,
+                               "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n",
+                               subLeafsNum, alignOrdersNum, subOrder);
+       }
+
+}
+int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot,
+               int* subLeafs, int& subLeafsNum, int nodeDepth) {
+       int leftNum, rightNum;
+       int leftOrder, rightOrder;
+       int* leftLeafs, *rightLeafs;
+
+       if (subRoot->leaf == LEAF) {
+               subLeafs[0] = subRoot->idx;
+               subLeafsNum = 1;
+
+               return 0;                       //if it is a leaf, return the index 0
+       }
+       leftOrder = rightOrder = 0;
+       leftNum = rightNum = 0;
+       leftLeafs = new int[numSeqs];
+       rightLeafs = new int[numSeqs];
+
+       //check the left subtree
+       if (subRoot->left) {
+               //recursively tranverse the left subtree
+               leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs,
+                               leftNum, nodeDepth + 1);
+       }
+       //check the right subtree
+       if (subRoot->right) {
+               rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs,
+                               rightNum, nodeDepth + 1);
+       }
+       //save the leafs in the left and right subtrees of the current subtree
+       if (this->alignOrdersNum > this->alignOrdersSize) {
+               fprintf(stderr, "the alignment order function works bad\n");\\r
+               exit(-1);
+       }
+
+       AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum];
+       order->nodeDepth = nodeDepth;
+       order->leftOrder = leftOrder;
+       order->rightOrder = rightOrder;
+       order->leftNum = leftNum;
+       order->rightNum = rightNum;
+       order->leftLeafs = new int[order->leftNum];
+       order->rightLeafs = new int[order->rightNum];
+       if (!order->leftLeafs || !order->rightLeafs) {
+               fprintf(stderr,
+                               "memory allocation failed while recursively constructing alignment orders\n");
+               exit(-1);
+       }
+       memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int));
+       memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int));
+
+       delete[] leftLeafs;
+       delete[] rightLeafs;
+
+       //for the root of the tree, subLeafs buffer is set to 0
+       if (subLeafs) {
+               //copy the results to the parent tree node
+               memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int));
+               memcpy(subLeafs + order->leftNum, order->rightLeafs,
+                               order->rightNum * sizeof(int));
+       }
+       //compute the total number of leafs in this subtree
+       subLeafsNum = order->leftNum + order->rightNum;
+
+       return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0
+}
+void MSAGuideTree::releaseAlignmentOrders() {
+       if (!this->alignOrders) {
+               return;
+       }
+       for (int i = 0; i < this->alignOrdersNum; i++) {
+               AlignmentOrder* order = &this->alignOrders[i];
+               if (order->leftLeafs) {
+                       delete[] order->leftLeafs;
+               }
+               if (order->rightLeafs) {
+                       delete[] order->rightLeafs;
+               }
+       }
+       delete[] alignOrders;
+}
+/********************************
+ display the alignment orders
+ ********************************/
+void MSAGuideTree::displayAlignmentOrders() {
+       int i, j;
+       AlignmentOrder* order;
+       fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n");
+       for (i = 1; i <= this->alignOrdersNum; i++) {
+               order = &this->alignOrders[i];
+
+               fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i,
+                               order->nodeDepth, order->leftOrder);
+               fprintf(stderr, "---LEFT: ");
+               for (j = 0; j < order->leftNum; j++) {
+                       fprintf(stderr, "%d ", order->leftLeafs[j]);
+               }
+
+               fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder);
+               fprintf(stderr, "\n---RIGHT: ");
+               for (j = 0; j < order->rightNum; j++) {
+                       fprintf(stderr, "%d ", order->rightLeafs[j]);
+               }
+               fprintf(stderr, "\n");
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ display the tree
+ *********************************/
+void MSAGuideTree::displayTree() {
+       fprintf(stderr, "**************DISPLAY TREE*********************\n");
+       for (int i = 0; i < nodesNum; i++) {
+               TreeNode* node = &nodes[i];
+
+               fprintf(stderr,
+                               "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n",
+                               (node == &nodes[node->idx]) ? node->idx : -2, node, node->left,
+                               (!node->left || node->left == &nodes[node->leftIdx]) ?
+                                               node->leftIdx : -2, node->right,
+                               (!node->right || node->right == &nodes[node->rightIdx]) ?
+                                               node->rightIdx : -2, node->parent,
+                               (!node->parent || node->parent == &nodes[node->parentIdx]) ?
+                                               node->parentIdx : -2, node->dist);
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ compute the sequence weights
+ *********************************/
+void MSAGuideTree::getSeqsWeights() {
+       int i;
+       TreeNode* curr;
+
+       //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it.
+       for (i = 0; i < leafsNum; i++) {
+               //for each leaf nodes
+               curr = &this->leafs[i];
+               while (curr != 0) {
+                       curr->order++;
+
+                       curr = curr->parent;
+               }
+       }
+       //compute the weight of each sequence, which corresponds to a leaf node
+       for (i = 0; i < numSeqs; i++) {
+               //compute the weight of each sequence
+               float weights = 0;
+               curr = &this->leafs[i];
+               while (curr->parent != 0) {
+                       weights += curr->dist / curr->order;
+                       curr = curr->parent;
+                       //printf("order:%d weights: %f\n", curr->order, weights);
+               }
+               //save the weight of this sequence
+               seqsWeights[i] = (int) (100 * weights);
+               //printf("%d\n", seqsWeights[i]);
+       }
+       //normalize the weights 
+       int wsum = 0;
+       for (i = 0; i < numSeqs; i++) {
+               wsum += seqsWeights[i];
+       }
+       if (wsum == 0) {
+               //in this case, every sequence is assumed to have an identical weight
+               for (i = 0; i < numSeqs; i++) {
+                       seqsWeights[i] = 1;
+               }
+               wsum = numSeqs;
+       }
+       //printf("wsum:%d \n", wsum);
+       for (i = 0; i < numSeqs; i++) {
+               seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum;
+               if (seqsWeights[i] < 1) {
+                       seqsWeights[i] = 1;
+               }
+               //printf("%d \n", seqsWeights[i]);
+       }
+}
+void MSAGuideTree::create() {
+       //do nothing
+}
+
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAGuideTree.h
new file mode 100644 (file)
index 0000000..97d538a
--- /dev/null
@@ -0,0 +1,119 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#ifndef _MSA_GUIDE_TREE_H
+#define _MSA_GUIDE_TREE_H
+#include "MSADef.h"
+#include "MSA.h"
+
+#include "SafeVector.h"
+#include "MultiSequence.h"
+#include "ScoreType.h"
+#include "ProbabilisticModel.h"
+#include "SparseMatrix.h"
+
+class MSA;
+struct ValidNode {
+       ValidNode* prev;
+       ValidNode* next;
+       int n;                          //the index in the distance matrix                      
+       int node;                       //the index in the tree node entries
+};
+
+struct TreeNode {
+       struct TreeNode *left;                  //the pointer to its left child
+       struct TreeNode *right;                 //the pointer to its right child
+       struct TreeNode *parent;                //the pointer to its parent
+       int leftIdx;                                    //the index of the left child
+       int rightIdx;                                   //the index of the right child
+       int parentIdx;                                  //the index of its parent
+       int idx;                                                //the index of itself
+       float dist;                                             //the distance to its parent
+       int leaf;                                               //whether it is a leaf node or not
+       int order;                      //the number of generations dating back to its ancestor
+       int depth;                                              //the depth of the node
+};
+struct AlignmentOrder {
+       int nodeDepth;                  //the depth of the internal node
+       int leftOrder;                  //the order number of the right child
+       int rightOrder;                 //the order number of the left child
+       int* leftLeafs;                 //the indices of leafs in the left subtree
+       int leftNum;                    //the number of leafs in the left subtree
+       int* rightLeafs;                        //the indices of leafs in the right subtree
+       int rightNum;                   //the number of leafs in the right substree
+};
+
+class MSAGuideTree {
+public:
+       MSAGuideTree(MSA* msa, VVF& distMatrix, int numSeqs);
+       virtual ~MSAGuideTree() = 0;    //abstract class
+
+       //get the tree nodes
+       TreeNode* getNodes();
+       //get the leaf nodes
+       TreeNode* getLeafs();
+       //get the number of nodes;
+       int getNodesNum();
+       //get the number of leaf nodes
+       int getLeafsNum();
+       //get the root of the tree
+       TreeNode* getRoot() {
+               return this->root;
+       }
+       //get the alignment orders
+       AlignmentOrder* getAlignOrders();
+       int getAlignOrdersNum();
+       //construct the alignment orders
+       void createAlignmentOrders();
+
+       //construct the guide tree
+       virtual void create();
+       //calculate the sequence weights
+       virtual void getSeqsWeights();
+
+       /**********DEBUGING****************/
+       //display the tree
+       void displayTree();
+       //display the alignment orders
+       void displayAlignmentOrders();
+
+protected:
+       //join two nodes
+       void connectNodes(TreeNode* parent, int parentIdx, TreeNode* leftChild,
+                       float leftDist, TreeNode* rightChild, float rightDist);
+       //release the alignment orders vector
+       void releaseAlignmentOrders();
+       //recursive implemenation of constructing the alignment orders
+       int recursiveCreateAlignmentOrders(TreeNode* subRoot, int* subLeafs,
+                       int& subLeafsNum, int nodeDepth);
+
+       //system configurations
+       MSA* msa;
+       VVF* distMatrix;
+       int numSeqs;
+       int* seqsWeights;
+
+       //all the tree nodes
+       TreeNode* nodes;
+       int nodesNum;
+       int nodesSize;
+       //the root tree node
+       TreeNode* root;
+       //leaf node
+       TreeNode* leafs;
+       int leafsNum;
+
+       //alignment order
+       AlignmentOrder* alignOrders;
+       int alignOrdersNum;
+       int alignOrdersSize;
+};
+#endif
+
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAPartProbs.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAPartProbs.cpp
new file mode 100644 (file)
index 0000000..f9c3f25
--- /dev/null
@@ -0,0 +1,728 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "SafeVector.h"
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <ctype.h>
+#include <assert.h>
+#define  TRACE 0               // 0: NOTRACE 1: TRACE
+//proba like settings
+#define  endgaps 1             // 1: engap penaties enabled 0: disabled
+#define  PART_FULL_MEMORY 0    //0: LOW MEM OPTION
+#define  REVPART_FULL_MEMORY 0 //0: LOW MEM OPTION
+using namespace std;
+
+#ifdef _WIN32
+#define OS_HUGE_VALL   HUGE_VAL        
+#else
+#define OS_HUGE_VALL   HUGE_VALL
+#endif
+
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+typedef struct sequence {
+       char *title;
+       char *text;
+       int length;
+} fasta;
+
+typedef struct alignment {
+       char *title;
+       char *text;
+       int length;
+} align;
+
+////////////////////////////////////////////////////////
+//externs related to scoring matrix and input arguments
+///////////////////////////////////////////////////////////
+extern float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+extern char aminos[26], matrixtype[20], bases[26];
+
+extern double sub_matrix[26][26];
+extern int subst_index[26];
+
+extern float TEMPERATURE;
+extern int MATRIXTYPE;
+
+extern float GAPOPEN;
+extern float GAPEXT;
+extern argument_decl argument;
+
+//////////////////////////////////////////////////////////////////////////////
+//calculates reverse partition function values based on z matrices
+//and also simulaneously calculates the propability of each basepair
+//or aminoacid residue pair i,j
+//////////////////////////////////////////////////////////////////////////////
+
+VF *revers_partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, long double **Zfm, const double d,
+               const double e) {
+       // printf("revpart\n");
+       //rest of the declarations
+       int i, j;
+       long double **Zm = NULL;
+       long double **Ze = NULL;
+       long double **Zf = NULL;
+       int len0, len1;
+       float probability;
+       long double tempvar;
+       int Si, Tj;
+       double endgapopen, endgapextend;
+       FILE *fo;
+
+       //Init lengths of sequences
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       //Safe vector declared
+       VF *posteriorPtr = new VF((len0 + 1) * (len1 + 1));
+       VF & posterior = *posteriorPtr;
+       VF::iterator ptr = posterior.begin();
+
+       if (TRACE)                      //open the trace file
+               fo = fopen("revpartdump", "a");
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //instantiate the z matrix
+       if (REVPART_FULL_MEMORY) {
+
+               Ze = new long double *[sequences[1].length + 1];
+               Zf = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               if (TRACE)
+                       printf("\n\n %e %e\n", d, e);
+
+               //DYNAMICALLY GROW 2D Zm Zf Ze MARICES (long double)
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[2];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= 1; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+               }
+
+       }
+
+       if (TRACE) {
+               printf("in rev partf---");
+               printf("\n\n");
+       }
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++)
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0.0;
+                               Zf[i][j] = 0.0;
+                               Ze[i][j] = 0.0;
+                       }
+       } else {
+
+               for (j = 0; j <= len0; j++) {
+                       Zm[0][j] = 0;
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+                       Zm[1][j] = 0;
+               }
+       }
+
+       //fill the probability matrix with 0s
+       for (i = 0; i <= len1; i++)
+               for (j = 0; j <= len0; j++)
+                       ptr[j * (len1 + 1) + i] = 0;
+
+       if (endgaps == 0) {
+               Zm[len1][len0] = 1;
+               Ze[len1][len0] = Zf[len1][len0] = 0;
+               Zf[len1 - 1][len0] = Zm[len1][len0] * d;
+               Ze[len1][len0 - 1] = Zm[len1][len0] * d;
+
+               //>=2ND ROW INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * e;
+                       }
+               } else {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * e;
+                       }
+               }
+       } else {
+
+               if (REVPART_FULL_MEMORY) {
+
+                       Zm[len1][len0] = 1;
+                       Ze[len1][len0] = Zf[len1][len0] = 0;
+                       Zf[len1 - 1][len0] = Zm[len1][len0] * endgapopen;
+                       Ze[len1][len0 - 1] = Zm[len1][len0] * endgapopen;
+
+                       //>=2ND ROW INIT
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * endgapextend;
+                       }
+
+                       //M Iy= d+j*e
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * endgapextend;
+                       }
+
+               } else {
+                       //in Zm
+                       //let:
+                       //  Zm(0) be the current row being filled/computed
+                       //  Zm(1) be the previous row
+
+                       Zm[1][len0] = 1;
+                       Ze[0][len0] = Zf[0][len0] = 0;
+                       Zf[1][len0] = Zm[1][len0] * endgapopen;
+                       Ze[0][len0 - 1] = Zm[1][len0] * endgapopen;
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * endgapextend;
+                       }
+
+               }                       //END ELSE
+
+       }                               //END FULL MEMORY and GAP enablement IF STATEMENT
+
+       double scorez, zz = 0;
+
+       for (i = len1 - 1; i >= 0; i--) {
+
+               for (j = len0 - 1; j >= 0; j--) {
+                       Si = subst_index[sequences[1].text[i] - 'A'];
+                       Tj = subst_index[sequences[0].text[j] - 'A'];
+                       scorez = sub_matrix[Si][Tj];
+
+                       //endgaps modification aug 10
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == 0) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == 0) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+
+                       }
+
+                       if (REVPART_FULL_MEMORY) {
+                               //z computation
+
+                               Ze[i][j] = Zm[i][j + 1] * open0 + Ze[i][j + 1] * extend0;
+                               Zf[i][j] = Zm[i + 1][j] * open1 + Zf[i + 1][j] * extend1;
+                               Zm[i][j] = (Zm[i + 1][j + 1] + Zf[i + 1][j + 1]
+                                               + Ze[i + 1][j + 1]) * scorez;
+                               zz = Zm[i][j] + Zf[i][j] + Ze[i][j];
+
+                       } else {
+
+                               //2 ROW zE zF ALGORITHM GOES...:
+                               //Ze[1][j] =Zm[i][j + 1] * exp(beta * open0) + Ze[1][j + 1] *exp(beta * extend0);
+                               //Zf[1][j] = Zm[i + 1][j] * exp(beta * open1) + Zf[0][j] * exp(beta * extend1);
+                               //Zm[i][j] = (Zm[i + 1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1]) * exp(beta * scorez);
+                               //zz = Zm[0][j] + Zf[1][j] + Ze[1][j];
+
+                               //lowmem code for merging probability calculating module
+                               //Here we make use of Zm as a 2 row matrix
+
+                               Zf[1][j] = Zm[1][j] * open1 + Zf[0][j] * extend1;
+                               Ze[1][j] = Zm[0][j + 1] * open0 + Ze[1][j + 1] * extend0;
+                               Zm[0][j] = (Zm[1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1])
+                                               * scorez;
+
+                               tempvar = Zfm[i + 1][j + 1] * Zm[0][j];
+                               //divide P(i,j) i.e. pairwise probability by denominator
+                               tempvar /= (scorez * Zfm[0][0]);
+                               probability = (float) tempvar;
+
+                               //store only noticable probabilities
+                               if (probability <= 1 && probability >= 0.001) {
+                                       //algorithm goes...
+                                       //validprob[i + 1][j + 1] = probability;
+                                       ptr[(j + 1) * (len1 + 1) + (i + 1)] = probability;
+                               }
+                               //lowmem code ends here
+
+                       }
+
+               }                       //end of for
+
+               if (REVPART_FULL_MEMORY == 0) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+
+                               Zm[1][t] = Zm[0][t];
+                               Zm[0][t] = 0;
+
+                       }
+                       Zf[0][len0] = 1;
+
+               }
+
+       }                               //end of for
+
+       if (TRACE) {
+               printf("\n\nrM:....\n\n");
+               if (REVPART_FULL_MEMORY) {
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zm[i][j]);
+                               printf("\n");
+                       }
+
+                       printf("\n\nrE:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Ze[i][j]);
+                               printf("\n");
+
+                       }
+
+                       printf("\n\nrF:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zf[i][j]);
+                               printf("\n");
+
+                       }
+
+               }
+
+       }
+
+       if (TRACE) {
+               fprintf(fo, "\n");
+               fclose(fo);
+       }
+
+       //delete unused memory
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++) {
+                       delete (Zm[i]);
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zm[0]);
+
+               delete (Zm[1]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       for (i = 0; i <= len1; i++) {
+               delete (Zfm[i]);
+       }
+
+       if (Zf != NULL)
+               delete (Zf);
+
+       if (Ze != NULL)
+               delete (Ze);
+
+       if (Zm != NULL)
+               delete (Zm);
+
+       if (Zfm != NULL)
+               delete (Zfm);
+
+       posterior[0] = 0;
+       return (posteriorPtr);
+
+}
+
+//////////////////////////////////////////////////////////////
+//forward partition function
+/////////////////////////////////////////////////////////////
+
+long double **partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, const double d, const double e) {
+       //printf("partf\n");
+       int i, j, len1, len0;
+       long double **Zm = NULL, **Zf = NULL, **Ze = NULL, zz = 0;
+       double endgapopen, endgapextend;
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //the flag endgaps is set at the #define section
+       if (PART_FULL_MEMORY) {
+
+               Zf = new long double *[sequences[1].length + 1];
+               Ze = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               //comment
+               if (TRACE)
+                       printf("\nPARTF:====\n");
+
+               //DYNAMICALLY GROW 2D M,IX,IY,PIX,PIY MARICES
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[sequences[1].length + 1];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+               Ze[0] = new long double[sequences[0].length + 1];
+               Zf[0] = new long double[sequences[0].length + 1];
+               Ze[1] = new long double[sequences[0].length + 1];
+               Zf[1] = new long double[sequences[0].length + 1];
+       }
+
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++)
+                       for (j = 0; j <= sequences[0].length; j++) {
+                               Zm[i][j] = 0.00;
+                               Zf[i][j] = 0.00;
+                               Ze[i][j] = 0.00;
+                       }
+       } else {
+               for (i = 0; i <= len1; i++) {
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0;
+                       }
+               }
+               for (j = 0; j <= len0; j++) {
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+               }
+       }
+
+       //INTITIALIZE THE DP 
+
+       if (endgaps == 0) {
+               Zm[0][0] = 1.00;
+
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * d;
+               Ze[0][1] = Zm[0][0] * d;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * e;
+               }
+       } else {
+               //init z
+               Zm[0][0] = 1.00;
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * endgapopen;
+               Ze[0][1] = Zm[0][0] * endgapopen;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * endgapextend;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * endgapextend;
+               }
+       }
+
+       //1ST ROW/COL INIT
+
+       int Si, Tj;
+       double score;
+
+       for (i = 1; i <= sequences[1].length; i++) {
+
+               for (j = 1; j <= sequences[0].length; j++) {
+
+                       Si = subst_index[sequences[1].text[i - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[j - 1] - 'A'];
+
+                       score = sub_matrix[Si][Tj];
+
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == sequences[1].length) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == sequences[0].length) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+                       }
+
+                       //
+                       //z computation using open and extend temp vars
+                       //open0 is gap open in seq0 and open1 is gap open in seq1
+                       //entend0 is gap extend in seq0 and extend1 is gap extend in seq1
+
+                       if (PART_FULL_MEMORY) {
+                               Ze[i][j] = Zm[i][j - 1] * open0 + Ze[i][j - 1] * extend0;
+
+                               if (Ze[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Ze\n");
+                                       exit(1);
+                               }
+
+                               Zf[i][j] = Zm[i - 1][j] * open1 + Zf[i - 1][j] * extend1;
+
+                               if (Zf[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zf\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[i - 1][j - 1]
+                                               + Zf[i - 1][j - 1]) * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zm\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[i][j] + Zf[i][j];
+                       } else {
+                               Ze[1][j] = Zm[i][j - 1] * open0 + Ze[1][j - 1] * extend0;
+
+                               if (Ze[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zE\n");
+                                       exit(1);
+                               }
+
+                               Zf[1][j] = Zm[i - 1][j] * open1 + Zf[0][j] * extend1;
+
+                               if (Zf[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zF\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[0][j - 1] + Zf[0][j - 1])
+                                               * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zM\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[1][j] + Zf[1][j];
+                       }
+
+               }                       //end for
+
+               if (!PART_FULL_MEMORY) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+                       }
+
+                       Zf[1][0] = 1;
+
+               }
+
+       }                               //end for
+
+       //store the sum of zm zf ze (m,n)s in zm's 0,0 th position
+       Zm[0][0] = zz;
+
+       if (TRACE) {
+               //debug code aug 3 
+               //print the 3 Z matrices namely Zm Zf and Ze
+
+               printf("\n\nFINAL Zm:\n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zm[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Zf \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zf[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Ze \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Ze[i][j]);
+                       printf("\n");
+               }
+
+               //end debug dump code
+
+       }
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++) {
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       delete (Zf);
+       delete (Ze);
+
+       return Zm;
+
+}                              //end of forward partition function
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//entry point (was the main function) , returns the posterior probability safe vector
+////////////////////////////////////////////////////////////////////////////////////////
+VF *ComputePostProbs(int a, int b, string seq1, string seq2) {
+       //printf("probamod\n"); 
+       double gap_open = -22, gap_ext = -1, beta = 0.2;//T = 5, beta = 1/T = 0.2, by default
+       int stock_loop = 1;
+       int le = 160;
+       double termgapopen = 1.0f;      //exp(0)
+       double termgapextend = 1.0f;    //exp(0)
+
+       //initialize the sequence structure
+       fasta sequences[2];
+
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+       if (TRACE)
+
+       {
+               printf("%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               printf("after init\n");
+
+               FILE *dump1 = fopen("dump1", "a");
+               fprintf(dump1, "%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               fclose(dump1);
+       }
+
+       gap_open = argument.gapopen;
+       gap_ext = argument.gapext;
+       beta = argument.beta;
+
+       stock_loop = argument.N;
+       le = argument.matrix;
+
+       //compute the values of exp(beta * ?)
+       termgapopen = exp(beta * 0.0);
+       termgapextend = exp(beta * 0.0);
+       gap_open = exp(beta * gap_open);
+       gap_ext = exp(beta * gap_ext);
+
+       if (TRACE)
+               printf("%f %f %f %d\n", gap_open, gap_ext, beta, le);
+
+       //call for calculating the posterior probabilities
+       // 1. call partition function partf
+       // 2. calculate revpartition using revers_parf
+       // 3. calculate probabilities
+       /// MODIFICATION... POPULATE SAFE VECTOR
+
+       long double **MAT1;
+
+       MAT1 = partf(sequences, termgapopen, termgapextend, gap_open, gap_ext);
+
+       return revers_partf(sequences, termgapopen, termgapextend, MAT1, gap_open,
+                       gap_ext);
+
+}
+
+//end of posterior probability  module
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj
new file mode 100644 (file)
index 0000000..5212610
--- /dev/null
@@ -0,0 +1,272 @@
+<?xml version="1.0" encoding="gb2312"?>\r
+<VisualStudioProject\r
+       ProjectType="Visual C++"\r
+       Version="8.00"\r
+       Name="MSAProbs"\r
+       ProjectGUID="{671563E4-93A2-419E-8B41-48DDF71DD144}"\r
+       RootNamespace="MSAProbs"\r
+       Keyword="Win32Proj"\r
+       >\r
+       <Platforms>\r
+               <Platform\r
+                       Name="Win32"\r
+               />\r
+       </Platforms>\r
+       <ToolFiles>\r
+       </ToolFiles>\r
+       <Configurations>\r
+               <Configuration\r
+                       Name="Debug|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="1"\r
+                       CharacterSet="1"\r
+                       >\r
+                       <Tool\r
+                               Name="VCPreBuildEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCustomBuildTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXMLDataGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebServiceProxyGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCMIDLTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               AdditionalOptions="/openmp"\r
+                               Optimization="0"\r
+                               PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"\r
+                               MinimalRebuild="true"\r
+                               BasicRuntimeChecks="3"\r
+                               RuntimeLibrary="3"\r
+                               OpenMP="true"\r
+                               UsePrecompiledHeader="0"\r
+                               WarningLevel="3"\r
+                               Detect64BitPortabilityProblems="true"\r
+                               DebugInformationFormat="4"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManagedResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPreLinkEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCLinkerTool"\r
+                               LinkIncremental="2"\r
+                               GenerateDebugInformation="true"\r
+                               SubSystem="1"\r
+                               TargetMachine="1"\r
+                       />\r
+                       <Tool\r
+                               Name="VCALinkTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManifestTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXDCMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCBscMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCFxCopTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCAppVerifierTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebDeploymentTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPostBuildEventTool"\r
+                       />\r
+               </Configuration>\r
+               <Configuration\r
+                       Name="Release|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="1"\r
+                       CharacterSet="1"\r
+                       WholeProgramOptimization="1"\r
+                       >\r
+                       <Tool\r
+                               Name="VCPreBuildEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCustomBuildTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXMLDataGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebServiceProxyGeneratorTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCMIDLTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"\r
+                               RuntimeLibrary="2"\r
+                               OpenMP="true"\r
+                               UsePrecompiledHeader="0"\r
+                               WarningLevel="3"\r
+                               Detect64BitPortabilityProblems="true"\r
+                               DebugInformationFormat="3"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManagedResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCResourceCompilerTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPreLinkEventTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCLinkerTool"\r
+                               LinkIncremental="1"\r
+                               GenerateDebugInformation="true"\r
+                               SubSystem="1"\r
+                               OptimizeReferences="2"\r
+                               EnableCOMDATFolding="2"\r
+                               TargetMachine="1"\r
+                       />\r
+                       <Tool\r
+                               Name="VCALinkTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCManifestTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCXDCMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCBscMakeTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCFxCopTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCAppVerifierTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCWebDeploymentTool"\r
+                       />\r
+                       <Tool\r
+                               Name="VCPostBuildEventTool"\r
+                       />\r
+               </Configuration>\r
+       </Configurations>\r
+       <References>\r
+       </References>\r
+       <Files>\r
+               <Filter\r
+                       Name="Source Files"\r
+                       Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"\r
+                       UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"\r
+                       >\r
+                       <File\r
+                               RelativePath=".\main.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSA.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAClusterTree.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAGuideTree.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAPartProbs.cpp"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAReadMatrix.cpp"\r
+                               >\r
+                       </File>\r
+               </Filter>\r
+               <Filter\r
+                       Name="Header Files"\r
+                       Filter="h;hpp;hxx;hm;inl;inc;xsd"\r
+                       UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"\r
+                       >\r
+                       <File\r
+                               RelativePath=".\Defaults.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\FileBuffer.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSA.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAClusterTree.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSADef.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAGuideTree.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MSAReadMatrix.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\MultiSequence.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\ProbabilisticModel.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\SafeVector.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\ScoreType.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\Sequence.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
+                               RelativePath=".\SparseMatrix.h"\r
+                               >\r
+                       </File>\r
+               </Filter>\r
+               <Filter\r
+                       Name="Resource Files"\r
+                       Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"\r
+                       UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"\r
+                       >\r
+               </Filter>\r
+       </Files>\r
+       <Globals>\r
+       </Globals>\r
+</VisualStudioProject>\r
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj.STUDENT.LIUY0039.user b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAProbs.vcproj.STUDENT.LIUY0039.user
new file mode 100644 (file)
index 0000000..99168da
--- /dev/null
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="gb2312"?>\r
+<VisualStudioUserFile\r
+       ProjectType="Visual C++"\r
+       Version="8.00"\r
+       ShowAllFiles="false"\r
+       >\r
+       <Configurations>\r
+               <Configuration\r
+                       Name="Debug|Win32"\r
+                       >\r
+                       <DebugSettings\r
+                               Command="$(TargetPath)"\r
+                               WorkingDirectory=""\r
+                               CommandArguments=""\r
+                               Attach="false"\r
+                               DebuggerType="3"\r
+                               Remote="1"\r
+                               RemoteMachine="GS-14"\r
+                               RemoteCommand=""\r
+                               HttpUrl=""\r
+                               PDBPath=""\r
+                               SQLDebugging=""\r
+                               Environment=""\r
+                               EnvironmentMerge="true"\r
+                               DebuggerFlavor=""\r
+                               MPIRunCommand=""\r
+                               MPIRunArguments=""\r
+                               MPIRunWorkingDirectory=""\r
+                               ApplicationCommand=""\r
+                               ApplicationArguments=""\r
+                               ShimCommand=""\r
+                               MPIAcceptMode=""\r
+                               MPIAcceptFilter=""\r
+                       />\r
+               </Configuration>\r
+               <Configuration\r
+                       Name="Release|Win32"\r
+                       >\r
+                       <DebugSettings\r
+                               Command="$(TargetPath)"\r
+                               WorkingDirectory=""\r
+                               CommandArguments=""\r
+                               Attach="false"\r
+                               DebuggerType="3"\r
+                               Remote="1"\r
+                               RemoteMachine="GS-14"\r
+                               RemoteCommand=""\r
+                               HttpUrl=""\r
+                               PDBPath=""\r
+                               SQLDebugging=""\r
+                               Environment=""\r
+                               EnvironmentMerge="true"\r
+                               DebuggerFlavor=""\r
+                               MPIRunCommand=""\r
+                               MPIRunArguments=""\r
+                               MPIRunWorkingDirectory=""\r
+                               ApplicationCommand=""\r
+                               ApplicationArguments=""\r
+                               ShimCommand=""\r
+                               MPIAcceptMode=""\r
+                               MPIAcceptFilter=""\r
+                       />\r
+               </Configuration>\r
+       </Configurations>\r
+</VisualStudioUserFile>\r
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.cpp
new file mode 100644 (file)
index 0000000..6ff0643
--- /dev/null
@@ -0,0 +1,174 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "MSAReadMatrix.h"
+
+#define TRACE 0
+
+////////////////////////////////////////////////////////////
+// extern variables for scoring matrix data
+////////////////////////////////////////////////////////////
+extern float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+extern char *aminos, *bases, matrixtype[20];
+extern int subst_index[26];
+
+extern double sub_matrix[26][26];
+
+extern float TEMPERATURE;
+extern int MATRIXTYPE;
+
+extern float GAPOPEN;
+extern float GAPEXT;
+
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+//argument support
+extern argument_decl argument;
+
+/////////////////////////////////////////////////////////
+//sets substitution matrix type
+////////////////////////////////////////////////////////
+void setmatrixtype(int le) {
+       switch (le) {
+       case 160:
+               strcpy(matrixtype, "gonnet_160");
+               break;
+       case 4:
+               strcpy(matrixtype, "nuc_simple");
+               break;
+       default:
+               strcpy(matrixtype, "CUSTOM");
+               break;
+
+       };
+
+}
+
+///////////////////////////////////////////////////////////////////
+//sets matrix flag
+///////////////////////////////////////////////////////////////////
+inline int matrixtype_to_int() {
+
+       if (!strcmp(matrixtype, "nuc_simple"))
+               return 4;
+       else if (!strcmp(matrixtype, "gonnet_160"))
+               return 160;
+       else
+               return 1000;
+
+}
+
+/////////////////////////////////////////////////////////////////
+//
+// Can read any scoring matrix as long as it is defined in Matrix.h
+// AND it is a lower triangular 
+// AND the order of amino acids/bases is mentioned
+/////////////////////////////////////////////////////////////////
+
+inline void read_matrix(score_matrix matrx) {
+       int i, j, basecount, position = 0;
+
+       bases = (char *) matrx.monomers;
+
+       basecount = strlen(bases);
+
+       for (i = 0; i < basecount; i++)
+               subst_index[i] = -1;
+
+       for (i = 0; i < basecount; i++)
+               subst_index[bases[i] - 'A'] = i;
+
+       if (TRACE == 1)
+               printf("\nbases read: %d\n", basecount);
+
+       for (i = 0; i < basecount; i++)
+               for (j = 0; j <= i; j++) {
+
+                       double value = exp(argument.beta * matrx.matrix[position++]);
+                       sub_matrix[i][j] = value;
+                       sub_matrix[j][i] = value;
+               }
+
+       if (TRACE)
+               for (i = 0; i < basecount; i++) {
+                       for (j = 0; j < basecount; j++)
+                               printf(" %g ", sub_matrix[i][j]);
+                       printf("\n");
+               }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////// 
+//intialize the arguments (default values)
+//////////////////////////////////////////////////////////////////////////////////
+void init_arguments() {
+       float gap_open = 0, gap_ext = 0;
+       int le;
+
+       le = matrixtype_to_int();
+
+       argument.N = 1;
+       strcpy(argument.input, "tempin");
+       argument.matrix = le;
+       argument.gapopen = GAPOPEN;
+       argument.gapext = GAPEXT;
+       argument.T = TEMPERATURE;
+       argument.beta = 1.0 / TEMPERATURE;
+       argument.opt = 'P';
+
+       if (le == 4)            //NUC OPTION :default is nuc_simple
+                       {
+               read_matrix(nuc_simple);
+               gap_open = -4;
+               gap_ext = -0.25;
+       }
+
+       else if (le == 160)  //PROT option: default is gonnet_160
+                       {
+               if (TRACE)
+                       printf("read matrix\n");
+               read_matrix(gonnet_160);
+               gap_open = -22;
+               gap_ext = -1;
+       } else if (le == 1000) {  //Error handling
+               printf("Error: enter a valid matrix type\n");
+               exit(1);
+               //additional matrices can only be lower triangular
+       }
+
+       //now override the gapopen and gapext
+       if (argument.gapopen != 0.0 || argument.gapext != 0.00)
+
+       {
+               gap_open = -argument.gapopen;
+               gap_ext = -argument.gapext;
+       }
+
+       if (TRACE)
+               printf("%f %f %f %d\n", argument.T, gap_open, gap_ext, le);
+
+       argument.gapopen = gap_open;
+       argument.gapext = gap_ext;
+       argument.opt = 'P';
+
+}
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MSAReadMatrix.h
new file mode 100644 (file)
index 0000000..8d15ce5
--- /dev/null
@@ -0,0 +1,87 @@
+/////////////////////////////////////////////////////////////////
+//  Matrix.h
+//
+//  Specifies scoring matrices and their structure
+//  
+//
+//
+/////////////////////////////////////////////////////////////////
+
+#ifndef _MSA_READ_MATRIX_H
+#define _MSA_READ_MATRIX_H
+
+typedef struct {
+       char monomers[26]; /* amino or nucleic acid order */
+       float matrix[676]; /* entries of the score matix, 26*26=676 */
+} score_matrix;
+
+//default protein sequence scoring matrix as well as default scoring matrix of the PROBALIGN
+//also used when -prot option is used
+
+score_matrix gonnet_160 = { "ABCDEFGHIKLMNPQRSTVWXYZ",
+
+{ 4.6, 0.0, 0.0, 0.3, 0.0, 13.5, -1.1, 0.0, -5.3, 7.0, -0.4, 0.0, -5.2, 3.4,
+               5.9, -3.8, 0.0, -1.8, -7.0, -6.2, 9.1, 0.2, 0.0, -3.4, -0.7, -2.1, -7.6,
+               8.2, -1.8, 0.0, -2.3, -0.1, -0.1, -0.7, -2.7, 9.3, -1.8, 0.0, -2.5,
+               -6.2, -4.3, 0.3, -7.0, -3.7, 5.9, -1.2, 0.0, -4.8, -0.1, 1.3, -5.3,
+               -2.4, 0.2, -3.5, 5.5, -2.2, 0.0, -2.9, -6.5, -4.5, 1.9, -6.7, -3.2, 3.0,
+               -3.4, 5.7, -1.2, 0.0, -1.9, -5.0, -3.1, 1.4, -5.2, -2.1, 2.9, -2.1, 3.4,
+               7.6, -1.2, 0.0, -3.1, 2.6, 0.5, -4.7, -0.2, 1.5, -4.4, 0.8, -4.8, -3.6,
+               6.5, -0.1, 0.0, -5.2, -1.9, -1.4, -5.8, -3.0, -2.2, -4.3, -1.6, -3.5,
+               -4.2, -2.2, 9.6, -0.7, 0.0, -4.2, 0.6, 2.3, -4.1, -2.1, 1.7, -3.2, 2.0,
+               -2.4, -1.2, 0.5, -0.8, 5.6, -1.6, 0.0, -3.5, -1.6, -0.3, -5.3, -2.1,
+               0.3, -4.1, 3.5, -3.5, -2.9, -0.4, -2.1, 1.7, 7.1, 1.6, 0.0, -0.2, 0.0,
+               -0.3, -4.5, -0.1, -0.8, -3.3, -0.4, -3.6, -2.3, 1.1, 0.0, -0.2, -0.9,
+               4.4, 0.5, 0.0, -1.4, -0.6, -0.8, -3.6, -2.4, -0.8, -1.2, -0.2, -2.4,
+               -1.1, 0.3, -0.4, -0.4, -0.9, 2.3, 5.0, 0.1, 0.0, -0.6, -4.9, -3.0, -0.8,
+               -5.2, -3.5, 4.0, -3.0, 1.7, 1.4, -3.8, -3.2, -2.7, -3.4, -2.0, 0.0, 5.3,
+               -5.5, 0.0, -2.1, -7.8, -6.4, 3.2, -5.5, -1.9, -3.4, -5.4, -2.0, -2.2,
+               -5.5, -7.4, -4.0, -2.4, -4.7, -5.4, -4.5, 15.8, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, -3.7, 0.0, -1.3, -4.2, -4.4, 5.6, -6.0, 2.7, -2.0, -3.5, -1.1,
+               -1.3, -2.2, -4.8, -2.9, -2.9, -2.8, -3.2, -2.4, 3.8, 0.0, 10.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+               0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }
+
+};
+
+//default nucleotide sequence scoring matrix
+//used when -nuc option is used
+score_matrix nuc_simple = {
+
+"ABCDGHKMNRSTUVWXY",
+
+{ 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0 }
+
+//Ribosum85-60
+               /*
+                {
+                2.22,
+                0,  0,
+                -1.86,  0,  1.16,
+                0,  0,  0,  0,
+                -1.46,  0,  -2.48,  0,  1.03,
+                0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                -1.39,  0,  -1.05,  0,  -1.74,  0,  0,  0,  0,  0,  0,  1.65,
+                -1.39,  0,  -1.05,  0,  -1.74,  0,  0,  0,  0,  0,  0,  0,  1.65,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+                }
+                */
+
+               };
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/Makefile b/binaries/src/MSAProbs-0.9.7/MSAProbs/Makefile
new file mode 100644 (file)
index 0000000..8dc8450
--- /dev/null
@@ -0,0 +1,16 @@
+
+CXXOBJS = MSA.o MSAGuideTree.o MSAClusterTree.o MSAPartProbs.o MSAReadMatrix.o main.o
+
+OPENMP = -fopenmp
+CXX = g++
+COMMON_FLAGS = -O3 $(OPENMP) -Wall -funroll-loops -I . -I /usr/include
+CXXFLAGS = $(COMMON_FLAGS)
+
+EXEC = msaprobs
+
+all: $(CXXOBJS)
+       $(CXX) $(CXXFLAGS) -o $(EXEC) $(CXXOBJS) $(NVCCOBJS) $(NVCCLIBS)
+       strip $(EXEC)
+clean:
+       rm -rf *.o $(EXEC)
+
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/MultiSequence.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/MultiSequence.h
new file mode 100644 (file)
index 0000000..b31af08
--- /dev/null
@@ -0,0 +1,733 @@
+////////////////////////////////////////////////////////////////
+// MultiSequence.h
+//
+// Utilities for reading/writing multiple sequence data.
+/////////////////////////////////////////////////////////////////
+
+#ifndef MULTISEQUENCE_H
+#define MULTISEQUENCE_H
+
+#include <cctype>
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <set>
+#include "SafeVector.h"
+#include "Sequence.h"
+#include "FileBuffer.h"
+
+#define VERSION "0.9.7"
+/////////////////////////////////////////////////////////////////
+// MultiSequence
+//
+// Class for multiple sequence alignment input/output.
+/////////////////////////////////////////////////////////////////
+
+class MultiSequence {
+
+       SafeVector<Sequence *> *sequences;
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Default constructor.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence() :
+                       sequences(NULL) {
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Constructor.  Load MFA from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence(FileBuffer &infile) :
+                       sequences(NULL) {
+               LoadMFA(infile);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::MultiSequence()
+       //
+       // Constructor.  Load MFA from a filename.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence(const string &filename) :
+                       sequences(NULL) {
+               LoadMFA(filename);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::~MultiSequence()
+       //
+       // Destructor.  Gets rid of sequence objects contained in the
+       // multiple alignment.
+       /////////////////////////////////////////////////////////////////
+
+       ~MultiSequence() {
+
+               // if sequences allocated
+               if (sequences) {
+
+                       // free all sequences
+                       for (SafeVector<Sequence *>::iterator iter = sequences->begin();
+                                       iter != sequences->end(); ++iter) {
+                               assert(*iter);
+                               delete *iter;
+                               *iter = NULL;
+                       }
+
+                       // free sequence vector
+                       delete sequences;
+                       sequences = NULL;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MFA from a filename.
+       /////////////////////////////////////////////////////////////////
+
+       void LoadMFA(const string &filename, bool stripGaps = false) {
+
+               // try opening file
+               FileBuffer infile(filename.c_str());
+
+               if (infile.fail()) {
+                       cerr << "ERROR: Could not open file '" << filename
+                                       << "' for reading." << endl;
+                       exit(1);
+               }
+
+               // if successful, then load using other LoadMFA() routine
+               LoadMFA(infile, stripGaps);
+
+               infile.close();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MSF from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       void ParseMSF(FileBuffer &infile, string header, bool stripGaps = false) {
+
+               SafeVector<SafeVector<char> *> seqData;
+               SafeVector<string> seqNames;
+               SafeVector<int> seqLengths;
+
+               istringstream in;
+               bool valid = true;
+               bool missingHeader = false;
+               bool clustalW = false;
+
+               // read until data starts
+               while (!infile.eof() && header.find("..", 0) == string::npos) {
+                       if (header.find("CLUSTAL", 0) == 0
+                                       || header.find("MSAPROBS", 0) == 0) {
+                               clustalW = true;
+                               break;
+                       }
+                       infile.GetLine(header);
+                       if (header.find("//", 0) != string::npos) {
+                               missingHeader = true;
+                               break;
+                       }
+               }
+
+               // read until end-of-file
+               while (valid) {
+                       infile.GetLine(header);
+                       if (infile.eof())
+                               break;
+
+                       string word;
+                       in.clear();
+                       in.str(header);
+
+                       // check if there's anything on this line
+                       if (in >> word) {
+
+                               // clustalw name parsing
+                               if (clustalW) {
+                                       if (!isspace(header[0])
+                                                       && find(seqNames.begin(), seqNames.end(), word)
+                                                                       == seqNames.end()) {
+                                               seqNames.push_back(word);
+                                               seqData.push_back(new SafeVector<char>());
+                                               seqLengths.push_back(0);
+                                               seqData[(int) seqData.size() - 1]->push_back('@');
+                                       }
+                               }
+
+                               // look for new sequence label
+                               if (word == string("Name:")) {
+                                       if (in >> word) {
+                                               seqNames.push_back(word);
+                                               seqData.push_back(new SafeVector<char>());
+                                               seqLengths.push_back(0);
+                                               seqData[(int) seqData.size() - 1]->push_back('@');
+                                       } else
+                                               valid = false;
+                               }
+
+                               // check if this is sequence data
+                               else if (find(seqNames.begin(), seqNames.end(), word)
+                                               != seqNames.end()) {
+                                       int index = find(seqNames.begin(), seqNames.end(), word)
+                                                       - seqNames.begin();
+
+                                       // read all remaining characters on the line
+                                       char ch;
+                                       while (in >> ch) {
+                                               if (isspace(ch))
+                                                       continue;
+                                               if (ch >= 'a' && ch <= 'z')
+                                                       ch = ch - 'a' + 'A';
+                                               if (ch == '.')
+                                                       ch = '-';
+                                               if (stripGaps && ch == '-')
+                                                       continue;
+                                               if (!((ch >= 'A' && ch <= 'Z') || ch == '*' || ch == '-')) {
+                                                       cerr << "ERROR: Unknown character encountered: "
+                                                                       << ch << endl;
+                                                       exit(1);
+                                               }
+
+                                               // everything's ok so far, so just store this character.
+                                               seqData[index]->push_back(ch);
+                                               seqLengths[index]++;
+                                       }
+                               } else if (missingHeader) {
+                                       seqNames.push_back(word);
+                                       seqData.push_back(new SafeVector<char>());
+                                       seqLengths.push_back(0);
+                                       seqData[(int) seqData.size() - 1]->push_back('@');
+
+                                       int index = (int) seqNames.size() - 1;
+
+                                       // read all remaining characters on the line
+                                       char ch;
+                                       while (in >> ch) {
+                                               if (isspace(ch))
+                                                       continue;
+                                               if (ch >= 'a' && ch <= 'z')
+                                                       ch = ch - 'a' + 'A';
+                                               if (ch == '.')
+                                                       ch = '-';
+                                               if (stripGaps && ch == '-')
+                                                       continue;
+                                               if (!((ch >= 'A' && ch <= 'Z') || ch == '*' || ch == '-')) {
+                                                       cerr << "ERROR: Unknown character encountered: "
+                                                                       << ch << endl;
+                                                       exit(1);
+                                               }
+
+                                               // everything's ok so far, so just store this character.
+                                               seqData[index]->push_back(ch);
+                                               seqLengths[index]++;
+                                       }
+                               }
+                       }
+               }
+
+               // check for errors
+               if (seqNames.size() == 0) {
+                       cerr << "ERROR: No sequences read!" << endl;
+                       exit(1);
+               }
+
+               assert(!sequences);
+               sequences = new SafeVector<Sequence *>;
+               for (int i = 0; i < (int) seqNames.size(); i++) {
+                       if (seqLengths[i] == 0) {
+                               cerr << "ERROR: Sequence of zero length!" << endl;
+                               exit(1);
+                       }
+                       Sequence *seq = new Sequence(seqData[i], seqNames[i], seqLengths[i],
+                                       i, i);
+                       sequences->push_back(seq);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::LoadMFA()
+       //
+       // Load MFA from a FileBuffer object.
+       /////////////////////////////////////////////////////////////////
+
+       void LoadMFA(FileBuffer &infile, bool stripGaps = false) {
+
+               // check to make sure that file reading is ok
+               if (infile.fail()) {
+                       cerr << "ERROR: Error reading file." << endl;
+                       exit(1);
+               }
+
+               // read all sequences
+               while (true) {
+
+                       // get the sequence label as being the current # of sequences
+                       // NOTE: sequence labels here are zero-based
+                       int index = (!sequences) ? 0 : sequences->size();
+
+                       // read the sequence
+                       Sequence *seq = new Sequence(infile, stripGaps);
+                       if (seq->Fail()) {
+
+                               // check if alternative file format (i.e. not MFA)
+                               if (index == 0) {
+                                       string header = seq->GetHeader();
+                                       if (header.length() > 0 && header[0] != '>') {
+
+                                               // try MSF format
+                                               ParseMSF(infile, header);
+                                               break;
+                                       }
+                               }
+
+                               delete seq;
+                               break;
+                       }
+                       seq->SetLabel(index);
+
+                       // add the sequence to the list of current sequences
+                       if (!sequences)
+                               sequences = new SafeVector<Sequence *>;
+                       sequences->push_back(seq);
+               }
+
+               // make sure at least one sequence was read
+               if (!sequences) {
+                       cerr << "ERROR: No sequences read." << endl;
+                       exit(1);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::AddSequence()
+       //
+       // Add another sequence to an existing sequence list
+       /////////////////////////////////////////////////////////////////
+
+       void AddSequence(Sequence *sequence) {
+               assert(sequence);
+               assert(!sequence->Fail());
+
+               // add sequence
+               if (!sequences)
+                       sequences = new SafeVector<Sequence *>;
+               sequences->push_back(sequence);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::RemoveSequence()
+       //
+       // Remove a sequence from the MultiSequence
+       /////////////////////////////////////////////////////////////////
+
+       void RemoveSequence(int index) {
+               assert(sequences);
+
+               assert(index >= 0 && index < (int) sequences->size());
+               delete (*sequences)[index];
+
+               sequences->erase(sequences->begin() + index);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::WriteMFA()
+       //
+       // Write MFA to the outfile.  Allows the user to specify the
+       // number of columns for the output.  Also, useIndices determines
+       // whether or not the actual sequence comments will be printed
+       // out or whether the artificially assigned sequence labels will
+       // be used instead.
+       /////////////////////////////////////////////////////////////////
+
+       void WriteMFA(ostream &outfile, int numColumns = 60,
+                       bool useIndices = false) {
+               if (!sequences)
+                       return;
+
+               // loop through all sequences and write them out
+               for (SafeVector<Sequence *>::iterator iter = sequences->begin();
+                               iter != sequences->end(); ++iter) {
+                       (*iter)->WriteMFA(outfile, numColumns, useIndices);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetAnnotationChar()
+       //
+       // Return CLUSTALW annotation for column.
+       /////////////////////////////////////////////////////////////////
+
+       char GetAnnotationChar(SafeVector<char> &column) {
+               SafeVector<int> counts(256, 0);
+               int allChars = (int) column.size();
+
+               for (int i = 0; i < allChars; i++) {
+                       counts[(unsigned char) toupper(column[i])]++;
+               }
+
+               allChars -= counts[(unsigned char) '-'];
+               if (allChars == 1)
+                       return ' ';
+
+               for (int i = 0; i < 256; i++)
+                       if ((char) i != '-' && counts[i] == allChars)
+                               return '*';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'A'] == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'H']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               + counts[(unsigned char) 'E'] + counts[(unsigned char) 'Q']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'Q'] + counts[(unsigned char) 'H']
+                               + counts[(unsigned char) 'R'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'M'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'V']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'M'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'F']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'H'] + counts[(unsigned char) 'Y']
+                               == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'F'] + counts[(unsigned char) 'Y']
+                               + counts[(unsigned char) 'W'] == allChars)
+                       return ':';
+
+               if (counts[(unsigned char) 'C'] + counts[(unsigned char) 'S']
+                               + counts[(unsigned char) 'A'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'A'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'V'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'A']
+                               + counts[(unsigned char) 'G'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'N'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'T']
+                               + counts[(unsigned char) 'P'] + counts[(unsigned char) 'A']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'G']
+                               + counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'S'] + counts[(unsigned char) 'N']
+                               + counts[(unsigned char) 'D'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'Q'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'D']
+                               + counts[(unsigned char) 'E'] + counts[(unsigned char) 'Q']
+                               + counts[(unsigned char) 'H'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'N'] + counts[(unsigned char) 'E']
+                               + counts[(unsigned char) 'H'] + counts[(unsigned char) 'Q']
+                               + counts[(unsigned char) 'R'] + counts[(unsigned char) 'K']
+                               == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'F'] + counts[(unsigned char) 'V']
+                               + counts[(unsigned char) 'L'] + counts[(unsigned char) 'I']
+                               + counts[(unsigned char) 'M'] == allChars)
+                       return '.';
+
+               if (counts[(unsigned char) 'H'] + counts[(unsigned char) 'F']
+                               + counts[(unsigned char) 'Y'] == allChars)
+                       return '.';
+
+               return ' ';
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::WriteALN()
+       //
+       // Write ALN to the outfile.  Allows the user to specify the
+       // number of columns for the output.  
+       /////////////////////////////////////////////////////////////////
+
+       void WriteALN(ostream &outfile, int numColumns = 60) {
+               if (!sequences)
+                       return;
+
+               outfile << "MSAPROBS version " << VERSION
+                               << " multiple sequence alignment" << endl;
+
+               int longestComment = 0;
+               SafeVector<SafeVector<char>::iterator> ptrs(GetNumSequences());
+               SafeVector<int> lengths(GetNumSequences());
+               for (int i = 0; i < GetNumSequences(); i++) {
+                       ptrs[i] = GetSequence(i)->GetDataPtr();
+                       lengths[i] = GetSequence(i)->GetLength();
+                       longestComment = max(longestComment,
+                                       (int) GetSequence(i)->GetName().length());
+               }
+               longestComment += 4;
+
+               int writtenChars = 0;
+               bool allDone = false;
+
+               while (!allDone) {
+                       outfile << endl;
+                       allDone = true;
+
+                       // loop through all sequences and write them out
+                       for (int i = 0; i < GetNumSequences(); i++) {
+
+                               if (writtenChars < lengths[i]) {
+                                       outfile << GetSequence(i)->GetName();
+                                       for (int j = 0;
+                                                       j
+                                                                       < longestComment
+                                                                                       - (int) GetSequence(i)->GetName().length();
+                                                       j++)
+                                               outfile << ' ';
+
+                                       for (int j = 0; j < numColumns; j++) {
+                                               if (writtenChars + j < lengths[i])
+                                                       outfile << ptrs[i][writtenChars + j + 1];
+                                               else
+                                                       break;
+                                       }
+
+                                       outfile << endl;
+
+                                       if (writtenChars + numColumns < lengths[i])
+                                               allDone = false;
+                               }
+                       }
+
+                       // write annotation line
+                       for (int j = 0; j < longestComment; j++)
+                               outfile << ' ';
+
+                       for (int j = 0; j < numColumns; j++) {
+                               SafeVector<char> column;
+
+                               for (int i = 0; i < GetNumSequences(); i++)
+                                       if (writtenChars + j < lengths[i])
+                                               column.push_back(ptrs[i][writtenChars + j + 1]);
+
+                               if (column.size() > 0)
+                                       outfile << GetAnnotationChar(column);
+                       }
+
+                       outfile << endl;
+                       writtenChars += numColumns;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetSequence()
+       //
+       // Retrieve a sequence from the MultiSequence object.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence* GetSequence(int i) {
+               assert(sequences);
+               assert(0 <= i && i < (int) sequences->size());
+
+               return (*sequences)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetSequence()
+       //
+       // Retrieve a sequence from the MultiSequence object
+       // (const version).
+       /////////////////////////////////////////////////////////////////
+
+       const Sequence* GetSequence(int i) const {
+               assert(sequences);
+               assert(0 <= i && i < (int) sequences->size());
+
+               return (*sequences)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::GetNumSequences()
+       //
+       // Returns the number of sequences in the MultiSequence.
+       /////////////////////////////////////////////////////////////////
+
+       int GetNumSequences() const {
+               if (!sequences)
+                       return 0;
+               return (int) sequences->size();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SortByHeader()
+       //
+       // Organizes the sequences according to their sequence headers
+       // in ascending order.
+       /////////////////////////////////////////////////////////////////
+
+       void SortByHeader() {
+               assert(sequences);
+
+               // a quick and easy O(n^2) sort
+               for (int i = 0; i < (int) sequences->size() - 1; i++) {
+                       for (int j = i + 1; j < (int) sequences->size(); j++) {
+                               if ((*sequences)[i]->GetHeader() > (*sequences)[j]->GetHeader())
+                                       swap((*sequences)[i], (*sequences)[j]);
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SortByLabel()
+       //
+       // Organizes the sequences according to their sequence labels
+       // in ascending order.
+       /////////////////////////////////////////////////////////////////
+
+       void SortByLabel() {
+               assert(sequences);
+
+               // a quick and easy O(n^2) sort
+               for (int i = 0; i < (int) sequences->size() - 1; i++) {
+                       for (int j = i + 1; j < (int) sequences->size(); j++) {
+                               if ((*sequences)[i]->GetSortLabel()
+                                               > (*sequences)[j]->GetSortLabel())
+                                       swap((*sequences)[i], (*sequences)[j]);
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::SaveOrdering()
+       //
+       // Relabels sequences so as to preserve the current ordering.
+       /////////////////////////////////////////////////////////////////
+
+       void SaveOrdering() {
+               assert(sequences);
+
+               for (int i = 0; i < (int) sequences->size(); i++)
+                       (*sequences)[i]->SetSortLabel(i);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // MultiSequence::Project()
+       //
+       // Given a set of indices, extract all sequences from the current
+       // MultiSequence object whose index is included in the set.
+       // Then, project the multiple alignments down to the desired
+       // subset, and return the projection as a new MultiSequence
+       // object.
+       /////////////////////////////////////////////////////////////////
+
+       MultiSequence *Project(const set<int> &indices) {
+               SafeVector<SafeVector<char>::iterator> oldPtrs(indices.size());
+               SafeVector<SafeVector<char> *> newPtrs(indices.size());
+
+               assert(indices.size() != 0);
+
+               // grab old data
+               int i = 0;
+               for (set<int>::const_iterator iter = indices.begin();
+                               iter != indices.end(); ++iter) {
+                       oldPtrs[i++] = GetSequence(*iter)->GetDataPtr();
+               }
+
+               // compute new length
+               int oldLength = GetSequence(*indices.begin())->GetLength();
+               int newLength = 0;
+               for (i = 1; i <= oldLength; i++) {
+
+                       // check to see if there is a gap in every sequence of the set
+                       bool found = false;
+                       for (int j = 0; !found && j < (int) indices.size(); j++)
+                               found = (oldPtrs[j][i] != '-');
+
+                       // if not, then this column counts towards the sequence length
+                       if (found)
+                               newLength++;
+               }
+
+               // build new alignments
+               for (i = 0; i < (int) indices.size(); i++) {
+                       newPtrs[i] = new SafeVector<char>();
+                       assert(newPtrs[i]);
+                       newPtrs[i]->push_back('@');
+               }
+
+               // add all needed columns
+               for (i = 1; i <= oldLength; i++) {
+
+                       // make sure column is not gapped in all sequences in the set
+                       bool found = false;
+                       for (int j = 0; !found && j < (int) indices.size(); j++)
+                               found = (oldPtrs[j][i] != '-');
+
+                       // if not, then add it
+                       if (found) {
+                               for (int j = 0; j < (int) indices.size(); j++)
+                                       newPtrs[j]->push_back(oldPtrs[j][i]);
+                       }
+               }
+
+               // wrap sequences in MultiSequence object
+               MultiSequence *ret = new MultiSequence();
+               i = 0;
+               for (set<int>::const_iterator iter = indices.begin();
+                               iter != indices.end(); ++iter) {
+                       ret->AddSequence(
+                                       new Sequence(newPtrs[i++], GetSequence(*iter)->GetHeader(),
+                                                       newLength, GetSequence(*iter)->GetSortLabel(),
+                                                       GetSequence(*iter)->GetLabel()));
+               }
+
+               return ret;
+       }
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/ProbabilisticModel.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/ProbabilisticModel.h
new file mode 100644 (file)
index 0000000..dd5dbd3
--- /dev/null
@@ -0,0 +1,1365 @@
+/////////////////////////////////////////////////////////////////
+// ProbabilisticModel.h
+//
+// Routines for (1) posterior probability computations
+//              (2) chained anchoring
+//              (3) maximum weight trace alignment
+/////////////////////////////////////////////////////////////////
+
+#ifndef PROBABILISTICMODEL_H
+#define PROBABILISTICMODEL_H
+
+#include <list>
+#include <cmath>
+#include <cstdio>
+#include "SafeVector.h"
+#include "ScoreType.h"
+#include "SparseMatrix.h"
+#include "MultiSequence.h"
+
+using namespace std;
+
+const int NumMatchStates = 1;            // note that in this version the number
+// of match states is fixed at 1...will
+// change in future versions
+const int NumInsertStates = 2;
+const int NumMatrixTypes = NumMatchStates + NumInsertStates * 2;
+
+/////////////////////////////////////////////////////////////////
+// ProbabilisticModel
+//
+// Class for storing the parameters of a probabilistic model and
+// performing different computations based on those parameters.
+// In particular, this class handles the computation of
+// posterior probabilities that may be used in alignment.
+/////////////////////////////////////////////////////////////////
+
+class ProbabilisticModel {
+
+       float initialDistribution[NumMatrixTypes]; // holds the initial probabilities for each state
+       float transProb[NumMatrixTypes][NumMatrixTypes]; // holds all state-to-state transition probabilities
+       float matchProb[256][256];        // emission probabilities for match states
+       float insProb[256][NumMatrixTypes]; // emission probabilities for insert states
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ProbabilisticModel()
+       //
+       // Constructor.  Builds a new probabilistic model using the
+       // given parameters.
+       /////////////////////////////////////////////////////////////////
+
+       ProbabilisticModel(const VF &initDistribMat, const VF &gapOpen,
+                       const VF &gapExtend, const VVF &emitPairs, const VF &emitSingle) {
+
+               // build transition matrix
+               VVF transMat(NumMatrixTypes, VF(NumMatrixTypes, 0.0f));
+               transMat[0][0] = 1;
+               for (int i = 0; i < NumInsertStates; i++) {
+                       transMat[0][2 * i + 1] = gapOpen[2 * i];
+                       transMat[0][2 * i + 2] = gapOpen[2 * i + 1];
+                       transMat[0][0] -= (gapOpen[2 * i] + gapOpen[2 * i + 1]);
+                       assert(transMat[0][0] > 0);
+                       transMat[2 * i + 1][2 * i + 1] = gapExtend[2 * i];
+                       transMat[2 * i + 2][2 * i + 2] = gapExtend[2 * i + 1];
+                       transMat[2 * i + 1][2 * i + 2] = 0;
+                       transMat[2 * i + 2][2 * i + 1] = 0;
+                       transMat[2 * i + 1][0] = 1 - gapExtend[2 * i];
+                       transMat[2 * i + 2][0] = 1 - gapExtend[2 * i + 1];
+               }
+
+               // create initial and transition probability matrices
+               for (int i = 0; i < NumMatrixTypes; i++) {
+                       initialDistribution[i] = LOG(initDistribMat[i]);
+                       for (int j = 0; j < NumMatrixTypes; j++)
+                               transProb[i][j] = LOG(transMat[i][j]);
+               }
+
+               // create insertion and match probability matrices
+               for (int i = 0; i < 256; i++) {
+                       for (int j = 0; j < NumMatrixTypes; j++)
+                               insProb[i][j] = LOG(emitSingle[i]);
+                       for (int j = 0; j < 256; j++)
+                               matchProb[i][j] = LOG(emitPairs[i][j]);
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeForwardMatrix()
+       //
+       // Computes a set of forward probability matrices for aligning
+       // seq1 and seq2.
+       //
+       // For efficiency reasons, a single-dimensional floating-point
+       // array is used here, with the following indexing scheme:
+       //
+       //    forward[i + NumMatrixTypes * (j * (seq2Length+1) + k)]
+       //    refers to the probability of aligning through j characters
+       //    of the first sequence, k characters of the second sequence,
+       //    and ending in state i.
+       /////////////////////////////////////////////////////////////////
+
+       VF *ComputeForwardMatrix(Sequence *seq1, Sequence *seq2) const {
+
+               assert(seq1);
+               assert(seq2);
+
+               const int seq1Length = seq1->GetLength();
+               const int seq2Length = seq2->GetLength();
+
+               // retrieve the points to the beginning of each sequence
+               SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+               SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+               // create matrix
+               VF *forwardPtr = new VF(
+                               NumMatrixTypes * (seq1Length + 1) * (seq2Length + 1), LOG_ZERO);
+               assert(forwardPtr);
+               VF &forward = *forwardPtr;
+
+               // initialization condition
+               forward[0 + NumMatrixTypes * (1 * (seq2Length + 1) + 1)] =
+                               initialDistribution[0]
+                                               + matchProb[(unsigned char) iter1[1]][(unsigned char) iter2[1]];
+
+               for (int k = 0; k < NumInsertStates; k++) {
+                       forward[2 * k + 1 + NumMatrixTypes * (1 * (seq2Length + 1) + 0)] =
+                                       initialDistribution[2 * k + 1]
+                                                       + insProb[(unsigned char) iter1[1]][k];
+                       forward[2 * k + 2 + NumMatrixTypes * (0 * (seq2Length + 1) + 1)] =
+                                       initialDistribution[2 * k + 2]
+                                                       + insProb[(unsigned char) iter2[1]][k];
+               }
+
+               // remember offset for each index combination
+               int ij = 0;
+               int i1j = -seq2Length - 1;
+               int ij1 = -1;
+               int i1j1 = -seq2Length - 2;
+
+               ij *= NumMatrixTypes;
+               i1j *= NumMatrixTypes;
+               ij1 *= NumMatrixTypes;
+               i1j1 *= NumMatrixTypes;
+
+               // compute forward scores
+               for (int i = 0; i <= seq1Length; i++) {
+                       unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];
+                       for (int j = 0; j <= seq2Length; j++) {
+                               unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];
+
+                               if (i > 1 || j > 1) {
+                                       if (i > 0 && j > 0) {
+                                               forward[0 + ij] = forward[0 + i1j1] + transProb[0][0];
+                                               for (int k = 1; k < NumMatrixTypes; k++)
+                                                       LOG_PLUS_EQUALS(forward[0 + ij],
+                                                                       forward[k + i1j1] + transProb[k][0]);
+                                               forward[0 + ij] += matchProb[c1][c2];
+                                       }
+                                       if (i > 0) {
+                                               for (int k = 0; k < NumInsertStates; k++)
+                                                       forward[2 * k + 1 + ij] = insProb[c1][k]
+                                                                       + LOG_ADD(
+                                                                                       forward[0 + i1j]
+                                                                                                       + transProb[0][2 * k + 1],
+                                                                                       forward[2 * k + 1 + i1j]
+                                                                                                       + transProb[2 * k + 1][2 * k
+                                                                                                                       + 1]);
+                                       }
+                                       if (j > 0) {
+                                               for (int k = 0; k < NumInsertStates; k++)
+                                                       forward[2 * k + 2 + ij] = insProb[c2][k]
+                                                                       + LOG_ADD(
+                                                                                       forward[0 + ij1]
+                                                                                                       + transProb[0][2 * k + 2],
+                                                                                       forward[2 * k + 2 + ij1]
+                                                                                                       + transProb[2 * k + 2][2 * k
+                                                                                                                       + 2]);
+                                       }
+                               }
+
+                               ij += NumMatrixTypes;
+                               i1j += NumMatrixTypes;
+                               ij1 += NumMatrixTypes;
+                               i1j1 += NumMatrixTypes;
+                       }
+               }
+
+               return forwardPtr;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeBackwardMatrix()
+       //
+       // Computes a set of backward probability matrices for aligning
+       // seq1 and seq2.
+       //
+       // For efficiency reasons, a single-dimensional floating-point
+       // array is used here, with the following indexing scheme:
+       //
+       //    backward[i + NumMatrixTypes * (j * (seq2Length+1) + k)]
+       //    refers to the probability of starting in state i and
+       //    aligning from character j+1 to the end of the first
+       //    sequence and from character k+1 to the end of the second
+       //    sequence.
+       /////////////////////////////////////////////////////////////////
+
+       VF *ComputeBackwardMatrix(Sequence *seq1, Sequence *seq2) const {
+
+               assert(seq1);
+               assert(seq2);
+
+               const int seq1Length = seq1->GetLength();
+               const int seq2Length = seq2->GetLength();
+               SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+               SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+               // create matrix
+               VF *backwardPtr = new VF(
+                               NumMatrixTypes * (seq1Length + 1) * (seq2Length + 1), LOG_ZERO);
+               assert(backwardPtr);
+               VF &backward = *backwardPtr;
+
+               // initialization condition
+               for (int k = 0; k < NumMatrixTypes; k++)
+                       backward[NumMatrixTypes * ((seq1Length + 1) * (seq2Length + 1) - 1)
+                                       + k] = initialDistribution[k];
+
+               // remember offset for each index combination
+               int ij = (seq1Length + 1) * (seq2Length + 1) - 1;
+               int i1j = ij + seq2Length + 1;
+               int ij1 = ij + 1;
+               int i1j1 = ij + seq2Length + 2;
+
+               ij *= NumMatrixTypes;
+               i1j *= NumMatrixTypes;
+               ij1 *= NumMatrixTypes;
+               i1j1 *= NumMatrixTypes;
+
+               // compute backward scores
+               for (int i = seq1Length; i >= 0; i--) {
+                       unsigned char c1 =
+                                       (i == seq1Length) ? '~' : (unsigned char) iter1[i + 1];
+                       for (int j = seq2Length; j >= 0; j--) {
+                               unsigned char c2 =
+                                               (j == seq2Length) ? '~' : (unsigned char) iter2[j + 1];
+
+                               if (i < seq1Length && j < seq2Length) {
+                                       const float ProbXY = backward[0 + i1j1] + matchProb[c1][c2];
+                                       for (int k = 0; k < NumMatrixTypes; k++)
+                                               LOG_PLUS_EQUALS(backward[k + ij],
+                                                               ProbXY + transProb[k][0]);
+                               }
+                               if (i < seq1Length) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               LOG_PLUS_EQUALS(backward[0 + ij],
+                                                               backward[2 * k + 1 + i1j] + insProb[c1][k]
+                                                                               + transProb[0][2 * k + 1]);
+                                               LOG_PLUS_EQUALS(backward[2 * k + 1 + ij],
+                                                               backward[2 * k + 1 + i1j] + insProb[c1][k]
+                                                                               + transProb[2 * k + 1][2 * k + 1]);
+                                       }
+                               }
+                               if (j < seq2Length) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               LOG_PLUS_EQUALS(backward[0 + ij],
+                                                               backward[2 * k + 2 + ij1] + insProb[c2][k]
+                                                                               + transProb[0][2 * k + 2]);
+                                               LOG_PLUS_EQUALS(backward[2 * k + 2 + ij],
+                                                               backward[2 * k + 2 + ij1] + insProb[c2][k]
+                                                                               + transProb[2 * k + 2][2 * k + 2]);
+                                       }
+                               }
+
+                               ij -= NumMatrixTypes;
+                               i1j -= NumMatrixTypes;
+                               ij1 -= NumMatrixTypes;
+                               i1j1 -= NumMatrixTypes;
+                       }
+               }
+
+               return backwardPtr;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeTotalProbability()
+       //
+       // Computes the total probability of an alignment given
+       // the forward and backward matrices.
+       /////////////////////////////////////////////////////////////////
+
+       float ComputeTotalProbability(int seq1Length, int seq2Length,
+                       const VF &forward, const VF &backward) const {
+
+               // compute total probability
+               float totalForwardProb = LOG_ZERO;
+               float totalBackwardProb = LOG_ZERO;
+               for (int k = 0; k < NumMatrixTypes; k++) {
+                       LOG_PLUS_EQUALS(totalForwardProb,
+                                       forward[k
+                                                       + NumMatrixTypes
+                                                                       * ((seq1Length + 1) * (seq2Length + 1) - 1)]
+                                                       + backward[k
+                                                                       + NumMatrixTypes
+                                                                                       * ((seq1Length + 1)
+                                                                                                       * (seq2Length + 1) - 1)]);
+               }
+
+               totalBackwardProb = forward[0
+                               + NumMatrixTypes * (1 * (seq2Length + 1) + 1)]
+                               + backward[0 + NumMatrixTypes * (1 * (seq2Length + 1) + 1)];
+
+               for (int k = 0; k < NumInsertStates; k++) {
+                       LOG_PLUS_EQUALS(totalBackwardProb,
+                                       forward[2 * k + 1
+                                                       + NumMatrixTypes * (1 * (seq2Length + 1) + 0)]
+                                                       + backward[2 * k + 1
+                                                                       + NumMatrixTypes
+                                                                                       * (1 * (seq2Length + 1) + 0)]);
+                       LOG_PLUS_EQUALS(totalBackwardProb,
+                                       forward[2 * k + 2
+                                                       + NumMatrixTypes * (0 * (seq2Length + 1) + 1)]
+                                                       + backward[2 * k + 2
+                                                                       + NumMatrixTypes
+                                                                                       * (0 * (seq2Length + 1) + 1)]);
+               }
+
+               //    cerr << totalForwardProb << " " << totalBackwardProb << endl;
+
+               return (totalForwardProb + totalBackwardProb) / 2;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputePosteriorMatrix()
+       //
+       // Computes the posterior probability matrix based on
+       // the forward and backward matrices.
+       /////////////////////////////////////////////////////////////////
+
+       VF *ComputePosteriorMatrix(Sequence *seq1, Sequence *seq2,
+                       const VF &forward, const VF &backward) const {
+
+               assert(seq1);
+               assert(seq2);
+
+               const int seq1Length = seq1->GetLength();
+               const int seq2Length = seq2->GetLength();
+
+               float totalProb = ComputeTotalProbability(seq1Length, seq2Length,
+                               forward, backward);
+
+               // compute posterior matrices
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1));
+               assert(posteriorPtr);
+               VF &posterior = *posteriorPtr;
+
+               int ij = 0;
+               if (totalProb == 0) {
+                       totalProb = 1.0f;
+               }
+               VF::iterator ptr = posterior.begin();
+
+               for (int i = 0; i <= seq1Length; i++) {
+                       for (int j = 0; j <= seq2Length; j++) {
+                               *(ptr++) = EXP(
+                                               min(LOG_ONE, forward[ij] + backward[ij] - totalProb));
+                               ij += NumMatrixTypes;
+                       }
+               }
+
+               posterior[0] = 0;
+
+               return posteriorPtr;
+       }
+
+       /*
+        /////////////////////////////////////////////////////////////////
+        // ProbabilisticModel::ComputeExpectedCounts()
+        //
+        // Computes the expected counts for the various transitions.
+        /////////////////////////////////////////////////////////////////
+
+        VVF *ComputeExpectedCounts () const {
+
+        assert (seq1);
+        assert (seq2);
+
+        const int seq1Length = seq1->GetLength();
+        const int seq2Length = seq2->GetLength();
+        SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+        SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+        // compute total probability
+        float totalProb = ComputeTotalProbability (seq1Length, seq2Length,
+        forward, backward);
+
+        // initialize expected counts
+        VVF *countsPtr = new VVF(NumMatrixTypes + 1, VF(NumMatrixTypes, LOG_ZERO)); assert (countsPtr);
+        VVF &counts = *countsPtr;
+
+        // remember offset for each index combination
+        int ij = 0;
+        int i1j = -seq2Length - 1;
+        int ij1 = -1;
+        int i1j1 = -seq2Length - 2;
+
+        ij *= NumMatrixTypes;
+        i1j *= NumMatrixTypes;
+        ij1 *= NumMatrixTypes;
+        i1j1 *= NumMatrixTypes;
+
+        // compute expected counts
+        for (int i = 0; i <= seq1Length; i++){
+        unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];
+        for (int j = 0; j <= seq2Length; j++){
+        unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];
+
+        if (i > 0 && j > 0){
+        for (int k = 0; k < NumMatrixTypes; k++)
+        LOG_PLUS_EQUALS (counts[k][0],
+        forward[k + i1j1] + transProb[k][0] +
+        matchProb[c1][c2] + backward[0 + ij]);
+        }
+        if (i > 0){
+        for (int k = 0; k < NumInsertStates; k++){
+        LOG_PLUS_EQUALS (counts[0][2*k+1],
+        forward[0 + i1j] + transProb[0][2*k+1] +
+        insProb[c1][k] + backward[2*k+1 + ij]);
+        LOG_PLUS_EQUALS (counts[2*k+1][2*k+1],
+        forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1] +
+        insProb[c1][k] + backward[2*k+1 + ij]);
+        }
+        }
+        if (j > 0){
+        for (int k = 0; k < NumInsertStates; k++){
+        LOG_PLUS_EQUALS (counts[0][2*k+2],
+        forward[0 + ij1] + transProb[0][2*k+2] +
+        insProb[c2][k] + backward[2*k+2 + ij]);
+        LOG_PLUS_EQUALS (counts[2*k+2][2*k+2],
+        forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2] +
+        insProb[c2][k] + backward[2*k+2 + ij]);
+        }
+        }
+
+        ij += NumMatrixTypes;
+        i1j += NumMatrixTypes;
+        ij1 += NumMatrixTypes;
+        i1j1 += NumMatrixTypes;
+        }
+        }
+
+        // scale all expected counts appropriately
+        for (int i = 0; i < NumMatrixTypes; i++)
+        for (int j = 0; j < NumMatrixTypes; j++)
+        counts[i][j] -= totalProb;
+
+        }
+        */
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeNewParameters()
+       //
+       // Computes a new parameter set based on the expected counts
+       // given.
+       /////////////////////////////////////////////////////////////////
+       void ComputeNewParameters(Sequence *seq1, Sequence *seq2, const VF &forward,
+                       const VF &backward, VF &initDistribMat, VF &gapOpen, VF &gapExtend,
+                       VVF &emitPairs, VF &emitSingle, bool enableTrainEmissions) const {
+
+               assert(seq1);
+               assert(seq2);
+
+               const int seq1Length = seq1->GetLength();
+               const int seq2Length = seq2->GetLength();
+               SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+               SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+               // compute total probability
+               float totalProb = ComputeTotalProbability(seq1Length, seq2Length,
+                               forward, backward);
+
+               // initialize expected counts
+               VVF transCounts(NumMatrixTypes, VF(NumMatrixTypes, LOG_ZERO));
+               VF initCounts(NumMatrixTypes, LOG_ZERO);
+               VVF pairCounts(256, VF(256, LOG_ZERO));
+               VF singleCounts(256, LOG_ZERO);
+
+               // remember offset for each index combination
+               int ij = 0;
+               int i1j = -seq2Length - 1;
+               int ij1 = -1;
+               int i1j1 = -seq2Length - 2;
+
+               ij *= NumMatrixTypes;
+               i1j *= NumMatrixTypes;
+               ij1 *= NumMatrixTypes;
+               i1j1 *= NumMatrixTypes;
+
+               // compute initial distribution posteriors
+               initCounts[0] = LOG_ADD(
+                               forward[0 + NumMatrixTypes * (1 * (seq2Length + 1) + 1)]
+                                               + backward[0
+                                                               + NumMatrixTypes * (1 * (seq2Length + 1) + 1)],
+                               forward[0
+                                               + NumMatrixTypes
+                                                               * ((seq1Length + 1) * (seq2Length + 1) - 1)]
+                                               + backward[0
+                                                               + NumMatrixTypes
+                                                                               * ((seq1Length + 1) * (seq2Length + 1)
+                                                                                               - 1)]);
+               for (int k = 0; k < NumInsertStates; k++) {
+                       initCounts[2 * k + 1] = LOG_ADD(
+                                       forward[2 * k + 1
+                                                       + NumMatrixTypes * (1 * (seq2Length + 1) + 0)]
+                                                       + backward[2 * k + 1
+                                                                       + NumMatrixTypes
+                                                                                       * (1 * (seq2Length + 1) + 0)],
+                                       forward[2 * k + 1
+                                                       + NumMatrixTypes
+                                                                       * ((seq1Length + 1) * (seq2Length + 1) - 1)]
+                                                       + backward[2 * k + 1
+                                                                       + NumMatrixTypes
+                                                                                       * ((seq1Length + 1)
+                                                                                                       * (seq2Length + 1) - 1)]);
+                       initCounts[2 * k + 2] = LOG_ADD(
+                                       forward[2 * k + 2
+                                                       + NumMatrixTypes * (0 * (seq2Length + 1) + 1)]
+                                                       + backward[2 * k + 2
+                                                                       + NumMatrixTypes
+                                                                                       * (0 * (seq2Length + 1) + 1)],
+                                       forward[2 * k + 2
+                                                       + NumMatrixTypes
+                                                                       * ((seq1Length + 1) * (seq2Length + 1) - 1)]
+                                                       + backward[2 * k + 2
+                                                                       + NumMatrixTypes
+                                                                                       * ((seq1Length + 1)
+                                                                                                       * (seq2Length + 1) - 1)]);
+               }
+
+               // compute expected counts
+               for (int i = 0; i <= seq1Length; i++) {
+                       unsigned char c1 =
+                                       (i == 0) ? '~' : (unsigned char) toupper(iter1[i]);
+                       for (int j = 0; j <= seq2Length; j++) {
+                               unsigned char c2 =
+                                               (j == 0) ? '~' : (unsigned char) toupper(iter2[j]);
+
+                               if (i > 0 && j > 0) {
+                                       if (enableTrainEmissions && i == 1 && j == 1) {
+                                               LOG_PLUS_EQUALS(pairCounts[c1][c2],
+                                                               initialDistribution[0] + matchProb[c1][c2]
+                                                                               + backward[0 + ij]);
+                                               LOG_PLUS_EQUALS(pairCounts[c2][c1],
+                                                               initialDistribution[0] + matchProb[c2][c1]
+                                                                               + backward[0 + ij]);
+                                       }
+
+                                       for (int k = 0; k < NumMatrixTypes; k++) {
+                                               LOG_PLUS_EQUALS(transCounts[k][0],
+                                                               forward[k + i1j1] + transProb[k][0]
+                                                                               + matchProb[c1][c2] + backward[0 + ij]);
+                                               if (enableTrainEmissions && (i != 1 || j != 1)) {//adding parentheses by Liu Yongchao, 5 Mar, 2010
+                                                       LOG_PLUS_EQUALS(pairCounts[c1][c2],
+                                                                       forward[k + i1j1] + transProb[k][0]
+                                                                                       + matchProb[c1][c2]
+                                                                                       + backward[0 + ij]);
+                                                       LOG_PLUS_EQUALS(pairCounts[c2][c1],
+                                                                       forward[k + i1j1] + transProb[k][0]
+                                                                                       + matchProb[c2][c1]
+                                                                                       + backward[0 + ij]);
+                                               }
+                                       }
+                               }
+                               if (i > 0) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               LOG_PLUS_EQUALS(transCounts[0][2 * k + 1],
+                                                               forward[0 + i1j] + transProb[0][2 * k + 1]
+                                                                               + insProb[c1][k]
+                                                                               + backward[2 * k + 1 + ij]);
+                                               LOG_PLUS_EQUALS(transCounts[2 * k + 1][2 * k + 1],
+                                                               forward[2 * k + 1 + i1j]
+                                                                               + transProb[2 * k + 1][2 * k + 1]
+                                                                               + insProb[c1][k]
+                                                                               + backward[2 * k + 1 + ij]);
+                                               if (enableTrainEmissions) {
+                                                       if (i == 1 && j == 0) {
+                                                               LOG_PLUS_EQUALS(singleCounts[c1],
+                                                                               initialDistribution[2 * k + 1]
+                                                                                               + insProb[c1][k]
+                                                                                               + backward[2 * k + 1 + ij]);
+                                                       } else {
+                                                               LOG_PLUS_EQUALS(singleCounts[c1],
+                                                                               forward[0 + i1j]
+                                                                                               + transProb[0][2 * k + 1]
+                                                                                               + insProb[c1][k]
+                                                                                               + backward[2 * k + 1 + ij]);
+                                                               LOG_PLUS_EQUALS(singleCounts[c1],
+                                                                               forward[2 * k + 1 + i1j]
+                                                                                               + transProb[2 * k + 1][2 * k + 1]
+                                                                                               + insProb[c1][k]
+                                                                                               + backward[2 * k + 1 + ij]);
+                                                       }
+                                               }
+                                       }
+                               }
+                               if (j > 0) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               LOG_PLUS_EQUALS(transCounts[0][2 * k + 2],
+                                                               forward[0 + ij1] + transProb[0][2 * k + 2]
+                                                                               + insProb[c2][k]
+                                                                               + backward[2 * k + 2 + ij]);
+                                               LOG_PLUS_EQUALS(transCounts[2 * k + 2][2 * k + 2],
+                                                               forward[2 * k + 2 + ij1]
+                                                                               + transProb[2 * k + 2][2 * k + 2]
+                                                                               + insProb[c2][k]
+                                                                               + backward[2 * k + 2 + ij]);
+                                               if (enableTrainEmissions) {
+                                                       if (i == 0 && j == 1) {
+                                                               LOG_PLUS_EQUALS(singleCounts[c2],
+                                                                               initialDistribution[2 * k + 2]
+                                                                                               + insProb[c2][k]
+                                                                                               + backward[2 * k + 2 + ij]);
+                                                       } else {
+                                                               LOG_PLUS_EQUALS(singleCounts[c2],
+                                                                               forward[0 + ij1]
+                                                                                               + transProb[0][2 * k + 2]
+                                                                                               + insProb[c2][k]
+                                                                                               + backward[2 * k + 2 + ij]);
+                                                               LOG_PLUS_EQUALS(singleCounts[c2],
+                                                                               forward[2 * k + 2 + ij1]
+                                                                                               + transProb[2 * k + 2][2 * k + 2]
+                                                                                               + insProb[c2][k]
+                                                                                               + backward[2 * k + 2 + ij]);
+                                                       }
+                                               }
+                                       }
+                               }
+
+                               ij += NumMatrixTypes;
+                               i1j += NumMatrixTypes;
+                               ij1 += NumMatrixTypes;
+                               i1j1 += NumMatrixTypes;
+                       }
+               }
+
+               // scale all expected counts appropriately
+               for (int i = 0; i < NumMatrixTypes; i++) {
+                       initCounts[i] -= totalProb;
+                       for (int j = 0; j < NumMatrixTypes; j++)
+                               transCounts[i][j] -= totalProb;
+               }
+               if (enableTrainEmissions) {
+                       for (int i = 0; i < 256; i++) {
+                               for (int j = 0; j < 256; j++)
+                                       pairCounts[i][j] -= totalProb;
+                               singleCounts[i] -= totalProb;
+                       }
+               }
+
+               // compute new initial distribution
+               float totalInitDistribCounts = 0;
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       totalInitDistribCounts += exp(initCounts[i]); // should be 2
+               initDistribMat[0] = min(1.0f,
+                               max(0.0f, (float) exp(initCounts[0]) / totalInitDistribCounts));
+               for (int k = 0; k < NumInsertStates; k++) {
+                       float val =
+                                       (exp(initCounts[2 * k + 1]) + exp(initCounts[2 * k + 2]))
+                                                       / 2;
+                       initDistribMat[2 * k + 1] = initDistribMat[2 * k + 2] = min(1.0f,
+                                       max(0.0f, val / totalInitDistribCounts));
+               }
+
+               // compute total counts for match state
+               float inMatchStateCounts = 0;
+               for (int i = 0; i < NumMatrixTypes; i++)
+                       inMatchStateCounts += exp(transCounts[0][i]);
+               for (int i = 0; i < NumInsertStates; i++) {
+
+                       // compute total counts for gap state
+                       float inGapStateCounts = exp(transCounts[2 * i + 1][0])
+                                       + exp(transCounts[2 * i + 1][2 * i + 1])
+                                       + exp(transCounts[2 * i + 2][0])
+                                       + exp(transCounts[2 * i + 2][2 * i + 2]);
+
+                       gapOpen[2 * i] = gapOpen[2 * i + 1] = (exp(
+                                       transCounts[0][2 * i + 1]) + exp(transCounts[0][2 * i + 2]))
+                                       / (2 * inMatchStateCounts);
+
+                       gapExtend[2 * i] = gapExtend[2 * i + 1] = (exp(
+                                       transCounts[2 * i + 1][2 * i + 1])
+                                       + exp(transCounts[2 * i + 2][2 * i + 2]))
+                                       / inGapStateCounts;
+               }
+
+               if (enableTrainEmissions) {
+                       float totalPairCounts = 0;
+                       float totalSingleCounts = 0;
+                       for (int i = 0; i < 256; i++) {
+                               for (int j = 0; j <= i; j++)
+                                       totalPairCounts += exp(pairCounts[j][i]);
+                               totalSingleCounts += exp(singleCounts[i]);
+                       }
+
+                       for (int i = 0; i < 256; i++)
+                               if (!islower((char) i)) {
+                                       int li = (int) ((unsigned char) tolower((char) i));
+                                       for (int j = 0; j <= i; j++)
+                                               if (!islower((char) j)) {
+                                                       int lj = (int) ((unsigned char) tolower((char) j));
+                                                       emitPairs[i][j] =
+                                                                       emitPairs[i][lj] =
+                                                                                       emitPairs[li][j] =
+                                                                                                       emitPairs[li][lj] =
+                                                                                                                       emitPairs[j][i] =
+                                                                                                                                       emitPairs[j][li] =
+                                                                                                                                                       emitPairs[lj][i] =
+                                                                                                                                                                       emitPairs[lj][li] =
+                                                                                                                                                                                       exp(
+                                                                                                                                                                                                       pairCounts[j][i])
+                                                                                                                                                                                                       / totalPairCounts;
+                                               }
+                                       emitSingle[i] = emitSingle[li] = exp(singleCounts[i])
+                                                       / totalSingleCounts;
+                               }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeAlignment()
+       //
+       // Computes an alignment based on the given posterior matrix.
+       // This is done by finding the maximum summing path (or
+       // maximum weight trace) through the posterior matrix.  The
+       // final alignment is returned as a pair consisting of:
+       //    (1) a string (e.g., XXXBBXXXBBBBBBYYYYBBB) where X's and
+       //        denote insertions in one of the two sequences and
+       //        B's denote that both sequences are present (i.e.
+       //        matches).
+       //    (2) a float indicating the sum achieved
+       /////////////////////////////////////////////////////////////////
+
+       pair<SafeVector<char> *, float> ComputeAlignment(int seq1Length,
+                       int seq2Length, const VF &posterior) const {
+
+               float *twoRows = new float[(seq2Length + 1) * 2];
+               assert(twoRows);
+               float *oldRow = twoRows;
+               float *newRow = twoRows + seq2Length + 1;
+
+               char *tracebackMatrix = new char[(seq1Length + 1) * (seq2Length + 1)];
+               assert(tracebackMatrix);
+               char *tracebackPtr = tracebackMatrix;
+
+               VF::const_iterator posteriorPtr = posterior.begin() + seq2Length + 1;
+
+               // initialization
+               for (int i = 0; i <= seq2Length; i++) {
+                       oldRow[i] = 0;
+                       *(tracebackPtr++) = 'L';
+               }
+
+               // fill in matrix
+               for (int i = 1; i <= seq1Length; i++) {
+
+                       // initialize left column
+                       newRow[0] = 0;
+                       posteriorPtr++;
+                       *(tracebackPtr++) = 'U';
+
+                       // fill in rest of row
+                       for (int j = 1; j <= seq2Length; j++) {
+                               ChooseBestOfThree(*(posteriorPtr++) + oldRow[j - 1],
+                                               newRow[j - 1], oldRow[j], 'D', 'L', 'U', &newRow[j],
+                                               tracebackPtr++);
+                       }
+
+                       // swap rows
+                       float *temp = oldRow;
+                       oldRow = newRow;
+                       newRow = temp;
+               }
+
+               // store best score
+               float total = oldRow[seq2Length];
+               delete[] twoRows;
+
+               // compute traceback
+               SafeVector<char> *alignment = new SafeVector<char>;
+               assert(alignment);
+               int r = seq1Length, c = seq2Length;
+               while (r != 0 || c != 0) {
+                       char ch = tracebackMatrix[r * (seq2Length + 1) + c];
+                       switch (ch) {
+                       case 'L':
+                               c--;
+                               alignment->push_back('Y');
+                               break;
+                       case 'U':
+                               r--;
+                               alignment->push_back('X');
+                               break;
+                       case 'D':
+                               c--;
+                               r--;
+                               alignment->push_back('B');
+                               break;
+                       default:
+                               assert(false);
+                       }
+               }
+
+               delete[] tracebackMatrix;
+
+               reverse(alignment->begin(), alignment->end());
+
+               return make_pair(alignment, total);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeAlignmentWithGapPenalties()
+       //
+       // Similar to ComputeAlignment() except with gap penalties.
+       /////////////////////////////////////////////////////////////////
+
+       pair<SafeVector<char> *, float> ComputeAlignmentWithGapPenalties(
+                       MultiSequence *align1, MultiSequence *align2, const VF &posterior,
+                       int numSeqs1, int numSeqs2, float gapOpenPenalty,
+                       float gapContinuePenalty) const {
+               int seq1Length = align1->GetSequence(0)->GetLength();
+               int seq2Length = align2->GetSequence(0)->GetLength();
+               SafeVector<SafeVector<char>::iterator> dataPtrs1(
+                               align1->GetNumSequences());
+               SafeVector<SafeVector<char>::iterator> dataPtrs2(
+                               align2->GetNumSequences());
+
+               // grab character data
+               for (int i = 0; i < align1->GetNumSequences(); i++)
+                       dataPtrs1[i] = align1->GetSequence(i)->GetDataPtr();
+               for (int i = 0; i < align2->GetNumSequences(); i++)
+                       dataPtrs2[i] = align2->GetSequence(i)->GetDataPtr();
+
+               // the number of active sequences at any given column is defined to be the
+               // number of non-gap characters in that column; the number of gap opens at
+               // any given column is defined to be the number of gap characters in that
+               // column where the previous character in the respective sequence was not
+               // a gap
+               SafeVector<int> numActive1(seq1Length + 1), numGapOpens1(
+                               seq1Length + 1);
+               SafeVector<int> numActive2(seq2Length + 1), numGapOpens2(
+                               seq2Length + 1);
+
+               // compute number of active sequences and gap opens for each group
+               for (int i = 0; i < align1->GetNumSequences(); i++) {
+                       SafeVector<char>::iterator dataPtr =
+                                       align1->GetSequence(i)->GetDataPtr();
+                       numActive1[0] = numGapOpens1[0] = 0;
+                       for (int j = 1; j <= seq1Length; j++) {
+                               if (dataPtr[j] != '-') {
+                                       numActive1[j]++;
+                                       numGapOpens1[j] += (j != 1 && dataPtr[j - 1] != '-');
+                               }
+                       }
+               }
+               for (int i = 0; i < align2->GetNumSequences(); i++) {
+                       SafeVector<char>::iterator dataPtr =
+                                       align2->GetSequence(i)->GetDataPtr();
+                       numActive2[0] = numGapOpens2[0] = 0;
+                       for (int j = 1; j <= seq2Length; j++) {
+                               if (dataPtr[j] != '-') {
+                                       numActive2[j]++;
+                                       numGapOpens2[j] += (j != 1 && dataPtr[j - 1] != '-');
+                               }
+                       }
+               }
+
+               VVF openingPenalty1(numSeqs1 + 1, VF(numSeqs2 + 1));
+               VF continuingPenalty1(numSeqs1 + 1);
+               VVF openingPenalty2(numSeqs1 + 1, VF(numSeqs2 + 1));
+               VF continuingPenalty2(numSeqs2 + 1);
+
+               // precompute penalties
+               for (int i = 0; i <= numSeqs1; i++)
+                       for (int j = 0; j <= numSeqs2; j++)
+                               openingPenalty1[i][j] = i
+                                               * (gapOpenPenalty * j
+                                                               + gapContinuePenalty * (numSeqs2 - j));
+               for (int i = 0; i <= numSeqs1; i++)
+                       continuingPenalty1[i] = i * gapContinuePenalty * numSeqs2;
+               for (int i = 0; i <= numSeqs2; i++)
+                       for (int j = 0; j <= numSeqs1; j++)
+                               openingPenalty2[i][j] = i
+                                               * (gapOpenPenalty * j
+                                                               + gapContinuePenalty * (numSeqs1 - j));
+               for (int i = 0; i <= numSeqs2; i++)
+                       continuingPenalty2[i] = i * gapContinuePenalty * numSeqs1;
+
+               float *twoRows = new float[6 * (seq2Length + 1)];
+               assert(twoRows);
+               float *oldRowMatch = twoRows;
+               float *newRowMatch = twoRows + (seq2Length + 1);
+               float *oldRowInsertX = twoRows + 2 * (seq2Length + 1);
+               float *newRowInsertX = twoRows + 3 * (seq2Length + 1);
+               float *oldRowInsertY = twoRows + 4 * (seq2Length + 1);
+               float *newRowInsertY = twoRows + 5 * (seq2Length + 1);
+
+               char *tracebackMatrix =
+                               new char[3 * (seq1Length + 1) * (seq2Length + 1)];
+               assert(tracebackMatrix);
+               char *tracebackPtr = tracebackMatrix;
+
+               VF::const_iterator posteriorPtr = posterior.begin() + seq2Length + 1;
+
+               // initialization
+               for (int i = 0; i <= seq2Length; i++) {
+                       oldRowMatch[i] = oldRowInsertX[i] = (i == 0) ? 0 : LOG_ZERO;
+                       oldRowInsertY[i] =
+                                       (i == 0) ?
+                                                       0 :
+                                                       oldRowInsertY[i - 1]
+                                                                       + continuingPenalty2[numActive2[i]];
+                       *(tracebackPtr) = *(tracebackPtr + 1) = *(tracebackPtr + 2) = 'Y';
+                       tracebackPtr += 3;
+               }
+
+               // fill in matrix
+               for (int i = 1; i <= seq1Length; i++) {
+
+                       // initialize left column
+                       newRowMatch[0] = newRowInsertY[0] = LOG_ZERO;
+                       newRowInsertX[0] = oldRowInsertX[0]
+                                       + continuingPenalty1[numActive1[i]];
+                       posteriorPtr++;
+                       *(tracebackPtr) = *(tracebackPtr + 1) = *(tracebackPtr + 2) = 'X';
+                       tracebackPtr += 3;
+
+                       // fill in rest of row
+                       for (int j = 1; j <= seq2Length; j++) {
+
+                               // going to MATCH state
+                               ChooseBestOfThree(oldRowMatch[j - 1], oldRowInsertX[j - 1],
+                                               oldRowInsertY[j - 1], 'M', 'X', 'Y', &newRowMatch[j],
+                                               tracebackPtr++);
+                               newRowMatch[j] += *(posteriorPtr++);
+
+                               // going to INSERT X state
+                               ChooseBestOfThree(
+                                               oldRowMatch[j]
+                                                               + openingPenalty1[numActive1[i]][numGapOpens2[j]],
+                                               oldRowInsertX[j] + continuingPenalty1[numActive1[i]],
+                                               oldRowInsertY[j]
+                                                               + openingPenalty1[numActive1[i]][numGapOpens2[j]],
+                                               'M', 'X', 'Y', &newRowInsertX[j], tracebackPtr++);
+
+                               // going to INSERT Y state
+                               ChooseBestOfThree(
+                                               newRowMatch[j - 1]
+                                                               + openingPenalty2[numActive2[j]][numGapOpens1[i]],
+                                               newRowInsertX[j - 1]
+                                                               + openingPenalty2[numActive2[j]][numGapOpens1[i]],
+                                               newRowInsertY[j - 1]
+                                                               + continuingPenalty2[numActive2[j]], 'M', 'X',
+                                               'Y', &newRowInsertY[j], tracebackPtr++);
+                       }
+
+                       // swap rows
+                       float *temp;
+                       temp = oldRowMatch;
+                       oldRowMatch = newRowMatch;
+                       newRowMatch = temp;
+                       temp = oldRowInsertX;
+                       oldRowInsertX = newRowInsertX;
+                       newRowInsertX = temp;
+                       temp = oldRowInsertY;
+                       oldRowInsertY = newRowInsertY;
+                       newRowInsertY = temp;
+               }
+
+               // store best score
+               float total;
+               char matrix;
+               ChooseBestOfThree(oldRowMatch[seq2Length], oldRowInsertX[seq2Length],
+                               oldRowInsertY[seq2Length], 'M', 'X', 'Y', &total, &matrix);
+
+               delete[] twoRows;
+
+               // compute traceback
+               SafeVector<char> *alignment = new SafeVector<char>;
+               assert(alignment);
+               int r = seq1Length, c = seq2Length;
+               while (r != 0 || c != 0) {
+
+                       int offset = (matrix == 'M') ? 0 : (matrix == 'X') ? 1 : 2;
+                       char ch = tracebackMatrix[(r * (seq2Length + 1) + c) * 3 + offset];
+                       switch (matrix) {
+                       case 'Y':
+                               c--;
+                               alignment->push_back('Y');
+                               break;
+                       case 'X':
+                               r--;
+                               alignment->push_back('X');
+                               break;
+                       case 'M':
+                               c--;
+                               r--;
+                               alignment->push_back('B');
+                               break;
+                       default:
+                               assert(false);
+                       }
+                       matrix = ch;
+               }
+
+               delete[] tracebackMatrix;
+
+               reverse(alignment->begin(), alignment->end());
+
+               return make_pair(alignment, 1.0f);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::ComputeViterbiAlignment()
+       //
+       // Computes the highest probability pairwise alignment using the
+       // probabilistic model.  The final alignment is returned as a
+       //  pair consisting of:
+       //    (1) a string (e.g., XXXBBXXXBBBBBBYYYYBBB) where X's and
+       //        denote insertions in one of the two sequences and
+       //        B's denote that both sequences are present (i.e.
+       //        matches).
+       //    (2) a float containing the log probability of the best
+       //        alignment (not used)
+       /////////////////////////////////////////////////////////////////
+
+       pair<SafeVector<char> *, float> ComputeViterbiAlignment(Sequence *seq1,
+                       Sequence *seq2) const {
+
+               assert(seq1);
+               assert(seq2);
+
+               const int seq1Length = seq1->GetLength();
+               const int seq2Length = seq2->GetLength();
+
+               // retrieve the points to the beginning of each sequence
+               SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
+               SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
+
+               // create viterbi matrix
+               VF *viterbiPtr = new VF(
+                               NumMatrixTypes * (seq1Length + 1) * (seq2Length + 1), LOG_ZERO);
+               assert(viterbiPtr);
+               VF &viterbi = *viterbiPtr;
+
+               // create traceback matrix
+               VI *tracebackPtr = new VI(
+                               NumMatrixTypes * (seq1Length + 1) * (seq2Length + 1), -1);
+               assert(tracebackPtr);
+               VI &traceback = *tracebackPtr;
+
+               // initialization condition
+               for (int k = 0; k < NumMatrixTypes; k++)
+                       viterbi[k] = initialDistribution[k];
+
+               // remember offset for each index combination
+               int ij = 0;
+               int i1j = -seq2Length - 1;
+               int ij1 = -1;
+               int i1j1 = -seq2Length - 2;
+
+               ij *= NumMatrixTypes;
+               i1j *= NumMatrixTypes;
+               ij1 *= NumMatrixTypes;
+               i1j1 *= NumMatrixTypes;
+
+               // compute viterbi scores
+               for (int i = 0; i <= seq1Length; i++) {
+                       unsigned char c1 = (i == 0) ? '~' : (unsigned char) iter1[i];
+                       for (int j = 0; j <= seq2Length; j++) {
+                               unsigned char c2 = (j == 0) ? '~' : (unsigned char) iter2[j];
+
+                               if (i > 0 && j > 0) {
+                                       for (int k = 0; k < NumMatrixTypes; k++) {
+                                               float newVal = viterbi[k + i1j1] + transProb[k][0]
+                                                               + matchProb[c1][c2];
+                                               if (viterbi[0 + ij] < newVal) {
+                                                       viterbi[0 + ij] = newVal;
+                                                       traceback[0 + ij] = k;
+                                               }
+                                       }
+                               }
+                               if (i > 0) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               float valFromMatch = insProb[c1][k] + viterbi[0 + i1j]
+                                                               + transProb[0][2 * k + 1];
+                                               float valFromIns = insProb[c1][k]
+                                                               + viterbi[2 * k + 1 + i1j]
+                                                               + transProb[2 * k + 1][2 * k + 1];
+                                               if (valFromMatch >= valFromIns) {
+                                                       viterbi[2 * k + 1 + ij] = valFromMatch;
+                                                       traceback[2 * k + 1 + ij] = 0;
+                                               } else {
+                                                       viterbi[2 * k + 1 + ij] = valFromIns;
+                                                       traceback[2 * k + 1 + ij] = 2 * k + 1;
+                                               }
+                                       }
+                               }
+                               if (j > 0) {
+                                       for (int k = 0; k < NumInsertStates; k++) {
+                                               float valFromMatch = insProb[c2][k] + viterbi[0 + ij1]
+                                                               + transProb[0][2 * k + 2];
+                                               float valFromIns = insProb[c2][k]
+                                                               + viterbi[2 * k + 2 + ij1]
+                                                               + transProb[2 * k + 2][2 * k + 2];
+                                               if (valFromMatch >= valFromIns) {
+                                                       viterbi[2 * k + 2 + ij] = valFromMatch;
+                                                       traceback[2 * k + 2 + ij] = 0;
+                                               } else {
+                                                       viterbi[2 * k + 2 + ij] = valFromIns;
+                                                       traceback[2 * k + 2 + ij] = 2 * k + 2;
+                                               }
+                                       }
+                               }
+
+                               ij += NumMatrixTypes;
+                               i1j += NumMatrixTypes;
+                               ij1 += NumMatrixTypes;
+                               i1j1 += NumMatrixTypes;
+                       }
+               }
+
+               // figure out best terminating cell
+               float bestProb = LOG_ZERO;
+               int state = -1;
+               for (int k = 0; k < NumMatrixTypes; k++) {
+                       float thisProb =
+                                       viterbi[k
+                                                       + NumMatrixTypes
+                                                                       * ((seq1Length + 1) * (seq2Length + 1) - 1)]
+                                                       + initialDistribution[k];
+                       if (bestProb < thisProb) {
+                               bestProb = thisProb;
+                               state = k;
+                       }
+               }
+               assert(state != -1);
+
+               delete viterbiPtr;
+
+               // compute traceback
+               SafeVector<char> *alignment = new SafeVector<char>;
+               assert(alignment);
+               int r = seq1Length, c = seq2Length;
+               while (r != 0 || c != 0) {
+                       int newState = traceback[state
+                                       + NumMatrixTypes * (r * (seq2Length + 1) + c)];
+
+                       if (state == 0) {
+                               c--;
+                               r--;
+                               alignment->push_back('B');
+                       } else if (state % 2 == 1) {
+                               r--;
+                               alignment->push_back('X');
+                       } else {
+                               c--;
+                               alignment->push_back('Y');
+                       }
+
+                       state = newState;
+               }
+
+               delete tracebackPtr;
+
+               reverse(alignment->begin(), alignment->end());
+
+               return make_pair(alignment, bestProb);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // ProbabilisticModel::BuildPosterior()
+       //
+       // Builds a posterior probability matrix needed to align a pair
+       // of alignments.  Mathematically, the returned matrix M is
+       // defined as follows:
+       //    M[i,j] =     sum          sum      f(s,t,i,j)
+       //             s in align1  t in align2
+       // where
+       //                  [  P(s[i'] <--> t[j'])
+       //                  [       if s[i'] is a letter in the ith column of align1 and
+       //                  [          t[j'] it a letter in the jth column of align2
+       //    f(s,t,i,j) =  [
+       //                  [  0    otherwise
+       //
+       /////////////////////////////////////////////////////////////////
+
+       VF *BuildPosterior(MultiSequence *align1, MultiSequence *align2,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       float cutoff = 0.0f) const {
+               const int seq1Length = align1->GetSequence(0)->GetLength();
+               const int seq2Length = align2->GetSequence(0)->GetLength();
+
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1), 0);
+               assert(posteriorPtr);
+               VF &posterior = *posteriorPtr;
+               VF::iterator postPtr = posterior.begin();
+
+               // for each s in align1
+               for (int i = 0; i < align1->GetNumSequences(); i++) {
+                       int first = align1->GetSequence(i)->GetLabel();
+                       SafeVector<int> *mapping1 = align1->GetSequence(i)->GetMapping();
+
+                       // for each t in align2
+                       for (int j = 0; j < align2->GetNumSequences(); j++) {
+                               int second = align2->GetSequence(j)->GetLabel();
+                               SafeVector<int> *mapping2 =
+                                               align2->GetSequence(j)->GetMapping();
+
+                               if (first < second) {
+
+                                       // get the associated sparse matrix
+                                       SparseMatrix *matrix = sparseMatrices[first][second];
+
+                                       for (int ii = 1; ii <= matrix->GetSeq1Length(); ii++) {
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(ii);
+                                               int base = (*mapping1)[ii] * (seq2Length + 1);
+                                               int rowSize = matrix->GetRowSize(ii);
+
+                                               // add in all relevant values
+                                               for (int jj = 0; jj < rowSize; jj++)
+                                                       posterior[base + (*mapping2)[row[jj].first]] +=
+                                                                       row[jj].second;
+
+                                               // subtract cutoff 
+                                               for (int jj = 0; jj < matrix->GetSeq2Length(); jj++)
+                                                       posterior[base + (*mapping2)[jj]] -= cutoff;
+                                       }
+
+                               } else {
+
+                                       // get the associated sparse matrix
+                                       SparseMatrix *matrix = sparseMatrices[second][first];
+
+                                       for (int jj = 1; jj <= matrix->GetSeq1Length(); jj++) {
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(jj);
+                                               int base = (*mapping2)[jj];
+                                               int rowSize = matrix->GetRowSize(jj);
+
+                                               // add in all relevant values
+                                               for (int ii = 0; ii < rowSize; ii++)
+                                                       posterior[base
+                                                                       + (*mapping1)[row[ii].first]
+                                                                                       * (seq2Length + 1)] +=
+                                                                       row[ii].second;
+
+                                               // subtract cutoff 
+                                               for (int ii = 0; ii < matrix->GetSeq2Length(); ii++)
+                                                       posterior[base + (*mapping1)[ii] * (seq2Length + 1)] -=
+                                                                       cutoff;
+                                       }
+
+                               }
+
+                               delete mapping2;
+                       }
+
+                       delete mapping1;
+               }
+
+               return posteriorPtr;
+       }
+       //added by Liu Yongchao.Feb 23, 2010
+       VF *BuildPosterior(int* seqsWeights, MultiSequence *align1,
+                       MultiSequence *align2,
+                       const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
+                       float cutoff = 0.0f) const {
+               const int seq1Length = align1->GetSequence(0)->GetLength();
+               const int seq2Length = align2->GetSequence(0)->GetLength();
+
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1), 0);
+               assert(posteriorPtr);
+               VF &posterior = *posteriorPtr;
+               VF::iterator postPtr = posterior.begin();
+
+               //compute the total sum of all weights
+               float totalWeights = 0;
+               for (int i = 0; i < align1->GetNumSequences(); i++) {
+                       int first = align1->GetSequence(i)->GetLabel();
+                       int w1 = seqsWeights[first];
+                       for (int j = 0; j < align2->GetNumSequences(); j++) {
+                               int second = align2->GetSequence(j)->GetLabel();
+                               int w2 = seqsWeights[second];
+
+                               totalWeights += w1 * w2;
+                       }
+               }
+               // for each s in align1
+               for (int i = 0; i < align1->GetNumSequences(); i++) {
+                       int first = align1->GetSequence(i)->GetLabel();
+                       int w1 = seqsWeights[first];
+                       SafeVector<int> *mapping1 = align1->GetSequence(i)->GetMapping();
+                       // for each t in align2
+                       for (int j = 0; j < align2->GetNumSequences(); j++) {
+                               int second = align2->GetSequence(j)->GetLabel();
+                               int w2 = seqsWeights[second];
+                               SafeVector<int> *mapping2 =
+                                               align2->GetSequence(j)->GetMapping();
+
+                               float w = (float) (w1 * w2) / totalWeights;
+                               if (first < second) {
+
+                                       // get the associated sparse matrix
+                                       SparseMatrix *matrix = sparseMatrices[first][second];
+
+                                       for (int ii = 1; ii <= matrix->GetSeq1Length(); ii++) {
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(ii);
+                                               int base = (*mapping1)[ii] * (seq2Length + 1);
+                                               int rowSize = matrix->GetRowSize(ii);
+
+                                               // add in all relevant values
+                                               for (int jj = 0; jj < rowSize; jj++)
+                                                       posterior[base + (*mapping2)[row[jj].first]] += w
+                                                                       * row[jj].second;
+
+                                               // subtract cutoff 
+                                               for (int jj = 0; jj < matrix->GetSeq2Length(); jj++)
+                                                       posterior[base + (*mapping2)[jj]] -= w * cutoff;
+                                       }
+
+                               } else {
+
+                                       // get the associated sparse matrix
+                                       SparseMatrix *matrix = sparseMatrices[second][first];
+
+                                       for (int jj = 1; jj <= matrix->GetSeq1Length(); jj++) {
+                                               SafeVector<PIF>::iterator row = matrix->GetRowPtr(jj);
+                                               int base = (*mapping2)[jj];
+                                               int rowSize = matrix->GetRowSize(jj);
+
+                                               // add in all relevant values
+                                               for (int ii = 0; ii < rowSize; ii++)
+                                                       posterior[base
+                                                                       + (*mapping1)[row[ii].first]
+                                                                                       * (seq2Length + 1)] += w
+                                                                       * row[ii].second;
+
+                                               // subtract cutoff 
+                                               for (int ii = 0; ii < matrix->GetSeq2Length(); ii++)
+                                                       posterior[base + (*mapping1)[ii] * (seq2Length + 1)] -=
+                                                                       w * cutoff;
+                                       }
+
+                               }
+
+                               delete mapping2;
+                       }
+
+                       delete mapping1;
+               }
+
+               return posteriorPtr;
+       }
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/SafeVector.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/SafeVector.h
new file mode 100644 (file)
index 0000000..f42c2da
--- /dev/null
@@ -0,0 +1,65 @@
+/////////////////////////////////////////////////////////////////
+// SafeVector.h
+//
+// STL vector with array bounds checking.  To enable bounds
+// checking, #define ENABLE_CHECKS.
+/////////////////////////////////////////////////////////////////
+
+#ifndef SAFEVECTOR_H
+#define SAFEVECTOR_H
+
+#include <cassert>
+#include <vector>
+using namespace std;
+
+/////////////////////////////////////////////////////////////////
+// SafeVector
+//
+// Class derived from the STL std::vector for bounds checking.
+/////////////////////////////////////////////////////////////////
+
+template<class TYPE>
+class SafeVector: public std::vector<TYPE> {
+public:
+
+       // miscellaneous constructors
+       SafeVector() :
+                       std::vector<TYPE>() {
+       }
+       SafeVector(size_t size) :
+                       std::vector<TYPE>(size) {
+       }
+       SafeVector(size_t size, const TYPE &value) :
+                       std::vector<TYPE>(size, value) {
+       }
+       SafeVector(const SafeVector &source) :
+                       std::vector<TYPE>(source) {
+       }
+
+#ifdef ENABLE_CHECKS
+
+       // [] array bounds checking
+       TYPE &operator[](int index) {
+               assert (index >= 0 && index < (int) size());
+               return std::vector<TYPE>::operator[] ((size_t) index);
+       }
+
+       // [] const array bounds checking
+       const TYPE &operator[] (int index) const {
+               assert (index >= 0 && index < (int) size());
+               return std::vector<TYPE>::operator[] ((size_t) index);
+       }
+
+#endif
+
+};
+
+// some commonly used vector types
+typedef SafeVector<int> VI;
+typedef SafeVector<VI> VVI;
+typedef SafeVector<VVI> VVVI;
+typedef SafeVector<float> VF;
+typedef SafeVector<VF> VVF;
+typedef SafeVector<VVF> VVVF;
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/ScoreType.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/ScoreType.h
new file mode 100644 (file)
index 0000000..47de13d
--- /dev/null
@@ -0,0 +1,368 @@
+/////////////////////////////////////////////////////////////////
+// ScoreType.h
+//
+// Routines for doing math operations in MSAPROBS
+/////////////////////////////////////////////////////////////////
+
+#ifndef SCORETYPE_H
+#define SCORETYPE_H
+
+#include <cmath>
+#include <algorithm>
+#include <cfloat>
+#include <assert.h>
+
+typedef float ScoreType;
+
+const float LOG_ZERO = -2e20;
+const float LOG_ONE = 0.0;
+
+/////////////////////////////////////////////////////////////////
+// LOG()
+//
+// Compute the logarithm of x.
+/////////////////////////////////////////////////////////////////
+
+inline ScoreType LOG(ScoreType x) {
+       return log(x);
+}
+
+/////////////////////////////////////////////////////////////////
+// EXP()
+//
+// Computes exp(x).
+/////////////////////////////////////////////////////////////////
+
+inline ScoreType EXP(ScoreType x) {
+       //return exp(x);
+       if (x > -2) {
+               if (x > -0.5) {
+                       if (x > 0)
+                               return exp(x);
+                       return (((0.03254409303190190000 * x + 0.16280432765779600000) * x
+                                       + 0.49929760485974900000) * x + 0.99995149601363700000) * x
+                                       + 0.99999925508501600000;
+               }
+               if (x > -1)
+                       return (((0.01973899026052090000 * x + 0.13822379685007000000) * x
+                                       + 0.48056651562365000000) * x + 0.99326940370383500000) * x
+                                       + 0.99906756856399500000;
+               return (((0.00940528203591384000 * x + 0.09414963667859410000) * x
+                               + 0.40825793595877300000) * x + 0.93933625499130400000) * x
+                               + 0.98369508190545300000;
+       }
+       if (x > -8) {
+               if (x > -4)
+                       return (((0.00217245711583303000 * x + 0.03484829428350620000) * x
+                                       + 0.22118199801337800000) * x + 0.67049462206469500000) * x
+                                       + 0.83556950223398500000;
+               return (((0.00012398771025456900 * x + 0.00349155785951272000) * x
+                               + 0.03727721426017900000) * x + 0.17974997741536900000) * x
+                               + 0.33249299994217400000;
+       }
+       if (x > -16)
+               return (((0.00000051741713416603 * x + 0.00002721456879608080) * x
+                               + 0.00053418601865636800) * x + 0.00464101989351936000) * x
+                               + 0.01507447981459420000;
+       return 0;
+}
+
+/*
+ /////////////////////////////////////////////////////////////////
+ // LOOKUP()
+ //
+ // Computes log (exp (x) + 1), for 0 <= x <= 7.5.
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOOKUP (ScoreType x){
+ //return log (exp(x) + 1);
+ if (x < 2){
+ if (x < 0.5){
+ if (x < 0)
+ return log (exp(x) + 1);
+ return (((-0.00486373205785640000*x - 0.00020245408813934800)*x + 0.12504222666029800000)*x + 0.49999685320563000000)*x + 0.69314723138948900000;
+ }
+ if (x < 1)
+ return (((-0.00278634205460548000*x - 0.00458097251248546000)*x + 0.12865849880472500000)*x + 0.49862228499205200000)*x + 0.69334810088688000000;
+ return (((0.00059633755154209200*x - 0.01918996666063320000)*x + 0.15288232492093800000)*x + 0.48039958825756900000)*x + 0.69857578503189200000;
+ }
+ if (x < 8){
+ if (x < 4)
+ return (((0.00135958539181047000*x - 0.02329807659316430000)*x + 0.15885799609532100000)*x + 0.48167498563270800000)*x + 0.69276185058669200000;
+ return (((0.00011992394456683500*x - 0.00338464503306568000)*x + 0.03622746366545470000)*x + 0.82481250248383700000)*x + 0.32507892994863100000;
+ }
+ if (x < 16)
+ return (((0.00000051726300753785*x - 0.00002720671238876090)*x + 0.00053403733818413500)*x + 0.99536021775747900000)*x + 0.01507065715532010000;
+ return x;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOOKUP_SLOW()
+ //
+ // Computes log (exp (x) + 1).
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOOKUP_SLOW (ScoreType x){
+ return log (exp (x) + 1);
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // MAX()
+ //
+ // Compute max of three numbers
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType MAX (ScoreType x, ScoreType y, ScoreType z){
+ if (x >= y){
+ if (x >= z)
+ return x;
+ return z;
+ }
+ if (y >= z)
+ return y;
+ return z;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_PLUS_EQUALS()
+ //
+ // Add two log probabilities and store in the first argument
+ /////////////////////////////////////////////////////////////////
+
+ inline void LOG_PLUS_EQUALS (ScoreType &x, ScoreType y){
+ if (x < y)
+ x = (x <= LOG_ZERO) ? y : LOOKUP(y-x) + x;
+ else
+ x = (y <= LOG_ZERO) ? x : LOOKUP(x-y) + y;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_PLUS_EQUALS_SLOW()
+ //
+ // Add two log probabilities and store in the first argument
+ /////////////////////////////////////////////////////////////////
+
+ inline void LOG_PLUS_EQUALS_SLOW (ScoreType &x, ScoreType y){
+ if (x < y)
+ x = (x <= LOG_ZERO) ? y : LOOKUP_SLOW(y-x) + x;
+ else
+ x = (y <= LOG_ZERO) ? x : LOOKUP_SLOW(x-y) + y;
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // LOG_ADD()
+ //
+ // Add two log probabilities
+ /////////////////////////////////////////////////////////////////
+
+ inline ScoreType LOG_ADD (ScoreType x, ScoreType y){
+ if (x < y) return (x <= LOG_ZERO) ? y : LOOKUP(y-x) + x;
+ return (y <= LOG_ZERO) ? x : LOOKUP(x-y) + y;
+ }
+ */
+
+/*
+ /////////////////////////////////////////////////////////////////
+ // LOG()
+ //
+ // Compute the logarithm of x.
+ /////////////////////////////////////////////////////////////////
+
+ inline float LOG (float x){
+ return log (x);
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // EXP()
+ //
+ // Computes exp(x), fr -4.6 <= x <= 0.
+ /////////////////////////////////////////////////////////////////
+
+ inline float EXP (float x){
+ assert (x <= 0.00f);
+ if (x < EXP_UNDERFLOW_THRESHOLD) return 0.0f;
+ return (((0.006349841068584 * x + 0.080775412572352) * x + 0.397982026296272) * x + 0.95279335963787f) * x + 0.995176455837312f;
+ //return (((0.00681169825657f * x + 0.08386267698832f) * x + 0.40413983195844f) * x + 0.95656674979767f) * x + 0.99556744049130f;
+ }
+ */
+
+const float EXP_UNDERFLOW_THRESHOLD = -4.6;
+const float LOG_UNDERFLOW_THRESHOLD = 7.5;
+
+/////////////////////////////////////////////////////////////////
+// LOOKUP()
+//
+// Computes log (exp (x) + 1), for 0 <= x <= 7.5.
+/////////////////////////////////////////////////////////////////
+
+inline float LOOKUP(float x) {
+       assert(x >= 0.00f);
+       assert(x <= LOG_UNDERFLOW_THRESHOLD);
+       //return ((-0.00653779113685f * x + 0.09537236626558f) * x + 0.55317574459331f) * x + 0.68672959851568f;
+       if (x <= 1.00f)
+               return ((-0.009350833524763f * x + 0.130659527668286f) * x
+                               + 0.498799810682272f) * x + 0.693203116424741f;
+       if (x <= 2.50f)
+               return ((-0.014532321752540f * x + 0.139942324101744f) * x
+                               + 0.495635523139337f) * x + 0.692140569840976f;
+       if (x <= 4.50f)
+               return ((-0.004605031767994f * x + 0.063427417320019f) * x
+                               + 0.695956496475118f) * x + 0.514272634594009f;
+       assert(x <= LOG_UNDERFLOW_THRESHOLD);
+       return ((-0.000458661602210f * x + 0.009695946122598f) * x
+                       + 0.930734667215156f) * x + 0.168037164329057f;
+
+       //return (((0.00089738532761f * x - 0.01859488697982f) * x + 0.14415772028626f) * x + 0.49515490689159f) * x + 0.69311928966454f;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOOKUP_SLOW()
+//
+// Computes log (exp (x) + 1).
+/////////////////////////////////////////////////////////////////
+
+inline float LOOKUP_SLOW(float x) {
+       return log(exp(x) + 1);
+}
+
+/////////////////////////////////////////////////////////////////
+// MAX()
+//
+// Compute max of three numbers
+/////////////////////////////////////////////////////////////////
+
+inline float MAX(float x, float y, float z) {
+       if (x >= y) {
+               if (x >= z)
+                       return x;
+               return z;
+       }
+       if (y >= z)
+               return y;
+       return z;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_PLUS_EQUALS()
+//
+// Add two log probabilities and store in the first argument
+/////////////////////////////////////////////////////////////////
+
+inline void LOG_PLUS_EQUALS(float &x, float y) {
+       if (x < y)
+               x = (x == LOG_ZERO || y - x >= LOG_UNDERFLOW_THRESHOLD) ?
+                               y : LOOKUP(y - x) + x;
+       else
+               x = (y == LOG_ZERO || x - y >= LOG_UNDERFLOW_THRESHOLD) ?
+                               x : LOOKUP(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_PLUS_EQUALS_SLOW()
+//
+// Add two log probabilities and store in the first argument
+/////////////////////////////////////////////////////////////////
+
+inline void LOG_PLUS_EQUALS_SLOW(float &x, float y) {
+       if (x < y)
+               x = (x == LOG_ZERO) ? y : LOOKUP_SLOW(y - x) + x;
+       else
+               x = (y == LOG_ZERO) ? x : LOOKUP_SLOW(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add two log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x, float y) {
+       if (x < y)
+               return (x == LOG_ZERO || y - x >= LOG_UNDERFLOW_THRESHOLD) ?
+                               y : LOOKUP(y - x) + x;
+       return (y == LOG_ZERO || x - y >= LOG_UNDERFLOW_THRESHOLD) ?
+                       x : LOOKUP(x - y) + y;
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add three log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3) {
+       return LOG_ADD(x1, LOG_ADD(x2, x3));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add four log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, x4)));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add five log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, x5))));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add siz log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5,
+               float x6) {
+       return LOG_ADD(x1, LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, LOG_ADD(x5, x6)))));
+}
+
+/////////////////////////////////////////////////////////////////
+// LOG_ADD()
+//
+// Add seven log probabilities
+/////////////////////////////////////////////////////////////////
+
+inline float LOG_ADD(float x1, float x2, float x3, float x4, float x5, float x6,
+               float x7) {
+       return LOG_ADD(x1,
+                       LOG_ADD(x2, LOG_ADD(x3, LOG_ADD(x4, LOG_ADD(x5, LOG_ADD(x6, x7))))));
+}
+
+/////////////////////////////////////////////////////////////////
+// ChooseBestOfThree()
+//
+// Store the largest of three values x1, x2, and x3 in *x.  Also
+// if xi is the largest value, then store bi in *b.
+/////////////////////////////////////////////////////////////////
+
+inline void ChooseBestOfThree(float x1, float x2, float x3, char b1, char b2,
+               char b3, float *x, char *b) {
+       if (x1 >= x2) {
+               if (x1 >= x3) {
+                       *x = x1;
+                       *b = b1;
+                       return;
+               }
+               *x = x3;
+               *b = b3;
+               return;
+       }
+       if (x2 >= x3) {
+               *x = x2;
+               *b = b2;
+               return;
+       }
+       *x = x3;
+       *b = b3;
+}
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/Sequence.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/Sequence.h
new file mode 100644 (file)
index 0000000..5bd1ef9
--- /dev/null
@@ -0,0 +1,444 @@
+/////////////////////////////////////////////////////////////////
+// Sequence.h
+//
+// Class for reading/manipulating single sequence character data.
+/////////////////////////////////////////////////////////////////
+
+#ifndef SEQUENCE_H
+#define SEQUENCE_H
+
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <cctype>
+#include <cstdlib>
+#include "SafeVector.h"
+#include "FileBuffer.h"
+
+/////////////////////////////////////////////////////////////////
+// Sequence
+//
+// Class for storing sequence information.
+/////////////////////////////////////////////////////////////////
+
+class Sequence {
+
+       bool isValid; // a boolean indicating whether the sequence data is valid or not
+       string header;       // string containing the comment line of the FASTA file
+       SafeVector<char> *data;      // pointer to character data
+       int length;                  // length of the sequence
+       int sequenceLabel; // integer sequence label, typically to indicate the ordering of sequences
+                                          //   in a Multi-FASTA file
+       int inputLabel;              // position of sequence in original input
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Default constructor.  Does nothing.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence() :
+                       isValid(false), header(""), data(NULL), length(0), sequenceLabel(0), inputLabel(
+                                       0) {
+       }
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Constructor.  Reads the sequence from a FileBuffer.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence(FileBuffer &infile, bool stripGaps = false) :
+                       isValid(false), header("~"), data(NULL), length(0), sequenceLabel(
+                                       0), inputLabel(0) {
+
+               // read until the first non-blank line
+               while (!infile.eof()) {
+                       infile.GetLine(header);
+                       if (header.length() != 0)
+                               break;
+               }
+
+               // check to make sure that it is a correct header line
+               if (header[0] == '>') {
+
+                       // if so, remove the leading ">"
+                       header = header.substr(1);
+
+                       // remove any leading or trailing white space in the header comment
+                       while (header.length() > 0 && isspace(header[0]))
+                               header = header.substr(1);
+                       while (header.length() > 0 && isspace(header[header.length() - 1]))
+                               header = header.substr(0, header.length() - 1);
+
+                       // get ready to read the data[] array; note that data[0] is always '@'
+                       char ch;
+                       data = new SafeVector<char>;
+                       assert(data);
+                       data->push_back('@');
+
+                       // get a character from the file
+                       while (infile.Get(ch)) {
+
+                               // if we've reached a new comment line, put the character back and stop
+                               if (ch == '>') {
+                                       infile.UnGet();
+                                       break;
+                               }
+
+                               // skip whitespace
+                               if (isspace(ch))
+                                       continue;
+
+                               // substitute gap character
+                               if (ch == '.')
+                                       ch = '-';
+                               if (stripGaps && ch == '-')
+                                       continue;
+
+                               // check for known characters
+                               if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) {
+                                       cerr << "ERROR: Unknown character encountered: " << ch
+                                                       << endl;
+                                       exit(1);
+                               }
+
+                               // everything's ok so far, so just store this character.
+                               if (ch >= 'a' && ch <= 'z') {
+                                       ch = ch - 'a' + 'A';
+                               }       //change to upper case. fixed by Liu Yongchao, May 21, 2010
+
+                               data->push_back(ch);
+                               ++length;
+                       }
+
+                       // sequence must contain data in order to be valid
+                       isValid = length > 0;
+                       if (!isValid) {
+                               delete data;
+                               data = NULL;
+                       }
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Constructor.  Builds a sequence from existing data.  Note
+       // that the data must use one-based indexing where data[0] should
+       // be set to '@'.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence(SafeVector<char> *data, string header, int length,
+                       int sequenceLabel, int inputLabel) :
+                       isValid(data != NULL), header(header), data(data), length(length), sequenceLabel(
+                                       sequenceLabel), inputLabel(inputLabel) {
+               assert(data);
+               assert((*data)[0] == '@');
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Sequence()
+       //
+       // Destructor.  Release allocated memory.
+       /////////////////////////////////////////////////////////////////
+
+       ~Sequence() {
+               if (data) {
+                       assert(isValid);
+                       delete data;
+                       data = NULL;
+                       isValid = false;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetHeader()
+       //
+       // Return the string comment associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       string GetHeader() const {
+               return header;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetName()
+       //
+       // Return the first word of the string comment associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       string GetName() const {
+               char name[1024];
+               sscanf(header.c_str(), "%s", name);
+               return string(name);
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetDataPtr()
+       //
+       // Return the iterator to data associated with this sequence.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<char>::iterator GetDataPtr() {
+               assert(isValid);
+               assert(data);
+               return data->begin();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetPosition()
+       //
+       // Return the character at position i.  Recall that the character
+       // data is stored with one-based indexing.
+       /////////////////////////////////////////////////////////////////
+
+       char GetPosition(int i) const {
+               assert(isValid);
+               assert(data);
+               assert(i >= 1 && i <= length);
+               return (*data)[i];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::SetLabel()
+       //
+       // Sets the sequence label to i.
+       /////////////////////////////////////////////////////////////////
+
+       void SetLabel(int i) {
+               assert(isValid);
+               sequenceLabel = i;
+               inputLabel = i;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::SetSortLabel()
+       //
+       // Sets the sequence sorting label to i.
+       /////////////////////////////////////////////////////////////////
+
+       void SetSortLabel(int i) {
+               assert(isValid);
+               sequenceLabel = i;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetLabel()
+       //
+       // Retrieves the input label.
+       /////////////////////////////////////////////////////////////////
+
+       int GetLabel() const {
+               assert(isValid);
+               return inputLabel;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetSortLabel()
+       //
+       // Retrieves the sorting label.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSortLabel() const {
+               assert(isValid);
+               return sequenceLabel;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Fail()
+       //
+       // Checks to see if the sequence successfully loaded.
+       /////////////////////////////////////////////////////////////////
+
+       bool Fail() const {
+               return !isValid;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Length()
+       //
+       // Returns the length of the sequence.
+       /////////////////////////////////////////////////////////////////
+
+       int GetLength() const {
+               assert(isValid);
+               assert(data);
+               return length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::WriteMFA()
+       //
+       // Writes the sequence to outfile in MFA format.  Uses numColumns
+       // columns per line.  If useIndex is set to false, then the
+       // header is printed as normal, but if useIndex is true, then
+       // ">S###" is printed where ### represents the sequence label.
+       /////////////////////////////////////////////////////////////////
+
+       void WriteMFA(ostream &outfile, int numColumns,
+                       bool useIndex = false) const {
+               assert(isValid);
+               assert(data);
+               assert(!outfile.fail());
+
+               // print out heading
+               if (useIndex)
+                       outfile << ">S" << GetLabel() << endl;
+               else
+                       outfile << ">" << header << endl;
+
+               // print out character data
+               int ct = 1;
+               for (; ct <= length; ct++) {
+                       outfile << (*data)[ct];
+                       if (ct % numColumns == 0)
+                               outfile << endl;
+               }
+               if ((ct - 1) % numColumns != 0)
+                       outfile << endl;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Clone()
+       //
+       // Returns a new deep copy of the seqeuence.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *Clone() const {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               *(ret->data) = *data;
+               ret->length = length;
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetRange()
+       //
+       // Returns a new sequence object consisting of a range of
+       // characters from the current seuquence.
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *GetRange(int start, int end) const {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               assert(start >= 1 && start <= length);
+               assert(end >= 1 && end <= length);
+               assert(start <= end);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               ret->data->push_back('@');
+               for (int i = start; i <= end; i++)
+                       ret->data->push_back((*data)[i]);
+               ret->length = end - start + 1;
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::AddGaps()
+       //
+       // Given an SafeVector<char> containing the skeleton for an
+       // alignment and the identity of the current character, this
+       // routine will create a new sequence with all necesssary gaps added.
+       // For instance,
+       //    alignment = "XXXBBYYYBBYYXX"
+       //    id = 'X'
+       // will perform the transformation
+       //    "ATGCAGTCA" --> "ATGCC---GT--CA"
+       //                    (XXXBBYYYBBYYXX)
+       /////////////////////////////////////////////////////////////////
+
+       Sequence *AddGaps(SafeVector<char> *alignment, char id) {
+               Sequence *ret = new Sequence();
+               assert(ret);
+
+               ret->isValid = isValid;
+               ret->header = header;
+               ret->data = new SafeVector<char>;
+               assert(ret->data);
+               ret->length = (int) alignment->size();
+               ret->sequenceLabel = sequenceLabel;
+               ret->inputLabel = inputLabel;
+               ret->data->push_back('@');
+
+               SafeVector<char>::iterator dataIter = data->begin() + 1;
+               for (SafeVector<char>::iterator iter = alignment->begin();
+                               iter != alignment->end(); ++iter) {
+                       if (*iter == 'B' || *iter == id) {
+                               ret->data->push_back(*dataIter);
+                               ++dataIter;
+                       } else
+                               ret->data->push_back('-');
+               }
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetString()
+       //
+       // Returns the sequence as a string with gaps removed.
+       /////////////////////////////////////////////////////////////////
+
+       string GetString() {
+               string s = "";
+               for (int i = 1; i <= length; i++) {
+                       if ((*data)[i] != '-')
+                               s += (*data)[i];
+               }
+               return s;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::GetMapping()
+       //
+       // Returns a SafeVector<int> containing the indices of every
+       // character in the sequence.  For instance, if the data is
+       // "ATGCC---GT--CA", the method returns {1,2,3,4,5,9,10,13,14}.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<int> *GetMapping() const {
+               SafeVector<int> *ret = new SafeVector<int>(1, 0);
+               for (int i = 1; i <= length; i++) {
+                       if ((*data)[i] != '-')
+                               ret->push_back(i);
+               }
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // Sequence::Highlight()
+       //
+       // Changes all positions with score >= cutoff to upper case and
+       // all positions with score < cutoff to lower case.
+       /////////////////////////////////////////////////////////////////
+
+       void Highlight(const SafeVector<float> &scores, const float cutoff) {
+               for (int i = 1; i <= length; i++) {
+                       if (scores[i - 1] >= cutoff)
+                               (*data)[i] = toupper((*data)[i]);
+                       else
+                               (*data)[i] = tolower((*data)[i]);
+               }
+       }
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/SparseMatrix.h b/binaries/src/MSAProbs-0.9.7/MSAProbs/SparseMatrix.h
new file mode 100644 (file)
index 0000000..51b273d
--- /dev/null
@@ -0,0 +1,266 @@
+/////////////////////////////////////////////////////////////////
+// SparseMatrix.h
+//
+// Sparse matrix computations
+/////////////////////////////////////////////////////////////////
+
+#ifndef SPARSEMATRIX_H
+#define SPARSEMATRIX_H
+
+#include <iostream>
+
+using namespace std;
+
+const float POSTERIOR_CUTOFF = 0.01;         // minimum posterior probability
+// value that is maintained in the
+// sparse matrix representation
+
+typedef pair<int, float> PIF;                 // Sparse matrix entry type
+//   first --> column
+//   second --> value
+
+/////////////////////////////////////////////////////////////////
+// SparseMatrix
+//
+// Class for sparse matrix computations
+/////////////////////////////////////////////////////////////////
+
+class SparseMatrix {
+
+       int seq1Length, seq2Length;                     // dimensions of matrix
+       VI rowSize;                              // rowSize[i] = # of cells in row i
+       SafeVector<PIF> data;                           // data values
+       SafeVector<SafeVector<PIF>::iterator> rowPtrs; // pointers to the beginning of each row
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::SparseMatrix()
+       //
+       // Private constructor.
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix() {
+       }
+
+public:
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::SparseMatrix()
+       //
+       // Constructor.  Builds a sparse matrix from a posterior matrix.
+       // Note that the expected format for the posterior matrix is as
+       // a (seq1Length+1) x (seq2Length+1) matrix where the 0th row
+       // and 0th column are ignored (they should contain all zeroes).
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix(int seq1Length, int seq2Length, const VF &posterior) :
+                       seq1Length(seq1Length), seq2Length(seq2Length) {
+
+               int numCells = 0;
+
+               assert(seq1Length > 0);
+               assert(seq2Length > 0);
+
+               // calculate memory required; count the number of cells in the
+               // posterior matrix above the threshold
+               VF::const_iterator postPtr = posterior.begin();
+               for (int i = 0; i <= seq1Length; i++) {
+                       for (int j = 0; j <= seq2Length; j++) {
+                               if (*(postPtr++) >= POSTERIOR_CUTOFF) {
+                                       assert(i != 0 && j != 0);
+                                       numCells++;
+                               }
+                       }
+               }
+
+               // allocate memory
+               data.resize(numCells);
+               rowSize.resize(seq1Length + 1);
+               rowSize[0] = -1;
+               rowPtrs.resize(seq1Length + 1);
+               rowPtrs[0] = data.end();
+
+               // build sparse matrix
+               postPtr = posterior.begin() + seq2Length + 1; // note that we're skipping the first row here
+               SafeVector<PIF>::iterator dataPtr = data.begin();
+               for (int i = 1; i <= seq1Length; i++) {
+                       postPtr++;              // and skipping the first column of each row
+                       rowPtrs[i] = dataPtr;
+                       for (int j = 1; j <= seq2Length; j++) {
+                               if (*postPtr >= POSTERIOR_CUTOFF) {
+                                       dataPtr->first = j;
+                                       dataPtr->second = *postPtr;
+                                       dataPtr++;
+                               }
+                               postPtr++;
+                       }
+                       rowSize[i] = dataPtr - rowPtrs[i];
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowPtr()
+       //
+       // Returns the pointer to a particular row in the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       SafeVector<PIF>::iterator GetRowPtr(int row) const {
+               assert(row >= 1 && row <= seq1Length);
+               return rowPtrs[row];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetValue()
+       //
+       // Returns value at a particular row, column.
+       /////////////////////////////////////////////////////////////////
+
+       float GetValue(int row, int col) {
+               assert(row >= 1 && row <= seq1Length);
+               assert(col >= 1 && col <= seq2Length);
+               for (int i = 0; i < rowSize[row]; i++) {
+                       if (rowPtrs[row][i].first == col)
+                               return rowPtrs[row][i].second;
+               }
+               return 0;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowSize()
+       //
+       // Returns the number of entries in a particular row.
+       /////////////////////////////////////////////////////////////////
+
+       int GetRowSize(int row) const {
+               assert(row >= 1 && row <= seq1Length);
+               return rowSize[row];
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetSeq1Length()
+       //
+       // Returns the first dimension of the matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSeq1Length() const {
+               return seq1Length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetSeq2Length()
+       //
+       // Returns the second dimension of the matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetSeq2Length() const {
+               return seq2Length;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetRowPtr
+       //
+       // Returns the pointer to a particular row in the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       int GetNumCells() const {
+               return data.size();
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::Print()
+       //
+       // Prints out a sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       void Print(ostream &outfile) const {
+               outfile << "Sparse Matrix:" << endl;
+               for (int i = 1; i <= seq1Length; i++) {
+                       outfile << "  " << i << ":";
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               outfile << " (" << rowPtrs[i][j].first << ","
+                                               << rowPtrs[i][j].second << ")";
+                       }
+                       outfile << endl;
+               }
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::ComputeTranspose()
+       //
+       // Returns a new sparse matrix containing the transpose of the
+       // current matrix.
+       /////////////////////////////////////////////////////////////////
+
+       SparseMatrix *ComputeTranspose() const {
+
+               // create a new sparse matrix
+               SparseMatrix *ret = new SparseMatrix();
+               int numCells = data.size();
+
+               ret->seq1Length = seq2Length;
+               ret->seq2Length = seq1Length;
+
+               // allocate memory
+               ret->data.resize(numCells);
+               ret->rowSize.resize(seq2Length + 1);
+               ret->rowSize[0] = -1;
+               ret->rowPtrs.resize(seq2Length + 1);
+               ret->rowPtrs[0] = ret->data.end();
+
+               // compute row sizes
+               for (int i = 1; i <= seq2Length; i++)
+                       ret->rowSize[i] = 0;
+               for (int i = 0; i < numCells; i++)
+                       ret->rowSize[data[i].first]++;
+
+               // compute row ptrs
+               for (int i = 1; i <= seq2Length; i++) {
+                       ret->rowPtrs[i] =
+                                       (i == 1) ?
+                                                       ret->data.begin() :
+                                                       ret->rowPtrs[i - 1] + ret->rowSize[i - 1];
+               }
+
+               // now fill in data
+               SafeVector<SafeVector<PIF>::iterator> currPtrs = ret->rowPtrs;
+
+               for (int i = 1; i <= seq1Length; i++) {
+                       SafeVector<PIF>::iterator row = rowPtrs[i];
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               currPtrs[row[j].first]->first = i;
+                               currPtrs[row[j].first]->second = row[j].second;
+                               currPtrs[row[j].first]++;
+                       }
+               }
+
+               return ret;
+       }
+
+       /////////////////////////////////////////////////////////////////
+       // SparseMatrix::GetPosterior()
+       //
+       // Return the posterior representation of the sparse matrix.
+       /////////////////////////////////////////////////////////////////
+
+       VF *GetPosterior() const {
+
+               // create a new posterior matrix
+               VF *posteriorPtr = new VF((seq1Length + 1) * (seq2Length + 1));
+               assert(posteriorPtr);
+               VF &posterior = *posteriorPtr;
+
+               // build the posterior matrix
+               for (int i = 0; i < (seq1Length + 1) * (seq2Length + 1); i++)
+                       posterior[i] = 0;
+               for (int i = 1; i <= seq1Length; i++) {
+                       VF::iterator postPtr = posterior.begin() + i * (seq2Length + 1);
+                       for (int j = 0; j < rowSize[i]; j++) {
+                               postPtr[rowPtrs[i][j].first] = rowPtrs[i][j].second;
+                       }
+               }
+
+               return posteriorPtr;
+       }
+
+};
+
+#endif
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/main.cpp b/binaries/src/MSAProbs-0.9.7/MSAProbs/main.cpp
new file mode 100644 (file)
index 0000000..6fd1934
--- /dev/null
@@ -0,0 +1,16 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "MSA.h"
+
+int main(int argc, char* argv[]) {
+       MSA msa(argc, argv);
+
+       return 0;
+}
diff --git a/binaries/src/MSAProbs-0.9.7/MSAProbs/msaprobs b/binaries/src/MSAProbs-0.9.7/MSAProbs/msaprobs
new file mode 100755 (executable)
index 0000000..e7e06a4
Binary files /dev/null and b/binaries/src/MSAProbs-0.9.7/MSAProbs/msaprobs differ
diff --git a/binaries/src/MSAProbs-0.9.7/README b/binaries/src/MSAProbs-0.9.7/README
new file mode 100644 (file)
index 0000000..5114c85
--- /dev/null
@@ -0,0 +1,48 @@
+\r
+MSAPROBS is a new and practial protein multiple sequence alignment\r
+algorithm based on pair hidden markov model and partition function\r
+posterrior probabilities. Assessed on BAliBASE 3.0, PREFAB 4.0,\r
+SABMARK 1.65, and OXBENCH, MSAProbs achieves the statistically \r
+highest alignment accuracy, compared to ClustalW 2.0.10, MAFFT 6.717(\r
+using L-INS-i with --maxiterate = 1000), MUSCLE 3.8.31, ProbCons 1.12,\r
+and Probalign 1.3. (current version 0.9.3, March 17, 2010).\r
+\r
+\r
+To use this software, please cite the following paper:\r
+/******************************************************\r
+Yongchao Liu, Bertil Schmidt, Douglas L. Maskell:\r
+\r
+"MSAProbs: multiple sequence alignment based on \r
+pair hidden Markov models and partition function posterior probabilities",\r
+\r
+Bioinformatics 2010, 26(16): 1958-1964\r
+\r
+*******************************************************/\r
+\r
+This software is developed by Liu Yongchao, School of Computer Engineering,\r
+Nanyang Technological University. If any comments or problems, \r
+please directly contact Liu Yongchao using either of the following email \r
+addresses: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com.\r
+\r
+MSAPROBS is an open-source software, complying with General Public \r
+Licence (GPL) version 3.0. MSAPROBS is distributed WITHOUT WARRANTY, express or\r
+implied. The authors accept NO LEGAL LIABILITY OR  RESPONSIBILITY  for\r
+loss due to reliance on the program.\r
+\r
+(1) Linux and Windows are supported, with a Makefile and a Visual Studio 2005\r
+project co-existing in the source code tarball.\r
+\r
+Change to sub-directory MSAProbs, the Makefile file for Linux can be found.\r
+\r
+(2)The default compiling options enable OpenMP support to fully utlized the \r
+compute capability of multi-core CPUs, as multi-core CPUs have been commonplace.\r
+\r
+Typical Usage:\r
+ (1) "./msaprobs -help" or "./msaprobs -?"\r
+                       Get the command line options\r
+\r
+ (2) "./msaprobs infile >outfile" or "./msaprobs infile -o outfile"\r
+                       The alignments are printed out into file "outfile" in FASTA format\r
+\r
+ (3) ./msaprobs infile -o outfile -num_threads 4\r
+                       Use four threads to accelerate the execution\r
index 87ce823..0eee21d 100755 (executable)
@@ -93,3 +93,18 @@ make
 echo "DONE"
 cd ..
 
+echo "Compiling GLProbs ..."
+cd GLProbs-1.0
+make clean
+make
+echo "DONE"
+cd ..
+
+
+echo "Compiling MSAProbs-0.9.7 ..."
+cd MSAProbs-0.9.7/MSAProbs
+make clean
+make
+echo "DONE"
+cd ../..
+
index 7be938c..38ad0fa 100644 (file)
@@ -34,3 +34,12 @@ chmod +x iupred/iupred
 
 echo "Setting executable flag for Jpred..."
 chmod +x jpred/i686/* jpred/x86_64/*
+
+echo "Setting executable flag for ViennaRNA..."
+chmod +x ViennaRNA/Progs/RNAalifold
+
+echo "Setting executable flag for MSAProbs-0.9.7..."
+chmod +x MSAProbs-0.9.7/MSAProbs/msaprobs
+
+echo "Setting executable flag for GLProbs-1.0..."
+chmod +x GLProbs-1.0/glprobs