Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / estring.cpp
diff --git a/website/archive/binaries/mac/src/muscle/estring.cpp b/website/archive/binaries/mac/src/muscle/estring.cpp
deleted file mode 100644 (file)
index ef7a576..0000000
+++ /dev/null
@@ -1,689 +0,0 @@
-#include "muscle.h"\r
-#include "pwpath.h"\r
-#include "estring.h"\r
-#include "seq.h"\r
-#include "msa.h"\r
-\r
-/***\r
-An "estring" is an edit string that operates on a sequence.\r
-An estring is represented as a vector of integers.\r
-It is interpreted in order of increasing suffix.\r
-A positive value n means copy n letters.\r
-A negative value -n means insert n indels.\r
-Zero marks the end of the vector.\r
-Consecutive entries must have opposite sign, i.e. the\r
-shortest possible representation must be used.\r
-\r
-A "tpair" is a traceback path for a pairwise alignment\r
-represented as two estrings, one for each sequence.\r
-***/\r
-\r
-#define c2(c,d)        (((unsigned char) c) << 8 | (unsigned char) d)\r
-\r
-unsigned LengthEstring(const short es[])\r
-       {\r
-       unsigned i = 0;\r
-       while (*es++ != 0)\r
-               ++i;\r
-       return i;\r
-       }\r
-\r
-short *EstringNewCopy(const short es[])\r
-       {\r
-       unsigned n = LengthEstring(es) + 1;\r
-       short *esNew = new short[n];\r
-       memcpy(esNew, es, n*sizeof(short));\r
-       return esNew;\r
-       }\r
-\r
-void LogEstring(const short es[])\r
-       {\r
-       Log("<");\r
-       for (unsigned i = 0; es[i] != 0; ++i)\r
-               {\r
-               if (i > 0)\r
-                       Log(" ");\r
-               Log("%d", es[i]);\r
-               }\r
-       Log(">");\r
-       }\r
-\r
-static bool EstringsEq(const short es1[], const short es2[])\r
-       {\r
-       for (;;)\r
-               {\r
-               if (*es1 != *es2)\r
-                       return false;\r
-               if (0 == *es1)\r
-                       break;\r
-               ++es1;\r
-               ++es2;\r
-               }\r
-       return true;\r
-       }\r
-\r
-static void EstringCounts(const short es[], unsigned *ptruSymbols,\r
-  unsigned *ptruIndels)\r
-       {\r
-       unsigned uSymbols = 0;\r
-       unsigned uIndels = 0;\r
-       for (unsigned i = 0; es[i] != 0; ++i)\r
-               {\r
-               short n = es[i];\r
-               if (n > 0)\r
-                       uSymbols += n;\r
-               else if (n < 0)\r
-                       uIndels += -n;\r
-               }\r
-       *ptruSymbols = uSymbols;\r
-       *ptruIndels = uIndels;\r
-       }\r
-\r
-static char *EstringOp(const short es[], const char s[])\r
-       {\r
-       unsigned uSymbols;\r
-       unsigned uIndels;\r
-       EstringCounts(es, &uSymbols, &uIndels);\r
-       assert((unsigned) strlen(s) == uSymbols);\r
-       char *sout = new char[uSymbols + uIndels + 1];\r
-       char *psout = sout;\r
-       for (;;)\r
-               {\r
-               int n = *es++;\r
-               if (0 == n)\r
-                       break;\r
-               if (n > 0)\r
-                       for (int i = 0; i < n; ++i)\r
-                               *psout++ = *s++;\r
-               else\r
-                       for (int i = 0; i < -n; ++i)\r
-                               *psout++ = '-';\r
-               }\r
-       assert(0 == *s);\r
-       *psout = 0;\r
-       return sout;\r
-       }\r
-\r
-void EstringOp(const short es[], const Seq &sIn, Seq &sOut)\r
-       {\r
-#if    DEBUG\r
-       unsigned uSymbols;\r
-       unsigned uIndels;\r
-       EstringCounts(es, &uSymbols, &uIndels);\r
-       assert(sIn.Length() == uSymbols);\r
-#endif\r
-       sOut.Clear();\r
-       sOut.SetName(sIn.GetName());\r
-       int p = 0;\r
-       for (;;)\r
-               {\r
-               int n = *es++;\r
-               if (0 == n)\r
-                       break;\r
-               if (n > 0)\r
-                       for (int i = 0; i < n; ++i)\r
-                               {\r
-                               char c = sIn[p++];\r
-                               sOut.push_back(c);\r
-                               }\r
-               else\r
-                       for (int i = 0; i < -n; ++i)\r
-                               sOut.push_back('-');\r
-               }\r
-       }\r
-\r
-unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)\r
-       {\r
-       unsigned uSymbols;\r
-       unsigned uIndels;\r
-       EstringCounts(es, &uSymbols, &uIndels);\r
-       assert(sIn.Length() == uSymbols);\r
-\r
-       unsigned uColCount = uSymbols + uIndels;\r
-\r
-       a.Clear();\r
-       a.SetSize(1, uColCount);\r
-\r
-       a.SetSeqName(0, sIn.GetName());\r
-       a.SetSeqId(0, sIn.GetId());\r
-\r
-       unsigned p = 0;\r
-       unsigned uColIndex = 0;\r
-       for (;;)\r
-               {\r
-               int n = *es++;\r
-               if (0 == n)\r
-                       break;\r
-               if (n > 0)\r
-                       for (int i = 0; i < n; ++i)\r
-                               {\r
-                               char c = sIn[p++];\r
-                               a.SetChar(0, uColIndex++, c);\r
-                               }\r
-               else\r
-                       for (int i = 0; i < -n; ++i)\r
-                               a.SetChar(0, uColIndex++, '-');\r
-               }\r
-       assert(uColIndex == uColCount);\r
-       return uColCount;\r
-       }\r
-\r
-void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)\r
-       {\r
-// First pass to determine size of estrings esA and esB\r
-       const unsigned uEdgeCount = Path.GetEdgeCount();\r
-       if (0 == uEdgeCount)\r
-               {\r
-               short *esA = new short[1];\r
-               short *esB = new short[1];\r
-               esA[0] = 0;\r
-               esB[0] = 0;\r
-               *ptresA = esA;\r
-               *ptresB = esB;\r
-               return;\r
-               }\r
-\r
-       unsigned iLengthA = 1;\r
-       unsigned iLengthB = 1;\r
-       const char cFirstEdgeType = Path.GetEdge(0).cType;\r
-       char cPrevEdgeType = cFirstEdgeType;\r
-       for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
-               {\r
-               const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
-               char cEdgeType = Edge.cType;\r
-\r
-               switch (c2(cPrevEdgeType, cEdgeType))\r
-                       {\r
-               case c2('M', 'M'):\r
-               case c2('D', 'D'):\r
-               case c2('I', 'I'):\r
-                       break;\r
-\r
-               case c2('D', 'M'):\r
-               case c2('M', 'D'):\r
-                       ++iLengthB;\r
-                       break;\r
-\r
-               case c2('I', 'M'):\r
-               case c2('M', 'I'):\r
-                       ++iLengthA;\r
-                       break;\r
-\r
-               case c2('I', 'D'):\r
-               case c2('D', 'I'):\r
-                       ++iLengthB;\r
-                       ++iLengthA;\r
-                       break;\r
-\r
-               default:\r
-                       assert(false);\r
-                       }\r
-               cPrevEdgeType = cEdgeType;\r
-               }\r
-\r
-// Pass2 for seq A\r
-       {\r
-       short *esA = new short[iLengthA+1];\r
-       unsigned iA = 0;\r
-       switch (Path.GetEdge(0).cType)\r
-               {\r
-       case 'M':\r
-       case 'D':\r
-               esA[0] = 1;\r
-               break;\r
-\r
-       case 'I':\r
-               esA[0] = -1;\r
-               break;\r
-\r
-       default:\r
-               assert(false);\r
-               }\r
-\r
-       char cPrevEdgeType = cFirstEdgeType;\r
-       for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
-               {\r
-               const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
-               char cEdgeType = Edge.cType;\r
-\r
-               switch (c2(cPrevEdgeType, cEdgeType))\r
-                       {\r
-               case c2('M', 'M'):\r
-               case c2('D', 'D'):\r
-               case c2('D', 'M'):\r
-               case c2('M', 'D'):\r
-                       ++(esA[iA]);\r
-                       break;\r
-\r
-               case c2('I', 'D'):\r
-               case c2('I', 'M'):\r
-                       ++iA;\r
-                       esA[iA] = 1;\r
-                       break;\r
-\r
-               case c2('M', 'I'):\r
-               case c2('D', 'I'):\r
-                       ++iA;\r
-                       esA[iA] = -1;\r
-                       break;\r
-\r
-               case c2('I', 'I'):\r
-                       --(esA[iA]);\r
-                       break;\r
-\r
-               default:\r
-                       assert(false);\r
-                       }\r
-\r
-               cPrevEdgeType = cEdgeType;\r
-               }\r
-       assert(iA == iLengthA - 1);\r
-       esA[iLengthA] = 0;\r
-       *ptresA = esA;\r
-       }\r
-\r
-       {\r
-// Pass2 for seq B\r
-       short *esB = new short[iLengthB+1];\r
-       unsigned iB = 0;\r
-       switch (Path.GetEdge(0).cType)\r
-               {\r
-       case 'M':\r
-       case 'I':\r
-               esB[0] = 1;\r
-               break;\r
-\r
-       case 'D':\r
-               esB[0] = -1;\r
-               break;\r
-\r
-       default:\r
-               assert(false);\r
-               }\r
-\r
-       char cPrevEdgeType = cFirstEdgeType;\r
-       for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
-               {\r
-               const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
-               char cEdgeType = Edge.cType;\r
-\r
-               switch (c2(cPrevEdgeType, cEdgeType))\r
-                       {\r
-               case c2('M', 'M'):\r
-               case c2('I', 'I'):\r
-               case c2('I', 'M'):\r
-               case c2('M', 'I'):\r
-                       ++(esB[iB]);\r
-                       break;\r
-\r
-               case c2('D', 'I'):\r
-               case c2('D', 'M'):\r
-                       ++iB;\r
-                       esB[iB] = 1;\r
-                       break;\r
-\r
-               case c2('M', 'D'):\r
-               case c2('I', 'D'):\r
-                       ++iB;\r
-                       esB[iB] = -1;\r
-                       break;\r
-\r
-               case c2('D', 'D'):\r
-                       --(esB[iB]);\r
-                       break;\r
-\r
-               default:\r
-                       assert(false);\r
-                       }\r
-\r
-               cPrevEdgeType = cEdgeType;\r
-               }\r
-       assert(iB == iLengthB - 1);\r
-       esB[iLengthB] = 0;\r
-       *ptresB = esB;\r
-       }\r
-\r
-#if    DEBUG\r
-       {\r
-       const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);\r
-       unsigned uSymbols;\r
-       unsigned uIndels;\r
-       EstringCounts(*ptresA, &uSymbols, &uIndels);\r
-       assert(uSymbols == LastEdge.uPrefixLengthA);\r
-       assert(uSymbols + uIndels == uEdgeCount);\r
-\r
-       EstringCounts(*ptresB, &uSymbols, &uIndels);\r
-       assert(uSymbols == LastEdge.uPrefixLengthB);\r
-       assert(uSymbols + uIndels == uEdgeCount);\r
-\r
-       PWPath TmpPath;\r
-       EstringsToPath(*ptresA, *ptresB, TmpPath);\r
-       TmpPath.AssertEqual(Path);\r
-       }\r
-#endif\r
-       }\r
-\r
-void EstringsToPath(const short esA[], const short esB[], PWPath &Path)\r
-       {\r
-       Path.Clear();\r
-       unsigned iA = 0;\r
-       unsigned iB = 0;\r
-       int nA = esA[iA++];\r
-       int nB = esB[iB++];\r
-       unsigned uPrefixLengthA = 0;\r
-       unsigned uPrefixLengthB = 0;\r
-       for (;;)\r
-               {\r
-               char cType;\r
-               if (nA > 0)\r
-                       {\r
-                       if (nB > 0)\r
-                               {\r
-                               cType = 'M';\r
-                               --nA;\r
-                               --nB;\r
-                               }\r
-                       else if (nB < 0)\r
-                               {\r
-                               cType = 'D';\r
-                               --nA;\r
-                               ++nB;\r
-                               }\r
-                       else\r
-                               assert(false);\r
-                       }\r
-               else if (nA < 0)\r
-                       {\r
-                       if (nB > 0)\r
-                               {\r
-                               cType = 'I';\r
-                               ++nA;\r
-                               --nB;\r
-                               }\r
-                       else\r
-                               assert(false);\r
-                       }\r
-               else\r
-                       assert(false);\r
-\r
-               switch (cType)\r
-                       {\r
-               case 'M':\r
-                       ++uPrefixLengthA;\r
-                       ++uPrefixLengthB;\r
-                       break;\r
-               case 'D':\r
-                       ++uPrefixLengthA;\r
-                       break;\r
-               case 'I':\r
-                       ++uPrefixLengthB;\r
-                       break;\r
-                       }\r
-\r
-               PWEdge Edge;\r
-               Edge.cType = cType;\r
-               Edge.uPrefixLengthA = uPrefixLengthA;\r
-               Edge.uPrefixLengthB = uPrefixLengthB;\r
-               Path.AppendEdge(Edge);\r
-\r
-               if (nA == 0)\r
-                       {\r
-                       if (0 == esA[iA])\r
-                               {\r
-                               assert(0 == esB[iB]);\r
-                               break;\r
-                               }\r
-                       nA = esA[iA++];\r
-                       }\r
-               if (nB == 0)\r
-                       nB = esB[iB++];\r
-               }\r
-       }\r
-\r
-/***\r
-Multiply two estrings to make a third estring.\r
-The product of two estrings e1*e2 is defined to be\r
-the estring that produces the same result as applying\r
-e1 then e2. Multiplication is not commutative. In fact,\r
-the reversed order is undefined unless both estrings\r
-consist of a single, identical, positive entry.\r
-A primary motivation for using estrings is that\r
-multiplication is very fast, reducing the time\r
-needed to construct the root alignment.\r
-\r
-Example\r
-\r
-       <-1,3>(XXX)     = -XXX\r
-       <2,-1,2>(-XXX) = -X-XX\r
-\r
-Therefore,\r
-\r
-       <-1,3>*<2,-1,2> = <-1,1,-1,2>\r
-***/\r
-\r
-static bool CanMultiplyEstrings(const short es1[], const short es2[])\r
-       {\r
-       unsigned uSymbols1;\r
-       unsigned uSymbols2;\r
-       unsigned uIndels1;\r
-       unsigned uIndels2;\r
-       EstringCounts(es1, &uSymbols1, &uIndels1);\r
-       EstringCounts(es2, &uSymbols2, &uIndels2);\r
-       return uSymbols1 + uIndels1 == uSymbols2;\r
-       }\r
-\r
-static inline void AppendGaps(short esp[], int &ip, int n)\r
-       {\r
-       if (-1 == ip)\r
-               esp[++ip] = n;\r
-       else if (esp[ip] < 0)\r
-               esp[ip] += n;\r
-       else\r
-               esp[++ip] = n;\r
-       }\r
-\r
-static inline void AppendSymbols(short esp[], int &ip, int n)\r
-       {\r
-       if (-1 == ip)\r
-               esp[++ip] = n;\r
-       else if (esp[ip] > 0)\r
-               esp[ip] += n;\r
-       else\r
-               esp[++ip] = n;\r
-       }\r
-\r
-void MulEstrings(const short es1[], const short es2[], short esp[])\r
-       {\r
-       assert(CanMultiplyEstrings(es1, es2));\r
-\r
-       unsigned i1 = 0;\r
-       int ip = -1;\r
-       int n1 = es1[i1++];\r
-       for (unsigned i2 = 0; ; ++i2)\r
-               {\r
-               int n2 = es2[i2];\r
-               if (0 == n2)\r
-                       break;\r
-               if (n2 > 0)\r
-                       {\r
-                       for (;;)\r
-                               {\r
-                               if (n1 < 0)\r
-                                       {\r
-                                       if (n2 > -n1)\r
-                                               {\r
-                                               AppendGaps(esp, ip, n1);\r
-                                               n2 += n1;\r
-                                               n1 = es1[i1++];\r
-                                               }\r
-                                       else if (n2 == -n1)\r
-                                               {\r
-                                               AppendGaps(esp, ip, n1);\r
-                                               n1 = es1[i1++];\r
-                                               break;\r
-                                               }\r
-                                       else\r
-                                               {\r
-                                               assert(n2 < -n1);\r
-                                               AppendGaps(esp, ip, -n2);\r
-                                               n1 += n2;\r
-                                               break;\r
-                                               }\r
-                                       }\r
-                               else\r
-                                       {\r
-                                       assert(n1 > 0);\r
-                                       if (n2 > n1)\r
-                                               {\r
-                                               AppendSymbols(esp, ip, n1);\r
-                                               n2 -= n1;\r
-                                               n1 = es1[i1++];\r
-                                               }\r
-                                       else if (n2 == n1)\r
-                                               {\r
-                                               AppendSymbols(esp, ip, n1);\r
-                                               n1 = es1[i1++];\r
-                                               break;\r
-                                               }\r
-                                       else\r
-                                               {\r
-                                               assert(n2 < n1);\r
-                                               AppendSymbols(esp, ip, n2);\r
-                                               n1 -= n2;\r
-                                               break;\r
-                                               }\r
-                                       }\r
-                               }\r
-                       }\r
-               else\r
-                       {\r
-                       assert(n2 < 0);\r
-                       AppendGaps(esp, ip, n2);\r
-                       }\r
-               }\r
-       esp[++ip] = 0;\r
-\r
-#if    DEBUG\r
-       {\r
-       int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);\r
-       assert(ip < MaxLen);\r
-       if (ip >= 2)\r
-               for (int i = 0; i < ip - 2; ++i)\r
-                       {\r
-                       if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))\r
-                               {\r
-                               Log("Bad result of MulEstring: ");\r
-                               LogEstring(esp);\r
-                               Quit("Assert failed (alternating signs)");\r
-                               }\r
-                       }\r
-       unsigned uSymbols1;\r
-       unsigned uSymbols2;\r
-       unsigned uSymbolsp;\r
-       unsigned uIndels1;\r
-       unsigned uIndels2;\r
-       unsigned uIndelsp;\r
-       EstringCounts(es1, &uSymbols1, &uIndels1);\r
-       EstringCounts(es2, &uSymbols2, &uIndels2);\r
-       EstringCounts(esp, &uSymbolsp, &uIndelsp);\r
-       if (uSymbols1 + uIndels1 != uSymbols2)\r
-               {\r
-               Log("Bad result of MulEstring: ");\r
-               LogEstring(esp);\r
-               Quit("Assert failed (counts1 %u %u %u)",\r
-                 uSymbols1, uIndels1, uSymbols2);\r
-               }\r
-       }\r
-#endif\r
-       }\r
-\r
-static void test(const short es1[], const short es2[], const short esa[])\r
-       {\r
-       unsigned uSymbols1;\r
-       unsigned uSymbols2;\r
-       unsigned uIndels1;\r
-       unsigned uIndels2;\r
-       EstringCounts(es1, &uSymbols1, &uIndels1);\r
-       EstringCounts(es2, &uSymbols2, &uIndels2);\r
-\r
-       char s[4096];\r
-       memset(s, 'X', sizeof(s));\r
-       s[uSymbols1] = 0;\r
-\r
-       char *s1 = EstringOp(es1, s);\r
-       char *s12 = EstringOp(es2, s1);\r
-\r
-       memset(s, 'X', sizeof(s));\r
-       s[uSymbols2] = 0;\r
-       char *s2 = EstringOp(es2, s);\r
-\r
-       Log("%s * %s = %s\n", s1, s2, s12);\r
-\r
-       LogEstring(es1);\r
-       Log(" * ");\r
-       LogEstring(es2);\r
-       Log(" = ");\r
-       LogEstring(esa);\r
-       Log("\n");\r
-\r
-       short esp[4096];\r
-       MulEstrings(es1, es2, esp);\r
-       LogEstring(esp);\r
-       if (!EstringsEq(esp, esa))\r
-               Log(" *ERROR* ");\r
-       Log("\n");\r
-\r
-       memset(s, 'X', sizeof(s));\r
-       s[uSymbols1] = 0;\r
-       char *sp = EstringOp(esp, s);\r
-       Log("%s\n", sp);\r
-       Log("\n==========\n\n");\r
-       }\r
-\r
-void TestEstrings()\r
-       {\r
-       SetListFileName("c:\\tmp\\muscle.log", false);\r
-       //{\r
-       //short es1[] = { -1, 1, -1, 0 };\r
-       //short es2[] = { 1, -1, 2, 0 };\r
-       //short esa[] = { -2, 1, -1, 0 };\r
-       //test(es1, es2, esa);\r
-       //}\r
-       //{\r
-       //short es1[] = { 2, -1, 2, 0 };\r
-       //short es2[] = { 1, -1, 3, -1, 1, 0 };\r
-       //short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };\r
-       //test(es1, es2, esa);\r
-       //}\r
-       //{\r
-       //short es1[] = { -1, 3, 0 };\r
-       //short es2[] = { 2, -1, 2, 0 };\r
-       //short esa[] = { -1, 1, -1, 2, 0 };\r
-       //test(es1, es2, esa);\r
-       //}\r
-       //{\r
-       //short es1[] = { -1, 1, -1, 1, 0};\r
-       //short es2[] = { 4, 0 };\r
-       //short esa[] = { -1, 1, -1, 1, 0};\r
-       //test(es1, es2, esa);\r
-       //}\r
-       //{\r
-       //short es1[] = { 1, -1, 1, -1, 0};\r
-       //short es2[] = { 4, 0 };\r
-       //short esa[] = { 1, -1, 1, -1, 0};\r
-       //test(es1, es2, esa);\r
-       //}\r
-       //{\r
-       //short es1[] = { 1, -1, 1, -1, 0};\r
-       //short es2[] = { -1, 4, -1, 0 };\r
-       //short esa[] = { -1, 1, -1, 1, -2, 0};\r
-       //test(es1, es2, esa);\r
-       //}\r
-       {\r
-       short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};\r
-       short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };\r
-       short esa[] = { 0 };\r
-       test(es1, es2, esa);\r
-       }\r
-       exit(0);\r
-       }\r