Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / upgma2.cpp
diff --git a/website/archive/binaries/mac/src/muscle/upgma2.cpp b/website/archive/binaries/mac/src/muscle/upgma2.cpp
deleted file mode 100644 (file)
index 941b9d2..0000000
+++ /dev/null
@@ -1,395 +0,0 @@
-#include "muscle.h"\r
-#include "tree.h"\r
-#include "distcalc.h"\r
-\r
-// UPGMA clustering in O(N^2) time and space.\r
-\r
-#define        TRACE   0\r
-\r
-#define        MIN(x, y)       ((x) < (y) ? (x) : (y))\r
-#define        MAX(x, y)       ((x) > (y) ? (x) : (y))\r
-#define        AVG(x, y)       (((x) + (y))/2)\r
-\r
-static unsigned g_uLeafCount;\r
-static unsigned g_uTriangleSize;\r
-static unsigned g_uInternalNodeCount;\r
-static unsigned g_uInternalNodeIndex;\r
-\r
-// Triangular distance matrix is g_Dist, which is allocated\r
-// as a one-dimensional vector of length g_uTriangleSize.\r
-// TriangleSubscript(i,j) maps row,column=i,j to the subscript\r
-// into this vector.\r
-// Row / column coordinates are a bit messy.\r
-// Initially they are leaf indexes 0..N-1.\r
-// But each time we create a new node (=new cluster, new subtree),\r
-// we re-use one of the two rows that become available (the children\r
-// of the new node). This saves memory.\r
-// We keep track of this through the g_uNodeIndex vector.\r
-static dist_t *g_Dist;\r
-\r
-// Distance to nearest neighbor in row i of distance matrix.\r
-// Subscript is distance matrix row.\r
-static dist_t *g_MinDist;\r
-\r
-// Nearest neighbor to row i of distance matrix.\r
-// Subscript is distance matrix row.\r
-static unsigned *g_uNearestNeighbor;\r
-\r
-// Node index of row i in distance matrix.\r
-// Node indexes are 0..N-1 for leaves, N..2N-2 for internal nodes.\r
-// Subscript is distance matrix row.\r
-static unsigned *g_uNodeIndex;\r
-\r
-// The following vectors are defined on internal nodes,\r
-// subscripts are internal node index 0..N-2.\r
-// For g_uLeft/Right, value is the node index 0 .. 2N-2\r
-// because a child can be internal or leaf.\r
-static unsigned *g_uLeft;\r
-static unsigned *g_uRight;\r
-static dist_t *g_Height;\r
-static dist_t *g_LeftLength;\r
-static dist_t *g_RightLength;\r
-\r
-static inline unsigned TriangleSubscript(unsigned uIndex1, unsigned uIndex2)\r
-       {\r
-#if    DEBUG\r
-       if (uIndex1 >= g_uLeafCount || uIndex2 >= g_uLeafCount)\r
-               Quit("TriangleSubscript(%u,%u) %u", uIndex1, uIndex2, g_uLeafCount);\r
-#endif\r
-       unsigned v;\r
-       if (uIndex1 >= uIndex2)\r
-               v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;\r
-       else\r
-               v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;\r
-       assert(v < (g_uLeafCount*(g_uLeafCount - 1))/2);\r
-       return v;\r
-       }\r
-\r
-static void ListState()\r
-       {\r
-       Log("Dist matrix\n");\r
-       Log("     ");\r
-       for (unsigned i = 0; i < g_uLeafCount; ++i)\r
-               {\r
-               if (uInsane == g_uNodeIndex[i])\r
-                       continue;\r
-               Log("  %5u", g_uNodeIndex[i]);\r
-               }\r
-       Log("\n");\r
-\r
-       for (unsigned i = 0; i < g_uLeafCount; ++i)\r
-               {\r
-               if (uInsane == g_uNodeIndex[i])\r
-                       continue;\r
-               Log("%5u  ", g_uNodeIndex[i]);\r
-               for (unsigned j = 0; j < g_uLeafCount; ++j)\r
-                       {\r
-                       if (uInsane == g_uNodeIndex[j])\r
-                               continue;\r
-                       if (i == j)\r
-                               Log("       ");\r
-                       else\r
-                               {\r
-                               unsigned v = TriangleSubscript(i, j);\r
-                               Log("%5.2g  ", g_Dist[v]);\r
-                               }\r
-                       }\r
-               Log("\n");\r
-               }\r
-\r
-       Log("\n");\r
-       Log("    i   Node   NrNb      Dist\n");\r
-       Log("-----  -----  -----  --------\n");\r
-       for (unsigned i = 0; i < g_uLeafCount; ++i)\r
-               {\r
-               if (uInsane == g_uNodeIndex[i])\r
-                       continue;\r
-               Log("%5u  %5u  %5u  %8.3f\n",\r
-                 i,\r
-                 g_uNodeIndex[i],\r
-                 g_uNearestNeighbor[i],\r
-                 g_MinDist[i]);\r
-               }\r
-\r
-       Log("\n");\r
-       Log(" Node      L      R  Height  LLength  RLength\n");\r
-       Log("-----  -----  -----  ------  -------  -------\n");\r
-       for (unsigned i = 0; i <= g_uInternalNodeIndex; ++i)\r
-               Log("%5u  %5u  %5u  %6.2g  %6.2g  %6.2g\n",\r
-                 i,\r
-                 g_uLeft[i],\r
-                 g_uRight[i],\r
-                 g_Height[i],\r
-                 g_LeftLength[i],\r
-                 g_RightLength[i]);\r
-       }\r
-\r
-void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage)\r
-       {\r
-       g_uLeafCount = DC.GetCount();\r
-\r
-       g_uTriangleSize = (g_uLeafCount*(g_uLeafCount - 1))/2;\r
-       g_uInternalNodeCount = g_uLeafCount - 1;\r
-\r
-       g_Dist = new dist_t[g_uTriangleSize];\r
-\r
-       g_uNodeIndex = new unsigned[g_uLeafCount];\r
-       g_uNearestNeighbor = new unsigned[g_uLeafCount];\r
-       g_MinDist = new dist_t[g_uLeafCount];\r
-       unsigned *Ids = new unsigned [g_uLeafCount];\r
-       char **Names = new char *[g_uLeafCount];\r
-\r
-       g_uLeft = new unsigned[g_uInternalNodeCount];\r
-       g_uRight = new unsigned[g_uInternalNodeCount];\r
-       g_Height = new dist_t[g_uInternalNodeCount];\r
-       g_LeftLength = new dist_t[g_uInternalNodeCount];\r
-       g_RightLength = new dist_t[g_uInternalNodeCount];\r
-\r
-       for (unsigned i = 0; i < g_uLeafCount; ++i)\r
-               {\r
-               g_MinDist[i] = BIG_DIST;\r
-               g_uNodeIndex[i] = i;\r
-               g_uNearestNeighbor[i] = uInsane;\r
-               Ids[i] = DC.GetId(i);\r
-               Names[i] = strsave(DC.GetName(i));\r
-               }\r
-\r
-       for (unsigned i = 0; i < g_uInternalNodeCount; ++i)\r
-               {\r
-               g_uLeft[i] = uInsane;\r
-               g_uRight[i] = uInsane;\r
-               g_LeftLength[i] = BIG_DIST;\r
-               g_RightLength[i] = BIG_DIST;\r
-               g_Height[i] = BIG_DIST;\r
-               }\r
-\r
-// Compute initial NxN triangular distance matrix.\r
-// Store minimum distance for each full (not triangular) row.\r
-// Loop from 1, not 0, because "row" is 0, 1 ... i-1,\r
-// so nothing to do when i=0.\r
-       for (unsigned i = 1; i < g_uLeafCount; ++i)\r
-               {\r
-               dist_t *Row = g_Dist + TriangleSubscript(i, 0);\r
-               DC.CalcDistRange(i, Row);\r
-               for (unsigned j = 0; j < i; ++j)\r
-                       {\r
-                       const dist_t d = Row[j];\r
-                       if (d < g_MinDist[i])\r
-                               {\r
-                               g_MinDist[i] = d;\r
-                               g_uNearestNeighbor[i] = j;\r
-                               }\r
-                       if (d < g_MinDist[j])\r
-                               {\r
-                               g_MinDist[j] = d;\r
-                               g_uNearestNeighbor[j] = i;\r
-                               }\r
-                       }\r
-               }\r
-\r
-#if    TRACE\r
-       Log("Initial state:\n");\r
-       ListState();\r
-#endif\r
-\r
-       for (g_uInternalNodeIndex = 0; g_uInternalNodeIndex < g_uLeafCount - 1;\r
-         ++g_uInternalNodeIndex)\r
-               {\r
-#if    TRACE\r
-               Log("\n");\r
-               Log("Internal node index %5u\n", g_uInternalNodeIndex);\r
-               Log("-------------------------\n");\r
-#endif\r
-\r
-       // Find nearest neighbors\r
-               unsigned Lmin = uInsane;\r
-               unsigned Rmin = uInsane;\r
-               dist_t dtMinDist = BIG_DIST;\r
-               for (unsigned j = 0; j < g_uLeafCount; ++j)\r
-                       {\r
-                       if (uInsane == g_uNodeIndex[j])\r
-                               continue;\r
-\r
-                       dist_t d = g_MinDist[j];\r
-                       if (d < dtMinDist)\r
-                               {\r
-                               dtMinDist = d;\r
-                               Lmin = j;\r
-                               Rmin = g_uNearestNeighbor[j];\r
-                               assert(uInsane != Rmin);\r
-                               assert(uInsane != g_uNodeIndex[Rmin]);\r
-                               }\r
-                       }\r
-\r
-               assert(Lmin != uInsane);\r
-               assert(Rmin != uInsane);\r
-               assert(dtMinDist != BIG_DIST);\r
-\r
-#if    TRACE\r
-               Log("Nearest neighbors Lmin %u[=%u] Rmin %u[=%u] dist %.3g\n",\r
-                 Lmin,\r
-                 g_uNodeIndex[Lmin],\r
-                 Rmin,\r
-                 g_uNodeIndex[Rmin],\r
-                 dtMinDist);\r
-#endif\r
-\r
-       // Compute distances to new node\r
-       // New node overwrites row currently assigned to Lmin\r
-               dist_t dtNewMinDist = BIG_DIST;\r
-               unsigned uNewNearestNeighbor = uInsane;\r
-               for (unsigned j = 0; j < g_uLeafCount; ++j)\r
-                       {\r
-                       if (j == Lmin || j == Rmin)\r
-                               continue;\r
-                       if (uInsane == g_uNodeIndex[j])\r
-                               continue;\r
-\r
-                       const unsigned vL = TriangleSubscript(Lmin, j);\r
-                       const unsigned vR = TriangleSubscript(Rmin, j);\r
-                       const dist_t dL = g_Dist[vL];\r
-                       const dist_t dR = g_Dist[vR];\r
-                       dist_t dtNewDist;\r
-\r
-                       switch (Linkage)\r
-                               {\r
-                       case LINKAGE_Avg:\r
-                               dtNewDist = AVG(dL, dR);\r
-                               break;\r
-\r
-                       case LINKAGE_Min:\r
-                               dtNewDist = MIN(dL, dR);\r
-                               break;\r
-\r
-                       case LINKAGE_Max:\r
-                               dtNewDist = MAX(dL, dR);\r
-                               break;\r
-\r
-                       case LINKAGE_Biased:\r
-                               dtNewDist = g_dSUEFF*AVG(dL, dR) + (1 - g_dSUEFF)*MIN(dL, dR);\r
-                               break;\r
-\r
-                       default:\r
-                               Quit("UPGMA2: Invalid LINKAGE_%u", Linkage);\r
-                               }\r
-\r
-               // Nasty special case.\r
-               // If nearest neighbor of j is Lmin or Rmin, then make the new\r
-               // node (which overwrites the row currently occupied by Lmin)\r
-               // the nearest neighbor. This situation can occur when there are\r
-               // equal distances in the matrix. If we don't make this fix,\r
-               // the nearest neighbor pointer for j would become invalid.\r
-               // (We don't need to test for == Lmin, because in that case\r
-               // the net change needed is zero due to the change in row\r
-               // numbering).\r
-                       if (g_uNearestNeighbor[j] == Rmin)\r
-                               g_uNearestNeighbor[j] = Lmin;\r
-\r
-#if    TRACE\r
-                       Log("New dist to %u = (%u/%.3g + %u/%.3g)/2 = %.3g\n",\r
-                         j, Lmin, dL, Rmin, dR, dtNewDist);\r
-#endif\r
-                       g_Dist[vL] = dtNewDist;\r
-                       if (dtNewDist < dtNewMinDist)\r
-                               {\r
-                               dtNewMinDist = dtNewDist;\r
-                               uNewNearestNeighbor = j;\r
-                               }\r
-                       }\r
-\r
-               assert(g_uInternalNodeIndex < g_uLeafCount - 1 || BIG_DIST != dtNewMinDist);\r
-               assert(g_uInternalNodeIndex < g_uLeafCount - 1 || uInsane != uNewNearestNeighbor);\r
-\r
-               const unsigned v = TriangleSubscript(Lmin, Rmin);\r
-               const dist_t dLR = g_Dist[v];\r
-               const dist_t dHeightNew = dLR/2;\r
-               const unsigned uLeft = g_uNodeIndex[Lmin];\r
-               const unsigned uRight = g_uNodeIndex[Rmin];\r
-               const dist_t HeightLeft =\r
-                 uLeft < g_uLeafCount ? 0 : g_Height[uLeft - g_uLeafCount];\r
-               const dist_t HeightRight =\r
-                 uRight < g_uLeafCount ? 0 : g_Height[uRight - g_uLeafCount];\r
-\r
-               g_uLeft[g_uInternalNodeIndex] = uLeft;\r
-               g_uRight[g_uInternalNodeIndex] = uRight;\r
-               g_LeftLength[g_uInternalNodeIndex] = dHeightNew - HeightLeft;\r
-               g_RightLength[g_uInternalNodeIndex] = dHeightNew - HeightRight;\r
-               g_Height[g_uInternalNodeIndex] = dHeightNew;\r
-\r
-       // Row for left child overwritten by row for new node\r
-               g_uNodeIndex[Lmin] = g_uLeafCount + g_uInternalNodeIndex;\r
-               g_uNearestNeighbor[Lmin] = uNewNearestNeighbor;\r
-               g_MinDist[Lmin] = dtNewMinDist;\r
-\r
-       // Delete row for right child\r
-               g_uNodeIndex[Rmin] = uInsane;\r
-\r
-#if    TRACE\r
-               Log("\nInternalNodeIndex=%u Lmin=%u Rmin=%u\n",\r
-                 g_uInternalNodeIndex, Lmin, Rmin);\r
-               ListState();\r
-#endif\r
-               }\r
-\r
-       unsigned uRoot = g_uLeafCount - 2;\r
-       tree.Create(g_uLeafCount, uRoot, g_uLeft, g_uRight, g_LeftLength, g_RightLength,\r
-         Ids, Names);\r
-\r
-#if    TRACE\r
-       tree.LogMe();\r
-#endif\r
-\r
-       delete[] g_Dist;\r
-\r
-       delete[] g_uNodeIndex;\r
-       delete[] g_uNearestNeighbor;\r
-       delete[] g_MinDist;\r
-       delete[] g_Height;\r
-\r
-       delete[] g_uLeft;\r
-       delete[] g_uRight;\r
-       delete[] g_LeftLength;\r
-       delete[] g_RightLength;\r
-       \r
-       for (unsigned i = 0; i < g_uLeafCount; ++i)\r
-               free(Names[i]);\r
-       delete[] Names;\r
-       delete[] Ids;\r
-       }\r
-\r
-class DistCalcTest : public DistCalc\r
-       {\r
-       virtual void CalcDistRange(unsigned i, dist_t Dist[]) const\r
-               {\r
-               static dist_t TestDist[5][5] =\r
-                       {\r
-                       0,  2, 14, 14, 20,\r
-                       2,  0, 14, 14, 20,\r
-                       14, 14,  0,  4, 20,\r
-                       14, 14,  4,  0, 20,\r
-                       20, 20, 20, 20,  0,\r
-                       };\r
-               for (unsigned j = 0; j < i; ++j)\r
-                       Dist[j] = TestDist[i][j];\r
-               }\r
-       virtual unsigned GetCount() const\r
-               {\r
-               return 5;\r
-               }\r
-       virtual unsigned GetId(unsigned i) const\r
-               {\r
-               return i;\r
-               }\r
-       virtual const char *GetName(unsigned i) const\r
-               {\r
-               return "name";\r
-               }\r
-       };\r
-\r
-void Test()\r
-       {\r
-       SetListFileName("c:\\tmp\\lobster.log", false);\r
-       DistCalcTest DC;\r
-       Tree tree;\r
-       UPGMA2(DC, tree, LINKAGE_Avg);\r
-       }\r