Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / refinesubfams.cpp
diff --git a/website/archive/binaries/mac/src/muscle/refinesubfams.cpp b/website/archive/binaries/mac/src/muscle/refinesubfams.cpp
new file mode 100644 (file)
index 0000000..600e394
--- /dev/null
@@ -0,0 +1,212 @@
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "tree.h"\r
+#include "clust.h"\r
+#include "profile.h"\r
+#include "pwpath.h"\r
+\r
+#define TRACE 0\r
+\r
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
+  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);\r
+\r
+// Identify subfamilies in a tree.\r
+// Returns array of internal node indexes, one for each subfamily.\r
+// First try is to select groups by height (which should approximate\r
+// minimum percent identity), if this gives too many subfamilies then\r
+// we cut at a point that gives the maximum allowed number of subfams.\r
+static void GetSubfams(const Tree &tree, double dMaxHeight,\r
+  unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+       unsigned *Subfams = new unsigned[uNodeCount];\r
+\r
+       unsigned uSubfamCount;\r
+       ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);\r
+\r
+       if (uSubfamCount > uMaxSubfamCount)\r
+               ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);\r
+\r
+       *ptrptrSubfams = Subfams;\r
+       *ptruSubfamCount = uSubfamCount;\r
+       }\r
+\r
+static void LogSubfams(const Tree &tree, const unsigned Subfams[],\r
+  unsigned uSubfamCount)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+       Log("%u subfamilies found\n", uSubfamCount);\r
+       Log("Subfam  Sequence\n");\r
+       Log("------  --------\n");\r
+       unsigned *Leaves = new unsigned[uNodeCount];\r
+       for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+               {\r
+               unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];\r
+               unsigned uLeafCount;\r
+               GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);\r
+               for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)\r
+                       Log("%6u  %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));\r
+               Log("\n");\r
+               }\r
+       delete[] Leaves;\r
+       }\r
+\r
+bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)\r
+       {\r
+       const unsigned uSeqCount = msa.GetSeqCount();\r
+       if (uSeqCount < 3)\r
+               return false;\r
+\r
+       const double dMaxHeight = 0.6;\r
+       const unsigned uMaxSubfamCount = 16;\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+       unsigned *Subfams;\r
+       unsigned uSubfamCount;\r
+       GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);\r
+       assert(uSubfamCount <= uSeqCount);\r
+\r
+       if (g_bVerbose)\r
+               LogSubfams(tree, Subfams, uSubfamCount);\r
+\r
+       MSA *SubfamMSAs = new MSA[uSubfamCount];\r
+       unsigned *Leaves = new unsigned[uSeqCount];\r
+       unsigned *Ids = new unsigned[uSeqCount];\r
+\r
+       bool bAnyChanges = false;\r
+       for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+               {\r
+               unsigned uSubfam = Subfams[uSubfamIndex];\r
+               unsigned uLeafCount;\r
+               GetLeaves(tree, uSubfam, Leaves, &uLeafCount);\r
+               assert(uLeafCount <= uSeqCount);\r
+\r
+               LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);\r
+\r
+               MSA &msaSubfam = SubfamMSAs[uSubfamIndex];\r
+               MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);\r
+               DeleteGappedCols(msaSubfam);\r
+\r
+#if    TRACE\r
+               Log("Subfam %u MSA=\n", uSubfamIndex);\r
+               msaSubfam.LogMe();\r
+#endif\r
+\r
+               if (msaSubfam.GetSeqCount() <= 2)\r
+                       continue;\r
+\r
+       // TODO /////////////////////////////////////////\r
+       // Try using existing tree, may actually hurt to\r
+       // re-estimate, may also be a waste of CPU & mem.\r
+       /////////////////////////////////////////////////\r
+               Tree SubfamTree;\r
+               TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);\r
+\r
+               bool bAnyChangesThisSubfam;\r
+               if (g_bAnchors)\r
+                       bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);\r
+               else\r
+                       bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);\r
+#if    TRACE\r
+               Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);\r
+#endif\r
+               if (bAnyChangesThisSubfam)\r
+                       bAnyChanges = true;\r
+               }\r
+\r
+       if (bAnyChanges)\r
+               ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);\r
+\r
+       delete[] Leaves;\r
+       delete[] Subfams;\r
+       delete[] SubfamMSAs;\r
+\r
+       return bAnyChanges;\r
+       }\r
+\r
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
+  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+       bool *Ready = new bool[uNodeCount];\r
+       MSA **MSAs = new MSA *[uNodeCount];\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               {\r
+               Ready[uNodeIndex] = false;\r
+               MSAs[uNodeIndex] = 0;\r
+               }\r
+\r
+       for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+               {\r
+               unsigned uNodeIndex = Subfams[uSubfamIndex];\r
+               Ready[uNodeIndex] = true;\r
+               MSA *ptrMSA = new MSA;\r
+       // TODO: Wasteful copy, needs re-design\r
+               ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);\r
+               MSAs[uNodeIndex] = ptrMSA;\r
+               }\r
+\r
+       for (unsigned uNodeIndex = tree.FirstDepthFirstNode();\r
+         NULL_NEIGHBOR != uNodeIndex;\r
+         uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))\r
+               {\r
+               if (tree.IsLeaf(uNodeIndex))\r
+                       continue;\r
+\r
+               unsigned uRight = tree.GetRight(uNodeIndex);\r
+               unsigned uLeft = tree.GetLeft(uNodeIndex);\r
+               if (!Ready[uRight] || !Ready[uLeft])\r
+                       continue;\r
+\r
+               MSA *ptrLeft = MSAs[uLeft];\r
+               MSA *ptrRight = MSAs[uRight];\r
+               assert(ptrLeft != 0 && ptrRight != 0);\r
+\r
+               MSA *ptrParent = new MSA;\r
+\r
+               PWPath Path;\r
+               AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);\r
+\r
+               MSAs[uNodeIndex] = ptrParent;\r
+               Ready[uNodeIndex] = true;\r
+               Ready[uLeft] = false;\r
+               Ready[uRight] = false;\r
+\r
+               delete MSAs[uLeft];\r
+               delete MSAs[uRight];\r
+               MSAs[uLeft] = 0;\r
+               MSAs[uRight] = 0;\r
+               }\r
+\r
+#if    DEBUG\r
+       {\r
+       unsigned uReadyCount = 0;\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               {\r
+               if (Ready[uNodeIndex])\r
+                       {\r
+                       assert(tree.IsRoot(uNodeIndex));\r
+                       ++uReadyCount;\r
+                       assert(0 != MSAs[uNodeIndex]);\r
+                       }\r
+               else\r
+                       assert(0 == MSAs[uNodeIndex]);\r
+               }\r
+       assert(1 == uReadyCount);\r
+       }\r
+#endif\r
+\r
+       const unsigned uRoot = tree.GetRootNodeIndex();\r
+       MSA *ptrRootAlignment = MSAs[uRoot];\r
+\r
+       msa.Copy(*ptrRootAlignment);\r
+\r
+       delete ptrRootAlignment;\r
+\r
+#if    TRACE\r
+       Log("After refine subfamilies, root alignment=\n");\r
+       msa.LogMe();\r
+#endif\r
+       }\r