--- /dev/null
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "tree.h"\r
+#include "clust.h"\r
+#include "profile.h"\r
+#include "pwpath.h"\r
+\r
+#define TRACE 0\r
+\r
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
+ unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);\r
+\r
+// Identify subfamilies in a tree.\r
+// Returns array of internal node indexes, one for each subfamily.\r
+// First try is to select groups by height (which should approximate\r
+// minimum percent identity), if this gives too many subfamilies then\r
+// we cut at a point that gives the maximum allowed number of subfams.\r
+static void GetSubfams(const Tree &tree, double dMaxHeight,\r
+ unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+ unsigned *Subfams = new unsigned[uNodeCount];\r
+\r
+ unsigned uSubfamCount;\r
+ ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);\r
+\r
+ if (uSubfamCount > uMaxSubfamCount)\r
+ ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);\r
+\r
+ *ptrptrSubfams = Subfams;\r
+ *ptruSubfamCount = uSubfamCount;\r
+ }\r
+\r
+static void LogSubfams(const Tree &tree, const unsigned Subfams[],\r
+ unsigned uSubfamCount)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+ Log("%u subfamilies found\n", uSubfamCount);\r
+ Log("Subfam Sequence\n");\r
+ Log("------ --------\n");\r
+ unsigned *Leaves = new unsigned[uNodeCount];\r
+ for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+ {\r
+ unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];\r
+ unsigned uLeafCount;\r
+ GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);\r
+ for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)\r
+ Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));\r
+ Log("\n");\r
+ }\r
+ delete[] Leaves;\r
+ }\r
+\r
+bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)\r
+ {\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+ if (uSeqCount < 3)\r
+ return false;\r
+\r
+ const double dMaxHeight = 0.6;\r
+ const unsigned uMaxSubfamCount = 16;\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+ unsigned *Subfams;\r
+ unsigned uSubfamCount;\r
+ GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);\r
+ assert(uSubfamCount <= uSeqCount);\r
+\r
+ if (g_bVerbose)\r
+ LogSubfams(tree, Subfams, uSubfamCount);\r
+\r
+ MSA *SubfamMSAs = new MSA[uSubfamCount];\r
+ unsigned *Leaves = new unsigned[uSeqCount];\r
+ unsigned *Ids = new unsigned[uSeqCount];\r
+\r
+ bool bAnyChanges = false;\r
+ for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+ {\r
+ unsigned uSubfam = Subfams[uSubfamIndex];\r
+ unsigned uLeafCount;\r
+ GetLeaves(tree, uSubfam, Leaves, &uLeafCount);\r
+ assert(uLeafCount <= uSeqCount);\r
+\r
+ LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);\r
+\r
+ MSA &msaSubfam = SubfamMSAs[uSubfamIndex];\r
+ MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);\r
+ DeleteGappedCols(msaSubfam);\r
+\r
+#if TRACE\r
+ Log("Subfam %u MSA=\n", uSubfamIndex);\r
+ msaSubfam.LogMe();\r
+#endif\r
+\r
+ if (msaSubfam.GetSeqCount() <= 2)\r
+ continue;\r
+\r
+ // TODO /////////////////////////////////////////\r
+ // Try using existing tree, may actually hurt to\r
+ // re-estimate, may also be a waste of CPU & mem.\r
+ /////////////////////////////////////////////////\r
+ Tree SubfamTree;\r
+ TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);\r
+\r
+ bool bAnyChangesThisSubfam;\r
+ if (g_bAnchors)\r
+ bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);\r
+ else\r
+ bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);\r
+#if TRACE\r
+ Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);\r
+#endif\r
+ if (bAnyChangesThisSubfam)\r
+ bAnyChanges = true;\r
+ }\r
+\r
+ if (bAnyChanges)\r
+ ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);\r
+\r
+ delete[] Leaves;\r
+ delete[] Subfams;\r
+ delete[] SubfamMSAs;\r
+\r
+ return bAnyChanges;\r
+ }\r
+\r
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
+ unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+ bool *Ready = new bool[uNodeCount];\r
+ MSA **MSAs = new MSA *[uNodeCount];\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ {\r
+ Ready[uNodeIndex] = false;\r
+ MSAs[uNodeIndex] = 0;\r
+ }\r
+\r
+ for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
+ {\r
+ unsigned uNodeIndex = Subfams[uSubfamIndex];\r
+ Ready[uNodeIndex] = true;\r
+ MSA *ptrMSA = new MSA;\r
+ // TODO: Wasteful copy, needs re-design\r
+ ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);\r
+ MSAs[uNodeIndex] = ptrMSA;\r
+ }\r
+\r
+ for (unsigned uNodeIndex = tree.FirstDepthFirstNode();\r
+ NULL_NEIGHBOR != uNodeIndex;\r
+ uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))\r
+ {\r
+ if (tree.IsLeaf(uNodeIndex))\r
+ continue;\r
+\r
+ unsigned uRight = tree.GetRight(uNodeIndex);\r
+ unsigned uLeft = tree.GetLeft(uNodeIndex);\r
+ if (!Ready[uRight] || !Ready[uLeft])\r
+ continue;\r
+\r
+ MSA *ptrLeft = MSAs[uLeft];\r
+ MSA *ptrRight = MSAs[uRight];\r
+ assert(ptrLeft != 0 && ptrRight != 0);\r
+\r
+ MSA *ptrParent = new MSA;\r
+\r
+ PWPath Path;\r
+ AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);\r
+\r
+ MSAs[uNodeIndex] = ptrParent;\r
+ Ready[uNodeIndex] = true;\r
+ Ready[uLeft] = false;\r
+ Ready[uRight] = false;\r
+\r
+ delete MSAs[uLeft];\r
+ delete MSAs[uRight];\r
+ MSAs[uLeft] = 0;\r
+ MSAs[uRight] = 0;\r
+ }\r
+\r
+#if DEBUG\r
+ {\r
+ unsigned uReadyCount = 0;\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ {\r
+ if (Ready[uNodeIndex])\r
+ {\r
+ assert(tree.IsRoot(uNodeIndex));\r
+ ++uReadyCount;\r
+ assert(0 != MSAs[uNodeIndex]);\r
+ }\r
+ else\r
+ assert(0 == MSAs[uNodeIndex]);\r
+ }\r
+ assert(1 == uReadyCount);\r
+ }\r
+#endif\r
+\r
+ const unsigned uRoot = tree.GetRootNodeIndex();\r
+ MSA *ptrRootAlignment = MSAs[uRoot];\r
+\r
+ msa.Copy(*ptrRootAlignment);\r
+\r
+ delete ptrRootAlignment;\r
+\r
+#if TRACE\r
+ Log("After refine subfamilies, root alignment=\n");\r
+ msa.LogMe();\r
+#endif\r
+ }\r