+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "tree.h"\r
-#include "clust.h"\r
-#include "profile.h"\r
-#include "pwpath.h"\r
-\r
-#define TRACE 0\r
-\r
-static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
- unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);\r
-\r
-// Identify subfamilies in a tree.\r
-// Returns array of internal node indexes, one for each subfamily.\r
-// First try is to select groups by height (which should approximate\r
-// minimum percent identity), if this gives too many subfamilies then\r
-// we cut at a point that gives the maximum allowed number of subfams.\r
-static void GetSubfams(const Tree &tree, double dMaxHeight,\r
- unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)\r
- {\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
-\r
- unsigned *Subfams = new unsigned[uNodeCount];\r
-\r
- unsigned uSubfamCount;\r
- ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);\r
-\r
- if (uSubfamCount > uMaxSubfamCount)\r
- ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);\r
-\r
- *ptrptrSubfams = Subfams;\r
- *ptruSubfamCount = uSubfamCount;\r
- }\r
-\r
-static void LogSubfams(const Tree &tree, const unsigned Subfams[],\r
- unsigned uSubfamCount)\r
- {\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
- Log("%u subfamilies found\n", uSubfamCount);\r
- Log("Subfam Sequence\n");\r
- Log("------ --------\n");\r
- unsigned *Leaves = new unsigned[uNodeCount];\r
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
- {\r
- unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];\r
- unsigned uLeafCount;\r
- GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);\r
- for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)\r
- Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));\r
- Log("\n");\r
- }\r
- delete[] Leaves;\r
- }\r
-\r
-bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)\r
- {\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- if (uSeqCount < 3)\r
- return false;\r
-\r
- const double dMaxHeight = 0.6;\r
- const unsigned uMaxSubfamCount = 16;\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
-\r
- unsigned *Subfams;\r
- unsigned uSubfamCount;\r
- GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);\r
- assert(uSubfamCount <= uSeqCount);\r
-\r
- if (g_bVerbose)\r
- LogSubfams(tree, Subfams, uSubfamCount);\r
-\r
- MSA *SubfamMSAs = new MSA[uSubfamCount];\r
- unsigned *Leaves = new unsigned[uSeqCount];\r
- unsigned *Ids = new unsigned[uSeqCount];\r
-\r
- bool bAnyChanges = false;\r
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
- {\r
- unsigned uSubfam = Subfams[uSubfamIndex];\r
- unsigned uLeafCount;\r
- GetLeaves(tree, uSubfam, Leaves, &uLeafCount);\r
- assert(uLeafCount <= uSeqCount);\r
-\r
- LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);\r
-\r
- MSA &msaSubfam = SubfamMSAs[uSubfamIndex];\r
- MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);\r
- DeleteGappedCols(msaSubfam);\r
-\r
-#if TRACE\r
- Log("Subfam %u MSA=\n", uSubfamIndex);\r
- msaSubfam.LogMe();\r
-#endif\r
-\r
- if (msaSubfam.GetSeqCount() <= 2)\r
- continue;\r
-\r
- // TODO /////////////////////////////////////////\r
- // Try using existing tree, may actually hurt to\r
- // re-estimate, may also be a waste of CPU & mem.\r
- /////////////////////////////////////////////////\r
- Tree SubfamTree;\r
- TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);\r
-\r
- bool bAnyChangesThisSubfam;\r
- if (g_bAnchors)\r
- bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);\r
- else\r
- bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);\r
-#if TRACE\r
- Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);\r
-#endif\r
- if (bAnyChangesThisSubfam)\r
- bAnyChanges = true;\r
- }\r
-\r
- if (bAnyChanges)\r
- ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);\r
-\r
- delete[] Leaves;\r
- delete[] Subfams;\r
- delete[] SubfamMSAs;\r
-\r
- return bAnyChanges;\r
- }\r
-\r
-static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],\r
- unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)\r
- {\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
-\r
- bool *Ready = new bool[uNodeCount];\r
- MSA **MSAs = new MSA *[uNodeCount];\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- Ready[uNodeIndex] = false;\r
- MSAs[uNodeIndex] = 0;\r
- }\r
-\r
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)\r
- {\r
- unsigned uNodeIndex = Subfams[uSubfamIndex];\r
- Ready[uNodeIndex] = true;\r
- MSA *ptrMSA = new MSA;\r
- // TODO: Wasteful copy, needs re-design\r
- ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);\r
- MSAs[uNodeIndex] = ptrMSA;\r
- }\r
-\r
- for (unsigned uNodeIndex = tree.FirstDepthFirstNode();\r
- NULL_NEIGHBOR != uNodeIndex;\r
- uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))\r
- {\r
- if (tree.IsLeaf(uNodeIndex))\r
- continue;\r
-\r
- unsigned uRight = tree.GetRight(uNodeIndex);\r
- unsigned uLeft = tree.GetLeft(uNodeIndex);\r
- if (!Ready[uRight] || !Ready[uLeft])\r
- continue;\r
-\r
- MSA *ptrLeft = MSAs[uLeft];\r
- MSA *ptrRight = MSAs[uRight];\r
- assert(ptrLeft != 0 && ptrRight != 0);\r
-\r
- MSA *ptrParent = new MSA;\r
-\r
- PWPath Path;\r
- AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);\r
-\r
- MSAs[uNodeIndex] = ptrParent;\r
- Ready[uNodeIndex] = true;\r
- Ready[uLeft] = false;\r
- Ready[uRight] = false;\r
-\r
- delete MSAs[uLeft];\r
- delete MSAs[uRight];\r
- MSAs[uLeft] = 0;\r
- MSAs[uRight] = 0;\r
- }\r
-\r
-#if DEBUG\r
- {\r
- unsigned uReadyCount = 0;\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- if (Ready[uNodeIndex])\r
- {\r
- assert(tree.IsRoot(uNodeIndex));\r
- ++uReadyCount;\r
- assert(0 != MSAs[uNodeIndex]);\r
- }\r
- else\r
- assert(0 == MSAs[uNodeIndex]);\r
- }\r
- assert(1 == uReadyCount);\r
- }\r
-#endif\r
-\r
- const unsigned uRoot = tree.GetRootNodeIndex();\r
- MSA *ptrRootAlignment = MSAs[uRoot];\r
-\r
- msa.Copy(*ptrRootAlignment);\r
-\r
- delete ptrRootAlignment;\r
-\r
-#if TRACE\r
- Log("After refine subfamilies, root alignment=\n");\r
- msa.LogMe();\r
-#endif\r
- }\r