10 static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
\r
11 unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);
\r
13 // Identify subfamilies in a tree.
\r
14 // Returns array of internal node indexes, one for each subfamily.
\r
15 // First try is to select groups by height (which should approximate
\r
16 // minimum percent identity), if this gives too many subfamilies then
\r
17 // we cut at a point that gives the maximum allowed number of subfams.
\r
18 static void GetSubfams(const Tree &tree, double dMaxHeight,
\r
19 unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
\r
21 const unsigned uNodeCount = tree.GetNodeCount();
\r
23 unsigned *Subfams = new unsigned[uNodeCount];
\r
25 unsigned uSubfamCount;
\r
26 ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);
\r
28 if (uSubfamCount > uMaxSubfamCount)
\r
29 ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);
\r
31 *ptrptrSubfams = Subfams;
\r
32 *ptruSubfamCount = uSubfamCount;
\r
35 static void LogSubfams(const Tree &tree, const unsigned Subfams[],
\r
36 unsigned uSubfamCount)
\r
38 const unsigned uNodeCount = tree.GetNodeCount();
\r
39 Log("%u subfamilies found\n", uSubfamCount);
\r
40 Log("Subfam Sequence\n");
\r
41 Log("------ --------\n");
\r
42 unsigned *Leaves = new unsigned[uNodeCount];
\r
43 for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
\r
45 unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
\r
46 unsigned uLeafCount;
\r
47 GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
\r
48 for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
\r
49 Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
\r
55 bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
\r
57 const unsigned uSeqCount = msa.GetSeqCount();
\r
61 const double dMaxHeight = 0.6;
\r
62 const unsigned uMaxSubfamCount = 16;
\r
63 const unsigned uNodeCount = tree.GetNodeCount();
\r
66 unsigned uSubfamCount;
\r
67 GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
\r
68 assert(uSubfamCount <= uSeqCount);
\r
71 LogSubfams(tree, Subfams, uSubfamCount);
\r
73 MSA *SubfamMSAs = new MSA[uSubfamCount];
\r
74 unsigned *Leaves = new unsigned[uSeqCount];
\r
75 unsigned *Ids = new unsigned[uSeqCount];
\r
77 bool bAnyChanges = false;
\r
78 for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
\r
80 unsigned uSubfam = Subfams[uSubfamIndex];
\r
81 unsigned uLeafCount;
\r
82 GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
\r
83 assert(uLeafCount <= uSeqCount);
\r
85 LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);
\r
87 MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
\r
88 MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
\r
89 DeleteGappedCols(msaSubfam);
\r
92 Log("Subfam %u MSA=\n", uSubfamIndex);
\r
96 if (msaSubfam.GetSeqCount() <= 2)
\r
99 // TODO /////////////////////////////////////////
\r
100 // Try using existing tree, may actually hurt to
\r
101 // re-estimate, may also be a waste of CPU & mem.
\r
102 /////////////////////////////////////////////////
\r
104 TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);
\r
106 bool bAnyChangesThisSubfam;
\r
108 bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
\r
110 bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
\r
112 Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
\r
114 if (bAnyChangesThisSubfam)
\r
115 bAnyChanges = true;
\r
119 ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);
\r
123 delete[] SubfamMSAs;
\r
125 return bAnyChanges;
\r
128 static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
\r
129 unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
\r
131 const unsigned uNodeCount = tree.GetNodeCount();
\r
133 bool *Ready = new bool[uNodeCount];
\r
134 MSA **MSAs = new MSA *[uNodeCount];
\r
135 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
137 Ready[uNodeIndex] = false;
\r
138 MSAs[uNodeIndex] = 0;
\r
141 for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
\r
143 unsigned uNodeIndex = Subfams[uSubfamIndex];
\r
144 Ready[uNodeIndex] = true;
\r
145 MSA *ptrMSA = new MSA;
\r
146 // TODO: Wasteful copy, needs re-design
\r
147 ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
\r
148 MSAs[uNodeIndex] = ptrMSA;
\r
151 for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
\r
152 NULL_NEIGHBOR != uNodeIndex;
\r
153 uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
\r
155 if (tree.IsLeaf(uNodeIndex))
\r
158 unsigned uRight = tree.GetRight(uNodeIndex);
\r
159 unsigned uLeft = tree.GetLeft(uNodeIndex);
\r
160 if (!Ready[uRight] || !Ready[uLeft])
\r
163 MSA *ptrLeft = MSAs[uLeft];
\r
164 MSA *ptrRight = MSAs[uRight];
\r
165 assert(ptrLeft != 0 && ptrRight != 0);
\r
167 MSA *ptrParent = new MSA;
\r
170 AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);
\r
172 MSAs[uNodeIndex] = ptrParent;
\r
173 Ready[uNodeIndex] = true;
\r
174 Ready[uLeft] = false;
\r
175 Ready[uRight] = false;
\r
177 delete MSAs[uLeft];
\r
178 delete MSAs[uRight];
\r
185 unsigned uReadyCount = 0;
\r
186 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
188 if (Ready[uNodeIndex])
\r
190 assert(tree.IsRoot(uNodeIndex));
\r
192 assert(0 != MSAs[uNodeIndex]);
\r
195 assert(0 == MSAs[uNodeIndex]);
\r
197 assert(1 == uReadyCount);
\r
201 const unsigned uRoot = tree.GetRootNodeIndex();
\r
202 MSA *ptrRootAlignment = MSAs[uRoot];
\r
204 msa.Copy(*ptrRootAlignment);
\r
206 delete ptrRootAlignment;
\r
209 Log("After refine subfamilies, root alignment=\n");
\r