8 const unsigned NULL_NEIGHBOR = UINT_MAX;
\r
10 enum NEWICK_TOKEN_TYPE
\r
14 // Returned from Tree::GetToken:
\r
22 // Following are never returned from Tree::GetToken:
\r
23 NTT_SingleQuotedString,
\r
24 NTT_DoubleQuotedString,
\r
42 m_bHasEdgeLength1 = 0;
\r
43 m_bHasEdgeLength2 = 0;
\r
44 m_bHasEdgeLength3 = 0;
\r
56 for (unsigned n = 0; n < m_uNodeCount; ++n)
\r
62 delete[] m_uNeighbor1;
\r
63 delete[] m_uNeighbor2;
\r
64 delete[] m_uNeighbor3;
\r
65 delete[] m_dEdgeLength1;
\r
66 delete[] m_dEdgeLength2;
\r
67 delete[] m_dEdgeLength3;
\r
68 delete[] m_bHasEdgeLength1;
\r
69 delete[] m_bHasEdgeLength2;
\r
70 delete[] m_bHasEdgeLength3;
\r
73 delete[] m_bHasHeight;
\r
84 m_uRootNodeIndex = 0;
\r
91 // Creation and manipulation
\r
92 void CreateRooted();
\r
93 void CreateUnrooted(double dEdgeLength);
\r
95 void FromFile(TextFile &File);
\r
96 void FromClust(Clust &C);
\r
98 void Copy(const Tree &tree);
\r
100 void Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],
\r
101 const unsigned Right[], const float LeftLength[], const float RightLength[],
\r
102 const unsigned LeafIds[], char *LeafNames[]);
\r
103 unsigned AppendBranch(unsigned uExistingNodeIndex);
\r
104 void SetLeafName(unsigned uNodeIndex, const char *ptrName);
\r
105 void SetLeafId(unsigned uNodeIndex, unsigned uId);
\r
106 void SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,
\r
109 void RootUnrootedTree(unsigned uNodeIndex1, unsigned uNodeIndex2);
\r
110 void RootUnrootedTree(ROOT Method);
\r
111 void UnrootByDeletingRoot();
\r
114 void ToFile(TextFile &File) const;
\r
116 // Accessor functions
\r
117 unsigned GetNodeCount() const
\r
119 return m_uNodeCount;
\r
122 unsigned GetLeafCount() const
\r
126 assert(m_uNodeCount%2 == 1);
\r
127 return (m_uNodeCount + 1)/2;
\r
131 assert(m_uNodeCount%2 == 0);
\r
132 return (m_uNodeCount + 2)/2;
\r
136 unsigned GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const;
\r
138 unsigned GetNeighbor1(unsigned uNodeIndex) const
\r
140 assert(uNodeIndex < m_uNodeCount);
\r
141 return m_uNeighbor1[uNodeIndex];
\r
144 unsigned GetNeighbor2(unsigned uNodeIndex) const
\r
146 assert(uNodeIndex < m_uNodeCount);
\r
147 return m_uNeighbor2[uNodeIndex];
\r
150 unsigned GetNeighbor3(unsigned uNodeIndex) const
\r
152 assert(uNodeIndex < m_uNodeCount);
\r
153 return m_uNeighbor3[uNodeIndex];
\r
156 unsigned GetParent(unsigned uNodeIndex) const
\r
158 assert(m_bRooted && uNodeIndex < m_uNodeCount);
\r
159 return m_uNeighbor1[uNodeIndex];
\r
162 bool IsRooted() const
\r
167 unsigned GetLeft(unsigned uNodeIndex) const
\r
169 assert(m_bRooted && uNodeIndex < m_uNodeCount);
\r
170 return m_uNeighbor2[uNodeIndex];
\r
173 unsigned GetRight(unsigned uNodeIndex) const
\r
175 assert(m_bRooted && uNodeIndex < m_uNodeCount);
\r
176 return m_uNeighbor3[uNodeIndex];
\r
179 const char *GetName(unsigned uNodeIndex) const
\r
181 assert(uNodeIndex < m_uNodeCount);
\r
182 return m_ptrName[uNodeIndex];
\r
185 unsigned GetRootNodeIndex() const
\r
188 return m_uRootNodeIndex;
\r
191 unsigned GetNeighborCount(unsigned uNodeIndex) const
\r
193 const unsigned n1 = m_uNeighbor1[uNodeIndex];
\r
194 const unsigned n2 = m_uNeighbor2[uNodeIndex];
\r
195 const unsigned n3 = m_uNeighbor3[uNodeIndex];
\r
196 return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
\r
199 bool IsLeaf(unsigned uNodeIndex) const
\r
201 assert(uNodeIndex < m_uNodeCount);
\r
202 if (1 == m_uNodeCount)
\r
204 return 1 == GetNeighborCount(uNodeIndex);
\r
207 bool IsRoot(unsigned uNodeIndex) const
\r
209 return IsRooted() && m_uRootNodeIndex == uNodeIndex;
\r
212 unsigned GetLeafId(unsigned uNodeIndex) const;
\r
213 unsigned GetLeafNodeIndex(const char *ptrName) const;
\r
214 bool IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
\r
215 bool HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
\r
216 double GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
\r
217 const char *GetLeafName(unsigned uNodeIndex) const;
\r
218 unsigned GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const;
\r
219 double GetNodeHeight(unsigned uNodeIndex) const;
\r
221 // Depth-first traversal
\r
222 unsigned FirstDepthFirstNode() const;
\r
223 unsigned NextDepthFirstNode(unsigned uNodeIndex) const;
\r
225 unsigned FirstDepthFirstNodeR() const;
\r
226 unsigned NextDepthFirstNodeR(unsigned uNodeIndex) const;
\r
228 // Equivalent of GetLeft/Right in unrooted tree, works in rooted tree too.
\r
229 unsigned GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
\r
230 unsigned GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
\r
232 // Getting parent node in unrooted tree defined iff leaf
\r
233 unsigned GetLeafParent(unsigned uNodeIndex) const;
\r
236 const char *NTTStr(NEWICK_TOKEN_TYPE NTT) const;
\r
237 void FindCenterByLongestSpan(unsigned *ptrNodeIndex1,
\r
238 unsigned *ptrNodeIndex2) const;
\r
239 void PruneTree(const Tree &tree, unsigned Subfams[],
\r
240 unsigned uSubfamCount);
\r
241 unsigned LeafIndexToNodeIndex(unsigned uLeafIndex) const;
\r
243 // Debugging & trouble-shooting support
\r
244 void Validate() const;
\r
245 void ValidateNode(unsigned uNodeIndex) const;
\r
246 void AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
\r
247 void LogMe() const;
\r
250 unsigned UnrootFromFile();
\r
251 NEWICK_TOKEN_TYPE GetTokenVerbose(TextFile &File, char szToken[],
\r
252 unsigned uBytes) const
\r
254 NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, uBytes);
\r
255 Log("GetToken %10.10s %s\n", NTTStr(NTT), szToken);
\r
259 void InitCache(unsigned uCacheCount);
\r
260 void ExpandCache();
\r
261 NEWICK_TOKEN_TYPE GetToken(TextFile &File, char szToken[], unsigned uBytes) const;
\r
262 bool GetGroupFromFile(TextFile &File, unsigned uNodeIndex, double *ptrdEdgeLength);
\r
263 unsigned GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,
\r
264 double *ptrdTotalDistance) const;
\r
265 void ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const;
\r
266 void ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const;
\r
267 void OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex);
\r
268 double FromClustNode(const Clust &C, unsigned uClustNodeIndex, unsigned uPhyNodeIndex);
\r
269 unsigned GetAnyNonLeafNode() const;
\r
271 // Yuck. Data is made public for the convenience of Tree::Copy.
\r
272 // There has to be a better way.
\r
274 unsigned m_uNodeCount;
\r
275 unsigned m_uCacheCount;
\r
276 unsigned *m_uNeighbor1;
\r
277 unsigned *m_uNeighbor2;
\r
278 unsigned *m_uNeighbor3;
\r
279 double *m_dEdgeLength1;
\r
280 double *m_dEdgeLength2;
\r
281 double *m_dEdgeLength3;
\r
283 bool *m_bHasEdgeLength1;
\r
284 bool *m_bHasEdgeLength2;
\r
285 bool *m_bHasEdgeLength3;
\r
286 bool *m_bHasHeight;
\r
290 unsigned m_uRootNodeIndex;
\r
293 struct PhyEnumEdgeState
\r
298 m_uNodeIndex1 = NULL_NEIGHBOR;
\r
299 m_uNodeIndex2 = NULL_NEIGHBOR;
\r
302 unsigned m_uNodeIndex1;
\r
303 unsigned m_uNodeIndex2;
\r
306 const unsigned NODE_CHANGED = (unsigned) (~0);
\r
308 extern bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
\r
309 unsigned Leaves1[], unsigned *ptruCount1,
\r
310 unsigned Leaves2[], unsigned *ptruCount2);
\r
311 extern bool PhyEnumBiPartsR(const Tree &tree, PhyEnumEdgeState &ES,
\r
312 unsigned Leaves1[], unsigned *ptruCount1,
\r
313 unsigned Leaves2[], unsigned *ptruCount2);
\r
314 extern void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
\r
315 unsigned *ptruSubtreeCount);
\r
316 void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
\r
317 unsigned Subfams[], unsigned *ptruSubfamCount);
\r
318 void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],
\r
319 unsigned *ptruLeafCount);
\r
320 void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,
\r
321 unsigned uExclude, unsigned Leaves[], unsigned *ptruCount);
\r
322 void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[]);
\r
323 void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength);
\r
324 void LeafIndexesToLeafNames(const Tree &tree, const unsigned Leaves[], unsigned uCount,
\r
326 void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,
\r
328 void MSASeqSubset(const MSA &msaIn, char *Names[], unsigned uSeqCount,
\r
330 void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
\r
331 unsigned IdToDiffsLeafNodeIndex[]);
\r
332 void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
\r
333 unsigned NewNodeIndexToOldNodeIndex[]);
\r
334 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
\r
335 double *ptrdLength1, double *ptrdLength2,
\r
337 void FixRoot(Tree &tree, ROOT RootMethod);
\r