Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / tree.h
1 #ifndef tree_h\r
2 #define tree_h\r
3 \r
4 #include <limits.h>\r
5 \r
6 class Clust;\r
7 \r
8 const unsigned NULL_NEIGHBOR = UINT_MAX;\r
9 \r
10 enum NEWICK_TOKEN_TYPE\r
11         {\r
12         NTT_Unknown,\r
13 \r
14 // Returned from Tree::GetToken:\r
15         NTT_Lparen,\r
16         NTT_Rparen,\r
17         NTT_Colon,\r
18         NTT_Comma,\r
19         NTT_Semicolon,\r
20         NTT_String,\r
21 \r
22 // Following are never returned from Tree::GetToken:\r
23         NTT_SingleQuotedString,\r
24         NTT_DoubleQuotedString,\r
25         NTT_Comment\r
26         };\r
27 \r
28 class Tree\r
29         {\r
30 public:\r
31         Tree()\r
32                 {\r
33                 m_uNodeCount = 0;\r
34                 m_uCacheCount = 0;\r
35                 m_uNeighbor1 = 0;\r
36                 m_uNeighbor2 = 0;\r
37                 m_uNeighbor3 = 0;\r
38                 m_dEdgeLength1 = 0;\r
39                 m_dEdgeLength2 = 0;\r
40                 m_dEdgeLength3 = 0;\r
41                 m_dHeight = 0;\r
42                 m_bHasEdgeLength1 = 0;\r
43                 m_bHasEdgeLength2 = 0;\r
44                 m_bHasEdgeLength3 = 0;\r
45                 m_bHasHeight = 0;\r
46                 m_ptrName = 0;\r
47                 m_Ids = 0;\r
48                 }\r
49         virtual ~Tree()\r
50                 {\r
51                 Clear();\r
52                 }\r
53 \r
54         void Clear()\r
55                 {\r
56                 for (unsigned n = 0; n < m_uNodeCount; ++n)\r
57                         free(m_ptrName[n]);\r
58 \r
59                 m_uNodeCount = 0;\r
60                 m_uCacheCount = 0;\r
61 \r
62                 delete[] m_uNeighbor1;\r
63                 delete[] m_uNeighbor2;\r
64                 delete[] m_uNeighbor3;\r
65                 delete[] m_dEdgeLength1;\r
66                 delete[] m_dEdgeLength2;\r
67                 delete[] m_dEdgeLength3;\r
68                 delete[] m_bHasEdgeLength1;\r
69                 delete[] m_bHasEdgeLength2;\r
70                 delete[] m_bHasEdgeLength3;\r
71                 delete[] m_ptrName;\r
72                 delete[] m_Ids;\r
73                 delete[] m_bHasHeight;\r
74                 delete[] m_dHeight;\r
75 \r
76                 m_uNeighbor1 = 0;\r
77                 m_uNeighbor2 = 0;\r
78                 m_uNeighbor3 = 0;\r
79                 m_dEdgeLength1 = 0;\r
80                 m_dEdgeLength2 = 0;\r
81                 m_dEdgeLength3 = 0;\r
82                 m_ptrName = 0;\r
83                 m_Ids = 0;\r
84                 m_uRootNodeIndex = 0;\r
85                 m_bHasHeight = 0;\r
86                 m_dHeight = 0;\r
87 \r
88                 m_bRooted = false;\r
89                 }\r
90 \r
91 // Creation and manipulation\r
92         void CreateRooted();\r
93         void CreateUnrooted(double dEdgeLength);\r
94 \r
95         void FromFile(TextFile &File);\r
96         void FromClust(Clust &C);\r
97 \r
98         void Copy(const Tree &tree);\r
99 \r
100         void Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],\r
101           const unsigned Right[], const float LeftLength[], const float RightLength[],\r
102           const unsigned LeafIds[], char *LeafNames[]);\r
103         unsigned AppendBranch(unsigned uExistingNodeIndex);\r
104         void SetLeafName(unsigned uNodeIndex, const char *ptrName);\r
105         void SetLeafId(unsigned uNodeIndex, unsigned uId);\r
106         void SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,\r
107           double dLength);\r
108 \r
109         void RootUnrootedTree(unsigned uNodeIndex1, unsigned uNodeIndex2);\r
110         void RootUnrootedTree(ROOT Method);\r
111         void UnrootByDeletingRoot();\r
112 \r
113 // Saving to file\r
114         void ToFile(TextFile &File) const;\r
115 \r
116 // Accessor functions\r
117         unsigned GetNodeCount() const\r
118                 {\r
119                 return m_uNodeCount;\r
120                 }\r
121 \r
122         unsigned GetLeafCount() const\r
123                 {\r
124                 if (m_bRooted)\r
125                         {\r
126                         assert(m_uNodeCount%2 == 1);\r
127                         return (m_uNodeCount + 1)/2;\r
128                         }\r
129                 else\r
130                         {\r
131                         assert(m_uNodeCount%2 == 0);\r
132                         return (m_uNodeCount + 2)/2;\r
133                         }\r
134                 }\r
135 \r
136         unsigned GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const;\r
137 \r
138         unsigned GetNeighbor1(unsigned uNodeIndex) const\r
139                 {\r
140                 assert(uNodeIndex < m_uNodeCount);\r
141                 return m_uNeighbor1[uNodeIndex];\r
142                 }\r
143 \r
144         unsigned GetNeighbor2(unsigned uNodeIndex) const\r
145                 {\r
146                 assert(uNodeIndex < m_uNodeCount);\r
147                 return m_uNeighbor2[uNodeIndex];\r
148                 }\r
149 \r
150         unsigned GetNeighbor3(unsigned uNodeIndex) const\r
151                 {\r
152                 assert(uNodeIndex < m_uNodeCount);\r
153                 return m_uNeighbor3[uNodeIndex];\r
154                 }\r
155 \r
156         unsigned GetParent(unsigned uNodeIndex) const\r
157                 {\r
158                 assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
159                 return m_uNeighbor1[uNodeIndex];\r
160                 }\r
161 \r
162         bool IsRooted() const\r
163                 {\r
164                 return m_bRooted;\r
165                 }\r
166 \r
167         unsigned GetLeft(unsigned uNodeIndex) const\r
168                 {\r
169                 assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
170                 return m_uNeighbor2[uNodeIndex];\r
171                 }\r
172 \r
173         unsigned GetRight(unsigned uNodeIndex) const\r
174                 {\r
175                 assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
176                 return m_uNeighbor3[uNodeIndex];\r
177                 }\r
178 \r
179         const char *GetName(unsigned uNodeIndex) const\r
180                 {\r
181                 assert(uNodeIndex < m_uNodeCount);\r
182                 return m_ptrName[uNodeIndex];\r
183                 }\r
184 \r
185         unsigned GetRootNodeIndex() const\r
186                 {\r
187                 assert(m_bRooted);\r
188                 return m_uRootNodeIndex;\r
189                 }\r
190 \r
191         unsigned GetNeighborCount(unsigned uNodeIndex) const\r
192                 {\r
193                 const unsigned n1 = m_uNeighbor1[uNodeIndex];\r
194                 const unsigned n2 = m_uNeighbor2[uNodeIndex];\r
195                 const unsigned n3 = m_uNeighbor3[uNodeIndex];\r
196                 return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);\r
197                 }\r
198 \r
199         bool IsLeaf(unsigned uNodeIndex) const\r
200                 {\r
201                 assert(uNodeIndex < m_uNodeCount);\r
202                 if (1 == m_uNodeCount)\r
203                         return true;\r
204                 return 1 == GetNeighborCount(uNodeIndex);\r
205                 }\r
206 \r
207         bool IsRoot(unsigned uNodeIndex) const\r
208                 {\r
209                 return IsRooted() && m_uRootNodeIndex == uNodeIndex;\r
210                 }\r
211 \r
212         unsigned GetLeafId(unsigned uNodeIndex) const;\r
213         unsigned GetLeafNodeIndex(const char *ptrName) const;\r
214         bool IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
215         bool HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
216         double GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
217         const char *GetLeafName(unsigned uNodeIndex) const;\r
218         unsigned GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
219         double GetNodeHeight(unsigned uNodeIndex) const;\r
220 \r
221 // Depth-first traversal\r
222         unsigned FirstDepthFirstNode() const;\r
223         unsigned NextDepthFirstNode(unsigned uNodeIndex) const;\r
224 \r
225         unsigned FirstDepthFirstNodeR() const;\r
226         unsigned NextDepthFirstNodeR(unsigned uNodeIndex) const;\r
227 \r
228 // Equivalent of GetLeft/Right in unrooted tree, works in rooted tree too.\r
229         unsigned GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
230         unsigned GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
231 \r
232 // Getting parent node in unrooted tree defined iff leaf\r
233         unsigned GetLeafParent(unsigned uNodeIndex) const;\r
234 \r
235 // Misc\r
236         const char *NTTStr(NEWICK_TOKEN_TYPE NTT) const;\r
237         void FindCenterByLongestSpan(unsigned *ptrNodeIndex1,\r
238           unsigned *ptrNodeIndex2) const;\r
239         void PruneTree(const Tree &tree, unsigned Subfams[],\r
240           unsigned uSubfamCount);\r
241         unsigned LeafIndexToNodeIndex(unsigned uLeafIndex) const;\r
242 \r
243 // Debugging & trouble-shooting support\r
244         void Validate() const;\r
245         void ValidateNode(unsigned uNodeIndex) const;\r
246         void AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
247         void LogMe() const;\r
248 \r
249 private:\r
250         unsigned UnrootFromFile();\r
251         NEWICK_TOKEN_TYPE GetTokenVerbose(TextFile &File, char szToken[],\r
252           unsigned uBytes) const\r
253                 {\r
254                 NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, uBytes);\r
255                 Log("GetToken %10.10s  %s\n", NTTStr(NTT), szToken);\r
256                 return NTT;\r
257                 }\r
258 \r
259         void InitCache(unsigned uCacheCount);\r
260         void ExpandCache();\r
261         NEWICK_TOKEN_TYPE GetToken(TextFile &File, char szToken[], unsigned uBytes) const;\r
262         bool GetGroupFromFile(TextFile &File, unsigned uNodeIndex, double *ptrdEdgeLength);\r
263         unsigned GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,\r
264           double *ptrdTotalDistance) const;\r
265         void ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const;\r
266         void ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const;\r
267         void OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex);\r
268         double FromClustNode(const Clust &C, unsigned uClustNodeIndex, unsigned uPhyNodeIndex);\r
269         unsigned GetAnyNonLeafNode() const;\r
270 \r
271 // Yuck. Data is made public for the convenience of Tree::Copy.\r
272 // There has to be a better way.\r
273 public:\r
274         unsigned m_uNodeCount;\r
275         unsigned m_uCacheCount;\r
276         unsigned *m_uNeighbor1;\r
277         unsigned *m_uNeighbor2;\r
278         unsigned *m_uNeighbor3;\r
279         double *m_dEdgeLength1;\r
280         double *m_dEdgeLength2;\r
281         double *m_dEdgeLength3;\r
282         double *m_dHeight;\r
283         bool *m_bHasEdgeLength1;\r
284         bool *m_bHasEdgeLength2;\r
285         bool *m_bHasEdgeLength3;\r
286         bool *m_bHasHeight;\r
287         unsigned *m_Ids;\r
288         char **m_ptrName;\r
289         bool m_bRooted;\r
290         unsigned m_uRootNodeIndex;\r
291         };\r
292 \r
293 struct PhyEnumEdgeState\r
294         {\r
295         PhyEnumEdgeState()\r
296                 {\r
297                 m_bInit = false;\r
298                 m_uNodeIndex1 = NULL_NEIGHBOR;\r
299                 m_uNodeIndex2 = NULL_NEIGHBOR;\r
300                 }\r
301         bool m_bInit;\r
302         unsigned m_uNodeIndex1;\r
303         unsigned m_uNodeIndex2;\r
304         };\r
305 \r
306 const unsigned NODE_CHANGED = (unsigned) (~0);\r
307 \r
308 extern bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,\r
309   unsigned Leaves1[], unsigned *ptruCount1,\r
310   unsigned Leaves2[], unsigned *ptruCount2);\r
311 extern bool PhyEnumBiPartsR(const Tree &tree, PhyEnumEdgeState &ES,\r
312   unsigned Leaves1[], unsigned *ptruCount1,\r
313   unsigned Leaves2[], unsigned *ptruCount2);\r
314 extern void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],\r
315   unsigned *ptruSubtreeCount);\r
316 void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,\r
317   unsigned Subfams[], unsigned *ptruSubfamCount);\r
318 void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],\r
319   unsigned *ptruLeafCount);\r
320 void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,\r
321   unsigned uExclude, unsigned Leaves[], unsigned *ptruCount);\r
322 void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[]);\r
323 void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength);\r
324 void LeafIndexesToLeafNames(const Tree &tree, const unsigned Leaves[], unsigned uCount,\r
325  char *Names[]);\r
326 void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,\r
327  unsigned Ids[]);\r
328 void MSASeqSubset(const MSA &msaIn, char *Names[], unsigned uSeqCount,\r
329   MSA &msaOut);\r
330 void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,\r
331   unsigned IdToDiffsLeafNodeIndex[]);\r
332 void DiffTreesE(const Tree &NewTree, const Tree &OldTree,\r
333   unsigned NewNodeIndexToOldNodeIndex[]);\r
334 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,\r
335   double *ptrdLength1, double *ptrdLength2,\r
336   ROOT RootMethod);\r
337 void FixRoot(Tree &tree, ROOT RootMethod);\r
338 \r
339 #endif // tree_h\r