+++ /dev/null
-#ifndef tree_h\r
-#define tree_h\r
-\r
-#include <limits.h>\r
-\r
-class Clust;\r
-\r
-const unsigned NULL_NEIGHBOR = UINT_MAX;\r
-\r
-enum NEWICK_TOKEN_TYPE\r
- {\r
- NTT_Unknown,\r
-\r
-// Returned from Tree::GetToken:\r
- NTT_Lparen,\r
- NTT_Rparen,\r
- NTT_Colon,\r
- NTT_Comma,\r
- NTT_Semicolon,\r
- NTT_String,\r
-\r
-// Following are never returned from Tree::GetToken:\r
- NTT_SingleQuotedString,\r
- NTT_DoubleQuotedString,\r
- NTT_Comment\r
- };\r
-\r
-class Tree\r
- {\r
-public:\r
- Tree()\r
- {\r
- m_uNodeCount = 0;\r
- m_uCacheCount = 0;\r
- m_uNeighbor1 = 0;\r
- m_uNeighbor2 = 0;\r
- m_uNeighbor3 = 0;\r
- m_dEdgeLength1 = 0;\r
- m_dEdgeLength2 = 0;\r
- m_dEdgeLength3 = 0;\r
- m_dHeight = 0;\r
- m_bHasEdgeLength1 = 0;\r
- m_bHasEdgeLength2 = 0;\r
- m_bHasEdgeLength3 = 0;\r
- m_bHasHeight = 0;\r
- m_ptrName = 0;\r
- m_Ids = 0;\r
- }\r
- virtual ~Tree()\r
- {\r
- Clear();\r
- }\r
-\r
- void Clear()\r
- {\r
- for (unsigned n = 0; n < m_uNodeCount; ++n)\r
- free(m_ptrName[n]);\r
-\r
- m_uNodeCount = 0;\r
- m_uCacheCount = 0;\r
-\r
- delete[] m_uNeighbor1;\r
- delete[] m_uNeighbor2;\r
- delete[] m_uNeighbor3;\r
- delete[] m_dEdgeLength1;\r
- delete[] m_dEdgeLength2;\r
- delete[] m_dEdgeLength3;\r
- delete[] m_bHasEdgeLength1;\r
- delete[] m_bHasEdgeLength2;\r
- delete[] m_bHasEdgeLength3;\r
- delete[] m_ptrName;\r
- delete[] m_Ids;\r
- delete[] m_bHasHeight;\r
- delete[] m_dHeight;\r
-\r
- m_uNeighbor1 = 0;\r
- m_uNeighbor2 = 0;\r
- m_uNeighbor3 = 0;\r
- m_dEdgeLength1 = 0;\r
- m_dEdgeLength2 = 0;\r
- m_dEdgeLength3 = 0;\r
- m_ptrName = 0;\r
- m_Ids = 0;\r
- m_uRootNodeIndex = 0;\r
- m_bHasHeight = 0;\r
- m_dHeight = 0;\r
-\r
- m_bRooted = false;\r
- }\r
-\r
-// Creation and manipulation\r
- void CreateRooted();\r
- void CreateUnrooted(double dEdgeLength);\r
-\r
- void FromFile(TextFile &File);\r
- void FromClust(Clust &C);\r
-\r
- void Copy(const Tree &tree);\r
-\r
- void Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],\r
- const unsigned Right[], const float LeftLength[], const float RightLength[],\r
- const unsigned LeafIds[], char *LeafNames[]);\r
- unsigned AppendBranch(unsigned uExistingNodeIndex);\r
- void SetLeafName(unsigned uNodeIndex, const char *ptrName);\r
- void SetLeafId(unsigned uNodeIndex, unsigned uId);\r
- void SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,\r
- double dLength);\r
-\r
- void RootUnrootedTree(unsigned uNodeIndex1, unsigned uNodeIndex2);\r
- void RootUnrootedTree(ROOT Method);\r
- void UnrootByDeletingRoot();\r
-\r
-// Saving to file\r
- void ToFile(TextFile &File) const;\r
-\r
-// Accessor functions\r
- unsigned GetNodeCount() const\r
- {\r
- return m_uNodeCount;\r
- }\r
-\r
- unsigned GetLeafCount() const\r
- {\r
- if (m_bRooted)\r
- {\r
- assert(m_uNodeCount%2 == 1);\r
- return (m_uNodeCount + 1)/2;\r
- }\r
- else\r
- {\r
- assert(m_uNodeCount%2 == 0);\r
- return (m_uNodeCount + 2)/2;\r
- }\r
- }\r
-\r
- unsigned GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const;\r
-\r
- unsigned GetNeighbor1(unsigned uNodeIndex) const\r
- {\r
- assert(uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor1[uNodeIndex];\r
- }\r
-\r
- unsigned GetNeighbor2(unsigned uNodeIndex) const\r
- {\r
- assert(uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor2[uNodeIndex];\r
- }\r
-\r
- unsigned GetNeighbor3(unsigned uNodeIndex) const\r
- {\r
- assert(uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor3[uNodeIndex];\r
- }\r
-\r
- unsigned GetParent(unsigned uNodeIndex) const\r
- {\r
- assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor1[uNodeIndex];\r
- }\r
-\r
- bool IsRooted() const\r
- {\r
- return m_bRooted;\r
- }\r
-\r
- unsigned GetLeft(unsigned uNodeIndex) const\r
- {\r
- assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor2[uNodeIndex];\r
- }\r
-\r
- unsigned GetRight(unsigned uNodeIndex) const\r
- {\r
- assert(m_bRooted && uNodeIndex < m_uNodeCount);\r
- return m_uNeighbor3[uNodeIndex];\r
- }\r
-\r
- const char *GetName(unsigned uNodeIndex) const\r
- {\r
- assert(uNodeIndex < m_uNodeCount);\r
- return m_ptrName[uNodeIndex];\r
- }\r
-\r
- unsigned GetRootNodeIndex() const\r
- {\r
- assert(m_bRooted);\r
- return m_uRootNodeIndex;\r
- }\r
-\r
- unsigned GetNeighborCount(unsigned uNodeIndex) const\r
- {\r
- const unsigned n1 = m_uNeighbor1[uNodeIndex];\r
- const unsigned n2 = m_uNeighbor2[uNodeIndex];\r
- const unsigned n3 = m_uNeighbor3[uNodeIndex];\r
- return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);\r
- }\r
-\r
- bool IsLeaf(unsigned uNodeIndex) const\r
- {\r
- assert(uNodeIndex < m_uNodeCount);\r
- if (1 == m_uNodeCount)\r
- return true;\r
- return 1 == GetNeighborCount(uNodeIndex);\r
- }\r
-\r
- bool IsRoot(unsigned uNodeIndex) const\r
- {\r
- return IsRooted() && m_uRootNodeIndex == uNodeIndex;\r
- }\r
-\r
- unsigned GetLeafId(unsigned uNodeIndex) const;\r
- unsigned GetLeafNodeIndex(const char *ptrName) const;\r
- bool IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
- bool HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
- double GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
- const char *GetLeafName(unsigned uNodeIndex) const;\r
- unsigned GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
- double GetNodeHeight(unsigned uNodeIndex) const;\r
-\r
-// Depth-first traversal\r
- unsigned FirstDepthFirstNode() const;\r
- unsigned NextDepthFirstNode(unsigned uNodeIndex) const;\r
-\r
- unsigned FirstDepthFirstNodeR() const;\r
- unsigned NextDepthFirstNodeR(unsigned uNodeIndex) const;\r
-\r
-// Equivalent of GetLeft/Right in unrooted tree, works in rooted tree too.\r
- unsigned GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
- unsigned GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;\r
-\r
-// Getting parent node in unrooted tree defined iff leaf\r
- unsigned GetLeafParent(unsigned uNodeIndex) const;\r
-\r
-// Misc\r
- const char *NTTStr(NEWICK_TOKEN_TYPE NTT) const;\r
- void FindCenterByLongestSpan(unsigned *ptrNodeIndex1,\r
- unsigned *ptrNodeIndex2) const;\r
- void PruneTree(const Tree &tree, unsigned Subfams[],\r
- unsigned uSubfamCount);\r
- unsigned LeafIndexToNodeIndex(unsigned uLeafIndex) const;\r
-\r
-// Debugging & trouble-shooting support\r
- void Validate() const;\r
- void ValidateNode(unsigned uNodeIndex) const;\r
- void AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const;\r
- void LogMe() const;\r
-\r
-private:\r
- unsigned UnrootFromFile();\r
- NEWICK_TOKEN_TYPE GetTokenVerbose(TextFile &File, char szToken[],\r
- unsigned uBytes) const\r
- {\r
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, uBytes);\r
- Log("GetToken %10.10s %s\n", NTTStr(NTT), szToken);\r
- return NTT;\r
- }\r
-\r
- void InitCache(unsigned uCacheCount);\r
- void ExpandCache();\r
- NEWICK_TOKEN_TYPE GetToken(TextFile &File, char szToken[], unsigned uBytes) const;\r
- bool GetGroupFromFile(TextFile &File, unsigned uNodeIndex, double *ptrdEdgeLength);\r
- unsigned GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,\r
- double *ptrdTotalDistance) const;\r
- void ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const;\r
- void ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const;\r
- void OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex);\r
- double FromClustNode(const Clust &C, unsigned uClustNodeIndex, unsigned uPhyNodeIndex);\r
- unsigned GetAnyNonLeafNode() const;\r
-\r
-// Yuck. Data is made public for the convenience of Tree::Copy.\r
-// There has to be a better way.\r
-public:\r
- unsigned m_uNodeCount;\r
- unsigned m_uCacheCount;\r
- unsigned *m_uNeighbor1;\r
- unsigned *m_uNeighbor2;\r
- unsigned *m_uNeighbor3;\r
- double *m_dEdgeLength1;\r
- double *m_dEdgeLength2;\r
- double *m_dEdgeLength3;\r
- double *m_dHeight;\r
- bool *m_bHasEdgeLength1;\r
- bool *m_bHasEdgeLength2;\r
- bool *m_bHasEdgeLength3;\r
- bool *m_bHasHeight;\r
- unsigned *m_Ids;\r
- char **m_ptrName;\r
- bool m_bRooted;\r
- unsigned m_uRootNodeIndex;\r
- };\r
-\r
-struct PhyEnumEdgeState\r
- {\r
- PhyEnumEdgeState()\r
- {\r
- m_bInit = false;\r
- m_uNodeIndex1 = NULL_NEIGHBOR;\r
- m_uNodeIndex2 = NULL_NEIGHBOR;\r
- }\r
- bool m_bInit;\r
- unsigned m_uNodeIndex1;\r
- unsigned m_uNodeIndex2;\r
- };\r
-\r
-const unsigned NODE_CHANGED = (unsigned) (~0);\r
-\r
-extern bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,\r
- unsigned Leaves1[], unsigned *ptruCount1,\r
- unsigned Leaves2[], unsigned *ptruCount2);\r
-extern bool PhyEnumBiPartsR(const Tree &tree, PhyEnumEdgeState &ES,\r
- unsigned Leaves1[], unsigned *ptruCount1,\r
- unsigned Leaves2[], unsigned *ptruCount2);\r
-extern void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],\r
- unsigned *ptruSubtreeCount);\r
-void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,\r
- unsigned Subfams[], unsigned *ptruSubfamCount);\r
-void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],\r
- unsigned *ptruLeafCount);\r
-void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,\r
- unsigned uExclude, unsigned Leaves[], unsigned *ptruCount);\r
-void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[]);\r
-void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength);\r
-void LeafIndexesToLeafNames(const Tree &tree, const unsigned Leaves[], unsigned uCount,\r
- char *Names[]);\r
-void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,\r
- unsigned Ids[]);\r
-void MSASeqSubset(const MSA &msaIn, char *Names[], unsigned uSeqCount,\r
- MSA &msaOut);\r
-void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,\r
- unsigned IdToDiffsLeafNodeIndex[]);\r
-void DiffTreesE(const Tree &NewTree, const Tree &OldTree,\r
- unsigned NewNodeIndexToOldNodeIndex[]);\r
-void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,\r
- double *ptrdLength1, double *ptrdLength2,\r
- ROOT RootMethod);\r
-void FixRoot(Tree &tree, ROOT RootMethod);\r
-\r
-#endif // tree_h\r