+++ /dev/null
-#include "muscle.h"\r
-#include "tree.h"\r
-#include "textfile.h"\r
-\r
-#define TRACE 0\r
-\r
-// Tokens in Newick files are:\r
-// ( ) : , ;\r
-// string\r
-// 'string'\r
-// "string"\r
-// [ comment ]\r
-//\r
-// We can't safely distinguish between identifiers and floating point\r
-// numbers at the lexical level (because identifiers may be numeric,\r
-// or start with digits), so both edge lengths and identifiers are\r
-// returned as strings.\r
-\r
-const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const\r
- {\r
- switch (NTT)\r
- {\r
-#define c(x) case NTT_##x: return #x;\r
- c(Unknown)\r
- c(Lparen)\r
- c(Rparen)\r
- c(Colon)\r
- c(Comma)\r
- c(Semicolon)\r
- c(String)\r
- c(SingleQuotedString)\r
- c(DoubleQuotedString)\r
- c(Comment)\r
-#undef c\r
- }\r
- return "??";\r
- }\r
-\r
-NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const\r
- {\r
-// Skip leading white space\r
- File.SkipWhite();\r
-\r
- char c;\r
- File.GetCharX(c);\r
-\r
-// In case a single-character token\r
- szToken[0] = c;\r
- szToken[1] = 0;\r
-\r
- unsigned uBytesCopied = 0;\r
- NEWICK_TOKEN_TYPE TT;\r
- switch (c)\r
- {\r
- case '(':\r
- return NTT_Lparen;\r
-\r
- case ')':\r
- return NTT_Rparen;\r
-\r
- case ':':\r
- return NTT_Colon;\r
-\r
- case ';':\r
- return NTT_Semicolon;\r
-\r
- case ',':\r
- return NTT_Comma;\r
-\r
- case '\'':\r
- TT = NTT_SingleQuotedString;\r
- File.GetCharX(c);\r
- break;\r
-\r
- case '"':\r
- TT = NTT_DoubleQuotedString;\r
- File.GetCharX(c);\r
- break;\r
-\r
- case '[':\r
- TT = NTT_Comment;\r
- break;\r
-\r
- default:\r
- TT = NTT_String;\r
- break;\r
- }\r
-\r
- for (;;)\r
- {\r
- if (TT != NTT_Comment)\r
- {\r
- if (uBytesCopied < uBytes - 2)\r
- {\r
- szToken[uBytesCopied++] = c;\r
- szToken[uBytesCopied] = 0;\r
- }\r
- else\r
- Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);\r
- }\r
- bool bEof = File.GetChar(c);\r
- if (bEof)\r
- return TT;\r
-\r
- switch (TT)\r
- {\r
- case NTT_String:\r
- if (0 != strchr("():;,", c))\r
- {\r
- File.PushBack(c);\r
- return NTT_String;\r
- }\r
- if (isspace(c))\r
- return NTT_String;\r
- break;\r
-\r
- case NTT_SingleQuotedString:\r
- if ('\'' == c)\r
- return NTT_String;\r
- break;\r
-\r
- case NTT_DoubleQuotedString:\r
- if ('"' == c)\r
- return NTT_String;\r
- break;\r
-\r
- case NTT_Comment:\r
- if (']' == c)\r
- return GetToken(File, szToken, uBytes);\r
- break;\r
-\r
- default:\r
- Quit("Tree::GetToken, invalid TT=%u", TT);\r
- }\r
- }\r
- }\r
-\r
-// NOTE: this hack must come after definition of Tree::GetToken.\r
-#if TRACE\r
-#define GetToken GetTokenVerbose\r
-#endif\r
-\r
-void Tree::FromFile(TextFile &File)\r
- {\r
-// Assume rooted.\r
-// If we discover that it is unrooted, will convert on the fly.\r
- CreateRooted();\r
-\r
- double dEdgeLength;\r
- bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);\r
-\r
-// Next token should be either ';' for rooted tree or ',' for unrooted.\r
- char szToken[16];\r
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
-\r
-// If rooted, all done.\r
- if (NTT_Semicolon == NTT)\r
- {\r
- if (bEdgeLength)\r
- Log(" *** Warning *** edge length on root group in Newick file %s\n",\r
- File.GetFileName());\r
- Validate();\r
- return;\r
- }\r
-\r
- if (NTT_Comma != NTT)\r
- Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);\r
-\r
- const unsigned uThirdNode = UnrootFromFile();\r
- bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);\r
- if (bEdgeLength)\r
- SetEdgeLength(0, uThirdNode, dEdgeLength);\r
- Validate();\r
- }\r
-\r
-// Return true if edge length for this group.\r
-bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,\r
- double *ptrdEdgeLength)\r
- {\r
- char szToken[1024];\r
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
-\r
-// Group is either leaf name or (left, right).\r
- if (NTT_String == NTT)\r
- {\r
- SetLeafName(uNodeIndex, szToken);\r
-#if TRACE\r
- Log("Group is leaf '%s'\n", szToken);\r
-#endif\r
- }\r
- else if (NTT_Lparen == NTT)\r
- {\r
- const unsigned uLeft = AppendBranch(uNodeIndex);\r
- const unsigned uRight = uLeft + 1;\r
-\r
- // Left sub-group...\r
-#if TRACE\r
- Log("Got '(', group is compound, expect left sub-group\n");\r
-#endif\r
- double dEdgeLength;\r
- bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);\r
-#if TRACE\r
- if (bLeftLength)\r
- Log("Edge length for left sub-group: %.3g\n", dEdgeLength);\r
- else\r
- Log("No edge length for left sub-group\n");\r
-#endif\r
- if (bLeftLength)\r
- SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);\r
-\r
- // ... then comma ...\r
-#if TRACE\r
- Log("Expect comma\n");\r
-#endif\r
- NTT = GetToken(File, szToken, sizeof(szToken));\r
- if (NTT_Comma != NTT)\r
- Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);\r
-\r
- // ...then right sub-group...\r
-#if TRACE\r
- Log("Expect right sub-group\n");\r
-#endif\r
- bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);\r
- if (bRightLength)\r
- SetEdgeLength(uNodeIndex, uRight, dEdgeLength);\r
-\r
-#if TRACE\r
- if (bRightLength)\r
- Log("Edge length for right sub-group: %.3g\n", dEdgeLength);\r
- else\r
- Log("No edge length for right sub-group\n");\r
-#endif\r
-\r
- // ... then closing parenthesis.\r
-#if TRACE\r
- Log("Expect closing parenthesis (or comma if > 2-ary)\n");\r
-#endif\r
- NTT = GetToken(File, szToken, sizeof(szToken));\r
- if (NTT_Rparen == NTT)\r
- ;\r
- else if (NTT_Comma == NTT)\r
- {\r
- File.PushBack(',');\r
- return false;\r
- }\r
- else\r
- Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);\r
- }\r
- else\r
- Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",\r
- szToken);\r
-\r
-// Group may optionally be followed by edge length.\r
- bool bEof = File.SkipWhiteX();\r
- if (bEof)\r
- return false;\r
- char c;\r
- File.GetCharX(c);\r
-#if TRACE\r
- Log("Character following group, could be colon, is '%c'\n", c);\r
-#endif\r
- if (':' == c)\r
- {\r
- NTT = GetToken(File, szToken, sizeof(szToken));\r
- if (NTT_String != NTT)\r
- Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);\r
- *ptrdEdgeLength = atof(szToken);\r
- return true;\r
- }\r
- File.PushBack(c);\r
- return false;\r
- }\r