--- /dev/null
+#include "muscle.h"\r
+#include "tree.h"\r
+#include "textfile.h"\r
+\r
+#define TRACE 0\r
+\r
+// Tokens in Newick files are:\r
+// ( ) : , ;\r
+// string\r
+// 'string'\r
+// "string"\r
+// [ comment ]\r
+//\r
+// We can't safely distinguish between identifiers and floating point\r
+// numbers at the lexical level (because identifiers may be numeric,\r
+// or start with digits), so both edge lengths and identifiers are\r
+// returned as strings.\r
+\r
+const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const\r
+ {\r
+ switch (NTT)\r
+ {\r
+#define c(x) case NTT_##x: return #x;\r
+ c(Unknown)\r
+ c(Lparen)\r
+ c(Rparen)\r
+ c(Colon)\r
+ c(Comma)\r
+ c(Semicolon)\r
+ c(String)\r
+ c(SingleQuotedString)\r
+ c(DoubleQuotedString)\r
+ c(Comment)\r
+#undef c\r
+ }\r
+ return "??";\r
+ }\r
+\r
+NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const\r
+ {\r
+// Skip leading white space\r
+ File.SkipWhite();\r
+\r
+ char c;\r
+ File.GetCharX(c);\r
+\r
+// In case a single-character token\r
+ szToken[0] = c;\r
+ szToken[1] = 0;\r
+\r
+ unsigned uBytesCopied = 0;\r
+ NEWICK_TOKEN_TYPE TT;\r
+ switch (c)\r
+ {\r
+ case '(':\r
+ return NTT_Lparen;\r
+\r
+ case ')':\r
+ return NTT_Rparen;\r
+\r
+ case ':':\r
+ return NTT_Colon;\r
+\r
+ case ';':\r
+ return NTT_Semicolon;\r
+\r
+ case ',':\r
+ return NTT_Comma;\r
+\r
+ case '\'':\r
+ TT = NTT_SingleQuotedString;\r
+ File.GetCharX(c);\r
+ break;\r
+\r
+ case '"':\r
+ TT = NTT_DoubleQuotedString;\r
+ File.GetCharX(c);\r
+ break;\r
+\r
+ case '[':\r
+ TT = NTT_Comment;\r
+ break;\r
+\r
+ default:\r
+ TT = NTT_String;\r
+ break;\r
+ }\r
+\r
+ for (;;)\r
+ {\r
+ if (TT != NTT_Comment)\r
+ {\r
+ if (uBytesCopied < uBytes - 2)\r
+ {\r
+ szToken[uBytesCopied++] = c;\r
+ szToken[uBytesCopied] = 0;\r
+ }\r
+ else\r
+ Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);\r
+ }\r
+ bool bEof = File.GetChar(c);\r
+ if (bEof)\r
+ return TT;\r
+\r
+ switch (TT)\r
+ {\r
+ case NTT_String:\r
+ if (0 != strchr("():;,", c))\r
+ {\r
+ File.PushBack(c);\r
+ return NTT_String;\r
+ }\r
+ if (isspace(c))\r
+ return NTT_String;\r
+ break;\r
+\r
+ case NTT_SingleQuotedString:\r
+ if ('\'' == c)\r
+ return NTT_String;\r
+ break;\r
+\r
+ case NTT_DoubleQuotedString:\r
+ if ('"' == c)\r
+ return NTT_String;\r
+ break;\r
+\r
+ case NTT_Comment:\r
+ if (']' == c)\r
+ return GetToken(File, szToken, uBytes);\r
+ break;\r
+\r
+ default:\r
+ Quit("Tree::GetToken, invalid TT=%u", TT);\r
+ }\r
+ }\r
+ }\r
+\r
+// NOTE: this hack must come after definition of Tree::GetToken.\r
+#if TRACE\r
+#define GetToken GetTokenVerbose\r
+#endif\r
+\r
+void Tree::FromFile(TextFile &File)\r
+ {\r
+// Assume rooted.\r
+// If we discover that it is unrooted, will convert on the fly.\r
+ CreateRooted();\r
+\r
+ double dEdgeLength;\r
+ bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);\r
+\r
+// Next token should be either ';' for rooted tree or ',' for unrooted.\r
+ char szToken[16];\r
+ NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
+\r
+// If rooted, all done.\r
+ if (NTT_Semicolon == NTT)\r
+ {\r
+ if (bEdgeLength)\r
+ Log(" *** Warning *** edge length on root group in Newick file %s\n",\r
+ File.GetFileName());\r
+ Validate();\r
+ return;\r
+ }\r
+\r
+ if (NTT_Comma != NTT)\r
+ Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);\r
+\r
+ const unsigned uThirdNode = UnrootFromFile();\r
+ bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);\r
+ if (bEdgeLength)\r
+ SetEdgeLength(0, uThirdNode, dEdgeLength);\r
+ Validate();\r
+ }\r
+\r
+// Return true if edge length for this group.\r
+bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,\r
+ double *ptrdEdgeLength)\r
+ {\r
+ char szToken[1024];\r
+ NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
+\r
+// Group is either leaf name or (left, right).\r
+ if (NTT_String == NTT)\r
+ {\r
+ SetLeafName(uNodeIndex, szToken);\r
+#if TRACE\r
+ Log("Group is leaf '%s'\n", szToken);\r
+#endif\r
+ }\r
+ else if (NTT_Lparen == NTT)\r
+ {\r
+ const unsigned uLeft = AppendBranch(uNodeIndex);\r
+ const unsigned uRight = uLeft + 1;\r
+\r
+ // Left sub-group...\r
+#if TRACE\r
+ Log("Got '(', group is compound, expect left sub-group\n");\r
+#endif\r
+ double dEdgeLength;\r
+ bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);\r
+#if TRACE\r
+ if (bLeftLength)\r
+ Log("Edge length for left sub-group: %.3g\n", dEdgeLength);\r
+ else\r
+ Log("No edge length for left sub-group\n");\r
+#endif\r
+ if (bLeftLength)\r
+ SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);\r
+\r
+ // ... then comma ...\r
+#if TRACE\r
+ Log("Expect comma\n");\r
+#endif\r
+ NTT = GetToken(File, szToken, sizeof(szToken));\r
+ if (NTT_Comma != NTT)\r
+ Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);\r
+\r
+ // ...then right sub-group...\r
+#if TRACE\r
+ Log("Expect right sub-group\n");\r
+#endif\r
+ bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);\r
+ if (bRightLength)\r
+ SetEdgeLength(uNodeIndex, uRight, dEdgeLength);\r
+\r
+#if TRACE\r
+ if (bRightLength)\r
+ Log("Edge length for right sub-group: %.3g\n", dEdgeLength);\r
+ else\r
+ Log("No edge length for right sub-group\n");\r
+#endif\r
+\r
+ // ... then closing parenthesis.\r
+#if TRACE\r
+ Log("Expect closing parenthesis (or comma if > 2-ary)\n");\r
+#endif\r
+ NTT = GetToken(File, szToken, sizeof(szToken));\r
+ if (NTT_Rparen == NTT)\r
+ ;\r
+ else if (NTT_Comma == NTT)\r
+ {\r
+ File.PushBack(',');\r
+ return false;\r
+ }\r
+ else\r
+ Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);\r
+ }\r
+ else\r
+ Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",\r
+ szToken);\r
+\r
+// Group may optionally be followed by edge length.\r
+ bool bEof = File.SkipWhiteX();\r
+ if (bEof)\r
+ return false;\r
+ char c;\r
+ File.GetCharX(c);\r
+#if TRACE\r
+ Log("Character following group, could be colon, is '%c'\n", c);\r
+#endif\r
+ if (':' == c)\r
+ {\r
+ NTT = GetToken(File, szToken, sizeof(szToken));\r
+ if (NTT_String != NTT)\r
+ Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);\r
+ *ptrdEdgeLength = atof(szToken);\r
+ return true;\r
+ }\r
+ File.PushBack(c);\r
+ return false;\r
+ }\r