Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / phyfromfile.cpp
diff --git a/website/archive/binaries/mac/src/muscle/phyfromfile.cpp b/website/archive/binaries/mac/src/muscle/phyfromfile.cpp
new file mode 100644 (file)
index 0000000..12f78b4
--- /dev/null
@@ -0,0 +1,272 @@
+#include "muscle.h"\r
+#include "tree.h"\r
+#include "textfile.h"\r
+\r
+#define TRACE 0\r
+\r
+// Tokens in Newick files are:\r
+//             ( ) : , ;\r
+//             string\r
+//             'string'\r
+//             "string"\r
+//             [ comment ]\r
+//\r
+// We can't safely distinguish between identifiers and floating point\r
+// numbers at the lexical level (because identifiers may be numeric,\r
+// or start with digits), so both edge lengths and identifiers are\r
+// returned as strings.\r
+\r
+const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const\r
+       {\r
+       switch (NTT)\r
+               {\r
+#define c(x)   case NTT_##x: return #x;\r
+       c(Unknown)\r
+       c(Lparen)\r
+       c(Rparen)\r
+       c(Colon)\r
+       c(Comma)\r
+       c(Semicolon)\r
+       c(String)\r
+       c(SingleQuotedString)\r
+       c(DoubleQuotedString)\r
+       c(Comment)\r
+#undef c\r
+               }\r
+       return "??";\r
+       }\r
+\r
+NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const\r
+       {\r
+// Skip leading white space\r
+       File.SkipWhite();\r
+\r
+       char c;\r
+       File.GetCharX(c);\r
+\r
+// In case a single-character token\r
+       szToken[0] = c;\r
+       szToken[1] = 0;\r
+\r
+       unsigned uBytesCopied = 0;\r
+       NEWICK_TOKEN_TYPE TT;\r
+       switch (c)\r
+               {\r
+       case '(':\r
+               return NTT_Lparen;\r
+\r
+       case ')':\r
+               return NTT_Rparen;\r
+\r
+       case ':':\r
+               return NTT_Colon;\r
+\r
+       case ';':\r
+               return NTT_Semicolon;\r
+\r
+       case ',':\r
+               return NTT_Comma;\r
+\r
+       case '\'':\r
+               TT = NTT_SingleQuotedString;\r
+               File.GetCharX(c);\r
+               break;\r
+\r
+       case '"':\r
+               TT = NTT_DoubleQuotedString;\r
+               File.GetCharX(c);\r
+               break;\r
+\r
+       case '[':\r
+               TT = NTT_Comment;\r
+               break;\r
+\r
+       default:\r
+               TT = NTT_String;\r
+               break;\r
+               }\r
+\r
+       for (;;)\r
+               {\r
+               if (TT != NTT_Comment)\r
+                       {\r
+                       if (uBytesCopied < uBytes - 2)\r
+                               {\r
+                               szToken[uBytesCopied++] = c;\r
+                               szToken[uBytesCopied] = 0;\r
+                               }\r
+                       else\r
+                               Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);\r
+                       }\r
+               bool bEof = File.GetChar(c);\r
+               if (bEof)\r
+                       return TT;\r
+\r
+               switch (TT)\r
+                       {\r
+               case NTT_String:\r
+                       if (0 != strchr("():;,", c))\r
+                               {\r
+                               File.PushBack(c);\r
+                               return NTT_String;\r
+                               }\r
+                       if (isspace(c))\r
+                               return NTT_String;\r
+                       break;\r
+\r
+               case NTT_SingleQuotedString:\r
+                       if ('\'' == c)\r
+                               return NTT_String;\r
+                       break;\r
+\r
+               case NTT_DoubleQuotedString:\r
+                       if ('"' == c)\r
+                               return NTT_String;\r
+                       break;\r
+\r
+               case NTT_Comment:\r
+                       if (']' == c)\r
+                               return GetToken(File, szToken, uBytes);\r
+                       break;\r
+\r
+               default:\r
+                       Quit("Tree::GetToken, invalid TT=%u", TT);\r
+                       }\r
+               }\r
+       }\r
+\r
+// NOTE: this hack must come after definition of Tree::GetToken.\r
+#if    TRACE\r
+#define GetToken       GetTokenVerbose\r
+#endif\r
+\r
+void Tree::FromFile(TextFile &File)\r
+       {\r
+// Assume rooted.\r
+// If we discover that it is unrooted, will convert on the fly.\r
+       CreateRooted();\r
+\r
+       double dEdgeLength;\r
+       bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);\r
+\r
+// Next token should be either ';' for rooted tree or ',' for unrooted.\r
+       char szToken[16];\r
+       NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
+\r
+// If rooted, all done.\r
+       if (NTT_Semicolon == NTT)\r
+               {\r
+               if (bEdgeLength)\r
+                       Log(" *** Warning *** edge length on root group in Newick file %s\n",\r
+                         File.GetFileName());\r
+               Validate();\r
+               return;\r
+               }\r
+\r
+       if (NTT_Comma != NTT)\r
+               Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);\r
+\r
+       const unsigned uThirdNode = UnrootFromFile();\r
+       bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);\r
+       if (bEdgeLength)\r
+               SetEdgeLength(0, uThirdNode, dEdgeLength);\r
+       Validate();\r
+       }\r
+\r
+// Return true if edge length for this group.\r
+bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,\r
+  double *ptrdEdgeLength)\r
+       {\r
+       char szToken[1024];\r
+       NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));\r
+\r
+// Group is either leaf name or (left, right).\r
+       if (NTT_String == NTT)\r
+               {\r
+               SetLeafName(uNodeIndex, szToken);\r
+#if    TRACE\r
+               Log("Group is leaf '%s'\n", szToken);\r
+#endif\r
+               }\r
+       else if (NTT_Lparen == NTT)\r
+               {\r
+               const unsigned uLeft = AppendBranch(uNodeIndex);\r
+               const unsigned uRight = uLeft + 1;\r
+\r
+       // Left sub-group...\r
+#if    TRACE\r
+               Log("Got '(', group is compound, expect left sub-group\n");\r
+#endif\r
+               double dEdgeLength;\r
+               bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);\r
+#if    TRACE\r
+               if (bLeftLength)\r
+                       Log("Edge length for left sub-group: %.3g\n", dEdgeLength);\r
+               else\r
+                       Log("No edge length for left sub-group\n");\r
+#endif\r
+               if (bLeftLength)\r
+                       SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);\r
+\r
+       // ... then comma ...\r
+#if    TRACE\r
+               Log("Expect comma\n");\r
+#endif\r
+               NTT = GetToken(File, szToken, sizeof(szToken));\r
+               if (NTT_Comma != NTT)\r
+                       Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);\r
+\r
+       // ...then right sub-group...\r
+#if    TRACE\r
+               Log("Expect right sub-group\n");\r
+#endif\r
+               bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);\r
+               if (bRightLength)\r
+                       SetEdgeLength(uNodeIndex, uRight, dEdgeLength);\r
+\r
+#if    TRACE\r
+               if (bRightLength)\r
+                       Log("Edge length for right sub-group: %.3g\n", dEdgeLength);\r
+               else\r
+                       Log("No edge length for right sub-group\n");\r
+#endif\r
+\r
+       // ... then closing parenthesis.\r
+#if    TRACE\r
+               Log("Expect closing parenthesis (or comma if > 2-ary)\n");\r
+#endif\r
+               NTT = GetToken(File, szToken, sizeof(szToken));\r
+               if (NTT_Rparen == NTT)\r
+                       ;\r
+               else if (NTT_Comma == NTT)\r
+                       {\r
+                       File.PushBack(',');\r
+                       return false;\r
+                       }\r
+               else\r
+                       Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);\r
+               }\r
+       else\r
+               Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",\r
+                 szToken);\r
+\r
+// Group may optionally be followed by edge length.\r
+       bool bEof = File.SkipWhiteX();\r
+       if (bEof)\r
+               return false;\r
+       char c;\r
+       File.GetCharX(c);\r
+#if    TRACE\r
+       Log("Character following group, could be colon, is '%c'\n", c);\r
+#endif\r
+       if (':' == c)\r
+               {\r
+               NTT = GetToken(File, szToken, sizeof(szToken));\r
+               if (NTT_String != NTT)\r
+                       Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);\r
+               *ptrdEdgeLength = atof(szToken);\r
+               return true;\r
+               }\r
+       File.PushBack(c);\r
+       return false;\r
+       }\r