3 #include "textfile.h"
\r
7 // Tokens in Newick files are:
\r
14 // We can't safely distinguish between identifiers and floating point
\r
15 // numbers at the lexical level (because identifiers may be numeric,
\r
16 // or start with digits), so both edge lengths and identifiers are
\r
17 // returned as strings.
\r
19 const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const
\r
23 #define c(x) case NTT_##x: return #x;
\r
31 c(SingleQuotedString)
\r
32 c(DoubleQuotedString)
\r
39 NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const
\r
41 // Skip leading white space
\r
47 // In case a single-character token
\r
51 unsigned uBytesCopied = 0;
\r
52 NEWICK_TOKEN_TYPE TT;
\r
65 return NTT_Semicolon;
\r
71 TT = NTT_SingleQuotedString;
\r
76 TT = NTT_DoubleQuotedString;
\r
91 if (TT != NTT_Comment)
\r
93 if (uBytesCopied < uBytes - 2)
\r
95 szToken[uBytesCopied++] = c;
\r
96 szToken[uBytesCopied] = 0;
\r
99 Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);
\r
101 bool bEof = File.GetChar(c);
\r
108 if (0 != strchr("():;,", c))
\r
117 case NTT_SingleQuotedString:
\r
122 case NTT_DoubleQuotedString:
\r
129 return GetToken(File, szToken, uBytes);
\r
133 Quit("Tree::GetToken, invalid TT=%u", TT);
\r
138 // NOTE: this hack must come after definition of Tree::GetToken.
\r
140 #define GetToken GetTokenVerbose
\r
143 void Tree::FromFile(TextFile &File)
\r
146 // If we discover that it is unrooted, will convert on the fly.
\r
149 double dEdgeLength;
\r
150 bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);
\r
152 // Next token should be either ';' for rooted tree or ',' for unrooted.
\r
154 NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
\r
156 // If rooted, all done.
\r
157 if (NTT_Semicolon == NTT)
\r
160 Log(" *** Warning *** edge length on root group in Newick file %s\n",
\r
161 File.GetFileName());
\r
166 if (NTT_Comma != NTT)
\r
167 Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);
\r
169 const unsigned uThirdNode = UnrootFromFile();
\r
170 bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);
\r
172 SetEdgeLength(0, uThirdNode, dEdgeLength);
\r
176 // Return true if edge length for this group.
\r
177 bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,
\r
178 double *ptrdEdgeLength)
\r
180 char szToken[1024];
\r
181 NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
\r
183 // Group is either leaf name or (left, right).
\r
184 if (NTT_String == NTT)
\r
186 SetLeafName(uNodeIndex, szToken);
\r
188 Log("Group is leaf '%s'\n", szToken);
\r
191 else if (NTT_Lparen == NTT)
\r
193 const unsigned uLeft = AppendBranch(uNodeIndex);
\r
194 const unsigned uRight = uLeft + 1;
\r
196 // Left sub-group...
\r
198 Log("Got '(', group is compound, expect left sub-group\n");
\r
200 double dEdgeLength;
\r
201 bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);
\r
204 Log("Edge length for left sub-group: %.3g\n", dEdgeLength);
\r
206 Log("No edge length for left sub-group\n");
\r
209 SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);
\r
211 // ... then comma ...
\r
213 Log("Expect comma\n");
\r
215 NTT = GetToken(File, szToken, sizeof(szToken));
\r
216 if (NTT_Comma != NTT)
\r
217 Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
\r
219 // ...then right sub-group...
\r
221 Log("Expect right sub-group\n");
\r
223 bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);
\r
225 SetEdgeLength(uNodeIndex, uRight, dEdgeLength);
\r
229 Log("Edge length for right sub-group: %.3g\n", dEdgeLength);
\r
231 Log("No edge length for right sub-group\n");
\r
234 // ... then closing parenthesis.
\r
236 Log("Expect closing parenthesis (or comma if > 2-ary)\n");
\r
238 NTT = GetToken(File, szToken, sizeof(szToken));
\r
239 if (NTT_Rparen == NTT)
\r
241 else if (NTT_Comma == NTT)
\r
243 File.PushBack(',');
\r
247 Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
\r
250 Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
\r
253 // Group may optionally be followed by edge length.
\r
254 bool bEof = File.SkipWhiteX();
\r
260 Log("Character following group, could be colon, is '%c'\n", c);
\r
264 NTT = GetToken(File, szToken, sizeof(szToken));
\r
265 if (NTT_String != NTT)
\r
266 Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
\r
267 *ptrdEdgeLength = atof(szToken);
\r