private Pattern[] NodeSafeName = new Pattern[] {\r
Pattern.compile("[\\[,:'()]"), // test for requiring quotes\r
Pattern.compile("'"), // escaping quote characters\r
- Pattern.compile("/w") // unqoted whitespace transformation\r
+ Pattern.compile("\\s") // unqoted whitespace transformation\r
};\r
\r
char QuoteChar = '\'';\r
String newickFile = null;\r
\r
/**\r
- * Creates a new NewickFile object.\r
+ * Creates a new NewickFile object\r
* \r
* @param inStr\r
- * DOCUMENT ME!\r
+ * Newick style tree string\r
* \r
* @throws IOException\r
- * DOCUMENT ME!\r
+ * if string is not a valid newick file\r
*/\r
public NewickFile(String inStr) throws IOException {\r
newickFile = inStr;\r
\r
/**\r
* call this to convert the newick string into a binary node linked tree\r
+ * Note: this is automatically called by the constructors, so you normally\r
+ * wouldn't need to use this.\r
* \r
* @throws IOException\r
* if the newick string cannot be parsed.\r
\r
float DefDistance = (float) 0.001; // @param Default distance for a node -\r
// very very small\r
- int DefBootstrap = 0; // @param Default bootstrap for a node\r
+ int DefBootstrap = -1; // @param Default bootstrap for a node\r
\r
float distance = DefDistance;\r
int bootstrap = DefBootstrap;\r
\r
Matcher mjsyms = majorsyms.matcher(nf);\r
char schar;\r
- while (mjsyms.find(cp) && (Error == null)) {\r
+ int nextcp=0;\r
+ int ncp = cp;\r
+ while (mjsyms.find(cp) && (Error == null)) {\r
int fcp = mjsyms.start();\r
\r
switch (schar = nf.charAt(fcp)) {\r
break;\r
\r
default:\r
- int nextcp = 0;\r
- // Skip Comment or structured/extended NH format info\r
+ // Reached termininating root node label.\r
+ if (schar == ';' && d != -1) {\r
+ Error = ErrorStringrange(Error,\r
+ "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
+ }\r
+\r
+ // Skip Comment or structured/extended NH format info\r
if (schar == '[') {\r
if ((nextcp=nf.indexOf(']', fcp)) > -1) {\r
- // Skip the comment field\r
- // should advance fcp too here\r
+ // verified that comment is properly terminated.\r
+ // now skip the comment field\r
nextcp++;\r
- //fcp = nextcp;\r
- //schar = nf.charAt(fcp);\r
+ break; // go and search for the next node separator, leaving ncp at beginning of node info\r
} else {\r
Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);\r
nextcp = 0;\r
break;\r
}\r
- ;\r
}\r
-\r
- // Reached termininating root node label.\r
- if (schar == ';' && d != -1) {\r
- Error = ErrorStringrange(Error,\r
- "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
+ \r
+ // Parse simpler field strings from substring between ncp and node separator\r
+ String fstring = nf.substring(ncp, fcp);\r
+ // extract any comments from the nodeinfo.\r
+ while (fstring.indexOf(']')>-1)\r
+ {\r
+ int cstart=fstring.indexOf('[');\r
+ int cend=fstring.indexOf(']');\r
+ String comment = fstring.substring(cstart+1,cend); // TODO: put this somewhere ?\r
+ fstring = fstring.substring(0, cstart)+fstring.substring(cend+1);\r
}\r
-\r
- // Parse simpler field strings\r
- String fstring = nf.substring(cp, fcp);\r
Matcher uqnodename = Pattern.compile("^([^' :;\\](),]+).*").matcher(\r
fstring);\r
if (uqnodename.matches()\r
}\r
}\r
\r
- Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher(\r
+ Matcher nbootstrap = Pattern.compile("\\s*([+0-9]+)\\s*:.*").matcher(\r
fstring);\r
\r
- if (nbootstrap.matches() && (nbootstrap.start(1) > uqnodename.end(1))) {\r
- try {\r
- bootstrap = (new Integer(nbootstrap.group(1))).intValue();\r
- HasBootstrap = true;\r
- } catch (Exception e) {\r
- Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,\r
- cp + nbootstrap.start(0), nf);\r
+ if (nbootstrap.matches())\r
+ {\r
+ if (nodename!=null && nbootstrap.group(1).equals(nodename)) \r
+ {\r
+ nodename=null; // empty nodename - only bootstrap value\r
+ }\r
+ if ((nodename==null || nodename.length()==0) || nbootstrap.start(1)>=uqnodename.end(1))\r
+ {\r
+ try {\r
+ bootstrap = (new Integer(nbootstrap.group(1))).intValue();\r
+ HasBootstrap = true;\r
+ } catch (Exception e) {\r
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,\r
+ ncp + nbootstrap.start(0), nf);\r
+ }\r
}\r
}\r
-\r
- Matcher ndist = Pattern.compile(":([-0-9Ee.+]+)").matcher(fstring);\r
+ \r
+ Matcher ndist = Pattern.compile(".*:([-0-9Ee.+]+)").matcher(fstring);\r
boolean nodehasdistance = false;\r
\r
if (ndist.matches()) {\r
nodehasdistance = true;\r
} catch (Exception e) {\r
Error = ErrorStringrange(Error, "Can't parse node distance value",\r
- 7, cp + ndist.start(0), nf);\r
+ 7, ncp + ndist.start(0), nf);\r
}\r
}\r
\r
}\r
}\r
}\r
-\r
- // else : We do nothing if ';' is encountered.\r
}\r
\r
// Reset new node properties to obvious fakes\r
nodename = null;\r
distance = DefDistance;\r
bootstrap = DefBootstrap;\r
- if (nextcp == 0)\r
- cp = fcp + 1;\r
- else\r
- cp = nextcp;\r
+ }\r
+ // Advance character pointers if necessary\r
+ if (nextcp == 0) {\r
+ ncp = cp = fcp + 1;\r
+ } else {\r
+ cp = nextcp;\r
+ nextcp = 0;\r
}\r
}\r
\r
private String printNodeField(SequenceNode c) {\r
return //c.getNewickNodeName()\r
((c.getName() == null) ? "" : nodeName(c.getName()))\r
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap())\r
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (((c.getName()==null) ? " " : "") + c.getBootstrap())\r
: "") : "") + ((HasDistances) ? (":" + c.dist) : "");\r
}\r
\r