/*\r
- * originally from \r
+ * This file is part of the Vamsas Client version 0.2. \r
+ * Copyright 2010 by Jim Procter, Iain Milne, Pierre Marguerite, \r
+ * Andrew Waterhouse and Dominik Lindner.\r
* \r
- * Jalview - A Sequence Alignment Editor and Viewer\r
- * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
- *\r
- * This program is free software; you can redistribute it and/or\r
- * modify it under the terms of the GNU General Public License\r
- * as published by the Free Software Foundation; either version 2\r
- * of the License, or (at your option) any later version.\r
- *\r
- * This program is distributed in the hope that it will be useful,\r
+ * Earlier versions have also been incorporated into Jalview version 2.4 \r
+ * since 2008, and TOPALi version 2 since 2007.\r
+ * \r
+ * The Vamsas Client is free software: you can redistribute it and/or modify\r
+ * it under the terms of the GNU Lesser General Public License as published by\r
+ * the Free Software Foundation, either version 3 of the License, or\r
+ * (at your option) any later version.\r
+ * \r
+ * The Vamsas Client is distributed in the hope that it will be useful,\r
* but WITHOUT ANY WARRANTY; without even the implied warranty of\r
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
- * GNU General Public License for more details.\r
- *\r
- * You should have received a copy of the GNU General Public License\r
- * along with this program; if not, write to the Free Software\r
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+ * GNU Lesser General Public License for more details.\r
+ * \r
+ * You should have received a copy of the GNU Lesser General Public License\r
+ * along with the Vamsas Client. If not, see <http://www.gnu.org/licenses/>.\r
*/\r
-\r
// NewickFile.java\r
// Tree I/O\r
// http://evolution.genetics.washington.edu/phylip/newick_doc.html\r
// File IO Flags\r
boolean ReplaceUnderscores = true;\r
\r
- boolean printRootInfo = false;\r
+ boolean printRootInfo = true; // in case root has anything to preserve\r
\r
private Pattern[] NodeSafeName = new Pattern[] {\r
Pattern.compile("[\\[,:'()]"), // test for requiring quotes\r
Pattern.compile("'"), // escaping quote characters\r
- Pattern.compile("/w") // unqoted whitespace transformation\r
+ Pattern.compile("\\s") // unqoted whitespace transformation\r
};\r
\r
char QuoteChar = '\'';\r
String newickFile = null;\r
\r
/**\r
- * Creates a new NewickFile object.\r
+ * Creates a new NewickFile object\r
* \r
* @param inStr\r
- * DOCUMENT ME!\r
+ * Newick style tree string\r
* \r
* @throws IOException\r
- * DOCUMENT ME!\r
+ * if string is not a valid newick file\r
*/\r
public NewickFile(String inStr) throws IOException {\r
newickFile = inStr;\r
}\r
\r
/**\r
- * call this to convert the newick string into a binary node linked tree\r
+ * call this to convert the newick string into a binary node linked tree Note:\r
+ * this is automatically called by the constructors, so you normally wouldn't\r
+ * need to use this.\r
* \r
* @throws IOException\r
* if the newick string cannot be parsed.\r
\r
float DefDistance = (float) 0.001; // @param Default distance for a node -\r
// very very small\r
- int DefBootstrap = 0; // @param Default bootstrap for a node\r
+ int DefBootstrap = -1; // @param Default bootstrap for a node\r
\r
float distance = DefDistance;\r
int bootstrap = DefBootstrap;\r
\r
Matcher mjsyms = majorsyms.matcher(nf);\r
char schar;\r
+ int nextcp = 0;\r
+ int ncp = cp;\r
while (mjsyms.find(cp) && (Error == null)) {\r
int fcp = mjsyms.start();\r
\r
break;\r
\r
default:\r
- int nextcp = 0;\r
+ // Reached termininating root node label.\r
+ if (schar == ';' && d != -1) {\r
+ Error = ErrorStringrange(Error,\r
+ "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
+ }\r
+\r
// Skip Comment or structured/extended NH format info\r
if (schar == '[') {\r
- if ((nextcp=nf.indexOf(']', fcp)) > -1) {\r
- // Skip the comment field\r
- // should advance fcp too here\r
+ if ((nextcp = nf.indexOf(']', fcp)) > -1) {\r
+ // verified that comment is properly terminated.\r
+ // now skip the comment field\r
nextcp++;\r
- //fcp = nextcp;\r
- //schar = nf.charAt(fcp);\r
+ break; // go and search for the next node separator, leaving ncp at\r
+ // beginning of node info\r
} else {\r
Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);\r
nextcp = 0;\r
break;\r
}\r
- ;\r
}\r
\r
- // Reached termininating root node label.\r
- if (schar == ';' && d != -1) {\r
- Error = ErrorStringrange(Error,\r
- "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
+ // Parse simpler field strings from substring between ncp and node\r
+ // separator\r
+ String fstring = nf.substring(ncp, fcp);\r
+ // extract any comments from the nodeinfo.\r
+ while (fstring.indexOf(']') > -1) {\r
+ int cstart = fstring.indexOf('[');\r
+ int cend = fstring.indexOf(']');\r
+ String comment = fstring.substring(cstart + 1, cend); // TODO: put\r
+ // this\r
+ // somewhere ?\r
+ fstring = fstring.substring(0, cstart) + fstring.substring(cend + 1);\r
}\r
-\r
- // Parse simpler field strings\r
- String fstring = nf.substring(cp, fcp);\r
Matcher uqnodename = Pattern.compile("^([^' :;\\](),]+).*").matcher(\r
fstring);\r
if (uqnodename.matches()\r
}\r
}\r
\r
- Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher(\r
+ Matcher nbootstrap = Pattern.compile("\\s*([+0-9]+)\\s*:.*").matcher(\r
fstring);\r
\r
- if (nbootstrap.matches() && (nbootstrap.start(1) > uqnodename.end(1))) {\r
- try {\r
- bootstrap = (new Integer(nbootstrap.group(1))).intValue();\r
- HasBootstrap = true;\r
- } catch (Exception e) {\r
- Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,\r
- cp + nbootstrap.start(0), nf);\r
+ if (nbootstrap.matches()) {\r
+ if (nodename != null && nbootstrap.group(1).equals(nodename)) {\r
+ nodename = null; // empty nodename - only bootstrap value\r
+ }\r
+ if ((nodename == null || nodename.length() == 0)\r
+ || nbootstrap.start(1) >= uqnodename.end(1)) {\r
+ try {\r
+ bootstrap = (new Integer(nbootstrap.group(1))).intValue();\r
+ HasBootstrap = true;\r
+ } catch (Exception e) {\r
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,\r
+ ncp + nbootstrap.start(0), nf);\r
+ }\r
}\r
}\r
\r
- Matcher ndist = Pattern.compile(":([-0-9Ee.+]+)").matcher(fstring);\r
+ Matcher ndist = Pattern.compile(".*:([-0-9Ee.+]+)").matcher(fstring);\r
boolean nodehasdistance = false;\r
\r
if (ndist.matches()) {\r
nodehasdistance = true;\r
} catch (Exception e) {\r
Error = ErrorStringrange(Error, "Can't parse node distance value",\r
- 7, cp + ndist.start(0), nf);\r
+ 7, ncp + ndist.start(0), nf);\r
}\r
}\r
\r
}\r
}\r
}\r
-\r
- // else : We do nothing if ';' is encountered.\r
}\r
\r
// Reset new node properties to obvious fakes\r
nodename = null;\r
distance = DefDistance;\r
bootstrap = DefBootstrap;\r
- if (nextcp == 0)\r
- cp = fcp + 1;\r
- else\r
- cp = nextcp;\r
+ }\r
+ // Advance character pointers if necessary\r
+ if (nextcp == 0) {\r
+ ncp = cp = fcp + 1;\r
+ } else {\r
+ cp = nextcp;\r
+ nextcp = 0;\r
}\r
}\r
\r
public uk.ac.vamsas.objects.core.Treenode[] matchTreeNodeNames(\r
String[] names, Vobject[] boundObjects) {\r
// todo!\r
- // also - need to reconstruct a names object id mapping (or BInaryNode) mapping for the parsed tree file\r
+ // also - need to reconstruct a names object id mapping (or BInaryNode)\r
+ // mapping for the parsed tree file\r
return null;\r
}\r
\r
private String nodeName(String name) {\r
if (NodeSafeName[0].matcher(name).find()) {\r
return QuoteChar + NodeSafeName[1].matcher(name).replaceAll("''")\r
- + QuoteChar; // quite \r
+ + QuoteChar; // quite\r
} else {\r
return NodeSafeName[2].matcher(name).replaceAll("_"); // whitespace\r
}\r
* @return DOCUMENT ME!\r
*/\r
private String printNodeField(SequenceNode c) {\r
- return //c.getNewickNodeName()\r
+ return // c.getNewickNodeName()\r
((c.getName() == null) ? "" : nodeName(c.getName()))\r
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap())\r
- : "") : "") + ((HasDistances) ? (":" + c.dist) : "");\r
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (((c.getName() == null) ? " "\r
+ : "") + c.getBootstrap())\r
+ : "")\r
+ : "") + ((HasDistances) ? (":" + c.dist) : "");\r
}\r
\r
/**\r
private String printRootField(SequenceNode root) {\r
return (printRootInfo) ? (((root.getName() == null) ? "" : nodeName(root\r
.getName()))\r
- + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? (" " + root\r
- .getBootstrap()) : "") : "") + ((RootHasDistance) ? (":" + root.dist)\r
- : ""))\r
+ + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root.getName() != null ? " "\r
+ : "") + root.getBootstrap())\r
+ : "")\r
+ : "") + ((RootHasDistance) ? (":" + root.dist) : ""))\r
: "";\r
}\r
\r
\r
/**\r
* Search for leaf nodes.\r
- *\r
- * @param node root node to search from\r
- * @param leaves Vector of leaves to add leaf node objects too.\r
- *\r
+ * \r
+ * @param node\r
+ * root node to search from\r
+ * @param leaves\r
+ * Vector of leaves to add leaf node objects too.\r
+ * \r
* @return Vector of leaf nodes on binary tree\r
*/\r
public Vector findLeaves(SequenceNode node, Vector leaves) {\r
return leaves;\r
}\r
\r
- if ((node.left() == null) && (node.right() == null)) // Interior node detection\r
+ if ((node.left() == null) && (node.right() == null)) // Interior node\r
+ // detection\r
{\r
leaves.addElement(node);\r
\r
return leaves;\r
} else {\r
- /* TODO: Identify internal nodes... if (node.isSequenceLabel())\r
- {\r
- leaves.addElement(node);\r
- }*/\r
+ /*\r
+ * TODO: Identify internal nodes... if (node.isSequenceLabel()) {\r
+ * leaves.addElement(node); }\r
+ */\r
findLeaves((SequenceNode) node.left(), leaves);\r
findLeaves((SequenceNode) node.right(), leaves);\r
}\r
}\r
\r
/**\r
- * make tree node vector from a newick tree structure with associated vamsas objects\r
+ * make tree node vector from a newick tree structure with associated vamsas\r
+ * objects\r
+ * \r
* @param ntree\r
* @return Treenode definitions for nodes with associated objects\r
*/\r
}\r
\r
/**\r
- * make treenode vector for a parsed tree with/out leaf node associations \r
- * @param ignoreplaceholders if true means only associated nodes are returned\r
+ * make treenode vector for a parsed tree with/out leaf node associations\r
+ * \r
+ * @param ignoreplaceholders\r
+ * if true means only associated nodes are returned\r
* @return treenode vector for associated or all leaves\r
*/\r
public Treenode[] makeTreeNodes(boolean ignoreplaceholders) {\r
}\r
\r
/**\r
- *\r
- * re-decorate the newick node representation with the VorbaId of an object mapped by its corresponding TreeNode. \r
- * Note: this only takes the first object in the first set of references.\r
+ * \r
+ * re-decorate the newick node representation with the VorbaId of an object\r
+ * mapped by its corresponding TreeNode. Note: this only takes the first\r
+ * object in the first set of references.\r
+ * \r
* @param tn\r
- * @return vector of mappings { treenode, SequenceNode, Vobject for VorbaId on sequence node }\r
+ * @return vector of mappings { treenode, SequenceNode, Vobject for VorbaId on\r
+ * sequence node }\r
*/\r
public Vector attachTreeMap(Treenode[] tn) {\r
if (root != null || tn == null)\r