// TODO: Extended SequenceNodeI to hold parsed NHX strings\r
package uk.ac.vamsas.objects.utils.trees;\r
\r
-\r
import java.io.*;\r
import java.util.Enumeration;\r
import java.util.Hashtable;\r
private boolean RootHasDistance = false;\r
\r
// File IO Flags\r
- boolean ReplaceUnderscores = false;\r
+ boolean ReplaceUnderscores = true;\r
\r
boolean printRootInfo = false;\r
\r
newickFile = inStr;\r
parse();\r
}\r
+\r
public NewickFile(File inFile) throws IOException {\r
- errormessage = "Problem's reading file "+inFile;\r
- dataIn = new java.io.BufferedReader(new InputStreamReader(new java.io.FileInputStream(inFile)));\r
+ errormessage = "Problem's reading file " + inFile;\r
+ dataIn = new java.io.BufferedReader(new InputStreamReader(\r
+ new java.io.FileInputStream(inFile)));\r
parse();\r
}\r
+\r
/**\r
* Creates a new NewickFile object.\r
* \r
public boolean HasRootDistance() {\r
return RootHasDistance;\r
}\r
+\r
/*\r
* hacked out of jalview code\r
*/\r
boolean error;\r
+\r
String errormessage;\r
- java.io.BufferedReader dataIn=null;\r
+\r
+ java.io.BufferedReader dataIn = null;\r
+\r
public String nextLine() throws IOException {\r
- if (dataIn==null && newickFile==null)\r
- throw new IOException("IMPLEMENTATION ERROR: NewickFile has not been initialised for reading a newick string.");\r
- if (dataIn==null)\r
- {\r
+ if (dataIn == null && newickFile == null)\r
+ throw new IOException(\r
+ "IMPLEMENTATION ERROR: NewickFile has not been initialised for reading a newick string.");\r
+ if (dataIn == null) {\r
dataIn = new BufferedReader(new StringReader(newickFile));\r
- error=false;\r
+ error = false;\r
}\r
if (!error)\r
return dataIn.readLine();\r
throw new IOException("Invalid Source Stream:" + errormessage);\r
}\r
+\r
/**\r
* call this to convert the newick string into a binary node linked tree\r
* \r
*/\r
public void parse() throws IOException {\r
String nf;\r
- if (newickFile==null)\r
- {\r
+ if (newickFile == null) {\r
// fill nf with complete tree file\r
\r
StringBuffer file = new StringBuffer();\r
}\r
\r
nf = file.toString();\r
- } else\r
- {\r
+ } else {\r
nf = newickFile;\r
}\r
- \r
\r
root = new SequenceNode();\r
\r
String nodename = null;\r
\r
float DefDistance = (float) 0.001; // @param Default distance for a node -\r
- // very very small\r
+ // very very small\r
int DefBootstrap = 0; // @param Default bootstrap for a node\r
\r
float distance = DefDistance;\r
int bootstrap = DefBootstrap;\r
\r
boolean ascending = false; // flag indicating that we are leaving the\r
- // current node\r
+ // current node\r
\r
- Pattern majorsyms = Pattern.compile(\r
- "[(\\['),;]");\r
+ Pattern majorsyms = Pattern.compile("[(\\['),;]");\r
\r
Matcher mjsyms = majorsyms.matcher(nf);\r
+ char schar;\r
while (mjsyms.find(cp) && (Error == null)) {\r
int fcp = mjsyms.start();\r
\r
- switch (nf.charAt(fcp)) {\r
- case '[': // Comment or structured/extended NH format info\r
-\r
-\r
- if (nf.indexOf(']',fcp)>-1) {\r
- // Skip the comment field\r
- cp = nf.indexOf(']',fcp);\r
- } else {\r
- Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);\r
- }\r
-\r
- ;\r
-\r
- break;\r
-\r
+ switch (schar = nf.charAt(fcp)) {\r
case '(':\r
\r
// ascending should not be set\r
\r
break;\r
\r
- case ';':\r
+ default:\r
+ int nextcp = 0;\r
+ // Skip Comment or structured/extended NH format info\r
+ if (schar == '[') {\r
+ if ((nextcp=nf.indexOf(']', fcp)) > -1) {\r
+ // Skip the comment field\r
+ // should advance fcp too here\r
+ nextcp++;\r
+ //fcp = nextcp;\r
+ //schar = nf.charAt(fcp);\r
+ } else {\r
+ Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);\r
+ nextcp = 0;\r
+ break;\r
+ }\r
+ ;\r
+ }\r
\r
- if (d != -1) {\r
+ // Reached termininating root node label.\r
+ if (schar == ';' && d != -1) {\r
Error = ErrorStringrange(Error,\r
"Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
}\r
\r
- // cp advanced at the end of default\r
- default:\r
-\r
// Parse simpler field strings\r
String fstring = nf.substring(cp, fcp);\r
- Matcher uqnodename = Pattern.compile("\\b([^' :;\\](),]+)").matcher(fstring);\r
+ Matcher uqnodename = Pattern.compile("^([^' :;\\](),]+).*").matcher(\r
+ fstring);\r
if (uqnodename.matches()\r
&& ((uqnodename.start(1) == 0) || (fstring.charAt(uqnodename\r
.start(1) - 1) != ':'))) // JBPNote HACK!\r
"File has broken algorithm - overwritten nodename", 10, fcp, nf);\r
}\r
}\r
- \r
- Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher(fstring);\r
\r
+ Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher(\r
+ fstring);\r
\r
- if (nbootstrap.matches()\r
- && (nbootstrap.start(1) > uqnodename.end(1))) {\r
+ if (nbootstrap.matches() && (nbootstrap.start(1) > uqnodename.end(1))) {\r
try {\r
bootstrap = (new Integer(nbootstrap.group(1))).intValue();\r
HasBootstrap = true;\r
}\r
}\r
\r
- Matcher ndist = Pattern.compile(\r
- ":([-0-9Ee.+]+)").matcher(fstring);\r
+ Matcher ndist = Pattern.compile(":([-0-9Ee.+]+)").matcher(fstring);\r
boolean nodehasdistance = false;\r
\r
if (ndist.matches()) {\r
c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);\r
if (c == realroot) {\r
RootHasDistance = nodehasdistance; // JBPNote This is really\r
- // UGLY!!! Ensure root node gets\r
- // its given distance\r
+ // UGLY!!! Ensure root node gets\r
+ // its given distance\r
}\r
} else {\r
// Find a place to put the leaf\r
nodename = null;\r
distance = DefDistance;\r
bootstrap = DefBootstrap;\r
-\r
- cp = fcp + 1;\r
+ if (nextcp == 0)\r
+ cp = fcp + 1;\r
+ else\r
+ cp = nextcp;\r
}\r
}\r
\r
public SequenceNode getTree() {\r
return root;\r
}\r
- public uk.ac.vamsas.objects.core.Treenode[] matchTreeNodeNames(String[] names, Vobject[] boundObjects)\r
- {\r
+\r
+ public uk.ac.vamsas.objects.core.Treenode[] matchTreeNodeNames(\r
+ String[] names, Vobject[] boundObjects) {\r
// todo!\r
// also - need to reconstruct a names object id mapping (or BInaryNode) mapping for the parsed tree file\r
return null;\r
}\r
+\r
/**\r
* Generate a newick format tree according to internal flags for bootstraps,\r
* distances and root distances.\r
*/\r
private String nodeName(String name) {\r
if (NodeSafeName[0].matcher(name).find()) {\r
- return QuoteChar + NodeSafeName[1].matcher(name).replaceAll("''") + QuoteChar; // quite \r
+ return QuoteChar + NodeSafeName[1].matcher(name).replaceAll("''")\r
+ + QuoteChar; // quite \r
} else {\r
return NodeSafeName[2].matcher(name).replaceAll("_"); // whitespace\r
}\r
private String printNodeField(SequenceNode c) {\r
return //c.getNewickNodeName()\r
((c.getName() == null) ? "" : nodeName(c.getName()))\r
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap())\r
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap())\r
: "") : "") + ((HasDistances) ? (":" + c.dist) : "");\r
}\r
\r
// trf.parse();\r
System.out.println("Original file :\n");\r
\r
- System.out.println(Pattern.compile("\n+").matcher(newickfile.toString()).replaceAll("") + "\n");\r
+ System.out.println(Pattern.compile("\n+").matcher(newickfile.toString())\r
+ .replaceAll("")\r
+ + "\n");\r
\r
System.out.println("Parsed file.\n");\r
System.out.println("Default output type for original input.\n");\r
System.out.println("leaves.\n");\r
Vector lvs = new Vector();\r
trf.findLeaves(trf.root, lvs);\r
- Enumeration lv =lvs.elements();\r
- while (lv.hasMoreElements())\r
- {\r
+ Enumeration lv = lvs.elements();\r
+ while (lv.hasMoreElements()) {\r
BinaryNode leave = (BinaryNode) lv.nextElement();\r
- if (leave.getName()!=null)\r
- {\r
- System.out.println("Node:'"+leave.getName()+"'");\r
+ if (leave.getName() != null) {\r
+ System.out.println("Node:'" + leave.getName() + "'");\r
}\r
- } \r
+ }\r
} catch (java.io.IOException e) {\r
System.err.println("Exception\n" + e);\r
e.printStackTrace();\r
}\r
}\r
+\r
/**\r
* Search for leaf nodes.\r
*\r
*\r
* @return Vector of leaf nodes on binary tree\r
*/\r
- public Vector findLeaves(SequenceNode node, Vector leaves)\r
- { \r
- if (node == null)\r
- {\r
+ public Vector findLeaves(SequenceNode node, Vector leaves) {\r
+ if (node == null) {\r
return leaves;\r
}\r
\r
- if ( (node.left() == null) && (node.right() == null)) // Interior node detection\r
+ if ((node.left() == null) && (node.right() == null)) // Interior node detection\r
{\r
leaves.addElement(node);\r
\r
return leaves;\r
- }\r
- else\r
- {\r
-/* TODO: Identify internal nodes... if (node.isSequenceLabel())\r
- {\r
- leaves.addElement(node);\r
- }*/\r
- findLeaves( (SequenceNode) node.left(), leaves);\r
- findLeaves( (SequenceNode) node.right(), leaves);\r
+ } else {\r
+ /* TODO: Identify internal nodes... if (node.isSequenceLabel())\r
+ {\r
+ leaves.addElement(node);\r
+ }*/\r
+ findLeaves((SequenceNode) node.left(), leaves);\r
+ findLeaves((SequenceNode) node.right(), leaves);\r
}\r
\r
return leaves;\r
public Treenode[] makeTreeNodes() {\r
return makeTreeNodes(true);\r
}\r
+\r
/**\r
* make treenode vector for a parsed tree with/out leaf node associations \r
* @param ignoreplaceholders if true means only associated nodes are returned\r
* @return treenode vector for associated or all leaves\r
*/\r
- public Treenode[] makeTreeNodes(boolean ignoreplaceholders) { \r
+ public Treenode[] makeTreeNodes(boolean ignoreplaceholders) {\r
Vector leaves = new Vector();\r
findLeaves(root, leaves);\r
Vector tnv = new Vector();\r
Enumeration l = leaves.elements();\r
Hashtable nodespecs = new Hashtable();\r
- while (l.hasMoreElements())\r
- {\r
+ while (l.hasMoreElements()) {\r
BinaryNode tnode = (BinaryNode) l.nextElement();\r
- if (tnode instanceof SequenceNode)\r
- {\r
- if (!(ignoreplaceholders && ((SequenceNode) tnode).isPlaceholder()))\r
- {\r
+ if (tnode instanceof SequenceNode) {\r
+ if (!(ignoreplaceholders && ((SequenceNode) tnode).isPlaceholder())) {\r
Object assocseq = ((SequenceNode) tnode).element();\r
- if (assocseq instanceof Vobject)\r
- {\r
+ if (assocseq instanceof Vobject) {\r
Vobject vobj = (Vobject) assocseq;\r
- if (vobj!=null)\r
- {\r
+ if (vobj != null) {\r
Treenode node = new Treenode();\r
node.setNodespec(makeNodeSpec(nodespecs, tnode));\r
node.setName(tnode.getName());\r
vr.addRefs(vobj);\r
node.addVref(vr);\r
tnv.addElement(node);\r
- }\r
- else\r
- {\r
- System.err.println("WARNING: Unassociated treeNode "+tnode.element().toString()+" "\r
- +((tnode.getName()!=null) ? " label "+tnode.getName() : ""));\r
+ } else {\r
+ System.err.println("WARNING: Unassociated treeNode "\r
+ + tnode.element().toString()\r
+ + " "\r
+ + ((tnode.getName() != null) ? " label " + tnode.getName()\r
+ : ""));\r
}\r
}\r
}\r
}\r
}\r
- if (tnv.size()>0)\r
- {\r
+ if (tnv.size() > 0) {\r
Treenode[] tn = new Treenode[tnv.size()];\r
- tnv.copyInto(tn); \r
+ tnv.copyInto(tn);\r
return tn;\r
}\r
return new Treenode[] {};\r
}\r
- private String makeNodeSpec(Hashtable nodespecs, BinaryNode tnode)\r
- { \r
+\r
+ private String makeNodeSpec(Hashtable nodespecs, BinaryNode tnode) {\r
String nname = new String(tnode.getName());\r
Integer nindx = (Integer) nodespecs.get(nname);\r
- if (nindx==null)\r
- {\r
+ if (nindx == null) {\r
nindx = new Integer(1);\r
}\r
- nname = nindx.toString()+" "+nname;\r
+ nname = nindx.toString() + " " + nname;\r
return nname;\r
}\r
+\r
/**\r
* call to match up Treenode specs to NJTree parsed from document object.\r
* \r
* as returned from NJTree.findLeaves( .., ..) ..\r
* @return\r
*/\r
- private BinaryNode findNodeSpec(String nodespec, Vector leaves)\r
- {\r
- int occurence=-1;\r
- String nspec = nodespec.substring(nodespec.indexOf(' ')+1);\r
+ private BinaryNode findNodeSpec(String nodespec, Vector leaves) {\r
+ int occurence = -1;\r
+ String nspec = nodespec.substring(nodespec.indexOf(' ') + 1);\r
String oval = nodespec.substring(0, nodespec.indexOf(' '));\r
try {\r
occurence = new Integer(oval).intValue();\r
- }\r
- catch (Exception e)\r
- {\r
- System.err.println("Invalid nodespec '"+nodespec+"'");\r
+ } catch (Exception e) {\r
+ System.err.println("Invalid nodespec '" + nodespec + "'");\r
return null;\r
}\r
BinaryNode bn = null;\r
- \r
+\r
int nocc = 0;\r
Enumeration en = leaves.elements();\r
- while (en.hasMoreElements() && nocc<occurence)\r
- {\r
+ while (en.hasMoreElements() && nocc < occurence) {\r
bn = (BinaryNode) en.nextElement();\r
- if (bn instanceof SequenceNode && bn.getName().equals(nspec))\r
- {\r
- --occurence;\r
- } else \r
- bn=null;\r
+ if (bn instanceof SequenceNode && bn.getName().equals(nspec)) {\r
+ --occurence;\r
+ } else\r
+ bn = null;\r
}\r
return bn;\r
}\r
+\r
/**\r
*\r
* re-decorate the newick node representation with the VorbaId of an object mapped by its corresponding TreeNode. \r
* @param tn\r
* @return vector of mappings { treenode, SequenceNode, Vobject for VorbaId on sequence node }\r
*/\r
- public Vector attachTreeMap(Treenode[] tn)\r
- {\r
- if (root!=null || tn==null)\r
- return null;\r
- Vector leaves = new Vector();\r
- Vector nodemap=new Vector();\r
- findLeaves(root, leaves);\r
- int sz = tn.length;\r
- int i = 0;\r
- \r
- while (i < sz)\r
- {\r
- Treenode node = tn[i++];\r
- BinaryNode mappednode = findNodeSpec(node.getNodespec(),leaves);\r
- if (mappednode!=null && mappednode instanceof SequenceNode) {\r
- SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);\r
- // check if we can make the specified association\r
- Vobject noderef = null;\r
- int vrf=0,refv=0;\r
- while (noderef==null && vrf<node.getVrefCount())\r
- {\r
- if (refv<node.getVref(vrf).getRefsCount())\r
- {\r
- Object ref = node.getVref(vrf).getRefs(refv++);\r
- if (ref instanceof Vobject)\r
- {\r
- noderef = (Vobject) ref;\r
+ public Vector attachTreeMap(Treenode[] tn) {\r
+ if (root != null || tn == null)\r
+ return null;\r
+ Vector leaves = new Vector();\r
+ Vector nodemap = new Vector();\r
+ findLeaves(root, leaves);\r
+ int sz = tn.length;\r
+ int i = 0;\r
+\r
+ while (i < sz) {\r
+ Treenode node = tn[i++];\r
+ BinaryNode mappednode = findNodeSpec(node.getNodespec(), leaves);\r
+ if (mappednode != null && mappednode instanceof SequenceNode) {\r
+ SequenceNode leaf = (SequenceNode) leaves.elementAt(i++);\r
+ // check if we can make the specified association\r
+ Vobject noderef = null;\r
+ int vrf = 0, refv = 0;\r
+ while (noderef == null && vrf < node.getVrefCount()) {\r
+ if (refv < node.getVref(vrf).getRefsCount()) {\r
+ Object ref = node.getVref(vrf).getRefs(refv++);\r
+ if (ref instanceof Vobject) {\r
+ noderef = (Vobject) ref;\r
+ }\r
+ } else {\r
+ refv = 0;\r
+ vrf++;\r
}\r
+ }\r
+ if (noderef != null) {\r
+ nodemap.addElement(new Object[] { node, leaf, noderef });\r
+ leaf.setElement(noderef);\r
+ leaf.setPlaceholder(false);\r
} else {\r
- refv=0;\r
- vrf++;\r
+ leaf.setPlaceholder(true);\r
}\r
}\r
- if (noderef!=null)\r
- {\r
- nodemap.addElement(new Object[] { node, leaf, noderef });\r
- leaf.setElement(noderef);\r
- leaf.setPlaceholder(false);\r
- } else {\r
- leaf.setPlaceholder(true);\r
- }\r
}\r
+ return nodemap;\r
}\r
- return nodemap;\r
-}\r
\r
}\r