X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fuk%2Fac%2Fvamsas%2Fobjects%2Futils%2Ftrees%2FNewickFile.java;h=d38c99aa381db696c3fbfcb9c7f63982e60638ed;hb=844ccad5a3fcbedec17b2af66d460f31abc7cff1;hp=2e1e893522d15fed6e7235853fe59c1e1ee27d0a;hpb=c5b324d5bb93fd421347f87ffe59bd6efe076d1e;p=vamsas.git
diff --git a/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java b/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java
index 2e1e893..d38c99a 100644
--- a/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java
+++ b/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java
@@ -1,24 +1,24 @@
/*
- * originally from
+ * This file is part of the Vamsas Client version 0.1.
+ * Copyright 2009 by Jim Procter, Iain Milne, Pierre Marguerite,
+ * Andrew Waterhouse and Dominik Lindner.
*
- * Jalview - A Sequence Alignment Editor and Viewer
- * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
+ * Earlier versions have also been incorporated into Jalview version 2.4
+ * since 2008, and TOPALi version 2 since 2007.
+ *
+ * The Vamsas Client is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The Vamsas Client is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the Vamsas Client. If not, see .
*/
-
// NewickFile.java
// Tree I/O
// http://evolution.genetics.washington.edu/phylip/newick_doc.html
@@ -27,7 +27,6 @@
// TODO: Extended SequenceNodeI to hold parsed NHX strings
package uk.ac.vamsas.objects.utils.trees;
-
import java.io.*;
import java.util.Enumeration;
import java.util.Hashtable;
@@ -55,14 +54,14 @@ public class NewickFile {
private boolean RootHasDistance = false;
// File IO Flags
- boolean ReplaceUnderscores = false;
+ boolean ReplaceUnderscores = true;
- boolean printRootInfo = false;
+ boolean printRootInfo = true; // in case root has anything to preserve
private Pattern[] NodeSafeName = new Pattern[] {
Pattern.compile("[\\[,:'()]"), // test for requiring quotes
Pattern.compile("'"), // escaping quote characters
- Pattern.compile("/w") // unqoted whitespace transformation
+ Pattern.compile("\\s") // unqoted whitespace transformation
};
char QuoteChar = '\'';
@@ -70,23 +69,26 @@ public class NewickFile {
String newickFile = null;
/**
- * Creates a new NewickFile object.
+ * Creates a new NewickFile object
*
* @param inStr
- * DOCUMENT ME!
+ * Newick style tree string
*
* @throws IOException
- * DOCUMENT ME!
+ * if string is not a valid newick file
*/
public NewickFile(String inStr) throws IOException {
newickFile = inStr;
parse();
}
+
public NewickFile(File inFile) throws IOException {
- errormessage = "Problem's reading file "+inFile;
- dataIn = new java.io.BufferedReader(new InputStreamReader(new java.io.FileInputStream(inFile)));
+ errormessage = "Problem's reading file " + inFile;
+ dataIn = new java.io.BufferedReader(new InputStreamReader(
+ new java.io.FileInputStream(inFile)));
parse();
}
+
/**
* Creates a new NewickFile object.
*
@@ -196,34 +198,40 @@ public class NewickFile {
public boolean HasRootDistance() {
return RootHasDistance;
}
+
/*
* hacked out of jalview code
*/
boolean error;
+
String errormessage;
- java.io.BufferedReader dataIn=null;
+
+ java.io.BufferedReader dataIn = null;
+
public String nextLine() throws IOException {
- if (dataIn==null)
- {
+ if (dataIn == null && newickFile == null)
+ throw new IOException(
+ "IMPLEMENTATION ERROR: NewickFile has not been initialised for reading a newick string.");
+ if (dataIn == null) {
dataIn = new BufferedReader(new StringReader(newickFile));
- error=false;
+ error = false;
}
- else
- throw new IOException("IMPLEMENTATION ERROR: NewickFile has not been initialised for reading a newick string.");
if (!error)
return dataIn.readLine();
throw new IOException("Invalid Source Stream:" + errormessage);
}
+
/**
- * call this to convert the newick string into a binary node linked tree
+ * call this to convert the newick string into a binary node linked tree Note:
+ * this is automatically called by the constructors, so you normally wouldn't
+ * need to use this.
*
* @throws IOException
* if the newick string cannot be parsed.
*/
public void parse() throws IOException {
String nf;
- if (newickFile==null)
- {
+ if (newickFile == null) {
// fill nf with complete tree file
StringBuffer file = new StringBuffer();
@@ -233,11 +241,9 @@ public class NewickFile {
}
nf = file.toString();
- } else
- {
+ } else {
nf = newickFile;
}
-
root = new SequenceNode();
@@ -252,37 +258,25 @@ public class NewickFile {
String nodename = null;
float DefDistance = (float) 0.001; // @param Default distance for a node -
- // very very small
- int DefBootstrap = 0; // @param Default bootstrap for a node
+ // very very small
+ int DefBootstrap = -1; // @param Default bootstrap for a node
float distance = DefDistance;
int bootstrap = DefBootstrap;
boolean ascending = false; // flag indicating that we are leaving the
- // current node
+ // current node
- Pattern majorsyms = Pattern.compile(
- "[(\\['),;]");
+ Pattern majorsyms = Pattern.compile("[(\\['),;]");
Matcher mjsyms = majorsyms.matcher(nf);
+ char schar;
+ int nextcp = 0;
+ int ncp = cp;
while (mjsyms.find(cp) && (Error == null)) {
int fcp = mjsyms.start();
- switch (nf.charAt(fcp)) {
- case '[': // Comment or structured/extended NH format info
-
-
- if (nf.indexOf(']',fcp)>-1) {
- // Skip the comment field
- cp = nf.indexOf(']',fcp);
- } else {
- Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
- }
-
- ;
-
- break;
-
+ switch (schar = nf.charAt(fcp)) {
case '(':
// ascending should not be set
@@ -338,19 +332,42 @@ public class NewickFile {
break;
- case ';':
-
- if (d != -1) {
+ default:
+ // Reached termininating root node label.
+ if (schar == ';' && d != -1) {
Error = ErrorStringrange(Error,
"Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
}
- // cp advanced at the end of default
- default:
+ // Skip Comment or structured/extended NH format info
+ if (schar == '[') {
+ if ((nextcp = nf.indexOf(']', fcp)) > -1) {
+ // verified that comment is properly terminated.
+ // now skip the comment field
+ nextcp++;
+ break; // go and search for the next node separator, leaving ncp at
+ // beginning of node info
+ } else {
+ Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
+ nextcp = 0;
+ break;
+ }
+ }
- // Parse simpler field strings
- String fstring = nf.substring(cp, fcp);
- Matcher uqnodename = Pattern.compile("\\b([^' :;\\](),]+)").matcher(fstring);
+ // Parse simpler field strings from substring between ncp and node
+ // separator
+ String fstring = nf.substring(ncp, fcp);
+ // extract any comments from the nodeinfo.
+ while (fstring.indexOf(']') > -1) {
+ int cstart = fstring.indexOf('[');
+ int cend = fstring.indexOf(']');
+ String comment = fstring.substring(cstart + 1, cend); // TODO: put
+ // this
+ // somewhere ?
+ fstring = fstring.substring(0, cstart) + fstring.substring(cend + 1);
+ }
+ Matcher uqnodename = Pattern.compile("^([^' :;\\](),]+).*").matcher(
+ fstring);
if (uqnodename.matches()
&& ((uqnodename.start(1) == 0) || (fstring.charAt(uqnodename
.start(1) - 1) != ':'))) // JBPNote HACK!
@@ -366,23 +383,27 @@ public class NewickFile {
"File has broken algorithm - overwritten nodename", 10, fcp, nf);
}
}
-
- Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher(fstring);
+ Matcher nbootstrap = Pattern.compile("\\s*([+0-9]+)\\s*:.*").matcher(
+ fstring);
- if (nbootstrap.matches()
- && (nbootstrap.start(1) > uqnodename.end(1))) {
- try {
- bootstrap = (new Integer(nbootstrap.group(1))).intValue();
- HasBootstrap = true;
- } catch (Exception e) {
- Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
- cp + nbootstrap.start(0), nf);
+ if (nbootstrap.matches()) {
+ if (nodename != null && nbootstrap.group(1).equals(nodename)) {
+ nodename = null; // empty nodename - only bootstrap value
+ }
+ if ((nodename == null || nodename.length() == 0)
+ || nbootstrap.start(1) >= uqnodename.end(1)) {
+ try {
+ bootstrap = (new Integer(nbootstrap.group(1))).intValue();
+ HasBootstrap = true;
+ } catch (Exception e) {
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
+ ncp + nbootstrap.start(0), nf);
+ }
}
}
- Matcher ndist = Pattern.compile(
- ":([-0-9Ee.+]+)").matcher(fstring);
+ Matcher ndist = Pattern.compile(".*:([-0-9Ee.+]+)").matcher(fstring);
boolean nodehasdistance = false;
if (ndist.matches()) {
@@ -392,7 +413,7 @@ public class NewickFile {
nodehasdistance = true;
} catch (Exception e) {
Error = ErrorStringrange(Error, "Can't parse node distance value",
- 7, cp + ndist.start(0), nf);
+ 7, ncp + ndist.start(0), nf);
}
}
@@ -405,8 +426,8 @@ public class NewickFile {
c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);
if (c == realroot) {
RootHasDistance = nodehasdistance; // JBPNote This is really
- // UGLY!!! Ensure root node gets
- // its given distance
+ // UGLY!!! Ensure root node gets
+ // its given distance
}
} else {
// Find a place to put the leaf
@@ -456,16 +477,19 @@ public class NewickFile {
}
}
}
-
- // else : We do nothing if ';' is encountered.
}
// Reset new node properties to obvious fakes
nodename = null;
distance = DefDistance;
bootstrap = DefBootstrap;
-
- cp = fcp + 1;
+ }
+ // Advance character pointers if necessary
+ if (nextcp == 0) {
+ ncp = cp = fcp + 1;
+ } else {
+ cp = nextcp;
+ nextcp = 0;
}
}
@@ -488,12 +512,15 @@ public class NewickFile {
public SequenceNode getTree() {
return root;
}
- public uk.ac.vamsas.objects.core.Treenode[] matchTreeNodeNames(String[] names, Vobject[] boundObjects)
- {
+
+ public uk.ac.vamsas.objects.core.Treenode[] matchTreeNodeNames(
+ String[] names, Vobject[] boundObjects) {
// todo!
- // also - need to reconstruct a names object id mapping (or BInaryNode) mapping for the parsed tree file
+ // also - need to reconstruct a names object id mapping (or BInaryNode)
+ // mapping for the parsed tree file
return null;
}
+
/**
* Generate a newick format tree according to internal flags for bootstraps,
* distances and root distances.
@@ -615,7 +642,8 @@ public class NewickFile {
*/
private String nodeName(String name) {
if (NodeSafeName[0].matcher(name).find()) {
- return QuoteChar + NodeSafeName[1].matcher(name).replaceAll("''") + QuoteChar; // quite
+ return QuoteChar + NodeSafeName[1].matcher(name).replaceAll("''")
+ + QuoteChar; // quite
} else {
return NodeSafeName[2].matcher(name).replaceAll("_"); // whitespace
}
@@ -630,10 +658,12 @@ public class NewickFile {
* @return DOCUMENT ME!
*/
private String printNodeField(SequenceNode c) {
- return //c.getNewickNodeName()
+ return // c.getNewickNodeName()
((c.getName() == null) ? "" : nodeName(c.getName()))
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap())
- : "") : "") + ((HasDistances) ? (":" + c.dist) : "");
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (((c.getName() == null) ? " "
+ : "") + c.getBootstrap())
+ : "")
+ : "") + ((HasDistances) ? (":" + c.dist) : "");
}
/**
@@ -647,9 +677,10 @@ public class NewickFile {
private String printRootField(SequenceNode root) {
return (printRootInfo) ? (((root.getName() == null) ? "" : nodeName(root
.getName()))
- + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? (" " + root
- .getBootstrap()) : "") : "") + ((RootHasDistance) ? (":" + root.dist)
- : ""))
+ + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root.getName() != null ? " "
+ : "") + root.getBootstrap())
+ : "")
+ : "") + ((RootHasDistance) ? (":" + root.dist) : ""))
: "";
}
@@ -729,7 +760,9 @@ public class NewickFile {
// trf.parse();
System.out.println("Original file :\n");
- System.out.println(Pattern.compile("\n+").matcher(newickfile.toString()).replaceAll("") + "\n");
+ System.out.println(Pattern.compile("\n+").matcher(newickfile.toString())
+ .replaceAll("")
+ + "\n");
System.out.println("Parsed file.\n");
System.out.println("Default output type for original input.\n");
@@ -744,69 +777,87 @@ public class NewickFile {
System.out.println(trf.print(false, false));
System.out.println("With bootstraps and with distances.\n");
System.out.println(trf.print(true, true));
+ System.out.println("leaves.\n");
+ Vector lvs = new Vector();
+ trf.findLeaves(trf.root, lvs);
+ Enumeration lv = lvs.elements();
+ while (lv.hasMoreElements()) {
+ BinaryNode leave = (BinaryNode) lv.nextElement();
+ if (leave.getName() != null) {
+ System.out.println("Node:'" + leave.getName() + "'");
+ }
+ }
} catch (java.io.IOException e) {
System.err.println("Exception\n" + e);
e.printStackTrace();
}
}
+
/**
* Search for leaf nodes.
- *
- * @param node root node to search from
- * @param leaves Vector of leaves to add leaf node objects too.
- *
+ *
+ * @param node
+ * root node to search from
+ * @param leaves
+ * Vector of leaves to add leaf node objects too.
+ *
* @return Vector of leaf nodes on binary tree
*/
- public Vector findLeaves(SequenceNode node, Vector leaves)
- {
- if (node == null)
- {
+ public Vector findLeaves(SequenceNode node, Vector leaves) {
+ if (node == null) {
return leaves;
}
- if ( (node.left() == null) && (node.right() == null)) // Interior node detection
+ if ((node.left() == null) && (node.right() == null)) // Interior node
+ // detection
{
leaves.addElement(node);
return leaves;
- }
- else
- {
-/* TODO: Identify internal nodes... if (node.isSequenceLabel())
- {
- leaves.addElement(node);
- }*/
- findLeaves( (SequenceNode) node.left(), leaves);
- findLeaves( (SequenceNode) node.right(), leaves);
+ } else {
+ /*
+ * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
+ * leaves.addElement(node); }
+ */
+ findLeaves((SequenceNode) node.left(), leaves);
+ findLeaves((SequenceNode) node.right(), leaves);
}
return leaves;
}
/**
- * make tree node vector from a newick tree structure with associated vamsas objects
+ * make tree node vector from a newick tree structure with associated vamsas
+ * objects
+ *
* @param ntree
- * @return
+ * @return Treenode definitions for nodes with associated objects
+ */
+ public Treenode[] makeTreeNodes() {
+ return makeTreeNodes(true);
+ }
+
+ /**
+ * make treenode vector for a parsed tree with/out leaf node associations
+ *
+ * @param ignoreplaceholders
+ * if true means only associated nodes are returned
+ * @return treenode vector for associated or all leaves
*/
- public Treenode[] makeTreeNodes() {
+ public Treenode[] makeTreeNodes(boolean ignoreplaceholders) {
Vector leaves = new Vector();
findLeaves(root, leaves);
Vector tnv = new Vector();
Enumeration l = leaves.elements();
Hashtable nodespecs = new Hashtable();
- while (l.hasMoreElements())
- {
+ while (l.hasMoreElements()) {
BinaryNode tnode = (BinaryNode) l.nextElement();
- if (tnode instanceof SequenceNode)
- {
- if (!((SequenceNode) tnode).isPlaceholder())
- {
+ if (tnode instanceof SequenceNode) {
+ if (!(ignoreplaceholders && ((SequenceNode) tnode).isPlaceholder())) {
Object assocseq = ((SequenceNode) tnode).element();
- if (assocseq instanceof Vobject)
- {
+ if (assocseq instanceof Vobject) {
Vobject vobj = (Vobject) assocseq;
- if (vobj!=null)
- {
+ if (vobj != null) {
Treenode node = new Treenode();
node.setNodespec(makeNodeSpec(nodespecs, tnode));
node.setName(tnode.getName());
@@ -814,35 +865,35 @@ public class NewickFile {
vr.addRefs(vobj);
node.addVref(vr);
tnv.addElement(node);
- }
- else
- {
- System.err.println("WARNING: Unassociated treeNode "+tnode.element().toString()+" "
- +((tnode.getName()!=null) ? " label "+tnode.getName() : ""));
+ } else {
+ System.err.println("WARNING: Unassociated treeNode "
+ + tnode.element().toString()
+ + " "
+ + ((tnode.getName() != null) ? " label " + tnode.getName()
+ : ""));
}
}
}
}
}
- if (tnv.size()>0)
- {
+ if (tnv.size() > 0) {
Treenode[] tn = new Treenode[tnv.size()];
- tnv.copyInto(tn);
+ tnv.copyInto(tn);
return tn;
}
return new Treenode[] {};
}
- private String makeNodeSpec(Hashtable nodespecs, BinaryNode tnode)
- {
+
+ private String makeNodeSpec(Hashtable nodespecs, BinaryNode tnode) {
String nname = new String(tnode.getName());
Integer nindx = (Integer) nodespecs.get(nname);
- if (nindx==null)
- {
+ if (nindx == null) {
nindx = new Integer(1);
}
- nname = nindx.toString()+" "+nname;
+ nname = nindx.toString() + " " + nname;
return nname;
}
+
/**
* call to match up Treenode specs to NJTree parsed from document object.
*
@@ -851,85 +902,78 @@ public class NewickFile {
* as returned from NJTree.findLeaves( .., ..) ..
* @return
*/
- private BinaryNode findNodeSpec(String nodespec, Vector leaves)
- {
- int occurence=-1;
- String nspec = nodespec.substring(nodespec.indexOf(' ')+1);
+ private BinaryNode findNodeSpec(String nodespec, Vector leaves) {
+ int occurence = -1;
+ String nspec = nodespec.substring(nodespec.indexOf(' ') + 1);
String oval = nodespec.substring(0, nodespec.indexOf(' '));
try {
occurence = new Integer(oval).intValue();
- }
- catch (Exception e)
- {
- System.err.println("Invalid nodespec '"+nodespec+"'");
+ } catch (Exception e) {
+ System.err.println("Invalid nodespec '" + nodespec + "'");
return null;
}
BinaryNode bn = null;
-
+
int nocc = 0;
Enumeration en = leaves.elements();
- while (en.hasMoreElements() && nocc