--- /dev/null
+// NewickFile.java
+// Tree I/O
+// http://evolution.genetics.washington.edu/phylip/newick_doc.html
+package jalview.io;
+
+import java.io.*;
+import java.util.*;
+import jalview.datamodel.*;
+
+public class NewickFile extends FileParse
+{
+ SequenceNode root;
+
+ private boolean HasBootstrap = false;
+ private boolean HasDistances = false;
+ private boolean RootHasDistance = false;
+ private String ErrorStringrange(String Error, String Er, int r, int p, String s) {
+ return ((Error==null) ? "" : Error)
+ + Er +
+ " at position "+p+" ( "
+ + s.substring((p-r)<0 ? 0 : (p-r),
+ (p+r)>s.length() ? s.length() : (p+r))
+ + " )\n";
+ }
+
+ // @tree annotations
+ // These are set automatically by the reader
+ public boolean HasBootstrap() {
+ return HasBootstrap;
+ }
+
+ public boolean HasDistances() {
+ return HasDistances;
+ }
+ public NewickFile(String inStr) throws IOException
+ {
+ super(inStr, "Paste");
+ }
+
+ public NewickFile(String inFile, String type)
+ throws IOException
+ {
+
+ super(inFile, type);
+ }
+
+ public void parse() throws IOException
+ {
+ String nf;
+
+ { // fill nf with complete tree file
+ StringBuffer file = new StringBuffer();
+ while ( (nf = nextLine()) != null)
+ {
+ file.append(nf);
+ }
+ nf = file.toString();
+ }
+
+ root = new SequenceNode();
+ SequenceNode c = root;
+ int d = -1;
+ int cp = 0;
+ int flen = nf.length();
+ String Error = null;
+ String nodename = null;
+ float distance=-99999;
+ int bootstrap=-99999;
+ boolean ascending = false; // flag indicating that we are leaving the current node
+ com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex("[(\\['),;]");
+ while (majorsyms.searchFrom(nf, cp) && Error==null) {
+ int fcp = majorsyms.matchedFrom();
+ switch (nf.charAt(fcp)) {
+
+ case '[': // Comment or structured/extended NH format info
+ com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
+ if (comment.searchFrom(nf, fcp))
+ {
+ // Skip the comment field
+ cp = 1 + comment.matchedFrom();
+ }
+ else
+ {
+ Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
+ }
+ ;
+ break;
+
+ case '(':
+
+ // ascending should not be set
+ // New Internal node
+ if (ascending)
+ {
+ Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
+ continue;
+ }
+ ;
+ d++;
+ if (c.right() == null)
+ {
+ c.setRight(new SequenceNode(null, c, 0, null));
+ c = (SequenceNode) c.right();
+ }
+ else
+ {
+ if (c.left() != null)
+ {
+ // Dummy node for polytomy - keeps c.left free for new node
+ SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true);
+ tmpn.SetChildren(c.left(), c.right());
+ c.setRight(tmpn);
+ }
+ c.setLeft(new SequenceNode(null, c, 0, null));
+ c = (SequenceNode) c.left();
+ }
+ nodename = null;
+ distance = -99999;
+ bootstrap = -99999;
+ cp = fcp + 1;
+ break;
+
+ // Deal with quoted fields
+ case '\'':
+ com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
+ "([^']|'')+'");
+ if (qnodename.searchFrom(nf, fcp))
+ {
+ int nl = qnodename.stringMatched().length();
+ nodename = new String(qnodename.stringMatched().substring(0, nl - 1));
+ cp = fcp + nl + 1;
+ }
+ else
+ {
+ Error = ErrorStringrange(Error, "Unterminated quotes for nodename",
+ 7, fcp, nf);
+ }
+ break;
+
+ case ';':
+ if (d != -1)
+ {
+ Error = ErrorStringrange(Error,
+ "Wayward semicolon (depth=" + d + ")", 7,
+ fcp, nf);
+ }
+ // cp advanced at the end of default
+
+ default:
+
+ // Parse simpler field strings
+ String fstring = nf.substring(cp, fcp);
+ com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
+ "\\b([^' :;\\](),]+)");
+ com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
+ "\\S+([0-9+]+)\\S*:");
+ com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
+ ":([-0-9.+]+)");
+ if (uqnodename.search(fstring)
+ && (uqnodename.matchedFrom(1) == 0
+ || fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':')) // JBPNote HACK!
+
+ {
+ if (nodename == null)
+ {
+ nodename = uqnodename.stringMatched(1).replace('_', ' ');
+ }
+ else
+ {
+ Error = ErrorStringrange(Error,
+ "File has broken algorithm - overwritten nodename",
+ 10, fcp,
+ nf);
+ }
+ }
+ if (nbootstrap.search(fstring)
+ &&
+ nbootstrap.matchedFrom(1)
+ > uqnodename.matchedFrom(1) + uqnodename.stringMatched().length())
+ {
+ try
+ {
+ bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
+ HasBootstrap = true;
+ }
+ catch (Exception e)
+ {
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
+ cp + nbootstrap.matchedFrom(), nf);
+ }
+ }
+
+ if (ndist.search(fstring))
+ {
+ try
+ {
+ distance = (new Float(ndist.stringMatched(1))).floatValue();
+ HasDistances = true;
+ if (c.parent()==root) {
+ RootHasDistance = true;
+ }
+ }
+ catch (Exception e)
+ {
+ Error = ErrorStringrange(Error, "Can't parse node distance value",
+ 7, cp + ndist.matchedFrom(), nf);
+ }
+ }
+
+ if (ascending)
+ {
+ // Write node info here
+ c.setName(nodename);
+ c.dist = (HasDistances) ? distance : 0;
+ c.setBootstrap((HasBootstrap) ? bootstrap : 0);
+ }
+ else
+ {
+ // Find a place to put the leaf
+ SequenceNode newnode = new SequenceNode(null, c,
+ (HasDistances) ? distance : 0,
+ nodename);
+
+ newnode.setBootstrap(HasBootstrap ? bootstrap : 0);
+
+ if (c.right() == null)
+ {
+ c.setRight(newnode);
+ }
+ else
+ {
+ if (c.left() == null)
+ {
+ c.setLeft(newnode);
+ }
+ else
+ {
+ // Insert a dummy node for polytomy
+ SequenceNode newdummy = new SequenceNode(null, c, null, 0, 0, true);
+ newdummy.SetChildren(c.left(), newnode);
+ c.setLeft(newdummy);
+ }
+ }
+ }
+ if (ascending)
+ {
+ // move back up the tree from preceding closure
+ c = c.AscendTree();
+ if (d > -1 && c == null)
+ {
+ Error = ErrorStringrange(Error,
+ "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
+ 7, fcp, nf);
+ }
+ }
+
+ if (nf.charAt(fcp) == ')') {
+ d--;
+ ascending = true;
+ } else {
+ if (nf.charAt(fcp) == ',') {
+ if (ascending) {
+ ascending = false;
+ } else {
+ // Just advance focus, if we need to
+ if (c.left()!=null && (!c.left().isLeaf())) {
+ c = (SequenceNode) c.left();
+ }
+ }
+ } // else : We do nothing if ';' is encountered.
+ }
+ // Reset new node properties to obvious fakes
+ nodename = null;
+ distance = -99999;
+ bootstrap = -99999;
+
+ cp=fcp+1;
+ }
+ }
+
+ if (Error!=null) {
+ throw(new IOException("NewickFile: "+Error+"\n"));
+ }
+
+ root = (SequenceNode) root.right().detach(); // remove the imaginary root.
+ }
+
+ public NewickFile(SequenceNode newtree) {
+ root = newtree;
+ }
+
+ public NewickFile(SequenceNode newtree, boolean bootstrap) {
+ HasBootstrap = bootstrap;
+ root=newtree;
+ }
+ public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
+ root = newtree;
+ HasBootstrap = bootstrap;
+ HasDistances = distances;
+ }
+
+ public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) {
+ root = newtree;
+ HasBootstrap = bootstrap;
+ HasDistances = distances;
+ RootHasDistance = rootdistance;
+ }
+
+ public SequenceNode getTree() {
+ return root;
+ }
+
+ public String print() {
+ synchronized (this) {
+ StringBuffer tf = new StringBuffer();
+ print(tf, root);
+ return (tf.append(";").toString());
+ }
+ }
+
+ public String print(boolean withbootstraps) {
+ synchronized(this) {
+ boolean boots = this.HasBootstrap;
+ this.HasBootstrap = withbootstraps;
+ String rv = print();
+ this.HasBootstrap = boots;
+ return rv;
+ }
+ }
+
+ public String print(boolean withbootstraps, boolean withdists) {
+ synchronized(this) {
+ boolean dists = this.HasDistances;
+ this.HasDistances = withdists;
+ String rv = print(withbootstraps);
+ this.HasDistances = dists;
+ return rv;
+ }
+ }
+ boolean printRootInfo = false;
+
+
+ public String print(boolean withbootstraps, boolean withdists, boolean printRootInfo) {
+ synchronized(this) {
+ boolean rootinfo = printRootInfo;
+ this.printRootInfo = printRootInfo;
+ String rv = print(withbootstraps, withdists);
+ this.printRootInfo = rootinfo;
+ return rv;
+ }
+ }
+ private com.stevesoft.pat.Regex[] NodeSafeName =
+ new com.stevesoft.pat.Regex[]
+ { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
+ new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
+ new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
+ };
+ char QuoteChar='\'';
+ char getQuoteChar() {
+ return QuoteChar;
+ }
+
+ char setQuoteChar(char c) {
+ char old = QuoteChar;
+ QuoteChar = c;
+ return old;
+ }
+
+ private String nodeName(String name) {
+ if (NodeSafeName[0].search(name)) {
+ return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
+ } else {
+ return NodeSafeName[2].replaceAll(name);
+ }
+ }
+ private String printNodeField(SequenceNode c) {
+
+ return ( (c.getName() == null) ? ""
+ : nodeName(c.getName()))
+ + ( (HasBootstrap)
+ ? ( (c.getBootstrap() > -1)
+ ? " " + c.getBootstrap()
+ : "")
+ : "")
+ + ( (HasDistances)
+ ? ":" + c.dist : "");
+ }
+ private String printRootField(SequenceNode root) {
+
+ return (printRootInfo)
+ ? (( (root.getName() == null) ? ""
+ : nodeName(root.getName()))
+ + ( (HasBootstrap)
+ ? ( (root.getBootstrap() > -1)
+ ? " " + root.getBootstrap()
+ : "")
+ : "")
+ + ( (RootHasDistance)
+ ? ":" + root.dist : ""))
+ : "";
+ }
+
+// Non recursive call deals with root node properties
+ public void print(StringBuffer tf, SequenceNode root) {
+ if (root!=null) {
+ if (root.isLeaf() && printRootInfo) {
+ tf.append(printRootField(root));
+ } else {
+ if (root.isDummy()) {
+ _print(tf, (SequenceNode) root.right());
+ _print(tf, (SequenceNode) root.left());
+ } else {
+ tf.append("(");
+ _print(tf, (SequenceNode) root.right());
+ if (root.left() != null) {
+ tf.append(",");
+ }
+ _print(tf, (SequenceNode) root.left());
+ tf.append(")" + printRootField(root));
+ }
+ }
+ }
+ }
+
+ // Recursive call for non-root nodes
+ public void _print(StringBuffer tf, SequenceNode c) {
+ if (c!=null) {
+ if (c.isLeaf()) {
+ tf.append(printNodeField(c));
+ } else {
+ if (c.isDummy()) {
+ _print(tf, (SequenceNode) c.right());
+ _print(tf, (SequenceNode) c.left());
+ } else {
+ tf.append("(");
+ _print(tf, (SequenceNode) c.right());
+ if (c.left() != null) {
+ tf.append(",");
+ }
+ _print(tf, (SequenceNode) c.left());
+ tf.append(")" + printNodeField(c));
+ }
+ }
+ }
+ }
+
+// Test
+ public static void main(String[] args)
+{
+ try
+ {
+ File fn = new File(args[0]);
+
+ StringBuffer newickfile = new StringBuffer();
+ BufferedReader treefile =
+ new BufferedReader(new FileReader(fn));
+ String l;
+ while ((l = treefile.readLine())!=null) {
+ newickfile.append(l);
+ }
+ treefile.close();
+ System.out.println("Read file :\n");
+ NewickFile trf = new NewickFile(args[0], "File");
+ trf.parse();
+ System.out.println("Original file :\n");
+ com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
+ System.out.println(nonl.replaceAll(newickfile.toString())+"\n");
+
+ System.out.println("Parsed file.\n");
+ System.out.println("Default output type for original input.\n");
+ System.out.println(trf.print());
+ System.out.println("Without bootstraps.\n");
+ System.out.println(trf.print(false));
+ System.out.println("Without distances.\n");
+ System.out.println(trf.print(true,false));
+ System.out.println("Without bootstraps but with distanecs.\n");
+ System.out.println(trf.print(false, true));
+ System.out.println("Without bootstraps or distanecs.\n");
+ System.out.println(trf.print(false, false));
+ System.out.println("With bootstraps and with distances.\n");
+ System.out.println(trf.print(true, true));
+ }
+ catch (java.io.IOException e)
+ {
+ System.out.println("Exception\n" + e);
+ e.printStackTrace();
+ }
+}
+
+}