2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
28 import jalview.datamodel.*;
30 public class NewickFile extends FileParse
34 private boolean HasBootstrap = false;
35 private boolean HasDistances = false;
36 private boolean RootHasDistance = false;
38 private String ErrorStringrange(String Error, String Er, int r, int p, String s) {
39 return ((Error==null) ? "" : Error)
41 " at position "+p+" ( "
42 + s.substring((p-r)<0 ? 0 : (p-r),
43 (p+r)>s.length() ? s.length() : (p+r))
48 // These are set automatically by the reader
49 public boolean HasBootstrap() {
53 public boolean HasDistances() {
56 public NewickFile(String inStr) throws IOException
58 super(inStr, "Paste");
61 public NewickFile(String inFile, String type)
68 boolean ReplaceUnderscores = false;
70 public void parse() throws IOException
74 { // fill nf with complete tree file
75 StringBuffer file = new StringBuffer();
76 while ( (nf = nextLine()) != null)
83 root = new SequenceNode();
84 SequenceNode realroot = null;
85 SequenceNode c = root;
89 int flen = nf.length();
92 String nodename = null;
94 float DefDistance = (float) 0.00001; // @param Default distance for a node - very very small
95 int DefBootstrap = 0; // @param Default bootstrap for a node
97 float distance=DefDistance;
98 int bootstrap=DefBootstrap;
100 boolean ascending = false; // flag indicating that we are leaving the current node
102 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex("[(\\['),;]");
104 while (majorsyms.searchFrom(nf, cp) && Error==null) {
105 int fcp = majorsyms.matchedFrom();
106 switch (nf.charAt(fcp)) {
108 case '[': // Comment or structured/extended NH format info
109 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
110 if (comment.searchFrom(nf, fcp))
112 // Skip the comment field
113 cp = 1 + comment.matchedFrom();
117 Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
124 // ascending should not be set
128 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
133 if (c.right() == null)
135 c.setRight(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false));
136 c = (SequenceNode) c.right();
140 if (c.left() != null)
142 // Dummy node for polytomy - keeps c.left free for new node
143 SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true);
144 tmpn.SetChildren(c.left(), c.right());
147 c.setLeft(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false));
148 c = (SequenceNode) c.left();
150 if (realroot==null) {
154 distance = DefDistance;
155 bootstrap = DefBootstrap;
159 // Deal with quoted fields
161 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
163 if (qnodename.searchFrom(nf, fcp))
165 int nl = qnodename.stringMatched().length();
166 nodename = new String(qnodename.stringMatched().substring(0, nl - 1));
171 Error = ErrorStringrange(Error, "Unterminated quotes for nodename",
179 Error = ErrorStringrange(Error,
180 "Wayward semicolon (depth=" + d + ")", 7,
183 // cp advanced at the end of default
187 // Parse simpler field strings
188 String fstring = nf.substring(cp, fcp);
189 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
190 "\\b([^' :;\\](),]+)");
191 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
192 "\\S+([0-9+]+)\\S*:");
193 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
195 if (uqnodename.search(fstring)
196 && (uqnodename.matchedFrom(1) == 0
197 || fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':')) // JBPNote HACK!
200 if (nodename == null)
202 if (ReplaceUnderscores) {
203 nodename = uqnodename.stringMatched(1).replace('_', ' ');
205 nodename = uqnodename.stringMatched(1);
210 Error = ErrorStringrange(Error,
211 "File has broken algorithm - overwritten nodename",
216 if (nbootstrap.search(fstring)
218 nbootstrap.matchedFrom(1)
219 > uqnodename.matchedFrom(1) + uqnodename.stringMatched().length())
223 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
228 Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
229 cp + nbootstrap.matchedFrom(), nf);
232 boolean nodehasdistance=false;
233 if (ndist.search(fstring))
237 distance = (new Float(ndist.stringMatched(1))).floatValue();
239 nodehasdistance = true;
243 Error = ErrorStringrange(Error, "Can't parse node distance value",
244 7, cp + ndist.matchedFrom(), nf);
250 // Write node info here
252 c.dist = (HasDistances) ? distance : 0;
253 c.setBootstrap((HasBootstrap) ? bootstrap : 0);
255 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!!
260 // Find a place to put the leaf
261 SequenceNode newnode =
262 new SequenceNode(null, c,
264 (HasDistances) ? distance : DefDistance,
265 (HasBootstrap) ? bootstrap : DefBootstrap,
268 if (c.right() == null)
274 if (c.left() == null)
280 // Insert a dummy node for polytomy
281 SequenceNode newdummy = new SequenceNode(null, c, null, 0, 0, true);
282 newdummy.SetChildren(c.left(), newnode);
289 // move back up the tree from preceding closure
291 if (d > -1 && c == null)
293 Error = ErrorStringrange(Error,
294 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
299 if (nf.charAt(fcp) == ')') {
303 if (nf.charAt(fcp) == ',') {
307 // Just advance focus, if we need to
308 if (c.left()!=null && (!c.left().isLeaf())) {
309 c = (SequenceNode) c.left();
312 } // else : We do nothing if ';' is encountered.
314 // Reset new node properties to obvious fakes
316 distance = DefDistance;
317 bootstrap = DefBootstrap;
324 throw(new IOException("NewickFile: "+Error+"\n"));
327 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
328 if (!RootHasDistance) {
334 public NewickFile(SequenceNode newtree) {
338 public NewickFile(SequenceNode newtree, boolean bootstrap) {
339 HasBootstrap = bootstrap;
342 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
344 HasBootstrap = bootstrap;
345 HasDistances = distances;
348 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) {
350 HasBootstrap = bootstrap;
351 HasDistances = distances;
352 RootHasDistance = rootdistance;
355 public SequenceNode getTree() {
359 public String print() {
360 synchronized (this) {
361 StringBuffer tf = new StringBuffer();
363 return (tf.append(";").toString());
367 public String print(boolean withbootstraps) {
369 boolean boots = this.HasBootstrap;
370 this.HasBootstrap = withbootstraps;
372 this.HasBootstrap = boots;
377 public String print(boolean withbootstraps, boolean withdists) {
379 boolean dists = this.HasDistances;
380 this.HasDistances = withdists;
381 String rv = print(withbootstraps);
382 this.HasDistances = dists;
386 boolean printRootInfo = false;
389 public String print(boolean withbootstraps, boolean withdists, boolean printRootInfo) {
391 boolean rootinfo = printRootInfo;
392 this.printRootInfo = printRootInfo;
393 String rv = print(withbootstraps, withdists);
394 this.printRootInfo = rootinfo;
398 private com.stevesoft.pat.Regex[] NodeSafeName =
399 new com.stevesoft.pat.Regex[]
400 { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
401 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
402 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
405 char getQuoteChar() {
409 char setQuoteChar(char c) {
410 char old = QuoteChar;
415 private String nodeName(String name) {
416 if (NodeSafeName[0].search(name)) {
417 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
419 return NodeSafeName[2].replaceAll(name);
422 private String printNodeField(SequenceNode c) {
424 return ( (c.getName() == null) ? ""
425 : nodeName(c.getName()))
427 ? ( (c.getBootstrap() > -1)
428 ? " " + c.getBootstrap()
432 ? ":" + c.dist : "");
434 private String printRootField(SequenceNode root) {
436 return (printRootInfo)
437 ? (( (root.getName() == null) ? ""
438 : nodeName(root.getName()))
440 ? ( (root.getBootstrap() > -1)
441 ? " " + root.getBootstrap()
444 + ( (RootHasDistance)
445 ? ":" + root.dist : ""))
449 // Non recursive call deals with root node properties
450 public void print(StringBuffer tf, SequenceNode root) {
452 if (root.isLeaf() && printRootInfo) {
453 tf.append(printRootField(root));
455 if (root.isDummy()) {
456 _print(tf, (SequenceNode) root.right());
457 _print(tf, (SequenceNode) root.left());
460 _print(tf, (SequenceNode) root.right());
461 if (root.left() != null) {
464 _print(tf, (SequenceNode) root.left());
465 tf.append(")" + printRootField(root));
471 // Recursive call for non-root nodes
472 public void _print(StringBuffer tf, SequenceNode c) {
475 tf.append(printNodeField(c));
478 _print(tf, (SequenceNode) c.right());
479 _print(tf, (SequenceNode) c.left());
482 _print(tf, (SequenceNode) c.right());
483 if (c.left() != null) {
486 _print(tf, (SequenceNode) c.left());
487 tf.append(")" + printNodeField(c));
494 public static void main(String[] args)
498 File fn = new File(args[0]);
500 StringBuffer newickfile = new StringBuffer();
501 BufferedReader treefile =
502 new BufferedReader(new FileReader(fn));
504 while ((l = treefile.readLine())!=null) {
505 newickfile.append(l);
508 System.out.println("Read file :\n");
509 NewickFile trf = new NewickFile(args[0], "File");
511 System.out.println("Original file :\n");
512 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
513 System.out.println(nonl.replaceAll(newickfile.toString())+"\n");
515 System.out.println("Parsed file.\n");
516 System.out.println("Default output type for original input.\n");
517 System.out.println(trf.print());
518 System.out.println("Without bootstraps.\n");
519 System.out.println(trf.print(false));
520 System.out.println("Without distances.\n");
521 System.out.println(trf.print(true,false));
522 System.out.println("Without bootstraps but with distanecs.\n");
523 System.out.println(trf.print(false, true));
524 System.out.println("Without bootstraps or distanecs.\n");
525 System.out.println(trf.print(false, false));
526 System.out.println("With bootstraps and with distances.\n");
527 System.out.println(trf.print(true, true));
529 catch (java.io.IOException e)
531 System.err.println("Exception\n" + e);