3 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
8 import jalview.datamodel.*;
10 public class NewickFile extends FileParse
14 private boolean HasBootstrap = false;
15 private boolean HasDistances = false;
16 private boolean RootHasDistance = false;
18 private String ErrorStringrange(String Error, String Er, int r, int p, String s) {
19 return ((Error==null) ? "" : Error)
21 " at position "+p+" ( "
22 + s.substring((p-r)<0 ? 0 : (p-r),
23 (p+r)>s.length() ? s.length() : (p+r))
28 // These are set automatically by the reader
29 public boolean HasBootstrap() {
33 public boolean HasDistances() {
36 public NewickFile(String inStr) throws IOException
38 super(inStr, "Paste");
41 public NewickFile(String inFile, String type)
48 boolean ReplaceUnderscores = false;
50 public void parse() throws IOException
54 { // fill nf with complete tree file
55 StringBuffer file = new StringBuffer();
56 while ( (nf = nextLine()) != null)
63 root = new SequenceNode();
64 SequenceNode realroot = null;
65 SequenceNode c = root;
69 int flen = nf.length();
72 String nodename = null;
74 float DefDistance = (float) 0.00001; // @param Default distance for a node - very very small
75 int DefBootstrap = 0; // @param Default bootstrap for a node
77 float distance=DefDistance;
78 int bootstrap=DefBootstrap;
80 boolean ascending = false; // flag indicating that we are leaving the current node
82 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex("[(\\['),;]");
84 while (majorsyms.searchFrom(nf, cp) && Error==null) {
85 int fcp = majorsyms.matchedFrom();
86 switch (nf.charAt(fcp)) {
88 case '[': // Comment or structured/extended NH format info
89 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
90 if (comment.searchFrom(nf, fcp))
92 // Skip the comment field
93 cp = 1 + comment.matchedFrom();
97 Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
104 // ascending should not be set
108 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
113 if (c.right() == null)
115 c.setRight(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false));
116 c = (SequenceNode) c.right();
120 if (c.left() != null)
122 // Dummy node for polytomy - keeps c.left free for new node
123 SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true);
124 tmpn.SetChildren(c.left(), c.right());
127 c.setLeft(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false));
128 c = (SequenceNode) c.left();
130 if (realroot==null) {
134 distance = DefDistance;
135 bootstrap = DefBootstrap;
139 // Deal with quoted fields
141 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
143 if (qnodename.searchFrom(nf, fcp))
145 int nl = qnodename.stringMatched().length();
146 nodename = new String(qnodename.stringMatched().substring(0, nl - 1));
151 Error = ErrorStringrange(Error, "Unterminated quotes for nodename",
159 Error = ErrorStringrange(Error,
160 "Wayward semicolon (depth=" + d + ")", 7,
163 // cp advanced at the end of default
167 // Parse simpler field strings
168 String fstring = nf.substring(cp, fcp);
169 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
170 "\\b([^' :;\\](),]+)");
171 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
172 "\\S+([0-9+]+)\\S*:");
173 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
175 if (uqnodename.search(fstring)
176 && (uqnodename.matchedFrom(1) == 0
177 || fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':')) // JBPNote HACK!
180 if (nodename == null)
182 if (ReplaceUnderscores) {
183 nodename = uqnodename.stringMatched(1).replace('_', ' ');
185 nodename = uqnodename.stringMatched(1);
190 Error = ErrorStringrange(Error,
191 "File has broken algorithm - overwritten nodename",
196 if (nbootstrap.search(fstring)
198 nbootstrap.matchedFrom(1)
199 > uqnodename.matchedFrom(1) + uqnodename.stringMatched().length())
203 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
208 Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
209 cp + nbootstrap.matchedFrom(), nf);
212 boolean nodehasdistance=false;
213 if (ndist.search(fstring))
217 distance = (new Float(ndist.stringMatched(1))).floatValue();
219 nodehasdistance = true;
223 Error = ErrorStringrange(Error, "Can't parse node distance value",
224 7, cp + ndist.matchedFrom(), nf);
230 // Write node info here
232 c.dist = (HasDistances) ? distance : 0;
233 c.setBootstrap((HasBootstrap) ? bootstrap : 0);
235 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!!
240 // Find a place to put the leaf
241 SequenceNode newnode =
242 new SequenceNode(null, c,
244 (HasDistances) ? distance : DefDistance,
245 (HasBootstrap) ? bootstrap : DefBootstrap,
248 if (c.right() == null)
254 if (c.left() == null)
260 // Insert a dummy node for polytomy
261 SequenceNode newdummy = new SequenceNode(null, c, null, 0, 0, true);
262 newdummy.SetChildren(c.left(), newnode);
269 // move back up the tree from preceding closure
271 if (d > -1 && c == null)
273 Error = ErrorStringrange(Error,
274 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
279 if (nf.charAt(fcp) == ')') {
283 if (nf.charAt(fcp) == ',') {
287 // Just advance focus, if we need to
288 if (c.left()!=null && (!c.left().isLeaf())) {
289 c = (SequenceNode) c.left();
292 } // else : We do nothing if ';' is encountered.
294 // Reset new node properties to obvious fakes
296 distance = DefDistance;
297 bootstrap = DefBootstrap;
304 throw(new IOException("NewickFile: "+Error+"\n"));
307 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
308 if (!RootHasDistance) {
314 public NewickFile(SequenceNode newtree) {
318 public NewickFile(SequenceNode newtree, boolean bootstrap) {
319 HasBootstrap = bootstrap;
322 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
324 HasBootstrap = bootstrap;
325 HasDistances = distances;
328 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) {
330 HasBootstrap = bootstrap;
331 HasDistances = distances;
332 RootHasDistance = rootdistance;
335 public SequenceNode getTree() {
339 public String print() {
340 synchronized (this) {
341 StringBuffer tf = new StringBuffer();
343 return (tf.append(";").toString());
347 public String print(boolean withbootstraps) {
349 boolean boots = this.HasBootstrap;
350 this.HasBootstrap = withbootstraps;
352 this.HasBootstrap = boots;
357 public String print(boolean withbootstraps, boolean withdists) {
359 boolean dists = this.HasDistances;
360 this.HasDistances = withdists;
361 String rv = print(withbootstraps);
362 this.HasDistances = dists;
366 boolean printRootInfo = false;
369 public String print(boolean withbootstraps, boolean withdists, boolean printRootInfo) {
371 boolean rootinfo = printRootInfo;
372 this.printRootInfo = printRootInfo;
373 String rv = print(withbootstraps, withdists);
374 this.printRootInfo = rootinfo;
378 private com.stevesoft.pat.Regex[] NodeSafeName =
379 new com.stevesoft.pat.Regex[]
380 { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
381 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
382 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
385 char getQuoteChar() {
389 char setQuoteChar(char c) {
390 char old = QuoteChar;
395 private String nodeName(String name) {
396 if (NodeSafeName[0].search(name)) {
397 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
399 return NodeSafeName[2].replaceAll(name);
402 private String printNodeField(SequenceNode c) {
404 return ( (c.getName() == null) ? ""
405 : nodeName(c.getName()))
407 ? ( (c.getBootstrap() > -1)
408 ? " " + c.getBootstrap()
412 ? ":" + c.dist : "");
414 private String printRootField(SequenceNode root) {
416 return (printRootInfo)
417 ? (( (root.getName() == null) ? ""
418 : nodeName(root.getName()))
420 ? ( (root.getBootstrap() > -1)
421 ? " " + root.getBootstrap()
424 + ( (RootHasDistance)
425 ? ":" + root.dist : ""))
429 // Non recursive call deals with root node properties
430 public void print(StringBuffer tf, SequenceNode root) {
432 if (root.isLeaf() && printRootInfo) {
433 tf.append(printRootField(root));
435 if (root.isDummy()) {
436 _print(tf, (SequenceNode) root.right());
437 _print(tf, (SequenceNode) root.left());
440 _print(tf, (SequenceNode) root.right());
441 if (root.left() != null) {
444 _print(tf, (SequenceNode) root.left());
445 tf.append(")" + printRootField(root));
451 // Recursive call for non-root nodes
452 public void _print(StringBuffer tf, SequenceNode c) {
455 tf.append(printNodeField(c));
458 _print(tf, (SequenceNode) c.right());
459 _print(tf, (SequenceNode) c.left());
462 _print(tf, (SequenceNode) c.right());
463 if (c.left() != null) {
466 _print(tf, (SequenceNode) c.left());
467 tf.append(")" + printNodeField(c));
474 public static void main(String[] args)
478 File fn = new File(args[0]);
480 StringBuffer newickfile = new StringBuffer();
481 BufferedReader treefile =
482 new BufferedReader(new FileReader(fn));
484 while ((l = treefile.readLine())!=null) {
485 newickfile.append(l);
488 System.out.println("Read file :\n");
489 NewickFile trf = new NewickFile(args[0], "File");
491 System.out.println("Original file :\n");
492 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
493 System.out.println(nonl.replaceAll(newickfile.toString())+"\n");
495 System.out.println("Parsed file.\n");
496 System.out.println("Default output type for original input.\n");
497 System.out.println(trf.print());
498 System.out.println("Without bootstraps.\n");
499 System.out.println(trf.print(false));
500 System.out.println("Without distances.\n");
501 System.out.println(trf.print(true,false));
502 System.out.println("Without bootstraps but with distanecs.\n");
503 System.out.println(trf.print(false, true));
504 System.out.println("Without bootstraps or distanecs.\n");
505 System.out.println(trf.print(false, false));
506 System.out.println("With bootstraps and with distances.\n");
507 System.out.println(trf.print(true, true));
509 catch (java.io.IOException e)
511 System.err.println("Exception\n" + e);