3 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
8 import jalview.datamodel.*;
10 public class NewickFile extends FileParse
14 private boolean HasBootstrap = false;
15 private boolean HasDistances = false;
16 private boolean RootHasDistance = false;
18 private String ErrorStringrange(String Error, String Er, int r, int p, String s) {
19 return ((Error==null) ? "" : Error)
21 " at position "+p+" ( "
22 + s.substring((p-r)<0 ? 0 : (p-r),
23 (p+r)>s.length() ? s.length() : (p+r))
28 // These are set automatically by the reader
29 public boolean HasBootstrap() {
33 public boolean HasDistances() {
36 public NewickFile(String inStr) throws IOException
38 super(inStr, "Paste");
41 public NewickFile(String inFile, String type)
48 boolean ReplaceUnderscores = false;
50 public void parse() throws IOException
54 { // fill nf with complete tree file
55 StringBuffer file = new StringBuffer();
56 while ( (nf = nextLine()) != null)
63 root = new SequenceNode();
64 SequenceNode realroot = null;
65 SequenceNode c = root;
68 int flen = nf.length();
70 String nodename = null;
71 float distance=-99999;
73 boolean ascending = false; // flag indicating that we are leaving the current node
74 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex("[(\\['),;]");
75 while (majorsyms.searchFrom(nf, cp) && Error==null) {
76 int fcp = majorsyms.matchedFrom();
77 switch (nf.charAt(fcp)) {
79 case '[': // Comment or structured/extended NH format info
80 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
81 if (comment.searchFrom(nf, fcp))
83 // Skip the comment field
84 cp = 1 + comment.matchedFrom();
88 Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
95 // ascending should not be set
99 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
104 if (c.right() == null)
106 c.setRight(new SequenceNode(null, c, 0, null));
107 c = (SequenceNode) c.right();
111 if (c.left() != null)
113 // Dummy node for polytomy - keeps c.left free for new node
114 SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true);
115 tmpn.SetChildren(c.left(), c.right());
118 c.setLeft(new SequenceNode(null, c, 0, null));
119 c = (SequenceNode) c.left();
121 if (realroot==null) {
130 // Deal with quoted fields
132 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
134 if (qnodename.searchFrom(nf, fcp))
136 int nl = qnodename.stringMatched().length();
137 nodename = new String(qnodename.stringMatched().substring(0, nl - 1));
142 Error = ErrorStringrange(Error, "Unterminated quotes for nodename",
150 Error = ErrorStringrange(Error,
151 "Wayward semicolon (depth=" + d + ")", 7,
154 // cp advanced at the end of default
158 // Parse simpler field strings
159 String fstring = nf.substring(cp, fcp);
160 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
161 "\\b([^' :;\\](),]+)");
162 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
163 "\\S+([0-9+]+)\\S*:");
164 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
166 if (uqnodename.search(fstring)
167 && (uqnodename.matchedFrom(1) == 0
168 || fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':')) // JBPNote HACK!
171 if (nodename == null)
173 if (ReplaceUnderscores) {
174 nodename = uqnodename.stringMatched(1).replace('_', ' ');
176 nodename = uqnodename.stringMatched(1);
181 Error = ErrorStringrange(Error,
182 "File has broken algorithm - overwritten nodename",
187 if (nbootstrap.search(fstring)
189 nbootstrap.matchedFrom(1)
190 > uqnodename.matchedFrom(1) + uqnodename.stringMatched().length())
194 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
199 Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
200 cp + nbootstrap.matchedFrom(), nf);
203 boolean nodehasdistance=false;
204 if (ndist.search(fstring))
208 distance = (new Float(ndist.stringMatched(1))).floatValue();
210 nodehasdistance = true;
214 Error = ErrorStringrange(Error, "Can't parse node distance value",
215 7, cp + ndist.matchedFrom(), nf);
221 // Write node info here
223 c.dist = (HasDistances) ? distance : 0;
224 c.setBootstrap((HasBootstrap) ? bootstrap : 0);
226 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!!
231 // Find a place to put the leaf
232 SequenceNode newnode = new SequenceNode(null, c,
233 (HasDistances) ? distance : 0,
236 newnode.setBootstrap(HasBootstrap ? bootstrap : 0);
238 if (c.right() == null)
244 if (c.left() == null)
250 // Insert a dummy node for polytomy
251 SequenceNode newdummy = new SequenceNode(null, c, null, 0, 0, true);
252 newdummy.SetChildren(c.left(), newnode);
259 // move back up the tree from preceding closure
261 if (d > -1 && c == null)
263 Error = ErrorStringrange(Error,
264 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
269 if (nf.charAt(fcp) == ')') {
273 if (nf.charAt(fcp) == ',') {
277 // Just advance focus, if we need to
278 if (c.left()!=null && (!c.left().isLeaf())) {
279 c = (SequenceNode) c.left();
282 } // else : We do nothing if ';' is encountered.
284 // Reset new node properties to obvious fakes
294 throw(new IOException("NewickFile: "+Error+"\n"));
297 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
298 if (!RootHasDistance) {
303 public NewickFile(SequenceNode newtree) {
307 public NewickFile(SequenceNode newtree, boolean bootstrap) {
308 HasBootstrap = bootstrap;
311 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
313 HasBootstrap = bootstrap;
314 HasDistances = distances;
317 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) {
319 HasBootstrap = bootstrap;
320 HasDistances = distances;
321 RootHasDistance = rootdistance;
324 public SequenceNode getTree() {
328 public String print() {
329 synchronized (this) {
330 StringBuffer tf = new StringBuffer();
332 return (tf.append(";").toString());
336 public String print(boolean withbootstraps) {
338 boolean boots = this.HasBootstrap;
339 this.HasBootstrap = withbootstraps;
341 this.HasBootstrap = boots;
346 public String print(boolean withbootstraps, boolean withdists) {
348 boolean dists = this.HasDistances;
349 this.HasDistances = withdists;
350 String rv = print(withbootstraps);
351 this.HasDistances = dists;
355 boolean printRootInfo = false;
358 public String print(boolean withbootstraps, boolean withdists, boolean printRootInfo) {
360 boolean rootinfo = printRootInfo;
361 this.printRootInfo = printRootInfo;
362 String rv = print(withbootstraps, withdists);
363 this.printRootInfo = rootinfo;
367 private com.stevesoft.pat.Regex[] NodeSafeName =
368 new com.stevesoft.pat.Regex[]
369 { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
370 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
371 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
374 char getQuoteChar() {
378 char setQuoteChar(char c) {
379 char old = QuoteChar;
384 private String nodeName(String name) {
385 if (NodeSafeName[0].search(name)) {
386 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
388 return NodeSafeName[2].replaceAll(name);
391 private String printNodeField(SequenceNode c) {
393 return ( (c.getName() == null) ? ""
394 : nodeName(c.getName()))
396 ? ( (c.getBootstrap() > -1)
397 ? " " + c.getBootstrap()
401 ? ":" + c.dist : "");
403 private String printRootField(SequenceNode root) {
405 return (printRootInfo)
406 ? (( (root.getName() == null) ? ""
407 : nodeName(root.getName()))
409 ? ( (root.getBootstrap() > -1)
410 ? " " + root.getBootstrap()
413 + ( (RootHasDistance)
414 ? ":" + root.dist : ""))
418 // Non recursive call deals with root node properties
419 public void print(StringBuffer tf, SequenceNode root) {
421 if (root.isLeaf() && printRootInfo) {
422 tf.append(printRootField(root));
424 if (root.isDummy()) {
425 _print(tf, (SequenceNode) root.right());
426 _print(tf, (SequenceNode) root.left());
429 _print(tf, (SequenceNode) root.right());
430 if (root.left() != null) {
433 _print(tf, (SequenceNode) root.left());
434 tf.append(")" + printRootField(root));
440 // Recursive call for non-root nodes
441 public void _print(StringBuffer tf, SequenceNode c) {
444 tf.append(printNodeField(c));
447 _print(tf, (SequenceNode) c.right());
448 _print(tf, (SequenceNode) c.left());
451 _print(tf, (SequenceNode) c.right());
452 if (c.left() != null) {
455 _print(tf, (SequenceNode) c.left());
456 tf.append(")" + printNodeField(c));
463 public static void main(String[] args)
467 File fn = new File(args[0]);
469 StringBuffer newickfile = new StringBuffer();
470 BufferedReader treefile =
471 new BufferedReader(new FileReader(fn));
473 while ((l = treefile.readLine())!=null) {
474 newickfile.append(l);
477 System.out.println("Read file :\n");
478 NewickFile trf = new NewickFile(args[0], "File");
480 System.out.println("Original file :\n");
481 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
482 System.out.println(nonl.replaceAll(newickfile.toString())+"\n");
484 System.out.println("Parsed file.\n");
485 System.out.println("Default output type for original input.\n");
486 System.out.println(trf.print());
487 System.out.println("Without bootstraps.\n");
488 System.out.println(trf.print(false));
489 System.out.println("Without distances.\n");
490 System.out.println(trf.print(true,false));
491 System.out.println("Without bootstraps but with distanecs.\n");
492 System.out.println(trf.print(false, true));
493 System.out.println("Without bootstraps or distanecs.\n");
494 System.out.println(trf.print(false, false));
495 System.out.println("With bootstraps and with distances.\n");
496 System.out.println(trf.print(true, true));
498 catch (java.io.IOException e)
500 System.out.println("Exception\n" + e);