3 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
8 import jalview.datamodel.*;
10 public class NewickFile extends FileParse
14 private boolean HasBootstrap = false;
15 private boolean HasDistances = false;
16 private boolean RootHasDistance = false;
17 private String ErrorStringrange(String Error, String Er, int r, int p, String s) {
18 return ((Error==null) ? "" : Error)
20 " at position "+p+" ( "
21 + s.substring((p-r)<0 ? 0 : (p-r),
22 (p+r)>s.length() ? s.length() : (p+r))
27 // These are set automatically by the reader
28 public boolean HasBootstrap() {
32 public boolean HasDistances() {
35 public NewickFile(String inStr) throws IOException
37 super(inStr, "Paste");
40 public NewickFile(String inFile, String type)
47 public void parse() throws IOException
51 { // fill nf with complete tree file
52 StringBuffer file = new StringBuffer();
53 while ( (nf = nextLine()) != null)
60 root = new SequenceNode();
61 SequenceNode c = root;
64 int flen = nf.length();
66 String nodename = null;
67 float distance=-99999;
69 boolean ascending = false; // flag indicating that we are leaving the current node
70 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex("[(\\['),;]");
71 while (majorsyms.searchFrom(nf, cp) && Error==null) {
72 int fcp = majorsyms.matchedFrom();
73 switch (nf.charAt(fcp)) {
75 case '[': // Comment or structured/extended NH format info
76 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
77 if (comment.searchFrom(nf, fcp))
79 // Skip the comment field
80 cp = 1 + comment.matchedFrom();
84 Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf);
91 // ascending should not be set
95 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
100 if (c.right() == null)
102 c.setRight(new SequenceNode(null, c, 0, null));
103 c = (SequenceNode) c.right();
107 if (c.left() != null)
109 // Dummy node for polytomy - keeps c.left free for new node
110 SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true);
111 tmpn.SetChildren(c.left(), c.right());
114 c.setLeft(new SequenceNode(null, c, 0, null));
115 c = (SequenceNode) c.left();
123 // Deal with quoted fields
125 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
127 if (qnodename.searchFrom(nf, fcp))
129 int nl = qnodename.stringMatched().length();
130 nodename = new String(qnodename.stringMatched().substring(0, nl - 1));
135 Error = ErrorStringrange(Error, "Unterminated quotes for nodename",
143 Error = ErrorStringrange(Error,
144 "Wayward semicolon (depth=" + d + ")", 7,
147 // cp advanced at the end of default
151 // Parse simpler field strings
152 String fstring = nf.substring(cp, fcp);
153 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
154 "\\b([^' :;\\](),]+)");
155 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
156 "\\S+([0-9+]+)\\S*:");
157 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
159 if (uqnodename.search(fstring)
160 && (uqnodename.matchedFrom(1) == 0
161 || fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':')) // JBPNote HACK!
164 if (nodename == null)
166 nodename = uqnodename.stringMatched(1).replace('_', ' ');
170 Error = ErrorStringrange(Error,
171 "File has broken algorithm - overwritten nodename",
176 if (nbootstrap.search(fstring)
178 nbootstrap.matchedFrom(1)
179 > uqnodename.matchedFrom(1) + uqnodename.stringMatched().length())
183 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
188 Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4,
189 cp + nbootstrap.matchedFrom(), nf);
193 if (ndist.search(fstring))
197 distance = (new Float(ndist.stringMatched(1))).floatValue();
199 if (c.parent()==root) {
200 RootHasDistance = true;
205 Error = ErrorStringrange(Error, "Can't parse node distance value",
206 7, cp + ndist.matchedFrom(), nf);
212 // Write node info here
214 c.dist = (HasDistances) ? distance : 0;
215 c.setBootstrap((HasBootstrap) ? bootstrap : 0);
219 // Find a place to put the leaf
220 SequenceNode newnode = new SequenceNode(null, c,
221 (HasDistances) ? distance : 0,
224 newnode.setBootstrap(HasBootstrap ? bootstrap : 0);
226 if (c.right() == null)
232 if (c.left() == null)
238 // Insert a dummy node for polytomy
239 SequenceNode newdummy = new SequenceNode(null, c, null, 0, 0, true);
240 newdummy.SetChildren(c.left(), newnode);
247 // move back up the tree from preceding closure
249 if (d > -1 && c == null)
251 Error = ErrorStringrange(Error,
252 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
257 if (nf.charAt(fcp) == ')') {
261 if (nf.charAt(fcp) == ',') {
265 // Just advance focus, if we need to
266 if (c.left()!=null && (!c.left().isLeaf())) {
267 c = (SequenceNode) c.left();
270 } // else : We do nothing if ';' is encountered.
272 // Reset new node properties to obvious fakes
282 throw(new IOException("NewickFile: "+Error+"\n"));
285 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
288 public NewickFile(SequenceNode newtree) {
292 public NewickFile(SequenceNode newtree, boolean bootstrap) {
293 HasBootstrap = bootstrap;
296 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
298 HasBootstrap = bootstrap;
299 HasDistances = distances;
302 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) {
304 HasBootstrap = bootstrap;
305 HasDistances = distances;
306 RootHasDistance = rootdistance;
309 public SequenceNode getTree() {
313 public String print() {
314 synchronized (this) {
315 StringBuffer tf = new StringBuffer();
317 return (tf.append(";").toString());
321 public String print(boolean withbootstraps) {
323 boolean boots = this.HasBootstrap;
324 this.HasBootstrap = withbootstraps;
326 this.HasBootstrap = boots;
331 public String print(boolean withbootstraps, boolean withdists) {
333 boolean dists = this.HasDistances;
334 this.HasDistances = withdists;
335 String rv = print(withbootstraps);
336 this.HasDistances = dists;
340 boolean printRootInfo = false;
343 public String print(boolean withbootstraps, boolean withdists, boolean printRootInfo) {
345 boolean rootinfo = printRootInfo;
346 this.printRootInfo = printRootInfo;
347 String rv = print(withbootstraps, withdists);
348 this.printRootInfo = rootinfo;
352 private com.stevesoft.pat.Regex[] NodeSafeName =
353 new com.stevesoft.pat.Regex[]
354 { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
355 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
356 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
359 char getQuoteChar() {
363 char setQuoteChar(char c) {
364 char old = QuoteChar;
369 private String nodeName(String name) {
370 if (NodeSafeName[0].search(name)) {
371 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
373 return NodeSafeName[2].replaceAll(name);
376 private String printNodeField(SequenceNode c) {
378 return ( (c.getName() == null) ? ""
379 : nodeName(c.getName()))
381 ? ( (c.getBootstrap() > -1)
382 ? " " + c.getBootstrap()
386 ? ":" + c.dist : "");
388 private String printRootField(SequenceNode root) {
390 return (printRootInfo)
391 ? (( (root.getName() == null) ? ""
392 : nodeName(root.getName()))
394 ? ( (root.getBootstrap() > -1)
395 ? " " + root.getBootstrap()
398 + ( (RootHasDistance)
399 ? ":" + root.dist : ""))
403 // Non recursive call deals with root node properties
404 public void print(StringBuffer tf, SequenceNode root) {
406 if (root.isLeaf() && printRootInfo) {
407 tf.append(printRootField(root));
409 if (root.isDummy()) {
410 _print(tf, (SequenceNode) root.right());
411 _print(tf, (SequenceNode) root.left());
414 _print(tf, (SequenceNode) root.right());
415 if (root.left() != null) {
418 _print(tf, (SequenceNode) root.left());
419 tf.append(")" + printRootField(root));
425 // Recursive call for non-root nodes
426 public void _print(StringBuffer tf, SequenceNode c) {
429 tf.append(printNodeField(c));
432 _print(tf, (SequenceNode) c.right());
433 _print(tf, (SequenceNode) c.left());
436 _print(tf, (SequenceNode) c.right());
437 if (c.left() != null) {
440 _print(tf, (SequenceNode) c.left());
441 tf.append(")" + printNodeField(c));
448 public static void main(String[] args)
452 File fn = new File(args[0]);
454 StringBuffer newickfile = new StringBuffer();
455 BufferedReader treefile =
456 new BufferedReader(new FileReader(fn));
458 while ((l = treefile.readLine())!=null) {
459 newickfile.append(l);
462 System.out.println("Read file :\n");
463 NewickFile trf = new NewickFile(args[0], "File");
465 System.out.println("Original file :\n");
466 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
467 System.out.println(nonl.replaceAll(newickfile.toString())+"\n");
469 System.out.println("Parsed file.\n");
470 System.out.println("Default output type for original input.\n");
471 System.out.println(trf.print());
472 System.out.println("Without bootstraps.\n");
473 System.out.println(trf.print(false));
474 System.out.println("Without distances.\n");
475 System.out.println(trf.print(true,false));
476 System.out.println("Without bootstraps but with distanecs.\n");
477 System.out.println(trf.print(false, true));
478 System.out.println("Without bootstraps or distanecs.\n");
479 System.out.println(trf.print(false, false));
480 System.out.println("With bootstraps and with distances.\n");
481 System.out.println(trf.print(true, true));
483 catch (java.io.IOException e)
485 System.out.println("Exception\n" + e);