2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
23 // TODO: Implement Basic NHX tag parsing and preservation
24 // TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html
25 // TODO: Extended SequenceNodeI to hold parsed NHX strings
28 import jalview.datamodel.*;
39 public class NewickFile extends FileParse
42 private boolean HasBootstrap = false;
43 private boolean HasDistances = false;
44 private boolean RootHasDistance = false;
47 boolean ReplaceUnderscores = false;
48 boolean printRootInfo = false;
49 private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
51 new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
52 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
53 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
55 char QuoteChar = '\'';
58 * Creates a new NewickFile object.
60 * @param inStr DOCUMENT ME!
62 * @throws IOException DOCUMENT ME!
64 public NewickFile(String inStr) throws IOException
66 super(inStr, "Paste");
70 * Creates a new NewickFile object.
72 * @param inFile DOCUMENT ME!
73 * @param type DOCUMENT ME!
75 * @throws IOException DOCUMENT ME!
77 public NewickFile(String inFile, String type) throws IOException
83 * Creates a new NewickFile object.
85 * @param newtree DOCUMENT ME!
87 public NewickFile(SequenceNode newtree)
93 * Creates a new NewickFile object.
95 * @param newtree DOCUMENT ME!
96 * @param bootstrap DOCUMENT ME!
98 public NewickFile(SequenceNode newtree, boolean bootstrap)
100 HasBootstrap = bootstrap;
105 * Creates a new NewickFile object.
107 * @param newtree DOCUMENT ME!
108 * @param bootstrap DOCUMENT ME!
109 * @param distances DOCUMENT ME!
111 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances)
114 HasBootstrap = bootstrap;
115 HasDistances = distances;
119 * Creates a new NewickFile object.
121 * @param newtree DOCUMENT ME!
122 * @param bootstrap DOCUMENT ME!
123 * @param distances DOCUMENT ME!
124 * @param rootdistance DOCUMENT ME!
126 public NewickFile(SequenceNode newtree, boolean bootstrap,
127 boolean distances, boolean rootdistance)
130 HasBootstrap = bootstrap;
131 HasDistances = distances;
132 RootHasDistance = rootdistance;
138 * @param Error DOCUMENT ME!
139 * @param Er DOCUMENT ME!
140 * @param r DOCUMENT ME!
141 * @param p DOCUMENT ME!
142 * @param s DOCUMENT ME!
144 * @return DOCUMENT ME!
146 private String ErrorStringrange(String Error, String Er, int r, int p,
149 return ((Error == null) ? "" : Error) + Er + " at position " + p +
151 s.substring(((p - r) < 0) ? 0 : (p - r),
152 ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";
156 // These are set automatically by the reader
157 public boolean HasBootstrap()
165 * @return DOCUMENT ME!
167 public boolean HasDistances()
172 public boolean HasRootDistance()
174 return RootHasDistance;
179 * @throws IOException DOCUMENT ME!
181 public void parse() throws IOException
185 { // fill nf with complete tree file
187 StringBuffer file = new StringBuffer();
189 while ((nf = nextLine()) != null)
194 nf = file.toString();
197 root = new SequenceNode();
199 SequenceNode realroot = null;
200 SequenceNode c = root;
204 //int flen = nf.length();
207 String nodename = null;
209 float DefDistance = (float) 0.001; // @param Default distance for a node - very very small
210 int DefBootstrap = 0; // @param Default bootstrap for a node
212 float distance = DefDistance;
213 int bootstrap = DefBootstrap;
215 boolean ascending = false; // flag indicating that we are leaving the current node
217 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
220 while (majorsyms.searchFrom(nf, cp) && (Error == null))
222 int fcp = majorsyms.matchedFrom();
224 switch (nf.charAt(fcp))
226 case '[': // Comment or structured/extended NH format info
228 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
231 if (comment.searchFrom(nf, fcp))
233 // Skip the comment field
234 cp = 1 + comment.matchedFrom();
238 Error = ErrorStringrange(Error, "Unterminated comment", 3,
248 // ascending should not be set
252 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
260 if (c.right() == null)
262 c.setRight(new SequenceNode(null, c, null, DefDistance,
263 DefBootstrap, false));
264 c = (SequenceNode) c.right();
268 if (c.left() != null)
270 // Dummy node for polytomy - keeps c.left free for new node
271 SequenceNode tmpn = new SequenceNode(null, c, null, 0,
273 tmpn.SetChildren(c.left(), c.right());
277 c.setLeft(new SequenceNode(null, c, null, DefDistance,
278 DefBootstrap, false));
279 c = (SequenceNode) c.left();
282 if (realroot == null)
288 distance = DefDistance;
289 bootstrap = DefBootstrap;
294 // Deal with quoted fields
297 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
300 if (qnodename.searchFrom(nf, fcp))
302 int nl = qnodename.stringMatched().length();
303 nodename = new String(qnodename.stringMatched().substring(0,
309 Error = ErrorStringrange(Error,
310 "Unterminated quotes for nodename", 7, fcp, nf);
319 Error = ErrorStringrange(Error,
320 "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
323 // cp advanced at the end of default
326 // Parse simpler field strings
327 String fstring = nf.substring(cp, fcp);
328 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
329 "\\b([^' :;\\](),]+)");
330 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
331 "\\S+([0-9+]+)\\S*:");
332 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
335 if (uqnodename.search(fstring) &&
336 ((uqnodename.matchedFrom(1) == 0) ||
337 (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK!
339 if (nodename == null)
341 if (ReplaceUnderscores)
343 nodename = uqnodename.stringMatched(1).replace('_',
348 nodename = uqnodename.stringMatched(1);
353 Error = ErrorStringrange(Error,
354 "File has broken algorithm - overwritten nodename",
359 if (nbootstrap.search(fstring) &&
360 (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +
361 uqnodename.stringMatched().length())))
365 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
370 Error = ErrorStringrange(Error,
371 "Can't parse bootstrap value", 4,
372 cp + nbootstrap.matchedFrom(), nf);
376 boolean nodehasdistance = false;
378 if (ndist.search(fstring))
382 distance = (new Float(ndist.stringMatched(1))).floatValue();
384 nodehasdistance = true;
388 Error = ErrorStringrange(Error,
389 "Can't parse node distance value", 7,
390 cp + ndist.matchedFrom(), nf);
396 // Write node info here
398 // Trees without distances still need a render distance
399 c.dist = (HasDistances) ? distance : DefDistance;
400 // be consistent for internal bootstrap defaults too
401 c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);
404 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance
409 // Find a place to put the leaf
410 SequenceNode newnode = new SequenceNode(null, c, nodename,
411 (HasDistances) ? distance : DefDistance,
412 (HasBootstrap) ? bootstrap : DefBootstrap, false);
414 if (c.right() == null)
420 if (c.left() == null)
426 // Insert a dummy node for polytomy
427 // dummy nodes have distances
428 SequenceNode newdummy = new SequenceNode(null, c,
429 null, (HasDistances ? 0 : DefDistance), 0, true);
430 newdummy.SetChildren(c.left(), newnode);
438 // move back up the tree from preceding closure
441 if ((d > -1) && (c == null))
443 Error = ErrorStringrange(Error,
444 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
449 if (nf.charAt(fcp) == ')')
456 if (nf.charAt(fcp) == ',')
464 // Just advance focus, if we need to
465 if ((c.left() != null) && (!c.left().isLeaf()))
467 c = (SequenceNode) c.left();
472 // else : We do nothing if ';' is encountered.
475 // Reset new node properties to obvious fakes
477 distance = DefDistance;
478 bootstrap = DefBootstrap;
486 throw (new IOException("NewickFile: " + Error + "\n"));
489 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
491 if (!RootHasDistance)
493 root.dist = (HasDistances) ? 0 : DefDistance;
500 * @return DOCUMENT ME!
502 public SequenceNode getTree()
508 * Generate a newick format tree according to internal flags
509 * for bootstraps, distances and root distances.
511 * @return new hampshire tree in a single line
513 public String print()
517 StringBuffer tf = new StringBuffer();
520 return (tf.append(";").toString());
527 * Generate a newick format tree according to internal flags
528 * for distances and root distances and user specificied writing of
530 * @param withbootstraps controls if bootstrap values are explicitly written.
532 * @return new hampshire tree in a single line
534 public String print(boolean withbootstraps)
538 boolean boots = this.HasBootstrap;
539 this.HasBootstrap = withbootstraps;
542 this.HasBootstrap = boots;
550 * Generate newick format tree according to internal flags
551 * for writing root node distances.
553 * @param withbootstraps explicitly write bootstrap values
554 * @param withdists explicitly write distances
556 * @return new hampshire tree in a single line
558 public String print(boolean withbootstraps, boolean withdists)
562 boolean dists = this.HasDistances;
563 this.HasDistances = withdists;
565 String rv = print(withbootstraps);
566 this.HasDistances = dists;
573 * Generate newick format tree according to user specified flags
575 * @param withbootstraps explicitly write bootstrap values
576 * @param withdists explicitly write distances
577 * @param printRootInfo explicitly write root distance
579 * @return new hampshire tree in a single line
581 public String print(boolean withbootstraps, boolean withdists,
582 boolean printRootInfo)
586 boolean rootinfo = printRootInfo;
587 this.printRootInfo = printRootInfo;
589 String rv = print(withbootstraps, withdists);
590 this.printRootInfo = rootinfo;
599 * @return DOCUMENT ME!
609 * @param c DOCUMENT ME!
611 * @return DOCUMENT ME!
613 char setQuoteChar(char c)
615 char old = QuoteChar;
624 * @param name DOCUMENT ME!
626 * @return DOCUMENT ME!
628 private String nodeName(String name)
630 if (NodeSafeName[0].search(name))
632 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
636 return NodeSafeName[2].replaceAll(name);
643 * @param c DOCUMENT ME!
645 * @return DOCUMENT ME!
647 private String printNodeField(SequenceNode c)
649 return ((c.getName() == null) ? "" : nodeName(c.getName())) +
651 ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") +
652 ((HasDistances) ? (":" + c.dist) : "");
658 * @param root DOCUMENT ME!
660 * @return DOCUMENT ME!
662 private String printRootField(SequenceNode root)
664 return (printRootInfo)
665 ? (((root.getName() == null) ? "" : nodeName(root.getName())) +
667 ? ((root.getBootstrap() > -1) ? (" " + root.getBootstrap()) : "") : "") +
668 ((RootHasDistance) ? (":" + root.dist) : "")) : "";
671 // Non recursive call deals with root node properties
672 public void print(StringBuffer tf, SequenceNode root)
676 if (root.isLeaf() && printRootInfo)
678 tf.append(printRootField(root));
684 _print(tf, (SequenceNode) root.right());
685 _print(tf, (SequenceNode) root.left());
690 _print(tf, (SequenceNode) root.right());
692 if (root.left() != null)
697 _print(tf, (SequenceNode) root.left());
698 tf.append(")" + printRootField(root));
704 // Recursive call for non-root nodes
705 public void _print(StringBuffer tf, SequenceNode c)
711 tf.append(printNodeField(c));
717 _print(tf, (SequenceNode) c.left());
718 if (c.left() != null)
722 _print(tf, (SequenceNode) c.right());
727 _print(tf, (SequenceNode) c.right());
729 if (c.left() != null)
734 _print(tf, (SequenceNode) c.left());
735 tf.append(")" + printNodeField(c));
742 public static void main(String[] args)
746 if (args==null || args.length!=1) {
747 System.err.println("Takes one argument - file name of a newick tree file.");
751 File fn = new File(args[0]);
753 StringBuffer newickfile = new StringBuffer();
754 BufferedReader treefile = new BufferedReader(new FileReader(fn));
757 while ((l = treefile.readLine()) != null)
759 newickfile.append(l);
763 System.out.println("Read file :\n");
765 NewickFile trf = new NewickFile(args[0], "File");
767 System.out.println("Original file :\n");
769 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
770 System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
772 System.out.println("Parsed file.\n");
773 System.out.println("Default output type for original input.\n");
774 System.out.println(trf.print());
775 System.out.println("Without bootstraps.\n");
776 System.out.println(trf.print(false));
777 System.out.println("Without distances.\n");
778 System.out.println(trf.print(true, false));
779 System.out.println("Without bootstraps but with distanecs.\n");
780 System.out.println(trf.print(false, true));
781 System.out.println("Without bootstraps or distanecs.\n");
782 System.out.println(trf.print(false, false));
783 System.out.println("With bootstraps and with distances.\n");
784 System.out.println(trf.print(true, true));
786 catch (java.io.IOException e)
788 System.err.println("Exception\n" + e);