2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
22 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
\r
25 import jalview.datamodel.*;
\r
34 * @version $Revision$
\r
36 public class NewickFile extends FileParse
\r
39 private boolean HasBootstrap = false;
\r
40 private boolean HasDistances = false;
\r
41 private boolean RootHasDistance = false;
\r
44 boolean ReplaceUnderscores = false;
\r
45 boolean printRootInfo = false;
\r
46 private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
\r
48 new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
\r
49 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
\r
50 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
\r
52 char QuoteChar = '\'';
\r
55 * Creates a new NewickFile object.
\r
57 * @param inStr DOCUMENT ME!
\r
59 * @throws IOException DOCUMENT ME!
\r
61 public NewickFile(String inStr) throws IOException
\r
63 super(inStr, "Paste");
\r
67 * Creates a new NewickFile object.
\r
69 * @param inFile DOCUMENT ME!
\r
70 * @param type DOCUMENT ME!
\r
72 * @throws IOException DOCUMENT ME!
\r
74 public NewickFile(String inFile, String type) throws IOException
\r
76 super(inFile, type);
\r
80 * Creates a new NewickFile object.
\r
82 * @param newtree DOCUMENT ME!
\r
84 public NewickFile(SequenceNode newtree)
\r
90 * Creates a new NewickFile object.
\r
92 * @param newtree DOCUMENT ME!
\r
93 * @param bootstrap DOCUMENT ME!
\r
95 public NewickFile(SequenceNode newtree, boolean bootstrap)
\r
97 HasBootstrap = bootstrap;
\r
102 * Creates a new NewickFile object.
\r
104 * @param newtree DOCUMENT ME!
\r
105 * @param bootstrap DOCUMENT ME!
\r
106 * @param distances DOCUMENT ME!
\r
108 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances)
\r
111 HasBootstrap = bootstrap;
\r
112 HasDistances = distances;
\r
116 * Creates a new NewickFile object.
\r
118 * @param newtree DOCUMENT ME!
\r
119 * @param bootstrap DOCUMENT ME!
\r
120 * @param distances DOCUMENT ME!
\r
121 * @param rootdistance DOCUMENT ME!
\r
123 public NewickFile(SequenceNode newtree, boolean bootstrap,
\r
124 boolean distances, boolean rootdistance)
\r
127 HasBootstrap = bootstrap;
\r
128 HasDistances = distances;
\r
129 RootHasDistance = rootdistance;
\r
135 * @param Error DOCUMENT ME!
\r
136 * @param Er DOCUMENT ME!
\r
137 * @param r DOCUMENT ME!
\r
138 * @param p DOCUMENT ME!
\r
139 * @param s DOCUMENT ME!
\r
141 * @return DOCUMENT ME!
\r
143 private String ErrorStringrange(String Error, String Er, int r, int p,
\r
146 return ((Error == null) ? "" : Error) + Er + " at position " + p +
\r
148 s.substring(((p - r) < 0) ? 0 : (p - r),
\r
149 ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";
\r
152 // @tree annotations
\r
153 // These are set automatically by the reader
\r
154 public boolean HasBootstrap()
\r
156 return HasBootstrap;
\r
162 * @return DOCUMENT ME!
\r
164 public boolean HasDistances()
\r
166 return HasDistances;
\r
169 public boolean HasRootDistance()
\r
171 return RootHasDistance;
\r
176 * @throws IOException DOCUMENT ME!
\r
178 public void parse() throws IOException
\r
182 { // fill nf with complete tree file
\r
184 StringBuffer file = new StringBuffer();
\r
186 while ((nf = nextLine()) != null)
\r
191 nf = file.toString();
\r
194 root = new SequenceNode();
\r
196 SequenceNode realroot = null;
\r
197 SequenceNode c = root;
\r
201 //int flen = nf.length();
\r
203 String Error = null;
\r
204 String nodename = null;
\r
206 float DefDistance = (float) 0.001; // @param Default distance for a node - very very small
\r
207 int DefBootstrap = 0; // @param Default bootstrap for a node
\r
209 float distance = DefDistance;
\r
210 int bootstrap = DefBootstrap;
\r
212 boolean ascending = false; // flag indicating that we are leaving the current node
\r
214 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
\r
217 while (majorsyms.searchFrom(nf, cp) && (Error == null))
\r
219 int fcp = majorsyms.matchedFrom();
\r
221 switch (nf.charAt(fcp))
\r
223 case '[': // Comment or structured/extended NH format info
\r
225 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
\r
228 if (comment.searchFrom(nf, fcp))
\r
230 // Skip the comment field
\r
231 cp = 1 + comment.matchedFrom();
\r
235 Error = ErrorStringrange(Error, "Unterminated comment", 3,
\r
245 // ascending should not be set
\r
246 // New Internal node
\r
249 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
\r
257 if (c.right() == null)
\r
259 c.setRight(new SequenceNode(null, c, null, DefDistance,
\r
260 DefBootstrap, false));
\r
261 c = (SequenceNode) c.right();
\r
265 if (c.left() != null)
\r
267 // Dummy node for polytomy - keeps c.left free for new node
\r
268 SequenceNode tmpn = new SequenceNode(null, c, null, 0,
\r
270 tmpn.SetChildren(c.left(), c.right());
\r
274 c.setLeft(new SequenceNode(null, c, null, DefDistance,
\r
275 DefBootstrap, false));
\r
276 c = (SequenceNode) c.left();
\r
279 if (realroot == null)
\r
285 distance = DefDistance;
\r
286 bootstrap = DefBootstrap;
\r
291 // Deal with quoted fields
\r
294 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
\r
297 if (qnodename.searchFrom(nf, fcp))
\r
299 int nl = qnodename.stringMatched().length();
\r
300 nodename = new String(qnodename.stringMatched().substring(0,
\r
306 Error = ErrorStringrange(Error,
\r
307 "Unterminated quotes for nodename", 7, fcp, nf);
\r
316 Error = ErrorStringrange(Error,
\r
317 "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
\r
320 // cp advanced at the end of default
\r
323 // Parse simpler field strings
\r
324 String fstring = nf.substring(cp, fcp);
\r
325 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
\r
326 "\\b([^' :;\\](),]+)");
\r
327 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
\r
328 "\\S+([0-9+]+)\\S*:");
\r
329 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
\r
332 if (uqnodename.search(fstring) &&
\r
333 ((uqnodename.matchedFrom(1) == 0) ||
\r
334 (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK!
\r
336 if (nodename == null)
\r
338 if (ReplaceUnderscores)
\r
340 nodename = uqnodename.stringMatched(1).replace('_',
\r
345 nodename = uqnodename.stringMatched(1);
\r
350 Error = ErrorStringrange(Error,
\r
351 "File has broken algorithm - overwritten nodename",
\r
356 if (nbootstrap.search(fstring) &&
\r
357 (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +
\r
358 uqnodename.stringMatched().length())))
\r
362 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
\r
363 HasBootstrap = true;
\r
365 catch (Exception e)
\r
367 Error = ErrorStringrange(Error,
\r
368 "Can't parse bootstrap value", 4,
\r
369 cp + nbootstrap.matchedFrom(), nf);
\r
373 boolean nodehasdistance = false;
\r
375 if (ndist.search(fstring))
\r
379 distance = (new Float(ndist.stringMatched(1))).floatValue();
\r
380 HasDistances = true;
\r
381 nodehasdistance = true;
\r
383 catch (Exception e)
\r
385 Error = ErrorStringrange(Error,
\r
386 "Can't parse node distance value", 7,
\r
387 cp + ndist.matchedFrom(), nf);
\r
393 // Write node info here
\r
394 c.setName(nodename);
\r
395 // Trees without distances still need a render distance
\r
396 c.dist = (HasDistances) ? distance : DefDistance;
\r
397 // be consistent for internal bootstrap defaults too
\r
398 c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);
\r
401 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance
\r
406 // Find a place to put the leaf
\r
407 SequenceNode newnode = new SequenceNode(null, c, nodename,
\r
408 (HasDistances) ? distance : DefDistance,
\r
409 (HasBootstrap) ? bootstrap : DefBootstrap, false);
\r
411 if (c.right() == null)
\r
413 c.setRight(newnode);
\r
417 if (c.left() == null)
\r
419 c.setLeft(newnode);
\r
423 // Insert a dummy node for polytomy
\r
424 // dummy nodes have distances
\r
425 SequenceNode newdummy = new SequenceNode(null, c,
\r
426 null, (HasDistances ? 0 : DefDistance), 0, true);
\r
427 newdummy.SetChildren(c.left(), newnode);
\r
428 c.setLeft(newdummy);
\r
435 // move back up the tree from preceding closure
\r
436 c = c.AscendTree();
\r
438 if ((d > -1) && (c == null))
\r
440 Error = ErrorStringrange(Error,
\r
441 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
\r
446 if (nf.charAt(fcp) == ')')
\r
453 if (nf.charAt(fcp) == ',')
\r
461 // Just advance focus, if we need to
\r
462 if ((c.left() != null) && (!c.left().isLeaf()))
\r
464 c = (SequenceNode) c.left();
\r
469 // else : We do nothing if ';' is encountered.
\r
472 // Reset new node properties to obvious fakes
\r
474 distance = DefDistance;
\r
475 bootstrap = DefBootstrap;
\r
483 throw (new IOException("NewickFile: " + Error + "\n"));
\r
486 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
\r
488 if (!RootHasDistance)
\r
490 root.dist = (HasDistances) ? 0 : DefDistance;
\r
497 * @return DOCUMENT ME!
\r
499 public SequenceNode getTree()
\r
505 * Generate a newick format tree according to internal flags
\r
506 * for bootstraps, distances and root distances.
\r
508 * @return new hampshire tree in a single line
\r
510 public String print()
\r
512 synchronized (this)
\r
514 StringBuffer tf = new StringBuffer();
\r
517 return (tf.append(";").toString());
\r
524 * Generate a newick format tree according to internal flags
\r
525 * for distances and root distances and user specificied writing of
\r
527 * @param withbootstraps controls if bootstrap values are explicitly written.
\r
529 * @return new hampshire tree in a single line
\r
531 public String print(boolean withbootstraps)
\r
533 synchronized (this)
\r
535 boolean boots = this.HasBootstrap;
\r
536 this.HasBootstrap = withbootstraps;
\r
538 String rv = print();
\r
539 this.HasBootstrap = boots;
\r
547 * Generate newick format tree according to internal flags
\r
548 * for writing root node distances.
\r
550 * @param withbootstraps explicitly write bootstrap values
\r
551 * @param withdists explicitly write distances
\r
553 * @return new hampshire tree in a single line
\r
555 public String print(boolean withbootstraps, boolean withdists)
\r
557 synchronized (this)
\r
559 boolean dists = this.HasDistances;
\r
560 this.HasDistances = withdists;
\r
562 String rv = print(withbootstraps);
\r
563 this.HasDistances = dists;
\r
570 * Generate newick format tree according to user specified flags
\r
572 * @param withbootstraps explicitly write bootstrap values
\r
573 * @param withdists explicitly write distances
\r
574 * @param printRootInfo explicitly write root distance
\r
576 * @return new hampshire tree in a single line
\r
578 public String print(boolean withbootstraps, boolean withdists,
\r
579 boolean printRootInfo)
\r
581 synchronized (this)
\r
583 boolean rootinfo = printRootInfo;
\r
584 this.printRootInfo = printRootInfo;
\r
586 String rv = print(withbootstraps, withdists);
\r
587 this.printRootInfo = rootinfo;
\r
596 * @return DOCUMENT ME!
\r
598 char getQuoteChar()
\r
606 * @param c DOCUMENT ME!
\r
608 * @return DOCUMENT ME!
\r
610 char setQuoteChar(char c)
\r
612 char old = QuoteChar;
\r
621 * @param name DOCUMENT ME!
\r
623 * @return DOCUMENT ME!
\r
625 private String nodeName(String name)
\r
627 if (NodeSafeName[0].search(name))
\r
629 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
\r
633 return NodeSafeName[2].replaceAll(name);
\r
640 * @param c DOCUMENT ME!
\r
642 * @return DOCUMENT ME!
\r
644 private String printNodeField(SequenceNode c)
\r
646 return ((c.getName() == null) ? "" : nodeName(c.getName())) +
\r
648 ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") +
\r
649 ((HasDistances) ? (":" + c.dist) : "");
\r
655 * @param root DOCUMENT ME!
\r
657 * @return DOCUMENT ME!
\r
659 private String printRootField(SequenceNode root)
\r
661 return (printRootInfo)
\r
662 ? (((root.getName() == null) ? "" : nodeName(root.getName())) +
\r
664 ? ((root.getBootstrap() > -1) ? (" " + root.getBootstrap()) : "") : "") +
\r
665 ((RootHasDistance) ? (":" + root.dist) : "")) : "";
\r
668 // Non recursive call deals with root node properties
\r
669 public void print(StringBuffer tf, SequenceNode root)
\r
673 if (root.isLeaf() && printRootInfo)
\r
675 tf.append(printRootField(root));
\r
679 if (root.isDummy())
\r
681 _print(tf, (SequenceNode) root.right());
\r
682 _print(tf, (SequenceNode) root.left());
\r
687 _print(tf, (SequenceNode) root.right());
\r
689 if (root.left() != null)
\r
694 _print(tf, (SequenceNode) root.left());
\r
695 tf.append(")" + printRootField(root));
\r
701 // Recursive call for non-root nodes
\r
702 public void _print(StringBuffer tf, SequenceNode c)
\r
708 tf.append(printNodeField(c));
\r
714 _print(tf, (SequenceNode) c.left());
\r
715 if (c.left() != null)
\r
719 _print(tf, (SequenceNode) c.right());
\r
724 _print(tf, (SequenceNode) c.right());
\r
726 if (c.left() != null)
\r
731 _print(tf, (SequenceNode) c.left());
\r
732 tf.append(")" + printNodeField(c));
\r
739 public static void main(String[] args)
\r
743 if (args==null || args.length!=1) {
\r
744 System.err.println("Takes one argument - file name of a newick tree file.");
\r
748 File fn = new File(args[0]);
\r
750 StringBuffer newickfile = new StringBuffer();
\r
751 BufferedReader treefile = new BufferedReader(new FileReader(fn));
\r
754 while ((l = treefile.readLine()) != null)
\r
756 newickfile.append(l);
\r
760 System.out.println("Read file :\n");
\r
762 NewickFile trf = new NewickFile(args[0], "File");
\r
764 System.out.println("Original file :\n");
\r
766 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
\r
767 System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
\r
769 System.out.println("Parsed file.\n");
\r
770 System.out.println("Default output type for original input.\n");
\r
771 System.out.println(trf.print());
\r
772 System.out.println("Without bootstraps.\n");
\r
773 System.out.println(trf.print(false));
\r
774 System.out.println("Without distances.\n");
\r
775 System.out.println(trf.print(true, false));
\r
776 System.out.println("Without bootstraps but with distanecs.\n");
\r
777 System.out.println(trf.print(false, true));
\r
778 System.out.println("Without bootstraps or distanecs.\n");
\r
779 System.out.println(trf.print(false, false));
\r
780 System.out.println("With bootstraps and with distances.\n");
\r
781 System.out.println(trf.print(true, true));
\r
783 catch (java.io.IOException e)
\r
785 System.err.println("Exception\n" + e);
\r
786 e.printStackTrace();
\r