2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 // http://evolution.genetics.washington.edu/phylip/newick_doc.html
\r
25 import jalview.datamodel.*;
\r
32 public class NewickFile extends FileParse {
\r
34 private boolean HasBootstrap = false;
\r
35 private boolean HasDistances = false;
\r
36 private boolean RootHasDistance = false;
\r
39 boolean ReplaceUnderscores = false;
\r
40 boolean printRootInfo = false;
\r
41 private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] {
\r
42 new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
\r
43 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
\r
44 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
\r
46 char QuoteChar = '\'';
\r
48 public NewickFile(String inStr) throws IOException {
\r
49 super(inStr, "Paste");
\r
52 public NewickFile(String inFile, String type) throws IOException {
\r
53 super(inFile, type);
\r
56 public NewickFile(SequenceNode newtree) {
\r
60 public NewickFile(SequenceNode newtree, boolean bootstrap) {
\r
61 HasBootstrap = bootstrap;
\r
65 public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) {
\r
67 HasBootstrap = bootstrap;
\r
68 HasDistances = distances;
\r
71 public NewickFile(SequenceNode newtree, boolean bootstrap,
\r
72 boolean distances, boolean rootdistance) {
\r
74 HasBootstrap = bootstrap;
\r
75 HasDistances = distances;
\r
76 RootHasDistance = rootdistance;
\r
79 private String ErrorStringrange(String Error, String Er, int r, int p,
\r
81 return ((Error == null) ? "" : Error) + Er + " at position " + p +
\r
83 s.substring(((p - r) < 0) ? 0 : (p - r),
\r
84 ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";
\r
87 // @tree annotations
\r
88 // These are set automatically by the reader
\r
89 public boolean HasBootstrap() {
\r
90 return HasBootstrap;
\r
93 public boolean HasDistances() {
\r
94 return HasDistances;
\r
97 public void parse() throws IOException {
\r
100 { // fill nf with complete tree file
\r
102 StringBuffer file = new StringBuffer();
\r
104 while ((nf = nextLine()) != null) {
\r
108 nf = file.toString();
\r
111 root = new SequenceNode();
\r
113 SequenceNode realroot = null;
\r
114 SequenceNode c = root;
\r
118 int flen = nf.length();
\r
120 String Error = null;
\r
121 String nodename = null;
\r
123 float DefDistance = (float) 0.00001; // @param Default distance for a node - very very small
\r
124 int DefBootstrap = 0; // @param Default bootstrap for a node
\r
126 float distance = DefDistance;
\r
127 int bootstrap = DefBootstrap;
\r
129 boolean ascending = false; // flag indicating that we are leaving the current node
\r
131 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
\r
134 while (majorsyms.searchFrom(nf, cp) && (Error == null)) {
\r
135 int fcp = majorsyms.matchedFrom();
\r
137 switch (nf.charAt(fcp)) {
\r
138 case '[': // Comment or structured/extended NH format info
\r
140 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
\r
143 if (comment.searchFrom(nf, fcp)) {
\r
144 // Skip the comment field
\r
145 cp = 1 + comment.matchedFrom();
\r
147 Error = ErrorStringrange(Error, "Unterminated comment", 3,
\r
157 // ascending should not be set
\r
158 // New Internal node
\r
160 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
\r
168 if (c.right() == null) {
\r
169 c.setRight(new SequenceNode(null, c, null, DefDistance,
\r
170 DefBootstrap, false));
\r
171 c = (SequenceNode) c.right();
\r
173 if (c.left() != null) {
\r
174 // Dummy node for polytomy - keeps c.left free for new node
\r
175 SequenceNode tmpn = new SequenceNode(null, c, null, 0,
\r
177 tmpn.SetChildren(c.left(), c.right());
\r
181 c.setLeft(new SequenceNode(null, c, null, DefDistance,
\r
182 DefBootstrap, false));
\r
183 c = (SequenceNode) c.left();
\r
186 if (realroot == null) {
\r
191 distance = DefDistance;
\r
192 bootstrap = DefBootstrap;
\r
197 // Deal with quoted fields
\r
200 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
\r
203 if (qnodename.searchFrom(nf, fcp)) {
\r
204 int nl = qnodename.stringMatched().length();
\r
205 nodename = new String(qnodename.stringMatched().substring(0,
\r
209 Error = ErrorStringrange(Error,
\r
210 "Unterminated quotes for nodename", 7, fcp, nf);
\r
218 Error = ErrorStringrange(Error,
\r
219 "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
\r
222 // cp advanced at the end of default
\r
225 // Parse simpler field strings
\r
226 String fstring = nf.substring(cp, fcp);
\r
227 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
\r
228 "\\b([^' :;\\](),]+)");
\r
229 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
\r
230 "\\S+([0-9+]+)\\S*:");
\r
231 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
\r
234 if (uqnodename.search(fstring) &&
\r
235 ((uqnodename.matchedFrom(1) == 0) ||
\r
236 (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK!
\r
238 if (nodename == null) {
\r
239 if (ReplaceUnderscores) {
\r
240 nodename = uqnodename.stringMatched(1).replace('_',
\r
243 nodename = uqnodename.stringMatched(1);
\r
246 Error = ErrorStringrange(Error,
\r
247 "File has broken algorithm - overwritten nodename",
\r
252 if (nbootstrap.search(fstring) &&
\r
253 (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +
\r
254 uqnodename.stringMatched().length()))) {
\r
256 bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
\r
257 HasBootstrap = true;
\r
258 } catch (Exception e) {
\r
259 Error = ErrorStringrange(Error,
\r
260 "Can't parse bootstrap value", 4,
\r
261 cp + nbootstrap.matchedFrom(), nf);
\r
265 boolean nodehasdistance = false;
\r
267 if (ndist.search(fstring)) {
\r
269 distance = (new Float(ndist.stringMatched(1))).floatValue();
\r
270 HasDistances = true;
\r
271 nodehasdistance = true;
\r
272 } catch (Exception e) {
\r
273 Error = ErrorStringrange(Error,
\r
274 "Can't parse node distance value", 7,
\r
275 cp + ndist.matchedFrom(), nf);
\r
280 // Write node info here
\r
281 c.setName(nodename);
\r
282 c.dist = (HasDistances) ? distance : 0;
\r
283 c.setBootstrap((HasBootstrap) ? bootstrap : 0);
\r
285 if (c == realroot) {
\r
286 RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!!
\r
289 // Find a place to put the leaf
\r
290 SequenceNode newnode = new SequenceNode(null, c, nodename,
\r
291 (HasDistances) ? distance : DefDistance,
\r
292 (HasBootstrap) ? bootstrap : DefBootstrap, false);
\r
294 if (c.right() == null) {
\r
295 c.setRight(newnode);
\r
297 if (c.left() == null) {
\r
298 c.setLeft(newnode);
\r
300 // Insert a dummy node for polytomy
\r
301 SequenceNode newdummy = new SequenceNode(null, c,
\r
303 newdummy.SetChildren(c.left(), newnode);
\r
304 c.setLeft(newdummy);
\r
310 // move back up the tree from preceding closure
\r
311 c = c.AscendTree();
\r
313 if ((d > -1) && (c == null)) {
\r
314 Error = ErrorStringrange(Error,
\r
315 "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
\r
320 if (nf.charAt(fcp) == ')') {
\r
324 if (nf.charAt(fcp) == ',') {
\r
328 // Just advance focus, if we need to
\r
329 if ((c.left() != null) && (!c.left().isLeaf())) {
\r
330 c = (SequenceNode) c.left();
\r
334 // else : We do nothing if ';' is encountered.
\r
337 // Reset new node properties to obvious fakes
\r
339 distance = DefDistance;
\r
340 bootstrap = DefBootstrap;
\r
346 if (Error != null) {
\r
347 throw (new IOException("NewickFile: " + Error + "\n"));
\r
350 root = (SequenceNode) root.right().detach(); // remove the imaginary root.
\r
352 if (!RootHasDistance) {
\r
357 public SequenceNode getTree() {
\r
361 public String print() {
\r
362 synchronized (this) {
\r
363 StringBuffer tf = new StringBuffer();
\r
366 return (tf.append(";").toString());
\r
370 public String print(boolean withbootstraps) {
\r
371 synchronized (this) {
\r
372 boolean boots = this.HasBootstrap;
\r
373 this.HasBootstrap = withbootstraps;
\r
375 String rv = print();
\r
376 this.HasBootstrap = boots;
\r
382 public String print(boolean withbootstraps, boolean withdists) {
\r
383 synchronized (this) {
\r
384 boolean dists = this.HasDistances;
\r
385 this.HasDistances = withdists;
\r
387 String rv = print(withbootstraps);
\r
388 this.HasDistances = dists;
\r
394 public String print(boolean withbootstraps, boolean withdists,
\r
395 boolean printRootInfo) {
\r
396 synchronized (this) {
\r
397 boolean rootinfo = printRootInfo;
\r
398 this.printRootInfo = printRootInfo;
\r
400 String rv = print(withbootstraps, withdists);
\r
401 this.printRootInfo = rootinfo;
\r
407 char getQuoteChar() {
\r
411 char setQuoteChar(char c) {
\r
412 char old = QuoteChar;
\r
418 private String nodeName(String name) {
\r
419 if (NodeSafeName[0].search(name)) {
\r
420 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
\r
422 return NodeSafeName[2].replaceAll(name);
\r
426 private String printNodeField(SequenceNode c) {
\r
427 return ((c.getName() == null) ? "" : nodeName(c.getName())) +
\r
429 ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") +
\r
430 ((HasDistances) ? (":" + c.dist) : "");
\r
433 private String printRootField(SequenceNode root) {
\r
434 return (printRootInfo)
\r
435 ? (((root.getName() == null) ? "" : nodeName(root.getName())) +
\r
437 ? ((root.getBootstrap() > -1) ? (" " + root.getBootstrap()) : "") : "") +
\r
438 ((RootHasDistance) ? (":" + root.dist) : "")) : "";
\r
441 // Non recursive call deals with root node properties
\r
442 public void print(StringBuffer tf, SequenceNode root) {
\r
443 if (root != null) {
\r
444 if (root.isLeaf() && printRootInfo) {
\r
445 tf.append(printRootField(root));
\r
447 if (root.isDummy()) {
\r
448 _print(tf, (SequenceNode) root.right());
\r
449 _print(tf, (SequenceNode) root.left());
\r
452 _print(tf, (SequenceNode) root.right());
\r
454 if (root.left() != null) {
\r
458 _print(tf, (SequenceNode) root.left());
\r
459 tf.append(")" + printRootField(root));
\r
465 // Recursive call for non-root nodes
\r
466 public void _print(StringBuffer tf, SequenceNode c) {
\r
469 tf.append(printNodeField(c));
\r
472 _print(tf, (SequenceNode) c.right());
\r
473 _print(tf, (SequenceNode) c.left());
\r
476 _print(tf, (SequenceNode) c.right());
\r
478 if (c.left() != null) {
\r
482 _print(tf, (SequenceNode) c.left());
\r
483 tf.append(")" + printNodeField(c));
\r
490 public static void main(String[] args) {
\r
492 File fn = new File(args[0]);
\r
494 StringBuffer newickfile = new StringBuffer();
\r
495 BufferedReader treefile = new BufferedReader(new FileReader(fn));
\r
498 while ((l = treefile.readLine()) != null) {
\r
499 newickfile.append(l);
\r
503 System.out.println("Read file :\n");
\r
505 NewickFile trf = new NewickFile(args[0], "File");
\r
507 System.out.println("Original file :\n");
\r
509 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
\r
510 System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
\r
512 System.out.println("Parsed file.\n");
\r
513 System.out.println("Default output type for original input.\n");
\r
514 System.out.println(trf.print());
\r
515 System.out.println("Without bootstraps.\n");
\r
516 System.out.println(trf.print(false));
\r
517 System.out.println("Without distances.\n");
\r
518 System.out.println(trf.print(true, false));
\r
519 System.out.println("Without bootstraps but with distanecs.\n");
\r
520 System.out.println(trf.print(false, true));
\r
521 System.out.println("Without bootstraps or distanecs.\n");
\r
522 System.out.println(trf.print(false, false));
\r
523 System.out.println("With bootstraps and with distances.\n");
\r
524 System.out.println(trf.print(true, true));
\r
525 } catch (java.io.IOException e) {
\r
526 System.err.println("Exception\n" + e);
\r
527 e.printStackTrace();
\r