X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=de33266c4fabc49d577428abe890ee1b388cfab7;hb=0d6e36143a2d7471edb7ef386d0b79095e0cd63e;hp=a6f1580efbc6dc6c46e56e171434c0a47d85f8a7;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index a6f1580..de33266 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -1,35 +1,43 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ // NewickFile.java // Tree I/O // http://evolution.genetics.washington.edu/phylip/newick_doc.html +// TODO: Implement Basic NHX tag parsing and preservation +// TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html +// TODO: Extended SequenceNodeI to hold parsed NHX strings package jalview.io; import jalview.datamodel.*; import java.io.*; -import java.util.*; - -public class NewickFile extends FileParse { +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class NewickFile extends FileParse +{ SequenceNode root; private boolean HasBootstrap = false; private boolean HasDistances = false; @@ -38,46 +46,106 @@ public class NewickFile extends FileParse { // File IO Flags boolean ReplaceUnderscores = false; boolean printRootInfo = false; - private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] { + private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] + { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation }; char QuoteChar = '\''; - public NewickFile(String inStr) throws IOException { + /** + * Creates a new NewickFile object. + * + * @param inStr DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public NewickFile(String inStr) throws IOException + { super(inStr, "Paste"); } - public NewickFile(String inFile, String type) throws IOException { + /** + * Creates a new NewickFile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public NewickFile(String inFile, String type) throws IOException + { super(inFile, type); } - public NewickFile(SequenceNode newtree) { + /** + * Creates a new NewickFile object. + * + * @param newtree DOCUMENT ME! + */ + public NewickFile(SequenceNode newtree) + { root = newtree; } - public NewickFile(SequenceNode newtree, boolean bootstrap) { + /** + * Creates a new NewickFile object. + * + * @param newtree DOCUMENT ME! + * @param bootstrap DOCUMENT ME! + */ + public NewickFile(SequenceNode newtree, boolean bootstrap) + { HasBootstrap = bootstrap; root = newtree; } - public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) { + /** + * Creates a new NewickFile object. + * + * @param newtree DOCUMENT ME! + * @param bootstrap DOCUMENT ME! + * @param distances DOCUMENT ME! + */ + public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) + { root = newtree; HasBootstrap = bootstrap; HasDistances = distances; } + /** + * Creates a new NewickFile object. + * + * @param newtree DOCUMENT ME! + * @param bootstrap DOCUMENT ME! + * @param distances DOCUMENT ME! + * @param rootdistance DOCUMENT ME! + */ public NewickFile(SequenceNode newtree, boolean bootstrap, - boolean distances, boolean rootdistance) { + boolean distances, boolean rootdistance) + { root = newtree; HasBootstrap = bootstrap; HasDistances = distances; RootHasDistance = rootdistance; } + /** + * DOCUMENT ME! + * + * @param Error DOCUMENT ME! + * @param Er DOCUMENT ME! + * @param r DOCUMENT ME! + * @param p DOCUMENT ME! + * @param s DOCUMENT ME! + * + * @return DOCUMENT ME! + */ private String ErrorStringrange(String Error, String Er, int r, int p, - String s) { + String s) + { return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " + s.substring(((p - r) < 0) ? 0 : (p - r), @@ -86,22 +154,40 @@ public class NewickFile extends FileParse { // @tree annotations // These are set automatically by the reader - public boolean HasBootstrap() { + public boolean HasBootstrap() + { return HasBootstrap; } - public boolean HasDistances() { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public boolean HasDistances() + { return HasDistances; } - public void parse() throws IOException { + public boolean HasRootDistance() + { + return RootHasDistance; + } + /** + * DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public void parse() throws IOException + { String nf; { // fill nf with complete tree file StringBuffer file = new StringBuffer(); - while ((nf = nextLine()) != null) { + while ((nf = nextLine()) != null) + { file.append(nf); } @@ -115,12 +201,12 @@ public class NewickFile extends FileParse { int d = -1; int cp = 0; - int flen = nf.length(); + //int flen = nf.length(); String Error = null; String nodename = null; - float DefDistance = (float) 0.00001; // @param Default distance for a node - very very small + float DefDistance = (float) 0.001; // @param Default distance for a node - very very small int DefBootstrap = 0; // @param Default bootstrap for a node float distance = DefDistance; @@ -131,19 +217,24 @@ public class NewickFile extends FileParse { com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( "[(\\['),;]"); - while (majorsyms.searchFrom(nf, cp) && (Error == null)) { + while (majorsyms.searchFrom(nf, cp) && (Error == null)) + { int fcp = majorsyms.matchedFrom(); - switch (nf.charAt(fcp)) { + switch (nf.charAt(fcp)) + { case '[': // Comment or structured/extended NH format info com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( "]"); - if (comment.searchFrom(nf, fcp)) { + if (comment.searchFrom(nf, fcp)) + { // Skip the comment field cp = 1 + comment.matchedFrom(); - } else { + } + else + { Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); } @@ -156,7 +247,8 @@ public class NewickFile extends FileParse { // ascending should not be set // New Internal node - if (ascending) { + if (ascending) + { Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf); continue; @@ -165,12 +257,16 @@ public class NewickFile extends FileParse { ; d++; - if (c.right() == null) { + if (c.right() == null) + { c.setRight(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false)); c = (SequenceNode) c.right(); - } else { - if (c.left() != null) { + } + else + { + if (c.left() != null) + { // Dummy node for polytomy - keeps c.left free for new node SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true); @@ -183,7 +279,8 @@ public class NewickFile extends FileParse { c = (SequenceNode) c.left(); } - if (realroot == null) { + if (realroot == null) + { realroot = c; } @@ -200,12 +297,15 @@ public class NewickFile extends FileParse { com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( "([^']|'')+'"); - if (qnodename.searchFrom(nf, fcp)) { + if (qnodename.searchFrom(nf, fcp)) + { int nl = qnodename.stringMatched().length(); nodename = new String(qnodename.stringMatched().substring(0, nl - 1)); cp = fcp + nl + 1; - } else { + } + else + { Error = ErrorStringrange(Error, "Unterminated quotes for nodename", 7, fcp, nf); } @@ -214,7 +314,8 @@ public class NewickFile extends FileParse { case ';': - if (d != -1) { + if (d != -1) + { Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); } @@ -229,20 +330,26 @@ public class NewickFile extends FileParse { com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( "\\S+([0-9+]+)\\S*:"); com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9.+]+)"); + ":([-0-9Ee.+]+)"); if (uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK! - { - if (nodename == null) { - if (ReplaceUnderscores) { + { + if (nodename == null) + { + if (ReplaceUnderscores) + { nodename = uqnodename.stringMatched(1).replace('_', ' '); - } else { + } + else + { nodename = uqnodename.stringMatched(1); } - } else { + } + else + { Error = ErrorStringrange(Error, "File has broken algorithm - overwritten nodename", 10, fcp, nf); @@ -251,11 +358,15 @@ public class NewickFile extends FileParse { if (nbootstrap.search(fstring) && (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + - uqnodename.stringMatched().length()))) { - try { + uqnodename.stringMatched().length()))) + { + try + { bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue(); HasBootstrap = true; - } catch (Exception e) { + } + catch (Exception e) + { Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4, cp + nbootstrap.matchedFrom(), nf); @@ -264,74 +375,101 @@ public class NewickFile extends FileParse { boolean nodehasdistance = false; - if (ndist.search(fstring)) { - try { + if (ndist.search(fstring)) + { + try + { distance = (new Float(ndist.stringMatched(1))).floatValue(); HasDistances = true; nodehasdistance = true; - } catch (Exception e) { + } + catch (Exception e) + { Error = ErrorStringrange(Error, "Can't parse node distance value", 7, cp + ndist.matchedFrom(), nf); } } - if (ascending) { + if (ascending) + { // Write node info here c.setName(nodename); - c.dist = (HasDistances) ? distance : 0; - c.setBootstrap((HasBootstrap) ? bootstrap : 0); - - if (c == realroot) { - RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! + // Trees without distances still need a render distance + c.dist = (HasDistances) ? distance : DefDistance; + // be consistent for internal bootstrap defaults too + c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap); + if (c == realroot) + { + RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance } - } else { + } + else + { // Find a place to put the leaf SequenceNode newnode = new SequenceNode(null, c, nodename, (HasDistances) ? distance : DefDistance, (HasBootstrap) ? bootstrap : DefBootstrap, false); - if (c.right() == null) { + if (c.right() == null) + { c.setRight(newnode); - } else { - if (c.left() == null) { + } + else + { + if (c.left() == null) + { c.setLeft(newnode); - } else { + } + else + { // Insert a dummy node for polytomy + // dummy nodes have distances SequenceNode newdummy = new SequenceNode(null, c, - null, 0, 0, true); + null, (HasDistances ? 0 : DefDistance), 0, true); newdummy.SetChildren(c.left(), newnode); c.setLeft(newdummy); } } } - if (ascending) { + if (ascending) + { // move back up the tree from preceding closure c = c.AscendTree(); - if ((d > -1) && (c == null)) { + if ((d > -1) && (c == null)) + { Error = ErrorStringrange(Error, "File broke algorithm: Lost place in tree (is there an extra ')' ?)", 7, fcp, nf); } } - if (nf.charAt(fcp) == ')') { + if (nf.charAt(fcp) == ')') + { d--; ascending = true; - } else { - if (nf.charAt(fcp) == ',') { - if (ascending) { + } + else + { + if (nf.charAt(fcp) == ',') + { + if (ascending) + { ascending = false; - } else { + } + else + { // Just advance focus, if we need to - if ((c.left() != null) && (!c.left().isLeaf())) { + if ((c.left() != null) && (!c.left().isLeaf())) + { c = (SequenceNode) c.left(); } } } - // else : We do nothing if ';' is encountered. + + // else : We do nothing if ';' is encountered. } // Reset new node properties to obvious fakes @@ -343,23 +481,39 @@ public class NewickFile extends FileParse { } } - if (Error != null) { + if (Error != null) + { throw (new IOException("NewickFile: " + Error + "\n")); } root = (SequenceNode) root.right().detach(); // remove the imaginary root. - if (!RootHasDistance) { - root.dist = 0; + if (!RootHasDistance) + { + root.dist = (HasDistances) ? 0 : DefDistance; } } - public SequenceNode getTree() { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode getTree() + { return root; } - public String print() { - synchronized (this) { + /** + * Generate a newick format tree according to internal flags + * for bootstraps, distances and root distances. + * + * @return new hampshire tree in a single line + */ + public String print() + { + synchronized (this) + { StringBuffer tf = new StringBuffer(); print(tf, root); @@ -367,8 +521,20 @@ public class NewickFile extends FileParse { } } - public String print(boolean withbootstraps) { - synchronized (this) { + /** + * + * + * Generate a newick format tree according to internal flags + * for distances and root distances and user specificied writing of + * bootstraps. + * @param withbootstraps controls if bootstrap values are explicitly written. + * + * @return new hampshire tree in a single line + */ + public String print(boolean withbootstraps) + { + synchronized (this) + { boolean boots = this.HasBootstrap; this.HasBootstrap = withbootstraps; @@ -379,8 +545,20 @@ public class NewickFile extends FileParse { } } - public String print(boolean withbootstraps, boolean withdists) { - synchronized (this) { + /** + * + * Generate newick format tree according to internal flags + * for writing root node distances. + * + * @param withbootstraps explicitly write bootstrap values + * @param withdists explicitly write distances + * + * @return new hampshire tree in a single line + */ + public String print(boolean withbootstraps, boolean withdists) + { + synchronized (this) + { boolean dists = this.HasDistances; this.HasDistances = withdists; @@ -391,9 +569,20 @@ public class NewickFile extends FileParse { } } + /** + * Generate newick format tree according to user specified flags + * + * @param withbootstraps explicitly write bootstrap values + * @param withdists explicitly write distances + * @param printRootInfo explicitly write root distance + * + * @return new hampshire tree in a single line + */ public String print(boolean withbootstraps, boolean withdists, - boolean printRootInfo) { - synchronized (this) { + boolean printRootInfo) + { + synchronized (this) + { boolean rootinfo = printRootInfo; this.printRootInfo = printRootInfo; @@ -404,33 +593,74 @@ public class NewickFile extends FileParse { } } - char getQuoteChar() { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + char getQuoteChar() + { return QuoteChar; } - char setQuoteChar(char c) { + /** + * DOCUMENT ME! + * + * @param c DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + char setQuoteChar(char c) + { char old = QuoteChar; QuoteChar = c; return old; } - private String nodeName(String name) { - if (NodeSafeName[0].search(name)) { + /** + * DOCUMENT ME! + * + * @param name DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + private String nodeName(String name) + { + if (NodeSafeName[0].search(name)) + { return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; - } else { + } + else + { return NodeSafeName[2].replaceAll(name); } } - private String printNodeField(SequenceNode c) { + /** + * DOCUMENT ME! + * + * @param c DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + private String printNodeField(SequenceNode c) + { return ((c.getName() == null) ? "" : nodeName(c.getName())) + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") + ((HasDistances) ? (":" + c.dist) : ""); } - private String printRootField(SequenceNode root) { + /** + * DOCUMENT ME! + * + * @param root DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + private String printRootField(SequenceNode root) + { return (printRootInfo) ? (((root.getName() == null) ? "" : nodeName(root.getName())) + ((HasBootstrap) @@ -439,19 +669,28 @@ public class NewickFile extends FileParse { } // Non recursive call deals with root node properties - public void print(StringBuffer tf, SequenceNode root) { - if (root != null) { - if (root.isLeaf() && printRootInfo) { + public void print(StringBuffer tf, SequenceNode root) + { + if (root != null) + { + if (root.isLeaf() && printRootInfo) + { tf.append(printRootField(root)); - } else { - if (root.isDummy()) { + } + else + { + if (root.isDummy()) + { _print(tf, (SequenceNode) root.right()); _print(tf, (SequenceNode) root.left()); - } else { + } + else + { tf.append("("); _print(tf, (SequenceNode) root.right()); - if (root.left() != null) { + if (root.left() != null) + { tf.append(","); } @@ -463,19 +702,32 @@ public class NewickFile extends FileParse { } // Recursive call for non-root nodes - public void _print(StringBuffer tf, SequenceNode c) { - if (c != null) { - if (c.isLeaf()) { + public void _print(StringBuffer tf, SequenceNode c) + { + if (c != null) + { + if (c.isLeaf()) + { tf.append(printNodeField(c)); - } else { - if (c.isDummy()) { - _print(tf, (SequenceNode) c.right()); + } + else + { + if (c.isDummy()) + { _print(tf, (SequenceNode) c.left()); - } else { + if (c.left() != null) + { + tf.append(","); + } + _print(tf, (SequenceNode) c.right()); + } + else + { tf.append("("); _print(tf, (SequenceNode) c.right()); - if (c.left() != null) { + if (c.left() != null) + { tf.append(","); } @@ -487,15 +739,23 @@ public class NewickFile extends FileParse { } // Test - public static void main(String[] args) { - try { + public static void main(String[] args) + { + try + { + if (args==null || args.length!=1) { + System.err.println("Takes one argument - file name of a newick tree file."); + System.exit(0); + } + File fn = new File(args[0]); StringBuffer newickfile = new StringBuffer(); BufferedReader treefile = new BufferedReader(new FileReader(fn)); String l; - while ((l = treefile.readLine()) != null) { + while ((l = treefile.readLine()) != null) + { newickfile.append(l); } @@ -522,7 +782,9 @@ public class NewickFile extends FileParse { System.out.println(trf.print(false, false)); System.out.println("With bootstraps and with distances.\n"); System.out.println(trf.print(true, true)); - } catch (java.io.IOException e) { + } + catch (java.io.IOException e) + { System.err.println("Exception\n" + e); e.printStackTrace(); }