updated to jalview 2.1 and begun ArchiveClient/VamsasClient/VamsasStore updates.
[jalview.git] / src / jalview / io / NewickFile.java
index de33266..16ac2c7 100755 (executable)
-/*\r
-* Jalview - A Sequence Alignment Editor and Viewer\r
-* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
-*\r
-* This program is free software; you can redistribute it and/or\r
-* modify it under the terms of the GNU General Public License\r
-* as published by the Free Software Foundation; either version 2\r
-* of the License, or (at your option) any later version.\r
-*\r
-* This program is distributed in the hope that it will be useful,\r
-* but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
-* GNU General Public License for more details.\r
-*\r
-* You should have received a copy of the GNU General Public License\r
-* along with this program; if not, write to the Free Software\r
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA\r
-*/\r
-\r
-// NewickFile.java\r
-// Tree I/O\r
-// http://evolution.genetics.washington.edu/phylip/newick_doc.html\r
-// TODO: Implement Basic NHX tag parsing and preservation\r
-// TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html\r
-// TODO: Extended SequenceNodeI to hold parsed NHX strings\r
-package jalview.io;\r
-\r
-import jalview.datamodel.*;\r
-\r
-import java.io.*;\r
-\r
-\r
-/**\r
- * DOCUMENT ME!\r
- *\r
- * @author $author$\r
- * @version $Revision$\r
- */\r
-public class NewickFile extends FileParse\r
-{\r
-    SequenceNode root;\r
-    private boolean HasBootstrap = false;\r
-    private boolean HasDistances = false;\r
-    private boolean RootHasDistance = false;\r
-\r
-    // File IO Flags\r
-    boolean ReplaceUnderscores = false;\r
-    boolean printRootInfo = false;\r
-    private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]\r
-        {\r
-            new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes\r
-            new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters\r
-            new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation\r
-        };\r
-    char QuoteChar = '\'';\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param inStr DOCUMENT ME!\r
-     *\r
-     * @throws IOException DOCUMENT ME!\r
-     */\r
-    public NewickFile(String inStr) throws IOException\r
-    {\r
-        super(inStr, "Paste");\r
-    }\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param inFile DOCUMENT ME!\r
-     * @param type DOCUMENT ME!\r
-     *\r
-     * @throws IOException DOCUMENT ME!\r
-     */\r
-    public NewickFile(String inFile, String type) throws IOException\r
-    {\r
-        super(inFile, type);\r
-    }\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param newtree DOCUMENT ME!\r
-     */\r
-    public NewickFile(SequenceNode newtree)\r
-    {\r
-        root = newtree;\r
-    }\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param newtree DOCUMENT ME!\r
-     * @param bootstrap DOCUMENT ME!\r
-     */\r
-    public NewickFile(SequenceNode newtree, boolean bootstrap)\r
-    {\r
-        HasBootstrap = bootstrap;\r
-        root = newtree;\r
-    }\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param newtree DOCUMENT ME!\r
-     * @param bootstrap DOCUMENT ME!\r
-     * @param distances DOCUMENT ME!\r
-     */\r
-    public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances)\r
-    {\r
-        root = newtree;\r
-        HasBootstrap = bootstrap;\r
-        HasDistances = distances;\r
-    }\r
-\r
-    /**\r
-     * Creates a new NewickFile object.\r
-     *\r
-     * @param newtree DOCUMENT ME!\r
-     * @param bootstrap DOCUMENT ME!\r
-     * @param distances DOCUMENT ME!\r
-     * @param rootdistance DOCUMENT ME!\r
-     */\r
-    public NewickFile(SequenceNode newtree, boolean bootstrap,\r
-        boolean distances, boolean rootdistance)\r
-    {\r
-        root = newtree;\r
-        HasBootstrap = bootstrap;\r
-        HasDistances = distances;\r
-        RootHasDistance = rootdistance;\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @param Error DOCUMENT ME!\r
-     * @param Er DOCUMENT ME!\r
-     * @param r DOCUMENT ME!\r
-     * @param p DOCUMENT ME!\r
-     * @param s DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    private String ErrorStringrange(String Error, String Er, int r, int p,\r
-        String s)\r
-    {\r
-        return ((Error == null) ? "" : Error) + Er + " at position " + p +\r
-        " ( " +\r
-        s.substring(((p - r) < 0) ? 0 : (p - r),\r
-            ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";\r
-    }\r
-\r
-    // @tree annotations\r
-    // These are set automatically by the reader\r
-    public boolean HasBootstrap()\r
-    {\r
-        return HasBootstrap;\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    public boolean HasDistances()\r
-    {\r
-        return HasDistances;\r
-    }\r
-\r
-    public boolean HasRootDistance()\r
-    {\r
-        return RootHasDistance;\r
-    }\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @throws IOException DOCUMENT ME!\r
-     */\r
-    public void parse() throws IOException\r
-    {\r
-        String nf;\r
-\r
-        { // fill nf with complete tree file\r
-\r
-            StringBuffer file = new StringBuffer();\r
-\r
-            while ((nf = nextLine()) != null)\r
-            {\r
-                file.append(nf);\r
-            }\r
-\r
-            nf = file.toString();\r
-        }\r
-\r
-        root = new SequenceNode();\r
-\r
-        SequenceNode realroot = null;\r
-        SequenceNode c = root;\r
-\r
-        int d = -1;\r
-        int cp = 0;\r
-        //int flen = nf.length();\r
-\r
-        String Error = null;\r
-        String nodename = null;\r
-\r
-        float DefDistance = (float) 0.001; // @param Default distance for a node - very very small\r
-        int DefBootstrap = 0; // @param Default bootstrap for a node\r
-\r
-        float distance = DefDistance;\r
-        int bootstrap = DefBootstrap;\r
-\r
-        boolean ascending = false; // flag indicating that we are leaving the current node\r
-\r
-        com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(\r
-                "[(\\['),;]");\r
-\r
-        while (majorsyms.searchFrom(nf, cp) && (Error == null))\r
-        {\r
-            int fcp = majorsyms.matchedFrom();\r
-\r
-            switch (nf.charAt(fcp))\r
-            {\r
-            case '[': // Comment or structured/extended NH format info\r
-\r
-                com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(\r
-                        "]");\r
-\r
-                if (comment.searchFrom(nf, fcp))\r
-                {\r
-                    // Skip the comment field\r
-                    cp = 1 + comment.matchedFrom();\r
-                }\r
-                else\r
-                {\r
-                    Error = ErrorStringrange(Error, "Unterminated comment", 3,\r
-                            fcp, nf);\r
-                }\r
-\r
-                ;\r
-\r
-                break;\r
-\r
-            case '(':\r
-\r
-                // ascending should not be set\r
-                // New Internal node\r
-                if (ascending)\r
-                {\r
-                    Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);\r
-\r
-                    continue;\r
-                }\r
-\r
-                ;\r
-                d++;\r
-\r
-                if (c.right() == null)\r
-                {\r
-                    c.setRight(new SequenceNode(null, c, null, DefDistance,\r
-                            DefBootstrap, false));\r
-                    c = (SequenceNode) c.right();\r
-                }\r
-                else\r
-                {\r
-                    if (c.left() != null)\r
-                    {\r
-                        // Dummy node for polytomy - keeps c.left free for new node\r
-                        SequenceNode tmpn = new SequenceNode(null, c, null, 0,\r
-                                0, true);\r
-                        tmpn.SetChildren(c.left(), c.right());\r
-                        c.setRight(tmpn);\r
-                    }\r
-\r
-                    c.setLeft(new SequenceNode(null, c, null, DefDistance,\r
-                            DefBootstrap, false));\r
-                    c = (SequenceNode) c.left();\r
-                }\r
-\r
-                if (realroot == null)\r
-                {\r
-                    realroot = c;\r
-                }\r
-\r
-                nodename = null;\r
-                distance = DefDistance;\r
-                bootstrap = DefBootstrap;\r
-                cp = fcp + 1;\r
-\r
-                break;\r
-\r
-            // Deal with quoted fields\r
-            case '\'':\r
-\r
-                com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(\r
-                        "([^']|'')+'");\r
-\r
-                if (qnodename.searchFrom(nf, fcp))\r
-                {\r
-                    int nl = qnodename.stringMatched().length();\r
-                    nodename = new String(qnodename.stringMatched().substring(0,\r
-                                nl - 1));\r
-                    cp = fcp + nl + 1;\r
-                }\r
-                else\r
-                {\r
-                    Error = ErrorStringrange(Error,\r
-                            "Unterminated quotes for nodename", 7, fcp, nf);\r
-                }\r
-\r
-                break;\r
-\r
-            case ';':\r
-\r
-                if (d != -1)\r
-                {\r
-                    Error = ErrorStringrange(Error,\r
-                            "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);\r
-                }\r
-\r
-            // cp advanced at the end of default\r
-            default:\r
-\r
-                // Parse simpler field strings\r
-                String fstring = nf.substring(cp, fcp);\r
-                com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(\r
-                        "\\b([^' :;\\](),]+)");\r
-                com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(\r
-                        "\\S+([0-9+]+)\\S*:");\r
-                com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(\r
-                        ":([-0-9Ee.+]+)");\r
-\r
-                if (uqnodename.search(fstring) &&\r
-                        ((uqnodename.matchedFrom(1) == 0) ||\r
-                        (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK!\r
-                {\r
-                    if (nodename == null)\r
-                    {\r
-                        if (ReplaceUnderscores)\r
-                        {\r
-                            nodename = uqnodename.stringMatched(1).replace('_',\r
-                                    ' ');\r
-                        }\r
-                        else\r
-                        {\r
-                            nodename = uqnodename.stringMatched(1);\r
-                        }\r
-                    }\r
-                    else\r
-                    {\r
-                        Error = ErrorStringrange(Error,\r
-                                "File has broken algorithm - overwritten nodename",\r
-                                10, fcp, nf);\r
-                    }\r
-                }\r
-\r
-                if (nbootstrap.search(fstring) &&\r
-                        (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +\r
-                        uqnodename.stringMatched().length())))\r
-                {\r
-                    try\r
-                    {\r
-                        bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();\r
-                        HasBootstrap = true;\r
-                    }\r
-                    catch (Exception e)\r
-                    {\r
-                        Error = ErrorStringrange(Error,\r
-                                "Can't parse bootstrap value", 4,\r
-                                cp + nbootstrap.matchedFrom(), nf);\r
-                    }\r
-                }\r
-\r
-                boolean nodehasdistance = false;\r
-\r
-                if (ndist.search(fstring))\r
-                {\r
-                    try\r
-                    {\r
-                        distance = (new Float(ndist.stringMatched(1))).floatValue();\r
-                        HasDistances = true;\r
-                        nodehasdistance = true;\r
-                    }\r
-                    catch (Exception e)\r
-                    {\r
-                        Error = ErrorStringrange(Error,\r
-                                "Can't parse node distance value", 7,\r
-                                cp + ndist.matchedFrom(), nf);\r
-                    }\r
-                }\r
-\r
-                if (ascending)\r
-                {\r
-                    // Write node info here\r
-                    c.setName(nodename);\r
-                    // Trees without distances still need a render distance\r
-                    c.dist = (HasDistances) ? distance : DefDistance;\r
-                    // be consistent for internal bootstrap defaults too\r
-                    c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);\r
-                    if (c == realroot)\r
-                    {\r
-                        RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance\r
-                    }\r
-                }\r
-                else\r
-                {\r
-                    // Find a place to put the leaf\r
-                    SequenceNode newnode = new SequenceNode(null, c, nodename,\r
-                            (HasDistances) ? distance : DefDistance,\r
-                            (HasBootstrap) ? bootstrap : DefBootstrap, false);\r
-\r
-                    if (c.right() == null)\r
-                    {\r
-                        c.setRight(newnode);\r
-                    }\r
-                    else\r
-                    {\r
-                        if (c.left() == null)\r
-                        {\r
-                            c.setLeft(newnode);\r
-                        }\r
-                        else\r
-                        {\r
-                            // Insert a dummy node for polytomy\r
-                            // dummy nodes have distances\r
-                            SequenceNode newdummy = new SequenceNode(null, c,\r
-                                    null, (HasDistances ? 0 : DefDistance), 0, true);\r
-                            newdummy.SetChildren(c.left(), newnode);\r
-                            c.setLeft(newdummy);\r
-                        }\r
-                    }\r
-                }\r
-\r
-                if (ascending)\r
-                {\r
-                    // move back up the tree from preceding closure\r
-                    c = c.AscendTree();\r
-\r
-                    if ((d > -1) && (c == null))\r
-                    {\r
-                        Error = ErrorStringrange(Error,\r
-                                "File broke algorithm: Lost place in tree (is there an extra ')' ?)",\r
-                                7, fcp, nf);\r
-                    }\r
-                }\r
-\r
-                if (nf.charAt(fcp) == ')')\r
-                {\r
-                    d--;\r
-                    ascending = true;\r
-                }\r
-                else\r
-                {\r
-                    if (nf.charAt(fcp) == ',')\r
-                    {\r
-                        if (ascending)\r
-                        {\r
-                            ascending = false;\r
-                        }\r
-                        else\r
-                        {\r
-                            // Just advance focus, if we need to\r
-                            if ((c.left() != null) && (!c.left().isLeaf()))\r
-                            {\r
-                                c = (SequenceNode) c.left();\r
-                            }\r
-                        }\r
-                    }\r
-\r
-                    // else : We do nothing if ';' is encountered.\r
-                }\r
-\r
-                // Reset new node properties to obvious fakes\r
-                nodename = null;\r
-                distance = DefDistance;\r
-                bootstrap = DefBootstrap;\r
-\r
-                cp = fcp + 1;\r
-            }\r
-        }\r
-\r
-        if (Error != null)\r
-        {\r
-            throw (new IOException("NewickFile: " + Error + "\n"));\r
-        }\r
-\r
-        root = (SequenceNode) root.right().detach(); // remove the imaginary root.\r
-\r
-        if (!RootHasDistance)\r
-        {\r
-            root.dist = (HasDistances) ? 0 : DefDistance;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    public SequenceNode getTree()\r
-    {\r
-        return root;\r
-    }\r
-\r
-    /**\r
-     * Generate a newick format tree according to internal flags\r
-     * for bootstraps, distances and root distances.\r
-     *\r
-     * @return new hampshire tree in a single line\r
-     */\r
-    public String print()\r
-    {\r
-        synchronized (this)\r
-        {\r
-            StringBuffer tf = new StringBuffer();\r
-            print(tf, root);\r
-\r
-            return (tf.append(";").toString());\r
-        }\r
-    }\r
-\r
-    /**\r
-     *\r
-     *\r
-     * Generate a newick format tree according to internal flags\r
-     * for distances and root distances and user specificied writing of\r
-     * bootstraps.\r
-     * @param withbootstraps controls if bootstrap values are explicitly written.\r
-     *\r
-     * @return new hampshire tree in a single line\r
-     */\r
-    public String print(boolean withbootstraps)\r
-    {\r
-        synchronized (this)\r
-        {\r
-            boolean boots = this.HasBootstrap;\r
-            this.HasBootstrap = withbootstraps;\r
-\r
-            String rv = print();\r
-            this.HasBootstrap = boots;\r
-\r
-            return rv;\r
-        }\r
-    }\r
-\r
-    /**\r
-     *\r
-     * Generate newick format tree according to internal flags\r
-     * for writing root node distances.\r
-     *\r
-     * @param withbootstraps explicitly write bootstrap values\r
-     * @param withdists explicitly write distances\r
-     *\r
-     * @return new hampshire tree in a single line\r
-     */\r
-    public String print(boolean withbootstraps, boolean withdists)\r
-    {\r
-        synchronized (this)\r
-        {\r
-            boolean dists = this.HasDistances;\r
-            this.HasDistances = withdists;\r
-\r
-            String rv = print(withbootstraps);\r
-            this.HasDistances = dists;\r
-\r
-            return rv;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Generate newick format tree according to user specified flags\r
-     *\r
-     * @param withbootstraps explicitly write bootstrap values\r
-     * @param withdists explicitly write distances\r
-     * @param printRootInfo explicitly write root distance\r
-     *\r
-     * @return new hampshire tree in a single line\r
-     */\r
-    public String print(boolean withbootstraps, boolean withdists,\r
-        boolean printRootInfo)\r
-    {\r
-        synchronized (this)\r
-        {\r
-            boolean rootinfo = printRootInfo;\r
-            this.printRootInfo = printRootInfo;\r
-\r
-            String rv = print(withbootstraps, withdists);\r
-            this.printRootInfo = rootinfo;\r
-\r
-            return rv;\r
-        }\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    char getQuoteChar()\r
-    {\r
-        return QuoteChar;\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @param c DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    char setQuoteChar(char c)\r
-    {\r
-        char old = QuoteChar;\r
-        QuoteChar = c;\r
-\r
-        return old;\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @param name DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    private String nodeName(String name)\r
-    {\r
-        if (NodeSafeName[0].search(name))\r
-        {\r
-            return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;\r
-        }\r
-        else\r
-        {\r
-            return NodeSafeName[2].replaceAll(name);\r
-        }\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @param c DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    private String printNodeField(SequenceNode c)\r
-    {\r
-        return ((c.getName() == null) ? "" : nodeName(c.getName())) +\r
-        ((HasBootstrap)\r
-        ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") +\r
-        ((HasDistances) ? (":" + c.dist) : "");\r
-    }\r
-\r
-    /**\r
-     * DOCUMENT ME!\r
-     *\r
-     * @param root DOCUMENT ME!\r
-     *\r
-     * @return DOCUMENT ME!\r
-     */\r
-    private String printRootField(SequenceNode root)\r
-    {\r
-        return (printRootInfo)\r
-        ? (((root.getName() == null) ? "" : nodeName(root.getName())) +\r
-        ((HasBootstrap)\r
-        ? ((root.getBootstrap() > -1) ? (" " + root.getBootstrap()) : "") : "") +\r
-        ((RootHasDistance) ? (":" + root.dist) : "")) : "";\r
-    }\r
-\r
-    // Non recursive call deals with root node properties\r
-    public void print(StringBuffer tf, SequenceNode root)\r
-    {\r
-        if (root != null)\r
-        {\r
-            if (root.isLeaf() && printRootInfo)\r
-            {\r
-                tf.append(printRootField(root));\r
-            }\r
-            else\r
-            {\r
-                if (root.isDummy())\r
-                {\r
-                    _print(tf, (SequenceNode) root.right());\r
-                    _print(tf, (SequenceNode) root.left());\r
-                }\r
-                else\r
-                {\r
-                    tf.append("(");\r
-                    _print(tf, (SequenceNode) root.right());\r
-\r
-                    if (root.left() != null)\r
-                    {\r
-                        tf.append(",");\r
-                    }\r
-\r
-                    _print(tf, (SequenceNode) root.left());\r
-                    tf.append(")" + printRootField(root));\r
-                }\r
-            }\r
-        }\r
-    }\r
-\r
-    // Recursive call for non-root nodes\r
-    public void _print(StringBuffer tf, SequenceNode c)\r
-    {\r
-        if (c != null)\r
-        {\r
-            if (c.isLeaf())\r
-            {\r
-                tf.append(printNodeField(c));\r
-            }\r
-            else\r
-            {\r
-                if (c.isDummy())\r
-                {\r
-                    _print(tf, (SequenceNode) c.left());\r
-                    if (c.left() != null)\r
-                    {\r
-                      tf.append(",");\r
-                    }\r
-                    _print(tf, (SequenceNode) c.right());\r
-                }\r
-                else\r
-                {\r
-                    tf.append("(");\r
-                    _print(tf, (SequenceNode) c.right());\r
-\r
-                    if (c.left() != null)\r
-                    {\r
-                        tf.append(",");\r
-                    }\r
-\r
-                    _print(tf, (SequenceNode) c.left());\r
-                    tf.append(")" + printNodeField(c));\r
-                }\r
-            }\r
-        }\r
-    }\r
-\r
-    // Test\r
-    public static void main(String[] args)\r
-    {\r
-        try\r
-        {\r
-            if (args==null || args.length!=1) {\r
-              System.err.println("Takes one argument - file name of a newick tree file.");\r
-              System.exit(0);\r
-            }\r
-\r
-            File fn = new File(args[0]);\r
-\r
-            StringBuffer newickfile = new StringBuffer();\r
-            BufferedReader treefile = new BufferedReader(new FileReader(fn));\r
-            String l;\r
-\r
-            while ((l = treefile.readLine()) != null)\r
-            {\r
-                newickfile.append(l);\r
-            }\r
-\r
-            treefile.close();\r
-            System.out.println("Read file :\n");\r
-\r
-            NewickFile trf = new NewickFile(args[0], "File");\r
-            trf.parse();\r
-            System.out.println("Original file :\n");\r
-\r
-            com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");\r
-            System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");\r
-\r
-            System.out.println("Parsed file.\n");\r
-            System.out.println("Default output type for original input.\n");\r
-            System.out.println(trf.print());\r
-            System.out.println("Without bootstraps.\n");\r
-            System.out.println(trf.print(false));\r
-            System.out.println("Without distances.\n");\r
-            System.out.println(trf.print(true, false));\r
-            System.out.println("Without bootstraps but with distanecs.\n");\r
-            System.out.println(trf.print(false, true));\r
-            System.out.println("Without bootstraps or distanecs.\n");\r
-            System.out.println(trf.print(false, false));\r
-            System.out.println("With bootstraps and with distances.\n");\r
-            System.out.println(trf.print(true, true));\r
-        }\r
-        catch (java.io.IOException e)\r
-        {\r
-            System.err.println("Exception\n" + e);\r
-            e.printStackTrace();\r
-        }\r
-    }\r
-}\r
+/*
+* Jalview - A Sequence Alignment Editor and Viewer
+* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+*/
+
+// NewickFile.java
+// Tree I/O
+// http://evolution.genetics.washington.edu/phylip/newick_doc.html
+// TODO: Implement Basic NHX tag parsing and preservation
+// TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html
+// TODO: Extended SequenceNodeI to hold parsed NHX strings
+package jalview.io;
+
+import jalview.datamodel.*;
+
+import java.io.*;
+
+
+/**
+ * DOCUMENT ME!
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class NewickFile extends FileParse
+{
+    SequenceNode root;
+    private boolean HasBootstrap = false;
+    private boolean HasDistances = false;
+    private boolean RootHasDistance = false;
+
+    // File IO Flags
+    boolean ReplaceUnderscores = false;
+    boolean printRootInfo = false;
+    private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
+        {
+            new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes
+            new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters
+            new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation
+        };
+    char QuoteChar = '\'';
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param inStr DOCUMENT ME!
+     *
+     * @throws IOException DOCUMENT ME!
+     */
+    public NewickFile(String inStr) throws IOException
+    {
+        super(inStr, "Paste");
+    }
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param inFile DOCUMENT ME!
+     * @param type DOCUMENT ME!
+     *
+     * @throws IOException DOCUMENT ME!
+     */
+    public NewickFile(String inFile, String type) throws IOException
+    {
+        super(inFile, type);
+    }
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param newtree DOCUMENT ME!
+     */
+    public NewickFile(SequenceNode newtree)
+    {
+        root = newtree;
+    }
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param newtree DOCUMENT ME!
+     * @param bootstrap DOCUMENT ME!
+     */
+    public NewickFile(SequenceNode newtree, boolean bootstrap)
+    {
+        HasBootstrap = bootstrap;
+        root = newtree;
+    }
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param newtree DOCUMENT ME!
+     * @param bootstrap DOCUMENT ME!
+     * @param distances DOCUMENT ME!
+     */
+    public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances)
+    {
+        root = newtree;
+        HasBootstrap = bootstrap;
+        HasDistances = distances;
+    }
+
+    /**
+     * Creates a new NewickFile object.
+     *
+     * @param newtree DOCUMENT ME!
+     * @param bootstrap DOCUMENT ME!
+     * @param distances DOCUMENT ME!
+     * @param rootdistance DOCUMENT ME!
+     */
+    public NewickFile(SequenceNode newtree, boolean bootstrap,
+        boolean distances, boolean rootdistance)
+    {
+        root = newtree;
+        HasBootstrap = bootstrap;
+        HasDistances = distances;
+        RootHasDistance = rootdistance;
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @param Error DOCUMENT ME!
+     * @param Er DOCUMENT ME!
+     * @param r DOCUMENT ME!
+     * @param p DOCUMENT ME!
+     * @param s DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    private String ErrorStringrange(String Error, String Er, int r, int p,
+        String s)
+    {
+        return ((Error == null) ? "" : Error) + Er + " at position " + p +
+        " ( " +
+        s.substring(((p - r) < 0) ? 0 : (p - r),
+            ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";
+    }
+
+    // @tree annotations
+    // These are set automatically by the reader
+    public boolean HasBootstrap()
+    {
+        return HasBootstrap;
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    public boolean HasDistances()
+    {
+        return HasDistances;
+    }
+
+    public boolean HasRootDistance()
+    {
+        return RootHasDistance;
+    }
+    /**
+     * DOCUMENT ME!
+     *
+     * @throws IOException DOCUMENT ME!
+     */
+    public void parse() throws IOException
+    {
+        String nf;
+
+        { // fill nf with complete tree file
+
+            StringBuffer file = new StringBuffer();
+
+            while ((nf = nextLine()) != null)
+            {
+                file.append(nf);
+            }
+
+            nf = file.toString();
+        }
+
+        root = new SequenceNode();
+
+        SequenceNode realroot = null;
+        SequenceNode c = root;
+
+        int d = -1;
+        int cp = 0;
+        //int flen = nf.length();
+
+        String Error = null;
+        String nodename = null;
+
+        float DefDistance = (float) 0.001; // @param Default distance for a node - very very small
+        int DefBootstrap = 0; // @param Default bootstrap for a node
+
+        float distance = DefDistance;
+        int bootstrap = DefBootstrap;
+
+        boolean ascending = false; // flag indicating that we are leaving the current node
+
+        com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
+                "[(\\['),;]");
+
+        while (majorsyms.searchFrom(nf, cp) && (Error == null))
+        {
+            int fcp = majorsyms.matchedFrom();
+
+            switch (nf.charAt(fcp))
+            {
+            case '[': // Comment or structured/extended NH format info
+
+                com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
+                        "]");
+
+                if (comment.searchFrom(nf, fcp))
+                {
+                    // Skip the comment field
+                    cp = 1 + comment.matchedFrom();
+                }
+                else
+                {
+                    Error = ErrorStringrange(Error, "Unterminated comment", 3,
+                            fcp, nf);
+                }
+
+                ;
+
+                break;
+
+            case '(':
+
+                // ascending should not be set
+                // New Internal node
+                if (ascending)
+                {
+                    Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf);
+
+                    continue;
+                }
+
+                ;
+                d++;
+
+                if (c.right() == null)
+                {
+                    c.setRight(new SequenceNode(null, c, null, DefDistance,
+                            DefBootstrap, false));
+                    c = (SequenceNode) c.right();
+                }
+                else
+                {
+                    if (c.left() != null)
+                    {
+                        // Dummy node for polytomy - keeps c.left free for new node
+                        SequenceNode tmpn = new SequenceNode(null, c, null, 0,
+                                0, true);
+                        tmpn.SetChildren(c.left(), c.right());
+                        c.setRight(tmpn);
+                    }
+
+                    c.setLeft(new SequenceNode(null, c, null, DefDistance,
+                            DefBootstrap, false));
+                    c = (SequenceNode) c.left();
+                }
+
+                if (realroot == null)
+                {
+                    realroot = c;
+                }
+
+                nodename = null;
+                distance = DefDistance;
+                bootstrap = DefBootstrap;
+                cp = fcp + 1;
+
+                break;
+
+            // Deal with quoted fields
+            case '\'':
+
+                com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
+                        "([^']|'')+'");
+
+                if (qnodename.searchFrom(nf, fcp))
+                {
+                    int nl = qnodename.stringMatched().length();
+                    nodename = new String(qnodename.stringMatched().substring(0,
+                                nl - 1));
+                    cp = fcp + nl + 1;
+                }
+                else
+                {
+                    Error = ErrorStringrange(Error,
+                            "Unterminated quotes for nodename", 7, fcp, nf);
+                }
+
+                break;
+
+            case ';':
+
+                if (d != -1)
+                {
+                    Error = ErrorStringrange(Error,
+                            "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
+                }
+
+            // cp advanced at the end of default
+            default:
+
+                // Parse simpler field strings
+                String fstring = nf.substring(cp, fcp);
+                com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
+                        "\\b([^' :;\\](),]+)");
+                com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
+                        "\\S+([0-9+]+)\\S*:");
+                com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
+                        ":([-0-9Ee.+]+)");
+
+                if (uqnodename.search(fstring) &&
+                        ((uqnodename.matchedFrom(1) == 0) ||
+                        (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK!
+                {
+                    if (nodename == null)
+                    {
+                        if (ReplaceUnderscores)
+                        {
+                            nodename = uqnodename.stringMatched(1).replace('_',
+                                    ' ');
+                        }
+                        else
+                        {
+                            nodename = uqnodename.stringMatched(1);
+                        }
+                    }
+                    else
+                    {
+                        Error = ErrorStringrange(Error,
+                                "File has broken algorithm - overwritten nodename",
+                                10, fcp, nf);
+                    }
+                }
+
+                if (nbootstrap.search(fstring) &&
+                        (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +
+                        uqnodename.stringMatched().length())))
+                {
+                    try
+                    {
+                        bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue();
+                        HasBootstrap = true;
+                    }
+                    catch (Exception e)
+                    {
+                        Error = ErrorStringrange(Error,
+                                "Can't parse bootstrap value", 4,
+                                cp + nbootstrap.matchedFrom(), nf);
+                    }
+                }
+
+                boolean nodehasdistance = false;
+
+                if (ndist.search(fstring))
+                {
+                    try
+                    {
+                        distance = (new Float(ndist.stringMatched(1))).floatValue();
+                        HasDistances = true;
+                        nodehasdistance = true;
+                    }
+                    catch (Exception e)
+                    {
+                        Error = ErrorStringrange(Error,
+                                "Can't parse node distance value", 7,
+                                cp + ndist.matchedFrom(), nf);
+                    }
+                }
+
+                if (ascending)
+                {
+                    // Write node info here
+                    c.setName(nodename);
+                    // Trees without distances still need a render distance
+                    c.dist = (HasDistances) ? distance : DefDistance;
+                    // be consistent for internal bootstrap defaults too
+                    c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap);
+                    if (c == realroot)
+                    {
+                        RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance
+                    }
+                }
+                else
+                {
+                    // Find a place to put the leaf
+                    SequenceNode newnode = new SequenceNode(null, c, nodename,
+                            (HasDistances) ? distance : DefDistance,
+                            (HasBootstrap) ? bootstrap : DefBootstrap, false);
+
+                    if (c.right() == null)
+                    {
+                        c.setRight(newnode);
+                    }
+                    else
+                    {
+                        if (c.left() == null)
+                        {
+                            c.setLeft(newnode);
+                        }
+                        else
+                        {
+                            // Insert a dummy node for polytomy
+                            // dummy nodes have distances
+                            SequenceNode newdummy = new SequenceNode(null, c,
+                                    null, (HasDistances ? 0 : DefDistance), 0, true);
+                            newdummy.SetChildren(c.left(), newnode);
+                            c.setLeft(newdummy);
+                        }
+                    }
+                }
+
+                if (ascending)
+                {
+                    // move back up the tree from preceding closure
+                    c = c.AscendTree();
+
+                    if ((d > -1) && (c == null))
+                    {
+                        Error = ErrorStringrange(Error,
+                                "File broke algorithm: Lost place in tree (is there an extra ')' ?)",
+                                7, fcp, nf);
+                    }
+                }
+
+                if (nf.charAt(fcp) == ')')
+                {
+                    d--;
+                    ascending = true;
+                }
+                else
+                {
+                    if (nf.charAt(fcp) == ',')
+                    {
+                        if (ascending)
+                        {
+                            ascending = false;
+                        }
+                        else
+                        {
+                            // Just advance focus, if we need to
+                            if ((c.left() != null) && (!c.left().isLeaf()))
+                            {
+                                c = (SequenceNode) c.left();
+                            }
+                        }
+                    }
+
+                    // else : We do nothing if ';' is encountered.
+                }
+
+                // Reset new node properties to obvious fakes
+                nodename = null;
+                distance = DefDistance;
+                bootstrap = DefBootstrap;
+
+                cp = fcp + 1;
+            }
+        }
+
+        if (Error != null)
+        {
+            throw (new IOException("NewickFile: " + Error + "\n"));
+        }
+
+        root = (SequenceNode) root.right().detach(); // remove the imaginary root.
+
+        if (!RootHasDistance)
+        {
+            root.dist = (HasDistances) ? 0 : DefDistance;
+        }
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    public SequenceNode getTree()
+    {
+        return root;
+    }
+
+    /**
+     * Generate a newick format tree according to internal flags
+     * for bootstraps, distances and root distances.
+     *
+     * @return new hampshire tree in a single line
+     */
+    public String print()
+    {
+        synchronized (this)
+        {
+            StringBuffer tf = new StringBuffer();
+            print(tf, root);
+
+            return (tf.append(";").toString());
+        }
+    }
+
+    /**
+     *
+     *
+     * Generate a newick format tree according to internal flags
+     * for distances and root distances and user specificied writing of
+     * bootstraps.
+     * @param withbootstraps controls if bootstrap values are explicitly written.
+     *
+     * @return new hampshire tree in a single line
+     */
+    public String print(boolean withbootstraps)
+    {
+        synchronized (this)
+        {
+            boolean boots = this.HasBootstrap;
+            this.HasBootstrap = withbootstraps;
+
+            String rv = print();
+            this.HasBootstrap = boots;
+
+            return rv;
+        }
+    }
+
+    /**
+     *
+     * Generate newick format tree according to internal flags
+     * for writing root node distances.
+     *
+     * @param withbootstraps explicitly write bootstrap values
+     * @param withdists explicitly write distances
+     *
+     * @return new hampshire tree in a single line
+     */
+    public String print(boolean withbootstraps, boolean withdists)
+    {
+        synchronized (this)
+        {
+            boolean dists = this.HasDistances;
+            this.HasDistances = withdists;
+
+            String rv = print(withbootstraps);
+            this.HasDistances = dists;
+
+            return rv;
+        }
+    }
+
+    /**
+     * Generate newick format tree according to user specified flags
+     *
+     * @param withbootstraps explicitly write bootstrap values
+     * @param withdists explicitly write distances
+     * @param printRootInfo explicitly write root distance
+     *
+     * @return new hampshire tree in a single line
+     */
+    public String print(boolean withbootstraps, boolean withdists,
+        boolean printRootInfo)
+    {
+        synchronized (this)
+        {
+            boolean rootinfo = printRootInfo;
+            this.printRootInfo = printRootInfo;
+
+            String rv = print(withbootstraps, withdists);
+            this.printRootInfo = rootinfo;
+
+            return rv;
+        }
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    char getQuoteChar()
+    {
+        return QuoteChar;
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @param c DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    char setQuoteChar(char c)
+    {
+        char old = QuoteChar;
+        QuoteChar = c;
+
+        return old;
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @param name DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    private String nodeName(String name)
+    {
+        if (NodeSafeName[0].search(name))
+        {
+            return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
+        }
+        else
+        {
+            return NodeSafeName[2].replaceAll(name);
+        }
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @param c DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    private String printNodeField(SequenceNode c)
+    {
+        return ((c.getName() == null) ? "" : nodeName(c.getName())) +
+        ((HasBootstrap)
+        ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) : "") : "") +
+        ((HasDistances) ? (":" + c.dist) : "");
+    }
+
+    /**
+     * DOCUMENT ME!
+     *
+     * @param root DOCUMENT ME!
+     *
+     * @return DOCUMENT ME!
+     */
+    private String printRootField(SequenceNode root)
+    {
+        return (printRootInfo)
+        ? (((root.getName() == null) ? "" : nodeName(root.getName())) +
+        ((HasBootstrap)
+        ? ((root.getBootstrap() > -1) ? (" " + root.getBootstrap()) : "") : "") +
+        ((RootHasDistance) ? (":" + root.dist) : "")) : "";
+    }
+
+    // Non recursive call deals with root node properties
+    public void print(StringBuffer tf, SequenceNode root)
+    {
+        if (root != null)
+        {
+            if (root.isLeaf() && printRootInfo)
+            {
+                tf.append(printRootField(root));
+            }
+            else
+            {
+                if (root.isDummy())
+                {
+                    _print(tf, (SequenceNode) root.right());
+                    _print(tf, (SequenceNode) root.left());
+                }
+                else
+                {
+                    tf.append("(");
+                    _print(tf, (SequenceNode) root.right());
+
+                    if (root.left() != null)
+                    {
+                        tf.append(",");
+                    }
+
+                    _print(tf, (SequenceNode) root.left());
+                    tf.append(")" + printRootField(root));
+                }
+            }
+        }
+    }
+
+    // Recursive call for non-root nodes
+    public void _print(StringBuffer tf, SequenceNode c)
+    {
+        if (c != null)
+        {
+            if (c.isLeaf())
+            {
+                tf.append(printNodeField(c));
+            }
+            else
+            {
+                if (c.isDummy())
+                {
+                    _print(tf, (SequenceNode) c.left());
+                    if (c.left() != null)
+                    {
+                      tf.append(",");
+                    }
+                    _print(tf, (SequenceNode) c.right());
+                }
+                else
+                {
+                    tf.append("(");
+                    _print(tf, (SequenceNode) c.right());
+
+                    if (c.left() != null)
+                    {
+                        tf.append(",");
+                    }
+
+                    _print(tf, (SequenceNode) c.left());
+                    tf.append(")" + printNodeField(c));
+                }
+            }
+        }
+    }
+
+    // Test
+    public static void main(String[] args)
+    {
+        try
+        {
+            if (args==null || args.length!=1) {
+              System.err.println("Takes one argument - file name of a newick tree file.");
+              System.exit(0);
+            }
+
+            File fn = new File(args[0]);
+
+            StringBuffer newickfile = new StringBuffer();
+            BufferedReader treefile = new BufferedReader(new FileReader(fn));
+            String l;
+
+            while ((l = treefile.readLine()) != null)
+            {
+                newickfile.append(l);
+            }
+
+            treefile.close();
+            System.out.println("Read file :\n");
+
+            NewickFile trf = new NewickFile(args[0], "File");
+            trf.parse();
+            System.out.println("Original file :\n");
+
+            com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
+            System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
+
+            System.out.println("Parsed file.\n");
+            System.out.println("Default output type for original input.\n");
+            System.out.println(trf.print());
+            System.out.println("Without bootstraps.\n");
+            System.out.println(trf.print(false));
+            System.out.println("Without distances.\n");
+            System.out.println(trf.print(true, false));
+            System.out.println("Without bootstraps but with distanecs.\n");
+            System.out.println(trf.print(false, true));
+            System.out.println("Without bootstraps or distanecs.\n");
+            System.out.println(trf.print(false, false));
+            System.out.println("With bootstraps and with distances.\n");
+            System.out.println(trf.print(true, true));
+        }
+        catch (java.io.IOException e)
+        {
+            System.err.println("Exception\n" + e);
+            e.printStackTrace();
+        }
+    }
+}