X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=5fdcc01e8d5e2cb9b8f36159f4214473ca802426;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=74612a6f7f43ebcefe31153f821eb62ef2d1b529;hpb=3a993bbe274824870c78bd7695c42fa93908cb30;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index 74612a6..5fdcc01 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -1,22 +1,20 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ - // NewickFile.java // Tree I/O // http://evolution.genetics.washington.edu/phylip/newick_doc.html @@ -26,68 +24,114 @@ package jalview.io; import java.io.*; +import java.util.StringTokenizer; import jalview.datamodel.*; /** - * Parse a new hanpshire style tree - * Caveats: NHX files are NOT supported and the tree distances and topology are unreliable when they are parsed. + * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the + * tree distances and topology are unreliable when they are parsed. TODO: on + * this: NHX codes are appended in comments beginning with &&NHX. The codes are + * given below (from http://www.phylosoft.org/forester/NHX.html): Element Type + * Description Corresponding phyloXML element (parent element in parentheses) no + * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) + * () : decimal branch length to parent node (MUST BE SECOND, IF + * ASSIGNED) () :GN= string gene name () + * :AC= string sequence accession () :ND= string node + * identifier - if this is being used, it has to be unique within each phylogeny + * () :B= decimal confidence value for parent branch + * () :D= 'T', 'F', or '?' 'T' if this node represents a + * duplication event - 'F' if this node represents a speciation event, '?' if + * this node represents an unknown event (D= tag should be replaced by Ev= tag) + * n/a :Ev=duplications>speciations>gene losses>event type>duplication type int + * int int string string event (replaces the =D tag), number of duplication, + * speciation, and gene loss events, type of event (transfer, fusion, root, + * unknown, other, speciation_duplication_loss, unassigned) () + * :E= string EC number at this node () :Fu= string + * function at this node () + * :DS=protein-length>from>to>support>name>from>... int int int double string + * int ... domain structure at this node () :S= + * string species name of the species/phylum at this node () + * :T= integer taxonomy ID of the species/phylum at this node () + * :W= integer width of parent branch () :C=rrr.ggg.bbb + * integer.integer.integer color of parent branch () :Co= 'Y' or + * 'N' collapse this node when drawing the tree (default is not to collapse) n/a + * :XB= string custom data associated with a branch () :XN= + * string custom data associated with a node () :O= integer + * orthologous to this external node n/a :SN= integer subtree neighbors n/a :SO= + * integer super orthologous (no duplications on paths) to this external node + * n/a + * * @author Jim Procter * @version $Revision$ */ -public class NewickFile - extends FileParse +public class NewickFile extends FileParse { SequenceNode root; + private boolean HasBootstrap = false; + private boolean HasDistances = false; + private boolean RootHasDistance = false; // File IO Flags boolean ReplaceUnderscores = false; + boolean printRootInfo = true; + private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] - { - new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for requiring quotes - new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote characters - new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace transformation + { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for + // requiring + // quotes + new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote + // characters + new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace + // transformation }; + char QuoteChar = '\''; /** * Creates a new NewickFile object. - * - * @param inStr DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! + * + * @param inStr + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! */ - public NewickFile(String inStr) - throws IOException + public NewickFile(String inStr) throws IOException { super(inStr, "Paste"); } /** * Creates a new NewickFile object. - * - * @param inFile DOCUMENT ME! - * @param type DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! + * + * @param inFile + * DOCUMENT ME! + * @param type + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! */ - public NewickFile(String inFile, String type) - throws IOException + public NewickFile(String inFile, String type) throws IOException { super(inFile, type); } + public NewickFile(FileParse source) throws IOException { super(source); } + /** * Creates a new NewickFile object. - * - * @param newtree DOCUMENT ME! + * + * @param newtree + * DOCUMENT ME! */ public NewickFile(SequenceNode newtree) { @@ -96,9 +140,11 @@ public class NewickFile /** * Creates a new NewickFile object. - * - * @param newtree DOCUMENT ME! - * @param bootstrap DOCUMENT ME! + * + * @param newtree + * DOCUMENT ME! + * @param bootstrap + * DOCUMENT ME! */ public NewickFile(SequenceNode newtree, boolean bootstrap) { @@ -108,12 +154,16 @@ public class NewickFile /** * Creates a new NewickFile object. - * - * @param newtree DOCUMENT ME! - * @param bootstrap DOCUMENT ME! - * @param distances DOCUMENT ME! + * + * @param newtree + * DOCUMENT ME! + * @param bootstrap + * DOCUMENT ME! + * @param distances + * DOCUMENT ME! */ - public NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances) + public NewickFile(SequenceNode newtree, boolean bootstrap, + boolean distances) { root = newtree; HasBootstrap = bootstrap; @@ -122,14 +172,18 @@ public class NewickFile /** * Creates a new NewickFile object. - * - * @param newtree DOCUMENT ME! - * @param bootstrap DOCUMENT ME! - * @param distances DOCUMENT ME! - * @param rootdistance DOCUMENT ME! + * + * @param newtree + * DOCUMENT ME! + * @param bootstrap + * DOCUMENT ME! + * @param distances + * DOCUMENT ME! + * @param rootdistance + * DOCUMENT ME! */ public NewickFile(SequenceNode newtree, boolean bootstrap, - boolean distances, boolean rootdistance) + boolean distances, boolean rootdistance) { root = newtree; HasBootstrap = bootstrap; @@ -139,22 +193,30 @@ public class NewickFile /** * DOCUMENT ME! - * - * @param Error DOCUMENT ME! - * @param Er DOCUMENT ME! - * @param r DOCUMENT ME! - * @param p DOCUMENT ME! - * @param s DOCUMENT ME! - * + * + * @param Error + * DOCUMENT ME! + * @param Er + * DOCUMENT ME! + * @param r + * DOCUMENT ME! + * @param p + * DOCUMENT ME! + * @param s + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private String ErrorStringrange(String Error, String Er, int r, int p, - String s) + String s) { - return ( (Error == null) ? "" : Error) + Er + " at position " + p + - " ( " + - s.substring( ( (p - r) < 0) ? 0 : (p - r), - ( (p + r) > s.length()) ? s.length() : (p + r)) + " )\n"; + return ((Error == null) ? "" : Error) + + Er + + " at position " + + p + + " ( " + + s.substring(((p - r) < 0) ? 0 : (p - r), + ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n"; } // @tree annotations @@ -166,7 +228,7 @@ public class NewickFile /** * DOCUMENT ME! - * + * * @return DOCUMENT ME! */ public boolean HasDistances() @@ -181,11 +243,12 @@ public class NewickFile /** * parse the filesource as a newick file (new hampshire and/or extended) - * - * @throws IOException with a line number and character position for badly formatted NH strings + * + * @throws IOException + * with a line number and character position for badly formatted NH + * strings */ - public void parse() - throws IOException + public void parse() throws IOException { String nf; @@ -193,7 +256,7 @@ public class NewickFile StringBuffer file = new StringBuffer(); - while ( (nf = nextLine()) != null) + while ((nf = nextLine()) != null) { file.append(nf); } @@ -208,314 +271,328 @@ public class NewickFile int d = -1; int cp = 0; - //int flen = nf.length(); + // int flen = nf.length(); String Error = null; String nodename = null; + String commentString2 = null; // comments after simple node props - float DefDistance = (float) 0.001; // @param Default distance for a node - very very small + float DefDistance = (float) 0.001; // @param Default distance for a node - + // very very small int DefBootstrap = -1; // @param Default bootstrap for a node float distance = DefDistance; int bootstrap = DefBootstrap; - boolean ascending = false; // flag indicating that we are leaving the current node + boolean ascending = false; // flag indicating that we are leaving the + // current node com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( - "[(\\['),;]"); + "[(\\['),;]"); - int nextcp=0; + int nextcp = 0; int ncp = cp; while (majorsyms.searchFrom(nf, cp) && (Error == null)) { int fcp = majorsyms.matchedFrom(); char schar; - switch (schar=nf.charAt(fcp)) + switch (schar = nf.charAt(fcp)) { - case '(': + case '(': - // ascending should not be set - // New Internal node - if (ascending) - { - Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf); + // ascending should not be set + // New Internal node + if (ascending) + { + Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf); - continue; - } + continue; + } - ; - d++; + ; + d++; - if (c.right() == null) + if (c.right() == null) + { + c.setRight(new SequenceNode(null, c, null, DefDistance, + DefBootstrap, false)); + c = (SequenceNode) c.right(); + } + else + { + if (c.left() != null) { - c.setRight(new SequenceNode(null, c, null, DefDistance, - DefBootstrap, false)); - c = (SequenceNode) c.right(); + // Dummy node for polytomy - keeps c.left free for new node + SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true); + tmpn.SetChildren(c.left(), c.right()); + c.setRight(tmpn); } - else - { - if (c.left() != null) - { - // Dummy node for polytomy - keeps c.left free for new node - SequenceNode tmpn = new SequenceNode(null, c, null, 0, - 0, true); - tmpn.SetChildren(c.left(), c.right()); - c.setRight(tmpn); - } - c.setLeft(new SequenceNode(null, c, null, DefDistance, - DefBootstrap, false)); - c = (SequenceNode) c.left(); - } + c.setLeft(new SequenceNode(null, c, null, DefDistance, + DefBootstrap, false)); + c = (SequenceNode) c.left(); + } - if (realroot == null) - { - realroot = c; - } + if (realroot == null) + { + realroot = c; + } + + nodename = null; + distance = DefDistance; + bootstrap = DefBootstrap; + cp = fcp + 1; - nodename = null; - distance = DefDistance; - bootstrap = DefBootstrap; - cp = fcp + 1; + break; - break; + // Deal with quoted fields + case '\'': - // Deal with quoted fields - case '\'': + com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( + "([^']|'')+'"); + + if (qnodename.searchFrom(nf, fcp)) + { + int nl = qnodename.stringMatched().length(); + nodename = new String(qnodename.stringMatched().substring(0, + nl - 1)); + cp = fcp + nl + 1; + } + else + { + Error = ErrorStringrange(Error, + "Unterminated quotes for nodename", 7, fcp, nf); + } - com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( - "([^']|'')+'"); + break; - if (qnodename.searchFrom(nf, fcp)) + default: + if (schar == ';') + { + if (d != -1) + { + Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d + + ")", 7, fcp, nf); + } + // cp advanced at the end of default + } + if (schar == '[') + { + // node string contains Comment or structured/extended NH format info + /* + * if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1)) { // will + * process in remains System.err.println("skipped text: + * '"+nf.substring(cp,fcp)+"'"); } + */ + // verify termination. + com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]"); + if (comment.searchFrom(nf, fcp)) { - int nl = qnodename.stringMatched().length(); - nodename = new String(qnodename.stringMatched().substring(0, - nl - 1)); - cp = fcp + nl + 1; + // Skip the comment field + nextcp = comment.matchedFrom() + 1; + warningMessage = "Tree file contained comments which may confuse input algorithm."; + break; + + // cp advanced at the end of default to nextcp, ncp is unchanged so + // any node info can be read. } else { - Error = ErrorStringrange(Error, - "Unterminated quotes for nodename", 7, fcp, - nf); + Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, + nf); } - break; + ; + } + // Parse simpler field strings + String fstring = nf.substring(ncp, fcp); + // remove any comments before we parse the node info + // TODO: test newick file with quoted square brackets in node name (is + // this allowed?) + while (fstring.indexOf(']') > -1) + { + int cstart = fstring.indexOf('['); + int cend = fstring.indexOf(']'); + commentString2 = fstring.substring(cstart + 1, cend); + fstring = fstring.substring(0, cstart) + + fstring.substring(cend + 1); - default: - if (schar==';') + } + com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( + "\\b([^' :;\\](),]+)"); + com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( + "\\s*([0-9+]+)\\s*:"); + com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( + ":([-0-9Ee.+]+)"); + + if (uqnodename.search(fstring) + && ((uqnodename.matchedFrom(1) == 0) || (fstring + .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote + // HACK! + { + if (nodename == null) { - if (d != -1) + if (ReplaceUnderscores) { - Error = ErrorStringrange(Error, - "Wayward semicolon (depth=" + d + ")", 7, - fcp, nf); - } - // cp advanced at the end of default - } - if (schar == '[') - { - // node string contains Comment or structured/extended NH format info - /* if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1)) - { - // will process in remains System.err.println("skipped text: '"+nf.substring(cp,fcp)+"'"); - } - */ - // verify termination. - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( - "]"); - if (comment.searchFrom(nf, fcp)) - { - // Skip the comment field - nextcp=comment.matchedFrom()+1; - warningMessage = "Tree file contained comments which may confuse input algorithm."; - break; - - // cp advanced at the end of default to nextcp, ncp is unchanged so any node info can be read. + nodename = uqnodename.stringMatched(1).replace('_', ' '); } else { - Error = ErrorStringrange(Error, "Unterminated comment", 3, - fcp, nf); + nodename = uqnodename.stringMatched(1); } - - ; } - // Parse simpler field strings - String fstring = nf.substring(ncp, fcp); - // remove any comments before we parse the node info - // TODO: test newick file with quoted square brackets in node name (is this allowed?) - while (fstring.indexOf(']')>-1) + else { - int cstart=fstring.indexOf('['); - int cend=fstring.indexOf(']'); - String comment = fstring.substring(cstart+1,cend); - fstring = fstring.substring(0, cstart)+fstring.substring(cend+1); - + Error = ErrorStringrange(Error, + "File has broken algorithm - overwritten nodename", 10, + fcp, nf); } - com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( - "\\b([^' :;\\](),]+)"); - com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\s*([0-9+]+)\\s*:"); - com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9Ee.+]+)"); - - if (uqnodename.search(fstring) && - ( (uqnodename.matchedFrom(1) == 0) || - (fstring.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote HACK! + } + // get comment bootstraps + + if (nbootstrap.search(fstring)) + { + if (nbootstrap.stringMatched(1).equals( + uqnodename.stringMatched(1))) { - if (nodename == null) - { - if (ReplaceUnderscores) - { - nodename = uqnodename.stringMatched(1).replace('_', - ' '); - } - else - { - nodename = uqnodename.stringMatched(1); - } - } - else - { - Error = ErrorStringrange(Error, - "File has broken algorithm - overwritten nodename", - 10, fcp, nf); - } + nodename = null; // no nodename here. } - - if (nbootstrap.search(fstring)) - { - if (nbootstrap.stringMatched(1).equals(uqnodename.stringMatched(1))) - { - nodename=null; // no nodename here. - } - if (nodename==null || nodename.length()==0 || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + - uqnodename.stringMatched().length())) + if (nodename == null + || nodename.length() == 0 + || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename + .stringMatched().length())) { try { - bootstrap = (new Integer(nbootstrap.stringMatched(1))).intValue(); + bootstrap = (new Integer(nbootstrap.stringMatched(1))) + .intValue(); HasBootstrap = true; - } - catch (Exception e) + } catch (Exception e) { Error = ErrorStringrange(Error, - "Can't parse bootstrap value", 4, - ncp + nbootstrap.matchedFrom(), nf); + "Can't parse bootstrap value", 4, + ncp + nbootstrap.matchedFrom(), nf); } } - } + } - boolean nodehasdistance = false; + boolean nodehasdistance = false; - if (ndist.search(fstring)) + if (ndist.search(fstring)) + { + try { - try - { - distance = (new Float(ndist.stringMatched(1))).floatValue(); - HasDistances = true; - nodehasdistance = true; - } - catch (Exception e) - { - Error = ErrorStringrange(Error, - "Can't parse node distance value", 7, - ncp + ndist.matchedFrom(), nf); - } + distance = (new Float(ndist.stringMatched(1))).floatValue(); + HasDistances = true; + nodehasdistance = true; + } catch (Exception e) + { + Error = ErrorStringrange(Error, + "Can't parse node distance value", 7, + ncp + ndist.matchedFrom(), nf); } + } - if (ascending) + if (ascending) + { + // Write node info here + c.setName(nodename); + // Trees without distances still need a render distance + c.dist = (HasDistances) ? distance : DefDistance; + // be consistent for internal bootstrap defaults too + c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap); + if (c == realroot) { - // Write node info here - c.setName(nodename); - // Trees without distances still need a render distance - c.dist = (HasDistances) ? distance : DefDistance; - // be consistent for internal bootstrap defaults too - c.setBootstrap( (HasBootstrap) ? bootstrap : DefBootstrap); - if (c == realroot) - { - RootHasDistance = nodehasdistance; // JBPNote This is really UGLY!!! Ensure root node gets its given distance - } + RootHasDistance = nodehasdistance; // JBPNote This is really + // UGLY!!! Ensure root node gets + // its given distance + } + parseNHXNodeProps(c, commentString2); + commentString2 = null; + } + else + { + // Find a place to put the leaf + SequenceNode newnode = new SequenceNode(null, c, nodename, + (HasDistances) ? distance : DefDistance, + (HasBootstrap) ? bootstrap : DefBootstrap, false); + parseNHXNodeProps(c, commentString2); + commentString2 = null; + + if (c.right() == null) + { + c.setRight(newnode); } else { - // Find a place to put the leaf - SequenceNode newnode = new SequenceNode(null, c, nodename, - (HasDistances) ? distance : DefDistance, - (HasBootstrap) ? bootstrap : DefBootstrap, false); - - if (c.right() == null) + if (c.left() == null) { - c.setRight(newnode); + c.setLeft(newnode); } else { - if (c.left() == null) - { - c.setLeft(newnode); - } - else - { - // Insert a dummy node for polytomy - // dummy nodes have distances - SequenceNode newdummy = new SequenceNode(null, c, - null, (HasDistances ? 0 : DefDistance), 0, true); - newdummy.SetChildren(c.left(), newnode); - c.setLeft(newdummy); - } + // Insert a dummy node for polytomy + // dummy nodes have distances + SequenceNode newdummy = new SequenceNode(null, c, null, + (HasDistances ? 0 : DefDistance), 0, true); + newdummy.SetChildren(c.left(), newnode); + c.setLeft(newdummy); } } + } - if (ascending) - { - // move back up the tree from preceding closure - c = c.AscendTree(); - - if ( (d > -1) && (c == null)) - { - Error = ErrorStringrange(Error, - "File broke algorithm: Lost place in tree (is there an extra ')' ?)", - 7, fcp, nf); - } - } + if (ascending) + { + // move back up the tree from preceding closure + c = c.AscendTree(); - if (nf.charAt(fcp) == ')') + if ((d > -1) && (c == null)) { - d--; - ascending = true; + Error = ErrorStringrange( + Error, + "File broke algorithm: Lost place in tree (is there an extra ')' ?)", + 7, fcp, nf); } - else + } + + if (nf.charAt(fcp) == ')') + { + d--; + ascending = true; + } + else + { + if (nf.charAt(fcp) == ',') { - if (nf.charAt(fcp) == ',') + if (ascending) { - if (ascending) - { - ascending = false; - } - else + ascending = false; + } + else + { + // Just advance focus, if we need to + if ((c.left() != null) && (!c.left().isLeaf())) { - // Just advance focus, if we need to - if ( (c.left() != null) && (!c.left().isLeaf())) - { - c = (SequenceNode) c.left(); - } + c = (SequenceNode) c.left(); } } } + } - // Reset new node properties to obvious fakes - nodename = null; - distance = DefDistance; - bootstrap = DefBootstrap; + // Reset new node properties to obvious fakes + nodename = null; + distance = DefDistance; + bootstrap = DefBootstrap; + commentString2 = null; } - if (nextcp==0) + if (nextcp == 0) { ncp = cp = fcp + 1; } - else { - cp=nextcp; - nextcp=0; + else + { + cp = nextcp; + nextcp = 0; } } @@ -523,11 +600,12 @@ public class NewickFile { throw (new IOException("NewickFile: " + Error + "\n")); } - if (root==null) + if (root == null) { throw (new IOException("NewickFile: No Tree read in\n")); } - // THe next line is failing for topali trees - not sure why yet. if (root.right()!=null && root.isDummy()) + // THe next line is failing for topali trees - not sure why yet. if + // (root.right()!=null && root.isDummy()) root = (SequenceNode) root.right().detach(); // remove the imaginary root. if (!RootHasDistance) @@ -537,8 +615,58 @@ public class NewickFile } /** + * parse NHX codes in comment strings and update NewickFile state flags for + * distances and bootstraps, and add any additional properties onto the node. + * + * @param c + * @param commentString + * @param commentString2 + */ + private void parseNHXNodeProps(SequenceNode c, String commentString) + { + // TODO: store raw comment on the sequenceNode so it can be recovered when + // tree is output + if (commentString != null && commentString.startsWith("&&NHX")) + { + StringTokenizer st = new StringTokenizer(commentString.substring(5), + ":"); + while (st.hasMoreTokens()) + { + String tok = st.nextToken(); + int colpos = tok.indexOf("="); + + if (colpos > -1) + { + String code = tok.substring(0, colpos); + String value = tok.substring(colpos + 1); + try + { + // parse out code/value pairs + if (code.toLowerCase().equals("b")) + { + int v = -1; + Float iv = new Float(value); + v = iv.intValue(); // jalview only does integer bootstraps + // currently + c.setBootstrap(v); + HasBootstrap = true; + } + // more codes here. + } catch (Exception e) + { + System.err.println("Couldn't parse code '" + code + "' = '" + + value + "'"); + e.printStackTrace(System.err); + } + } + } + } + + } + + /** * DOCUMENT ME! - * + * * @return DOCUMENT ME! */ public SequenceNode getTree() @@ -547,9 +675,9 @@ public class NewickFile } /** - * Generate a newick format tree according to internal flags - * for bootstraps, distances and root distances. - * + * Generate a newick format tree according to internal flags for bootstraps, + * distances and root distances. + * * @return new hampshire tree in a single line */ public String print() @@ -564,13 +692,14 @@ public class NewickFile } /** - * - * - * Generate a newick format tree according to internal flags - * for distances and root distances and user specificied writing of - * bootstraps. - * @param withbootstraps controls if bootstrap values are explicitly written. - * + * + * + * Generate a newick format tree according to internal flags for distances and + * root distances and user specificied writing of bootstraps. + * + * @param withbootstraps + * controls if bootstrap values are explicitly written. + * * @return new hampshire tree in a single line */ public String print(boolean withbootstraps) @@ -588,13 +717,15 @@ public class NewickFile } /** - * - * Generate newick format tree according to internal flags - * for writing root node distances. - * - * @param withbootstraps explicitly write bootstrap values - * @param withdists explicitly write distances - * + * + * Generate newick format tree according to internal flags for writing root + * node distances. + * + * @param withbootstraps + * explicitly write bootstrap values + * @param withdists + * explicitly write distances + * * @return new hampshire tree in a single line */ public String print(boolean withbootstraps, boolean withdists) @@ -613,15 +744,18 @@ public class NewickFile /** * Generate newick format tree according to user specified flags - * - * @param withbootstraps explicitly write bootstrap values - * @param withdists explicitly write distances - * @param printRootInfo explicitly write root distance - * + * + * @param withbootstraps + * explicitly write bootstrap values + * @param withdists + * explicitly write distances + * @param printRootInfo + * explicitly write root distance + * * @return new hampshire tree in a single line */ public String print(boolean withbootstraps, boolean withdists, - boolean printRootInfo) + boolean printRootInfo) { synchronized (this) { @@ -637,7 +771,7 @@ public class NewickFile /** * DOCUMENT ME! - * + * * @return DOCUMENT ME! */ char getQuoteChar() @@ -647,9 +781,10 @@ public class NewickFile /** * DOCUMENT ME! - * - * @param c DOCUMENT ME! - * + * + * @param c + * DOCUMENT ME! + * * @return DOCUMENT ME! */ char setQuoteChar(char c) @@ -662,9 +797,10 @@ public class NewickFile /** * DOCUMENT ME! - * - * @param name DOCUMENT ME! - * + * + * @param name + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private String nodeName(String name) @@ -681,35 +817,37 @@ public class NewickFile /** * DOCUMENT ME! - * - * @param c DOCUMENT ME! - * + * + * @param c + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private String printNodeField(SequenceNode c) { - return ( (c.getName() == null) ? "" : nodeName(c.getName())) + - ( (HasBootstrap) - ? ( (c.getBootstrap() > -1) ? ((c.getName()!=null ? " " : "")+ c.getBootstrap()) : "") : "") + - ( (HasDistances) ? (":" + c.dist) : ""); + return ((c.getName() == null) ? "" : nodeName(c.getName())) + + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " " + : "") + c.getBootstrap()) + : "") + : "") + ((HasDistances) ? (":" + c.dist) : ""); } /** * DOCUMENT ME! - * - * @param root DOCUMENT ME! - * + * + * @param root + * DOCUMENT ME! + * * @return DOCUMENT ME! */ private String printRootField(SequenceNode root) { - return (printRootInfo) - ? ( ( (root.getName() == null) ? "" : nodeName(root.getName())) + - ( (HasBootstrap) - ? ( (root.getBootstrap() > -1) ? ((root.getName()!=null ? " " : "")+ - + root.getBootstrap()) : "") : - "") + - ( (RootHasDistance) ? (":" + root.dist) : "")) : ""; + return (printRootInfo) ? (((root.getName() == null) ? "" + : nodeName(root.getName())) + + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root + .getName() != null ? " " : "") + +root.getBootstrap()) + : "") : "") + ((RootHasDistance) ? (":" + root.dist) + : "")) : ""; } // Non recursive call deals with root node properties @@ -789,8 +927,8 @@ public class NewickFile { if (args == null || args.length != 1) { - System.err.println( - "Takes one argument - file name of a newick tree file."); + System.err + .println("Takes one argument - file name of a newick tree file."); System.exit(0); } @@ -800,7 +938,7 @@ public class NewickFile BufferedReader treefile = new BufferedReader(new FileReader(fn)); String l; - while ( (l = treefile.readLine()) != null) + while ((l = treefile.readLine()) != null) { newickfile.append(l); } @@ -828,8 +966,7 @@ public class NewickFile System.out.println(trf.print(false, false)); System.out.println("With bootstraps and with distances.\n"); System.out.println(trf.print(true, true)); - } - catch (java.io.IOException e) + } catch (java.io.IOException e) { System.err.println("Exception\n" + e); e.printStackTrace();