X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=b3c0011b52ea2d1ad4284cb7faf4fbc1566d6189;hb=42e022a59de649375c8878c1d26361cb6bc89cb8;hp=9a7f6fb9ff0dae4dccaa96bec65f06b94f141b74;hpb=ab9e5bf849afd2f41102db0bf9893de1df0512f6;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index 9a7f6fb..b3c0011 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -1,22 +1,23 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ - // NewickFile.java // Tree I/O // http://evolution.genetics.washington.edu/phylip/newick_doc.html @@ -25,10 +26,19 @@ // TODO: Extended SequenceNodeI to hold parsed NHX strings package jalview.io; -import java.io.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Locale; import java.util.StringTokenizer; -import jalview.datamodel.*; +import com.stevesoft.pat.Regex; + +import jalview.bin.Jalview; +import jalview.datamodel.BinaryNode; +import jalview.datamodel.SequenceNode; +import jalview.util.MessageManager; /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the @@ -36,10 +46,10 @@ import jalview.datamodel.*; * this: NHX codes are appended in comments beginning with &&NHX. The codes are * given below (from http://www.phylosoft.org/forester/NHX.html): Element Type * Description Corresponding phyloXML element (parent element in parentheses) no - * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) () : - * decimal branch length to parent node (MUST BE SECOND, IF ASSIGNED) - * () :GN= string gene name () :AC= - * string sequence accession () :ND= string node + * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) + * () : decimal branch length to parent node (MUST BE SECOND, IF + * ASSIGNED) () :GN= string gene name () + * :AC= string sequence accession () :ND= string node * identifier - if this is being used, it has to be unique within each phylogeny * () :B= decimal confidence value for parent branch * () :D= 'T', 'F', or '?' 'T' if this node represents a @@ -69,7 +79,7 @@ import jalview.datamodel.*; */ public class NewickFile extends FileParse { - SequenceNode root; + BinaryNode root; private boolean HasBootstrap = false; @@ -82,14 +92,14 @@ public class NewickFile extends FileParse boolean printRootInfo = true; - private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] - { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for - // requiring - // quotes - new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote - // characters - new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation + private Regex[] NodeSafeName = new Regex[] { + new Regex().perlCode("m/[\\[,:'()]/"), // test for + // requiring + // quotes + new Regex().perlCode("s/'/''/"), // escaping quote + // characters + new Regex().perlCode("s/\\/w/_/") // unqoted whitespace + // transformation }; char QuoteChar = '\''; @@ -98,30 +108,31 @@ public class NewickFile extends FileParse * Creates a new NewickFile object. * * @param inStr - * DOCUMENT ME! + * DOCUMENT ME! * * @throws IOException - * DOCUMENT ME! + * DOCUMENT ME! */ public NewickFile(String inStr) throws IOException { - super(inStr, "Paste"); + super(inStr, DataSourceType.PASTE); } /** * Creates a new NewickFile object. * * @param inFile - * DOCUMENT ME! - * @param type - * DOCUMENT ME! + * DOCUMENT ME! + * @param protocol + * DOCUMENT ME! * * @throws IOException - * DOCUMENT ME! + * DOCUMENT ME! */ - public NewickFile(String inFile, String type) throws IOException + public NewickFile(String inFile, DataSourceType protocol) + throws IOException { - super(inFile, type); + super(inFile, protocol); } public NewickFile(FileParse source) throws IOException @@ -133,9 +144,9 @@ public class NewickFile extends FileParse * Creates a new NewickFile object. * * @param newtree - * DOCUMENT ME! + * DOCUMENT ME! */ - public NewickFile(SequenceNode newtree) + public NewickFile(BinaryNode newtree) { root = newtree; } @@ -144,9 +155,9 @@ public class NewickFile extends FileParse * Creates a new NewickFile object. * * @param newtree - * DOCUMENT ME! + * DOCUMENT ME! * @param bootstrap - * DOCUMENT ME! + * DOCUMENT ME! */ public NewickFile(SequenceNode newtree, boolean bootstrap) { @@ -158,13 +169,13 @@ public class NewickFile extends FileParse * Creates a new NewickFile object. * * @param newtree - * DOCUMENT ME! + * DOCUMENT ME! * @param bootstrap - * DOCUMENT ME! + * DOCUMENT ME! * @param distances - * DOCUMENT ME! + * DOCUMENT ME! */ - public NewickFile(SequenceNode newtree, boolean bootstrap, + public NewickFile(BinaryNode newtree, boolean bootstrap, boolean distances) { root = newtree; @@ -176,15 +187,15 @@ public class NewickFile extends FileParse * Creates a new NewickFile object. * * @param newtree - * DOCUMENT ME! + * DOCUMENT ME! * @param bootstrap - * DOCUMENT ME! + * DOCUMENT ME! * @param distances - * DOCUMENT ME! + * DOCUMENT ME! * @param rootdistance - * DOCUMENT ME! + * DOCUMENT ME! */ - public NewickFile(SequenceNode newtree, boolean bootstrap, + public NewickFile(BinaryNode newtree, boolean bootstrap, boolean distances, boolean rootdistance) { root = newtree; @@ -197,28 +208,25 @@ public class NewickFile extends FileParse * DOCUMENT ME! * * @param Error - * DOCUMENT ME! + * DOCUMENT ME! * @param Er - * DOCUMENT ME! + * DOCUMENT ME! * @param r - * DOCUMENT ME! + * DOCUMENT ME! * @param p - * DOCUMENT ME! + * DOCUMENT ME! * @param s - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ private String ErrorStringrange(String Error, String Er, int r, int p, String s) { - return ((Error == null) ? "" : Error) - + Er - + " at position " - + p - + " ( " - + s.substring(((p - r) < 0) ? 0 : (p - r), ((p + r) > s - .length()) ? s.length() : (p + r)) + " )\n"; + return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " + + s.substring(((p - r) < 0) ? 0 : (p - r), + ((p + r) > s.length()) ? s.length() : (p + r)) + + " )\n"; } // @tree annotations @@ -247,8 +255,8 @@ public class NewickFile extends FileParse * parse the filesource as a newick file (new hampshire and/or extended) * * @throws IOException - * with a line number and character position for badly - * formatted NH strings + * with a line number and character position for badly formatted NH + * strings */ public void parse() throws IOException { @@ -268,8 +276,8 @@ public class NewickFile extends FileParse root = new SequenceNode(); - SequenceNode realroot = null; - SequenceNode c = root; + BinaryNode realroot = null; + BinaryNode c = root; int d = -1; int cp = 0; @@ -279,21 +287,21 @@ public class NewickFile extends FileParse String nodename = null; String commentString2 = null; // comments after simple node props - float DefDistance = (float) 0.001; // @param Default distance for a node - - // very very small + double DefDistance = (float) 0.001; // @param Default distance for a node - + // very very small int DefBootstrap = -1; // @param Default bootstrap for a node - float distance = DefDistance; + double distance = DefDistance; int bootstrap = DefBootstrap; boolean ascending = false; // flag indicating that we are leaving the - // current node + // current node - com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( - "[(\\['),;]"); + Regex majorsyms = new Regex("[(\\['),;]"); int nextcp = 0; int ncp = cp; + boolean parsednodename = false; while (majorsyms.searchFrom(nf, cp) && (Error == null)) { int fcp = majorsyms.matchedFrom(); @@ -310,29 +318,27 @@ public class NewickFile extends FileParse continue; } - - ; d++; if (c.right() == null) { c.setRight(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false)); - c = (SequenceNode) c.right(); + c = (BinaryNode) c.right(); } else { if (c.left() != null) { // Dummy node for polytomy - keeps c.left free for new node - SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true); + BinaryNode tmpn = new SequenceNode(null, c, null, 0, 0, true); tmpn.SetChildren(c.left(), c.right()); c.setRight(tmpn); } c.setLeft(new SequenceNode(null, c, null, DefDistance, DefBootstrap, false)); - c = (SequenceNode) c.left(); + c = (BinaryNode) c.left(); } if (realroot == null) @@ -350,15 +356,20 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( - "([^']|'')+'"); + Regex qnodename = new Regex("'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { int nl = qnodename.stringMatched().length(); - nodename = new String(qnodename.stringMatched().substring(0, - nl - 1)); - cp = fcp + nl + 1; + nodename = new String( + qnodename.stringMatched().substring(1, nl - 1)); + // unpack any escaped colons + Regex xpandquotes = Regex.perlCode("s/''/'/"); + String widernodename = xpandquotes.replaceAll(nodename); + nodename = widernodename; + // jump to after end of quoted nodename + nextcp = fcp + nl + 1; + parsednodename = true; } else { @@ -373,8 +384,8 @@ public class NewickFile extends FileParse { if (d != -1) { - Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d - + ")", 7, fcp, nf); + Error = ErrorStringrange(Error, + "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); } // cp advanced at the end of default } @@ -387,7 +398,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]"); + Regex comment = new Regex("]"); if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -403,8 +414,6 @@ public class NewickFile extends FileParse Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); } - - ; } // Parse simpler field strings String fstring = nf.substring(ncp, fcp); @@ -420,17 +429,14 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( - "\\b([^' :;\\](),]+)"); - com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\s*([0-9+]+)\\s*:"); - com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9Ee.+]+)"); - - if (uqnodename.search(fstring) + Regex uqnodename = new Regex("\\b([^' :;\\](),]+)"); + Regex nbootstrap = new Regex("\\s*([0-9+]+)\\s*:"); + Regex ndist = new Regex(":([-0-9Ee.+]+)"); + + if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote - // HACK! + // HACK! { if (nodename == null) { @@ -454,26 +460,24 @@ public class NewickFile extends FileParse if (nbootstrap.search(fstring)) { - if (nbootstrap.stringMatched(1).equals( - uqnodename.stringMatched(1))) + if (nbootstrap.stringMatched(1) + .equals(uqnodename.stringMatched(1))) { nodename = null; // no nodename here. } - if (nodename == null - || nodename.length() == 0 - || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename - .stringMatched().length())) + if (nodename == null || nodename.length() == 0 + || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + + uqnodename.stringMatched().length())) { try { - bootstrap = (new Integer(nbootstrap.stringMatched(1))) + bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1))) .intValue(); HasBootstrap = true; } catch (Exception e) { - Error = ErrorStringrange(Error, - "Can't parse bootstrap value", 4, ncp - + nbootstrap.matchedFrom(), nf); + Error = ErrorStringrange(Error, "Can't parse bootstrap value", + 4, ncp + nbootstrap.matchedFrom(), nf); } } } @@ -484,14 +488,15 @@ public class NewickFile extends FileParse { try { - distance = (new Float(ndist.stringMatched(1))).floatValue(); + distance = (Double.valueOf(ndist.stringMatched(1))) + .floatValue(); HasDistances = true; nodehasdistance = true; } catch (Exception e) { Error = ErrorStringrange(Error, - "Can't parse node distance value", 7, ncp - + ndist.matchedFrom(), nf); + "Can't parse node distance value", 7, + ncp + ndist.matchedFrom(), nf); } } @@ -506,8 +511,8 @@ public class NewickFile extends FileParse if (c == realroot) { RootHasDistance = nodehasdistance; // JBPNote This is really - // UGLY!!! Ensure root node gets - // its given distance + // UGLY!!! Ensure root node gets + // its given distance } parseNHXNodeProps(c, commentString2); commentString2 = null; @@ -515,7 +520,7 @@ public class NewickFile extends FileParse else { // Find a place to put the leaf - SequenceNode newnode = new SequenceNode(null, c, nodename, + BinaryNode newnode = new SequenceNode(null, c, nodename, (HasDistances) ? distance : DefDistance, (HasBootstrap) ? bootstrap : DefBootstrap, false); parseNHXNodeProps(c, commentString2); @@ -535,7 +540,7 @@ public class NewickFile extends FileParse { // Insert a dummy node for polytomy // dummy nodes have distances - SequenceNode newdummy = new SequenceNode(null, c, null, + BinaryNode newdummy = new SequenceNode(null, c, null, (HasDistances ? 0 : DefDistance), 0, true); newdummy.SetChildren(c.left(), newnode); c.setLeft(newdummy); @@ -550,8 +555,7 @@ public class NewickFile extends FileParse if ((d > -1) && (c == null)) { - Error = ErrorStringrange( - Error, + Error = ErrorStringrange(Error, "File broke algorithm: Lost place in tree (is there an extra ')' ?)", 7, fcp, nf); } @@ -575,7 +579,7 @@ public class NewickFile extends FileParse // Just advance focus, if we need to if ((c.left() != null) && (!c.left().isLeaf())) { - c = (SequenceNode) c.left(); + c = (BinaryNode) c.left(); } } } @@ -585,7 +589,8 @@ public class NewickFile extends FileParse nodename = null; distance = DefDistance; bootstrap = DefBootstrap; - commentString2=null; + commentString2 = null; + parsednodename = false; } if (nextcp == 0) { @@ -600,11 +605,15 @@ public class NewickFile extends FileParse if (Error != null) { - throw (new IOException("NewickFile: " + Error + "\n")); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { Error.toString() }))); } if (root == null) { - throw (new IOException("NewickFile: No Tree read in\n")); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { MessageManager.getString("label.no_tree_read_in") }))); } // THe next line is failing for topali trees - not sure why yet. if // (root.right()!=null && root.isDummy()) @@ -624,45 +633,46 @@ public class NewickFile extends FileParse * @param commentString * @param commentString2 */ - private void parseNHXNodeProps(SequenceNode c, String commentString) + private void parseNHXNodeProps(BinaryNode c, String commentString) { // TODO: store raw comment on the sequenceNode so it can be recovered when // tree is output if (commentString != null && commentString.startsWith("&&NHX")) { - StringTokenizer st = new StringTokenizer(commentString.substring(5),":"); + StringTokenizer st = new StringTokenizer(commentString.substring(5), + ":"); while (st.hasMoreTokens()) { String tok = st.nextToken(); - int colpos=tok.indexOf("="); + int colpos = tok.indexOf("="); - if (colpos>-1) + if (colpos > -1) { String code = tok.substring(0, colpos); - String value = tok.substring(colpos+1); - try { + String value = tok.substring(colpos + 1); + try + { // parse out code/value pairs - if (code.toLowerCase().equals("b")) + if (code.toLowerCase(Locale.ROOT).equals("b")) { - int v=-1; - Float iv = new Float(value); + int v = -1; + Float iv = Float.valueOf(value); v = iv.intValue(); // jalview only does integer bootstraps - // currently + // currently c.setBootstrap(v); HasBootstrap = true; } // more codes here. - } - catch (Exception e) + } catch (Exception e) { - System.err.println("Couldn't parse code '"+code+"' = '"+value+"'"); + System.err.println( + "Couldn't parse code '" + code + "' = '" + value + "'"); e.printStackTrace(System.err); } } } } - - + } /** @@ -670,7 +680,7 @@ public class NewickFile extends FileParse * * @return DOCUMENT ME! */ - public SequenceNode getTree() + public BinaryNode getTree() { return root; } @@ -699,7 +709,7 @@ public class NewickFile extends FileParse * root distances and user specificied writing of bootstraps. * * @param withbootstraps - * controls if bootstrap values are explicitly written. + * controls if bootstrap values are explicitly written. * * @return new hampshire tree in a single line */ @@ -723,9 +733,9 @@ public class NewickFile extends FileParse * node distances. * * @param withbootstraps - * explicitly write bootstrap values + * explicitly write bootstrap values * @param withdists - * explicitly write distances + * explicitly write distances * * @return new hampshire tree in a single line */ @@ -747,11 +757,11 @@ public class NewickFile extends FileParse * Generate newick format tree according to user specified flags * * @param withbootstraps - * explicitly write bootstrap values + * explicitly write bootstrap values * @param withdists - * explicitly write distances + * explicitly write distances * @param printRootInfo - * explicitly write root distance + * explicitly write root distance * * @return new hampshire tree in a single line */ @@ -784,7 +794,7 @@ public class NewickFile extends FileParse * DOCUMENT ME! * * @param c - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ @@ -800,7 +810,7 @@ public class NewickFile extends FileParse * DOCUMENT ME! * * @param name - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ @@ -820,39 +830,43 @@ public class NewickFile extends FileParse * DOCUMENT ME! * * @param c - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ - private String printNodeField(SequenceNode c) + private String printNodeField(BinaryNode c) { return ((c.getName() == null) ? "" : nodeName(c.getName())) - + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " " - : "") + c.getBootstrap()) - : "") - : "") + ((HasDistances) ? (":" + c.dist) : ""); + + ((HasBootstrap) ? ((c.getBootstrap() > -1) + ? ((c.getName() != null ? " " : "") + c.getBootstrap()) + : "") : "") + + ((HasDistances) ? (":" + c.dist) : ""); } /** * DOCUMENT ME! * * @param root - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ - private String printRootField(SequenceNode root) + private String printRootField(BinaryNode root) { - return (printRootInfo) ? (((root.getName() == null) ? "" - : nodeName(root.getName())) - + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root - .getName() != null ? " " : "") + +root.getBootstrap()) - : "") : "") + ((RootHasDistance) ? (":" + root.dist) - : "")) : ""; + return (printRootInfo) + ? (((root.getName() == null) ? "" : nodeName(root.getName())) + + ((HasBootstrap) + ? ((root.getBootstrap() > -1) + ? ((root.getName() != null ? " " : "") + + +root.getBootstrap()) + : "") + : "") + + ((RootHasDistance) ? (":" + root.dist) : "")) + : ""; } // Non recursive call deals with root node properties - public void print(StringBuffer tf, SequenceNode root) + public void print(StringBuffer tf, BinaryNode root) { if (root != null) { @@ -864,20 +878,20 @@ public class NewickFile extends FileParse { if (root.isDummy()) { - _print(tf, (SequenceNode) root.right()); - _print(tf, (SequenceNode) root.left()); + _print(tf, root.right()); + _print(tf, root.left()); } else { tf.append("("); - _print(tf, (SequenceNode) root.right()); + _print(tf, root.right()); if (root.left() != null) { tf.append(","); } - _print(tf, (SequenceNode) root.left()); + _print(tf, root.left()); tf.append(")" + printRootField(root)); } } @@ -885,7 +899,7 @@ public class NewickFile extends FileParse } // Recursive call for non-root nodes - public void _print(StringBuffer tf, SequenceNode c) + public void _print(StringBuffer tf, BinaryNode c) { if (c != null) { @@ -897,40 +911,43 @@ public class NewickFile extends FileParse { if (c.isDummy()) { - _print(tf, (SequenceNode) c.left()); + _print(tf, c.left()); if (c.left() != null) { tf.append(","); } - _print(tf, (SequenceNode) c.right()); + _print(tf, c.right()); } else { tf.append("("); - _print(tf, (SequenceNode) c.right()); + _print(tf, c.right()); if (c.left() != null) { tf.append(","); } - _print(tf, (SequenceNode) c.left()); + _print(tf, c.left()); tf.append(")" + printNodeField(c)); } } } } - // Test + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { try { if (args == null || args.length != 1) { - System.err - .println("Takes one argument - file name of a newick tree file."); - System.exit(0); + Jalview.exit( + "Takes one argument - file name of a newick tree file.", 0); } File fn = new File(args[0]); @@ -947,11 +964,11 @@ public class NewickFile extends FileParse treefile.close(); System.out.println("Read file :\n"); - NewickFile trf = new NewickFile(args[0], "File"); + NewickFile trf = new NewickFile(args[0], DataSourceType.FILE); trf.parse(); System.out.println("Original file :\n"); - com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", ""); + Regex nonl = new Regex("\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");