From 6badd7bce3bb217f0b32e3872da900a935d7daa3 Mon Sep 17 00:00:00 2001 From: jprocter Date: Tue, 11 Sep 2007 20:44:48 +0000 Subject: [PATCH] bootstrap value parsing/recovery only for nodes with unquoted non-negative integer in node name or after a space separated node name field (bugfix for regexes when moved from Jalview to j1.4+ pattern matching) git-svn-id: https://svn.lifesci.dundee.ac.uk/svn/repository/trunk@459 be28352e-c001-0410-b1a7-c7978e42abec --- .../ac/vamsas/objects/utils/trees/BinaryNode.java | 6 +- .../ac/vamsas/objects/utils/trees/NewickFile.java | 96 ++++++++++++-------- 2 files changed, 60 insertions(+), 42 deletions(-) diff --git a/src/uk/ac/vamsas/objects/utils/trees/BinaryNode.java b/src/uk/ac/vamsas/objects/utils/trees/BinaryNode.java index ca0739a..bc41783 100644 --- a/src/uk/ac/vamsas/objects/utils/trees/BinaryNode.java +++ b/src/uk/ac/vamsas/objects/utils/trees/BinaryNode.java @@ -17,15 +17,15 @@ public class BinaryNode { BinaryNode parent; - /** DOCUMENT ME!! */ - public int bootstrap; + /** bootstrap is non-negative integer */ + public int bootstrap=-1; /** * Creates a new BinaryNode object. */ public BinaryNode() { left = right = parent = null; - bootstrap = 0; + bootstrap = -1; } /** diff --git a/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java b/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java index 3c41744..521c616 100644 --- a/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java +++ b/src/uk/ac/vamsas/objects/utils/trees/NewickFile.java @@ -61,7 +61,7 @@ public class NewickFile { private Pattern[] NodeSafeName = new Pattern[] { Pattern.compile("[\\[,:'()]"), // test for requiring quotes Pattern.compile("'"), // escaping quote characters - Pattern.compile("/w") // unqoted whitespace transformation + Pattern.compile("\\s") // unqoted whitespace transformation }; char QuoteChar = '\''; @@ -69,13 +69,13 @@ public class NewickFile { String newickFile = null; /** - * Creates a new NewickFile object. + * Creates a new NewickFile object * * @param inStr - * DOCUMENT ME! + * Newick style tree string * * @throws IOException - * DOCUMENT ME! + * if string is not a valid newick file */ public NewickFile(String inStr) throws IOException { newickFile = inStr; @@ -223,6 +223,8 @@ public class NewickFile { /** * call this to convert the newick string into a binary node linked tree + * Note: this is automatically called by the constructors, so you normally + * wouldn't need to use this. * * @throws IOException * if the newick string cannot be parsed. @@ -257,7 +259,7 @@ public class NewickFile { float DefDistance = (float) 0.001; // @param Default distance for a node - // very very small - int DefBootstrap = 0; // @param Default bootstrap for a node + int DefBootstrap = -1; // @param Default bootstrap for a node float distance = DefDistance; int bootstrap = DefBootstrap; @@ -269,7 +271,9 @@ public class NewickFile { Matcher mjsyms = majorsyms.matcher(nf); char schar; - while (mjsyms.find(cp) && (Error == null)) { + int nextcp=0; + int ncp = cp; + while (mjsyms.find(cp) && (Error == null)) { int fcp = mjsyms.start(); switch (schar = nf.charAt(fcp)) { @@ -329,31 +333,36 @@ public class NewickFile { break; default: - int nextcp = 0; - // Skip Comment or structured/extended NH format info + // Reached termininating root node label. + if (schar == ';' && d != -1) { + Error = ErrorStringrange(Error, + "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); + } + + // Skip Comment or structured/extended NH format info if (schar == '[') { if ((nextcp=nf.indexOf(']', fcp)) > -1) { - // Skip the comment field - // should advance fcp too here + // verified that comment is properly terminated. + // now skip the comment field nextcp++; - //fcp = nextcp; - //schar = nf.charAt(fcp); + break; // go and search for the next node separator, leaving ncp at beginning of node info } else { Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); nextcp = 0; break; } - ; } - - // Reached termininating root node label. - if (schar == ';' && d != -1) { - Error = ErrorStringrange(Error, - "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); + + // Parse simpler field strings from substring between ncp and node separator + String fstring = nf.substring(ncp, fcp); + // extract any comments from the nodeinfo. + while (fstring.indexOf(']')>-1) + { + int cstart=fstring.indexOf('['); + int cend=fstring.indexOf(']'); + String comment = fstring.substring(cstart+1,cend); // TODO: put this somewhere ? + fstring = fstring.substring(0, cstart)+fstring.substring(cend+1); } - - // Parse simpler field strings - String fstring = nf.substring(cp, fcp); Matcher uqnodename = Pattern.compile("^([^' :;\\](),]+).*").matcher( fstring); if (uqnodename.matches() @@ -372,20 +381,28 @@ public class NewickFile { } } - Matcher nbootstrap = Pattern.compile("\\S+([0-9+]+)\\S*:").matcher( + Matcher nbootstrap = Pattern.compile("\\s*([+0-9]+)\\s*:.*").matcher( fstring); - if (nbootstrap.matches() && (nbootstrap.start(1) > uqnodename.end(1))) { - try { - bootstrap = (new Integer(nbootstrap.group(1))).intValue(); - HasBootstrap = true; - } catch (Exception e) { - Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4, - cp + nbootstrap.start(0), nf); + if (nbootstrap.matches()) + { + if (nodename!=null && nbootstrap.group(1).equals(nodename)) + { + nodename=null; // empty nodename - only bootstrap value + } + if ((nodename==null || nodename.length()==0) || nbootstrap.start(1)>=uqnodename.end(1)) + { + try { + bootstrap = (new Integer(nbootstrap.group(1))).intValue(); + HasBootstrap = true; + } catch (Exception e) { + Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4, + ncp + nbootstrap.start(0), nf); + } } } - - Matcher ndist = Pattern.compile(":([-0-9Ee.+]+)").matcher(fstring); + + Matcher ndist = Pattern.compile(".*:([-0-9Ee.+]+)").matcher(fstring); boolean nodehasdistance = false; if (ndist.matches()) { @@ -395,7 +412,7 @@ public class NewickFile { nodehasdistance = true; } catch (Exception e) { Error = ErrorStringrange(Error, "Can't parse node distance value", - 7, cp + ndist.start(0), nf); + 7, ncp + ndist.start(0), nf); } } @@ -459,18 +476,19 @@ public class NewickFile { } } } - - // else : We do nothing if ';' is encountered. } // Reset new node properties to obvious fakes nodename = null; distance = DefDistance; bootstrap = DefBootstrap; - if (nextcp == 0) - cp = fcp + 1; - else - cp = nextcp; + } + // Advance character pointers if necessary + if (nextcp == 0) { + ncp = cp = fcp + 1; + } else { + cp = nextcp; + nextcp = 0; } } @@ -640,7 +658,7 @@ public class NewickFile { private String printNodeField(SequenceNode c) { return //c.getNewickNodeName() ((c.getName() == null) ? "" : nodeName(c.getName())) - + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (" " + c.getBootstrap()) + + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? (((c.getName()==null) ? " " : "") + c.getBootstrap()) : "") : "") + ((HasDistances) ? (":" + c.dist) : ""); } -- 1.7.10.2