From: jprocter Date: Thu, 6 Sep 2007 17:32:59 +0000 (+0000) Subject: bootstrap regex 'fixed' and ugly hack to try and get comment skip working correctly... X-Git-Tag: Release_2_4~282 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=8734c40ed208a27c7c2f5d4a56b658339e9d52ff;p=jalview.git bootstrap regex 'fixed' and ugly hack to try and get comment skip working correctly for chunglong chen .branch file (sent to help at jalview) --- diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index a5d20fb..5775072 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -221,6 +221,8 @@ public class NewickFile com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( "[(\\['),;]"); + int nextcp=0; + int ncp = cp; while (majorsyms.searchFrom(nf, cp) && (Error == null)) { int fcp = majorsyms.matchedFrom(); @@ -308,7 +310,6 @@ public class NewickFile } // cp advanced at the end of default } - int nextcp=0; if (schar == '[') { // node string contains Comment or structured/extended NH format info @@ -325,12 +326,9 @@ public class NewickFile // Skip the comment field nextcp=comment.matchedFrom()+1; warningMessage = "Tree file contained comments which may confuse input algorithm."; - // Skip the comment field - // should advance fcp too here - // fcp = nextcp+1; - // schar = nf.charAt(nextcp+1); + break; - // cp advanced at the end of default to nextcp + // cp advanced at the end of default to nextcp, ncp is unchanged so any node info can be read. } else { @@ -341,11 +339,21 @@ public class NewickFile ; } // Parse simpler field strings - String fstring = nf.substring(cp, fcp); + String fstring = nf.substring(ncp, fcp); + // remove any comments before we parse the node info + // TODO: test newick file with quoted square brackets in node name (is this allowed?) + while (fstring.indexOf(']')>-1) + { + int cstart=fstring.indexOf('['); + int cend=fstring.indexOf(']'); + String comment = fstring.substring(cstart+1,cend); + fstring = fstring.substring(0, cstart)+fstring.substring(cend+1); + + } com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( "\\b([^' :;\\](),]+)"); com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\S+([0-9+]+)\\S*:"); + "\\s*([0-9+]+)\\s*:"); com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( ":([-0-9Ee.+]+)"); @@ -373,9 +381,14 @@ public class NewickFile } } - if (nbootstrap.search(fstring) && - (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + - uqnodename.stringMatched().length()))) + if (nbootstrap.search(fstring)) + { + if (nbootstrap.stringMatched(1).equals(uqnodename.stringMatched(1))) + { + nodename=""; // no nodename here. + } + if (nodename==null || nodename.length()==0 || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + + uqnodename.stringMatched().length())) { try { @@ -386,9 +399,10 @@ public class NewickFile { Error = ErrorStringrange(Error, "Can't parse bootstrap value", 4, - cp + nbootstrap.matchedFrom(), nf); + ncp + nbootstrap.matchedFrom(), nf); } } + } boolean nodehasdistance = false; @@ -404,7 +418,7 @@ public class NewickFile { Error = ErrorStringrange(Error, "Can't parse node distance value", 7, - cp + ndist.matchedFrom(), nf); + ncp + ndist.matchedFrom(), nf); } } @@ -499,11 +513,14 @@ public class NewickFile nodename = null; distance = DefDistance; bootstrap = DefBootstrap; - - if (nextcp==0) - cp = fcp + 1; - else - cp=nextcp; + } + if (nextcp==0) + { + ncp = cp = fcp + 1; + } + else { + cp=nextcp; + nextcp=0; } }