bootstrap regex 'fixed' and ugly hack to try and get comment skip working correctly...
authorjprocter <Jim Procter>
Thu, 6 Sep 2007 17:32:59 +0000 (17:32 +0000)
committerjprocter <Jim Procter>
Thu, 6 Sep 2007 17:32:59 +0000 (17:32 +0000)
src/jalview/io/NewickFile.java

index a5d20fb..5775072 100755 (executable)
@@ -221,6 +221,8 @@ public class NewickFile
     com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
         "[(\\['),;]");
 
+    int nextcp=0;
+    int ncp = cp;
     while (majorsyms.searchFrom(nf, cp) && (Error == null))
     {
       int fcp = majorsyms.matchedFrom();
@@ -308,7 +310,6 @@ public class NewickFile
             }
             // cp advanced at the end of default
           }
-          int nextcp=0;
           if (schar == '[')
           { 
             // node string contains Comment or structured/extended NH format info
@@ -325,12 +326,9 @@ public class NewickFile
               // Skip the comment field
               nextcp=comment.matchedFrom()+1;
               warningMessage = "Tree file contained comments which may confuse input algorithm.";
-              // Skip the comment field
-              // should advance fcp too here
-              // fcp = nextcp+1;
-              // schar = nf.charAt(nextcp+1); 
+              break;
               
-              // cp advanced at the end of default to nextcp
+              // cp advanced at the end of default to nextcp, ncp is unchanged so any node info can be read.
             }
             else
             {
@@ -341,11 +339,21 @@ public class NewickFile
             ;
           }
           // Parse simpler field strings
-          String fstring = nf.substring(cp, fcp);
+          String fstring = nf.substring(ncp, fcp);
+          // remove any comments before we parse the node info
+          // TODO: test newick file with quoted square brackets in node name (is this allowed?)
+          while (fstring.indexOf(']')>-1)
+          {
+            int cstart=fstring.indexOf('[');
+            int cend=fstring.indexOf(']');
+            String comment =  fstring.substring(cstart+1,cend);
+            fstring = fstring.substring(0, cstart)+fstring.substring(cend+1);
+            
+          }
           com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
               "\\b([^' :;\\](),]+)");
           com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
-              "\\S+([0-9+]+)\\S*:");
+              "\\s*([0-9+]+)\\s*:");
           com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
               ":([-0-9Ee.+]+)");
 
@@ -373,9 +381,14 @@ public class NewickFile
             }
           }
 
-          if (nbootstrap.search(fstring) &&
-              (nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) +
-                                            uqnodename.stringMatched().length())))
+          if (nbootstrap.search(fstring))
+            {
+            if (nbootstrap.stringMatched(1).equals(uqnodename.stringMatched(1)))
+              {
+                nodename=""; // no nodename here.
+              }
+            if (nodename==null || nodename.length()==0 || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + 
+                      uqnodename.stringMatched().length()))
           {
             try
             {
@@ -386,9 +399,10 @@ public class NewickFile
             {
               Error = ErrorStringrange(Error,
                                        "Can't parse bootstrap value", 4,
-                                       cp + nbootstrap.matchedFrom(), nf);
+                                       ncp + nbootstrap.matchedFrom(), nf);
             }
           }
+            }
 
           boolean nodehasdistance = false;
 
@@ -404,7 +418,7 @@ public class NewickFile
             {
               Error = ErrorStringrange(Error,
                                        "Can't parse node distance value", 7,
-                                       cp + ndist.matchedFrom(), nf);
+                                       ncp + ndist.matchedFrom(), nf);
             }
           }
 
@@ -499,11 +513,14 @@ public class NewickFile
           nodename = null;
           distance = DefDistance;
           bootstrap = DefBootstrap;
-
-          if (nextcp==0)
-            cp = fcp + 1;
-          else
-            cp=nextcp;
+      }
+      if (nextcp==0)
+      {
+        ncp = cp = fcp + 1;
+      }
+      else {
+        cp=nextcp;
+        nextcp=0;
       }
     }