introduce beginning of bug fix for parsing atv style newick tree files where the...
authorjprocter <Jim Procter>
Wed, 22 Aug 2007 13:28:02 +0000 (13:28 +0000)
committerjprocter <Jim Procter>
Wed, 22 Aug 2007 13:28:02 +0000 (13:28 +0000)
src/jalview/io/NewickFile.java

index 4c5de3a..a5d20fb 100755 (executable)
@@ -30,9 +30,9 @@ import java.io.*;
 import jalview.datamodel.*;
 
 /**
- * DOCUMENT ME!
- *
- * @author $author$
+ * Parse a new hanpshire style tree
+ * Caveats: NHX files are NOT supported and the tree distances and topology are unreliable when they are parsed.
+ * @author Jim Procter
  * @version $Revision$
  */
 public class NewickFile
@@ -224,29 +224,9 @@ public class NewickFile
     while (majorsyms.searchFrom(nf, cp) && (Error == null))
     {
       int fcp = majorsyms.matchedFrom();
-
-      switch (nf.charAt(fcp))
+      char schar;
+      switch (schar=nf.charAt(fcp))
       {
-        case '[': // Comment or structured/extended NH format info
-
-          com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
-              "]");
-
-          if (comment.searchFrom(nf, fcp))
-          {
-            // Skip the comment field
-            cp = 1 + comment.matchedFrom();
-          }
-          else
-          {
-            Error = ErrorStringrange(Error, "Unterminated comment", 3,
-                                     fcp, nf);
-          }
-
-          ;
-
-          break;
-
         case '(':
 
           // ascending should not be set
@@ -317,18 +297,49 @@ public class NewickFile
 
           break;
 
-        case ';':
-
-          if (d != -1)
+        default:
+          if (schar==';')
           {
-            Error = ErrorStringrange(Error,
+            if (d != -1)
+            {
+              Error = ErrorStringrange(Error,
                                      "Wayward semicolon (depth=" + d + ")", 7,
                                      fcp, nf);
+            }
+            // cp advanced at the end of default
           }
+          int nextcp=0;
+          if (schar == '[')
+          { 
+            // node string contains Comment or structured/extended NH format info
+            /* if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1))
+              {
+                // will process in remains System.err.println("skipped text: '"+nf.substring(cp,fcp)+"'");
+              }
+             */
+            // verify termination.
+            com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
+              "]");
+            if (comment.searchFrom(nf, fcp))
+            {
+              // Skip the comment field
+              nextcp=comment.matchedFrom()+1;
+              warningMessage = "Tree file contained comments which may confuse input algorithm.";
+              // Skip the comment field
+              // should advance fcp too here
+              // fcp = nextcp+1;
+              // schar = nf.charAt(nextcp+1); 
+              
+              // cp advanced at the end of default to nextcp
+            }
+            else
+            {
+              Error = ErrorStringrange(Error, "Unterminated comment", 3,
+                                     fcp, nf);
+            }
 
-          // cp advanced at the end of default
-        default:
-
+            ;
+          }
           // Parse simpler field strings
           String fstring = nf.substring(cp, fcp);
           com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
@@ -474,8 +485,14 @@ public class NewickFile
                 }
               }
             }
-
-            // else : We do nothing if ';' is encountered.
+            else
+            {
+              if (nf.charAt(fcp)=='[') {
+                
+              }
+            
+                // else : We do nothing if ';' is encountered.
+            }
           }
 
           // Reset new node properties to obvious fakes
@@ -483,7 +500,10 @@ public class NewickFile
           distance = DefDistance;
           bootstrap = DefBootstrap;
 
-          cp = fcp + 1;
+          if (nextcp==0)
+            cp = fcp + 1;
+          else
+            cp=nextcp;
       }
     }