From: jprocter Date: Wed, 22 Aug 2007 13:28:02 +0000 (+0000) Subject: introduce beginning of bug fix for parsing atv style newick tree files where the... X-Git-Tag: Release_2_4~313 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b61fccc43c50b8311348a5225e5af29befffb845;hp=1233e68299e9a3c8b2a075f18e5179d8480e2aee;p=jalview.git introduce beginning of bug fix for parsing atv style newick tree files where the NHX comment appears between the node info (label bootstrap:distance[nhx&...]). Comments now raise a non-fatal warning message about incorrect tree parsing. --- diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index 4c5de3a..a5d20fb 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -30,9 +30,9 @@ import java.io.*; import jalview.datamodel.*; /** - * DOCUMENT ME! - * - * @author $author$ + * Parse a new hanpshire style tree + * Caveats: NHX files are NOT supported and the tree distances and topology are unreliable when they are parsed. + * @author Jim Procter * @version $Revision$ */ public class NewickFile @@ -224,29 +224,9 @@ public class NewickFile while (majorsyms.searchFrom(nf, cp) && (Error == null)) { int fcp = majorsyms.matchedFrom(); - - switch (nf.charAt(fcp)) + char schar; + switch (schar=nf.charAt(fcp)) { - case '[': // Comment or structured/extended NH format info - - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( - "]"); - - if (comment.searchFrom(nf, fcp)) - { - // Skip the comment field - cp = 1 + comment.matchedFrom(); - } - else - { - Error = ErrorStringrange(Error, "Unterminated comment", 3, - fcp, nf); - } - - ; - - break; - case '(': // ascending should not be set @@ -317,18 +297,49 @@ public class NewickFile break; - case ';': - - if (d != -1) + default: + if (schar==';') { - Error = ErrorStringrange(Error, + if (d != -1) + { + Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); + } + // cp advanced at the end of default } + int nextcp=0; + if (schar == '[') + { + // node string contains Comment or structured/extended NH format info + /* if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1)) + { + // will process in remains System.err.println("skipped text: '"+nf.substring(cp,fcp)+"'"); + } + */ + // verify termination. + com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( + "]"); + if (comment.searchFrom(nf, fcp)) + { + // Skip the comment field + nextcp=comment.matchedFrom()+1; + warningMessage = "Tree file contained comments which may confuse input algorithm."; + // Skip the comment field + // should advance fcp too here + // fcp = nextcp+1; + // schar = nf.charAt(nextcp+1); + + // cp advanced at the end of default to nextcp + } + else + { + Error = ErrorStringrange(Error, "Unterminated comment", 3, + fcp, nf); + } - // cp advanced at the end of default - default: - + ; + } // Parse simpler field strings String fstring = nf.substring(cp, fcp); com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( @@ -474,8 +485,14 @@ public class NewickFile } } } - - // else : We do nothing if ';' is encountered. + else + { + if (nf.charAt(fcp)=='[') { + + } + + // else : We do nothing if ';' is encountered. + } } // Reset new node properties to obvious fakes @@ -483,7 +500,10 @@ public class NewickFile distance = DefDistance; bootstrap = DefBootstrap; - cp = fcp + 1; + if (nextcp==0) + cp = fcp + 1; + else + cp=nextcp; } }