X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;fp=src%2Fjalview%2Fio%2FNewickFile.java;h=ea31e67471c723166f8d4a11fe93fc9fde62aeb3;hb=586ade46bdcd05ff028a1cff82c3c527326d28ec;hp=f3eaa45e2882fd08445dbbc85fbb8848bc46de6d;hpb=adcef27f5747b4e70e89a56c3735bc3afb8ce9bf;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index f3eaa45..ea31e67 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -28,6 +28,7 @@ package jalview.io; import jalview.datamodel.SequenceNode; import jalview.util.MessageManager; +import jalview.util.Platform; import java.io.BufferedReader; import java.io.File; @@ -37,6 +38,8 @@ import java.util.StringTokenizer; import com.stevesoft.pat.Regex; +// TODO This class does not conform to Java standards for field name capitalization. + /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the * tree distances and topology are unreliable when they are parsed. TODO: on @@ -76,7 +79,7 @@ import com.stevesoft.pat.Regex; */ public class NewickFile extends FileParse { - SequenceNode root; + private SequenceNode root; private boolean HasBootstrap = false; @@ -85,21 +88,90 @@ public class NewickFile extends FileParse private boolean RootHasDistance = false; // File IO Flags - boolean ReplaceUnderscores = false; + private boolean ReplaceUnderscores = false; + + private boolean printRootInfo = true; + + private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0; + + private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1; + + private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2; + + private static final int REGEX_MAJOR_SYMS = 3; + + private static final int REGEX_QNODE_NAME = 4; + + private static final int REGEX_COMMENT = 5; + + private static final int REGEX_UQNODE_NAME = 6; - boolean printRootInfo = true; + private static final int REGEX_NBOOTSTRAP = 7; + + private static final int REGEX_NDIST = 8; + + private static final int REGEX_NO_LINES = 9; + + private static final int REGEX_PERL_EXPAND_QUOTES = 10; + + private static final int REGEX_MAX = 11; + + private static final Regex[] REGEX = new Regex[REGEX_MAX]; + + private static Regex getRegex(int id) + { + if (REGEX[id] == null) + { + String code = null; + String code2 = null; + String codePerl = null; + switch (id) + { + case REGEX_PERL_NODE_REQUIRE_QUOTE: + codePerl = "m/[\\[,:'()]/"; + break; + case REGEX_PERL_NODE_ESCAPE_QUOTE: + codePerl = "s/'/''/"; + break; + case REGEX_PERL_NODE_UNQUOTED_WHITESPACE: + codePerl = "s/\\/w/_/"; + break; + case REGEX_PERL_EXPAND_QUOTES: + codePerl = "s/''/'/"; + break; + case REGEX_MAJOR_SYMS: + code = "[(\\['),;]"; + break; + case REGEX_QNODE_NAME: + code = "'([^']|'')+'"; + break; + case REGEX_COMMENT: + code = "]"; + break; + case REGEX_UQNODE_NAME: + code = "\\b([^' :;\\](),]+)"; + break; + case REGEX_NBOOTSTRAP: + code = "\\s*([0-9+]+)\\s*:"; + break; + case REGEX_NDIST: + code = ":([-0-9Ee.+]+)"; + break; + case REGEX_NO_LINES: + code = "\n+"; + code2 = ""; + break; + default: + return null; + } + return codePerl == null ? Platform.newRegex(code, code2) + : Platform.newRegexPerl(codePerl); + } + return REGEX[id]; + } - private Regex[] NodeSafeName = new Regex[] { - new Regex().perlCode("m/[\\[,:'()]/"), // test for - // requiring - // quotes - new Regex().perlCode("s/'/''/"), // escaping quote - // characters - new Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation - }; - char QuoteChar = '\''; + private char quoteChar = '\''; /** * Creates a new NewickFile object. @@ -257,6 +329,7 @@ public class NewickFile extends FileParse */ public void parse() throws IOException { + Platform.ensureRegex(); String nf; { // fill nf with complete tree file @@ -294,8 +367,7 @@ public class NewickFile extends FileParse boolean ascending = false; // flag indicating that we are leaving the // current node - Regex majorsyms = new Regex( - "[(\\['),;]"); + Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]" int nextcp = 0; int ncp = cp; @@ -354,8 +426,7 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - Regex qnodename = new Regex( - "'([^']|'')+'"); + Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { @@ -363,8 +434,7 @@ public class NewickFile extends FileParse nodename = new String( qnodename.stringMatched().substring(1, nl - 1)); // unpack any escaped colons - Regex xpandquotes = Regex - .perlCode("s/''/'/"); + Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES); String widernodename = xpandquotes.replaceAll(nodename); nodename = widernodename; // jump to after end of quoted nodename @@ -398,8 +468,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - Regex comment = new Regex( - "]"); + Regex comment = getRegex(REGEX_COMMENT); // "]" if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -430,12 +499,9 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - Regex uqnodename = new Regex( - "\\b([^' :;\\](),]+)"); - Regex nbootstrap = new Regex( - "\\s*([0-9+]+)\\s*:"); - Regex ndist = new Regex( - ":([-0-9Ee.+]+)"); + Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)" + Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:"); + Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)"); if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring @@ -790,7 +856,7 @@ public class NewickFile extends FileParse */ char getQuoteChar() { - return QuoteChar; + return quoteChar; } /** @@ -803,8 +869,8 @@ public class NewickFile extends FileParse */ char setQuoteChar(char c) { - char old = QuoteChar; - QuoteChar = c; + char old = quoteChar; + quoteChar = c; return old; } @@ -819,13 +885,15 @@ public class NewickFile extends FileParse */ private String nodeName(String name) { - if (NodeSafeName[0].search(name)) + if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name)) { - return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; + return quoteChar + + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name) + + quoteChar; } else { - return NodeSafeName[2].replaceAll(name); + return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name); } } @@ -972,7 +1040,7 @@ public class NewickFile extends FileParse trf.parse(); System.out.println("Original file :\n"); - Regex nonl = new Regex("\n+", ""); + Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");