X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=8a464a4c6143fb1341b7c2f7c7ed0331b3c5a165;hb=c0501eaa85c0594f9275766f64de8ea44a59c368;hp=027390a1e8fc6facd4f7ae1e5852df515acacf38;hpb=04c8f7bff663aa469127e9eed4164e02933782f1;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index 027390a..8a464a4 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -30,6 +30,7 @@ import java.util.Locale; import jalview.datamodel.SequenceNode; import jalview.util.MessageManager; +import jalview.util.Platform; import java.io.BufferedReader; import java.io.File; @@ -39,6 +40,7 @@ import java.util.StringTokenizer; import com.stevesoft.pat.Regex; +// TODO This class does not conform to Java standards for field name capitalization. /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the * tree distances and topology are unreliable when they are parsed. TODO: on @@ -78,7 +80,7 @@ import com.stevesoft.pat.Regex; */ public class NewickFile extends FileParse { - SequenceNode root; + private SequenceNode root; private boolean HasBootstrap = false; @@ -87,21 +89,90 @@ public class NewickFile extends FileParse private boolean RootHasDistance = false; // File IO Flags - boolean ReplaceUnderscores = false; + private boolean ReplaceUnderscores = false; - boolean printRootInfo = true; + private boolean printRootInfo = true; - private Regex[] NodeSafeName = new Regex[] { - new Regex().perlCode("m/[\\[,:'()]/"), // test for - // requiring - // quotes - new Regex().perlCode("s/'/''/"), // escaping quote - // characters - new Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation - }; + private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0; - char QuoteChar = '\''; + private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1; + + private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2; + + private static final int REGEX_MAJOR_SYMS = 3; + + private static final int REGEX_QNODE_NAME = 4; + + private static final int REGEX_COMMENT = 5; + + private static final int REGEX_UQNODE_NAME = 6; + + private static final int REGEX_NBOOTSTRAP = 7; + + private static final int REGEX_NDIST = 8; + + private static final int REGEX_NO_LINES = 9; + + private static final int REGEX_PERL_EXPAND_QUOTES = 10; + + private static final int REGEX_MAX = 11; + + private static final Regex[] REGEX = new Regex[REGEX_MAX]; + + private static Regex getRegex(int id) + { + if (REGEX[id] == null) + { + String code = null; + String code2 = null; + String codePerl = null; + switch (id) + { + case REGEX_PERL_NODE_REQUIRE_QUOTE: + codePerl = "m/[\\[,:'()]/"; + break; + case REGEX_PERL_NODE_ESCAPE_QUOTE: + codePerl = "s/'/''/"; + break; + case REGEX_PERL_NODE_UNQUOTED_WHITESPACE: + codePerl = "s/\\/w/_/"; + break; + case REGEX_PERL_EXPAND_QUOTES: + codePerl = "s/''/'/"; + break; + case REGEX_MAJOR_SYMS: + code = "[(\\['),;]"; + break; + case REGEX_QNODE_NAME: + code = "'([^']|'')+'"; + break; + case REGEX_COMMENT: + code = "]"; + break; + case REGEX_UQNODE_NAME: + code = "\\b([^' :;\\](),]+)"; + break; + case REGEX_NBOOTSTRAP: + code = "\\s*([0-9+]+)\\s*:"; + break; + case REGEX_NDIST: + code = ":([-0-9Ee.+]+)"; + break; + case REGEX_NO_LINES: + code = "\n+"; + code2 = ""; + break; + default: + return null; + } + return codePerl == null ? Platform.newRegex(code, code2) + : Platform.newRegexPerl(codePerl); + } + return REGEX[id]; + } + + + private char quoteChar = '\''; /** * Creates a new NewickFile object. @@ -259,6 +330,7 @@ public class NewickFile extends FileParse */ public void parse() throws IOException { + Platform.ensureRegex(); String nf; { // fill nf with complete tree file @@ -296,7 +368,7 @@ public class NewickFile extends FileParse boolean ascending = false; // flag indicating that we are leaving the // current node - Regex majorsyms = new Regex("[(\\['),;]"); + Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]" int nextcp = 0; int ncp = cp; @@ -355,7 +427,7 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - Regex qnodename = new Regex("'([^']|'')+'"); + Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { @@ -363,7 +435,7 @@ public class NewickFile extends FileParse nodename = new String( qnodename.stringMatched().substring(1, nl - 1)); // unpack any escaped colons - Regex xpandquotes = Regex.perlCode("s/''/'/"); + Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES); String widernodename = xpandquotes.replaceAll(nodename); nodename = widernodename; // jump to after end of quoted nodename @@ -397,7 +469,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - Regex comment = new Regex("]"); + Regex comment = getRegex(REGEX_COMMENT); // "]" if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -428,9 +500,9 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - Regex uqnodename = new Regex("\\b([^' :;\\](),]+)"); - Regex nbootstrap = new Regex("\\s*([0-9+]+)\\s*:"); - Regex ndist = new Regex(":([-0-9Ee.+]+)"); + Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)" + Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:"); + Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)"); if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring @@ -785,7 +857,7 @@ public class NewickFile extends FileParse */ char getQuoteChar() { - return QuoteChar; + return quoteChar; } /** @@ -798,8 +870,8 @@ public class NewickFile extends FileParse */ char setQuoteChar(char c) { - char old = QuoteChar; - QuoteChar = c; + char old = quoteChar; + quoteChar = c; return old; } @@ -814,13 +886,15 @@ public class NewickFile extends FileParse */ private String nodeName(String name) { - if (NodeSafeName[0].search(name)) + if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name)) { - return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; + return quoteChar + + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name) + + quoteChar; } else { - return NodeSafeName[2].replaceAll(name); + return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name); } } @@ -967,7 +1041,7 @@ public class NewickFile extends FileParse trf.parse(); System.out.println("Original file :\n"); - Regex nonl = new Regex("\n+", ""); + Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");