X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=db888b7716c53df25d9f5660c7c7ff0852aebd39;hb=304e64fb34b32659be1bbfd39fb4e15b2f79586e;hp=2221f0090a29830112daf03331f4650e49dcac5c;hpb=3027bfb4b1b13af8686abcd419888e03141bd672;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index 2221f00..db888b7 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -26,8 +26,11 @@ // TODO: Extended SequenceNodeI to hold parsed NHX strings package jalview.io; +import java.util.Locale; + import jalview.datamodel.SequenceNode; import jalview.util.MessageManager; +import jalview.util.Platform; import java.io.BufferedReader; import java.io.File; @@ -35,6 +38,10 @@ import java.io.FileReader; import java.io.IOException; import java.util.StringTokenizer; +import com.stevesoft.pat.Regex; + +// TODO This class does not conform to Java standards for field name capitalization. + /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the * tree distances and topology are unreliable when they are parsed. TODO: on @@ -74,7 +81,7 @@ import java.util.StringTokenizer; */ public class NewickFile extends FileParse { - SequenceNode root; + private SequenceNode root; private boolean HasBootstrap = false; @@ -83,21 +90,90 @@ public class NewickFile extends FileParse private boolean RootHasDistance = false; // File IO Flags - boolean ReplaceUnderscores = false; + private boolean ReplaceUnderscores = false; + + private boolean printRootInfo = true; + + private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0; + + private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1; + + private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2; + + private static final int REGEX_MAJOR_SYMS = 3; + + private static final int REGEX_QNODE_NAME = 4; + + private static final int REGEX_COMMENT = 5; - boolean printRootInfo = true; + private static final int REGEX_UQNODE_NAME = 6; + + private static final int REGEX_NBOOTSTRAP = 7; + + private static final int REGEX_NDIST = 8; + + private static final int REGEX_NO_LINES = 9; + + private static final int REGEX_PERL_EXPAND_QUOTES = 10; + + private static final int REGEX_MAX = 11; + + private static final Regex[] REGEX = new Regex[REGEX_MAX]; + + private static Regex getRegex(int id) + { + if (REGEX[id] == null) + { + String code = null; + String code2 = null; + String codePerl = null; + switch (id) + { + case REGEX_PERL_NODE_REQUIRE_QUOTE: + codePerl = "m/[\\[,:'()]/"; + break; + case REGEX_PERL_NODE_ESCAPE_QUOTE: + codePerl = "s/'/''/"; + break; + case REGEX_PERL_NODE_UNQUOTED_WHITESPACE: + codePerl = "s/\\/w/_/"; + break; + case REGEX_PERL_EXPAND_QUOTES: + codePerl = "s/''/'/"; + break; + case REGEX_MAJOR_SYMS: + code = "[(\\['),;]"; + break; + case REGEX_QNODE_NAME: + code = "'([^']|'')+'"; + break; + case REGEX_COMMENT: + code = "]"; + break; + case REGEX_UQNODE_NAME: + code = "\\b([^' :;\\](),]+)"; + break; + case REGEX_NBOOTSTRAP: + code = "\\s*([0-9+]+)\\s*:"; + break; + case REGEX_NDIST: + code = ":([-0-9Ee.+]+)"; + break; + case REGEX_NO_LINES: + code = "\n+"; + code2 = ""; + break; + default: + return null; + } + return codePerl == null ? Platform.newRegex(code, code2) + : Platform.newRegexPerl(codePerl); + } + return REGEX[id]; + } - private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] { - new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for - // requiring - // quotes - new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote - // characters - new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation - }; - char QuoteChar = '\''; + private char quoteChar = '\''; /** * Creates a new NewickFile object. @@ -255,6 +331,7 @@ public class NewickFile extends FileParse */ public void parse() throws IOException { + Platform.ensureRegex(); String nf; { // fill nf with complete tree file @@ -292,8 +369,7 @@ public class NewickFile extends FileParse boolean ascending = false; // flag indicating that we are leaving the // current node - com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( - "[(\\['),;]"); + Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]" int nextcp = 0; int ncp = cp; @@ -314,8 +390,6 @@ public class NewickFile extends FileParse continue; } - - ; d++; if (c.right() == null) @@ -354,8 +428,7 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( - "'([^']|'')+'"); + Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { @@ -363,8 +436,7 @@ public class NewickFile extends FileParse nodename = new String( qnodename.stringMatched().substring(1, nl - 1)); // unpack any escaped colons - com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex - .perlCode("s/''/'/"); + Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES); String widernodename = xpandquotes.replaceAll(nodename); nodename = widernodename; // jump to after end of quoted nodename @@ -398,8 +470,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( - "]"); + Regex comment = getRegex(REGEX_COMMENT); // "]" if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -415,8 +486,6 @@ public class NewickFile extends FileParse Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); } - - ; } // Parse simpler field strings String fstring = nf.substring(ncp, fcp); @@ -432,12 +501,9 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( - "\\b([^' :;\\](),]+)"); - com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\s*([0-9+]+)\\s*:"); - com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9Ee.+]+)"); + Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)" + Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:"); + Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)"); if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring @@ -658,7 +724,7 @@ public class NewickFile extends FileParse try { // parse out code/value pairs - if (code.toLowerCase().equals("b")) + if (code.toLowerCase(Locale.ROOT).equals("b")) { int v = -1; Float iv = Float.valueOf(value); @@ -792,7 +858,7 @@ public class NewickFile extends FileParse */ char getQuoteChar() { - return QuoteChar; + return quoteChar; } /** @@ -805,8 +871,8 @@ public class NewickFile extends FileParse */ char setQuoteChar(char c) { - char old = QuoteChar; - QuoteChar = c; + char old = quoteChar; + quoteChar = c; return old; } @@ -821,13 +887,15 @@ public class NewickFile extends FileParse */ private String nodeName(String name) { - if (NodeSafeName[0].search(name)) + if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name)) { - return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; + return quoteChar + + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name) + + quoteChar; } else { - return NodeSafeName[2].replaceAll(name); + return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name); } } @@ -940,7 +1008,11 @@ public class NewickFile extends FileParse } } - // Test + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { try @@ -970,7 +1042,7 @@ public class NewickFile extends FileParse trf.parse(); System.out.println("Original file :\n"); - com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", ""); + Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");