import jalview.datamodel.SequenceNode;
import jalview.util.MessageManager;
+import jalview.util.Platform;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.StringTokenizer;
+import com.stevesoft.pat.Regex;
+
+// TODO This class does not conform to Java standards for field name capitalization.
+
/**
* Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the
* tree distances and topology are unreliable when they are parsed. TODO: on
*/
public class NewickFile extends FileParse
{
- SequenceNode root;
+ private SequenceNode root;
private boolean HasBootstrap = false;
private boolean RootHasDistance = false;
// File IO Flags
- boolean ReplaceUnderscores = false;
+ private boolean ReplaceUnderscores = false;
+
+ private boolean printRootInfo = true;
+
+ private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0;
+
+ private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1;
+
+ private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2;
+
+ private static final int REGEX_MAJOR_SYMS = 3;
+
+ private static final int REGEX_QNODE_NAME = 4;
+
+ private static final int REGEX_COMMENT = 5;
+
+ private static final int REGEX_UQNODE_NAME = 6;
- boolean printRootInfo = true;
+ private static final int REGEX_NBOOTSTRAP = 7;
- private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] {
- new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for
- // requiring
- // quotes
- new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote
- // characters
- new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace
- // transformation
- };
+ private static final int REGEX_NDIST = 8;
- char QuoteChar = '\'';
+ private static final int REGEX_NO_LINES = 9;
+
+ private static final int REGEX_PERL_EXPAND_QUOTES = 10;
+
+ private static final int REGEX_MAX = 11;
+
+ private static final Regex[] REGEX = new Regex[REGEX_MAX];
+
+ private static Regex getRegex(int id)
+ {
+ if (REGEX[id] == null)
+ {
+ String code = null;
+ String code2 = null;
+ String codePerl = null;
+ switch (id)
+ {
+ case REGEX_PERL_NODE_REQUIRE_QUOTE:
+ codePerl = "m/[\\[,:'()]/";
+ break;
+ case REGEX_PERL_NODE_ESCAPE_QUOTE:
+ codePerl = "s/'/''/";
+ break;
+ case REGEX_PERL_NODE_UNQUOTED_WHITESPACE:
+ codePerl = "s/\\/w/_/";
+ break;
+ case REGEX_PERL_EXPAND_QUOTES:
+ codePerl = "s/''/'/";
+ break;
+ case REGEX_MAJOR_SYMS:
+ code = "[(\\['),;]";
+ break;
+ case REGEX_QNODE_NAME:
+ code = "'([^']|'')+'";
+ break;
+ case REGEX_COMMENT:
+ code = "]";
+ break;
+ case REGEX_UQNODE_NAME:
+ code = "\\b([^' :;\\](),]+)";
+ break;
+ case REGEX_NBOOTSTRAP:
+ code = "\\s*([0-9+]+)\\s*:";
+ break;
+ case REGEX_NDIST:
+ code = ":([-0-9Ee.+]+)";
+ break;
+ case REGEX_NO_LINES:
+ code = "\n+";
+ code2 = "";
+ break;
+ default:
+ return null;
+ }
+ return codePerl == null ? Platform.newRegex(code, code2)
+ : Platform.newRegexPerl(codePerl);
+ }
+ return REGEX[id];
+ }
+
+
+ private char quoteChar = '\'';
/**
* Creates a new NewickFile object.
*/
public void parse() throws IOException
{
+ Platform.ensureRegex();
String nf;
{ // fill nf with complete tree file
boolean ascending = false; // flag indicating that we are leaving the
// current node
- com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
- "[(\\['),;]");
+ Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]"
int nextcp = 0;
int ncp = cp;
continue;
}
-
- ;
d++;
if (c.right() == null)
// Deal with quoted fields
case '\'':
- com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
- "'([^']|'')+'");
+ Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'");
if (qnodename.searchFrom(nf, fcp))
{
nodename = new String(
qnodename.stringMatched().substring(1, nl - 1));
// unpack any escaped colons
- com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex
- .perlCode("s/''/'/");
+ Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES);
String widernodename = xpandquotes.replaceAll(nodename);
nodename = widernodename;
// jump to after end of quoted nodename
* '"+nf.substring(cp,fcp)+"'"); }
*/
// verify termination.
- com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
- "]");
+ Regex comment = getRegex(REGEX_COMMENT); // "]"
if (comment.searchFrom(nf, fcp))
{
// Skip the comment field
Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp,
nf);
}
-
- ;
}
// Parse simpler field strings
String fstring = nf.substring(ncp, fcp);
+ fstring.substring(cend + 1);
}
- com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
- "\\b([^' :;\\](),]+)");
- com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
- "\\s*([0-9+]+)\\s*:");
- com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
- ":([-0-9Ee.+]+)");
+ Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)"
+ Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:");
+ Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)");
if (!parsednodename && uqnodename.search(fstring)
&& ((uqnodename.matchedFrom(1) == 0) || (fstring
*/
char getQuoteChar()
{
- return QuoteChar;
+ return quoteChar;
}
/**
*/
char setQuoteChar(char c)
{
- char old = QuoteChar;
- QuoteChar = c;
+ char old = quoteChar;
+ quoteChar = c;
return old;
}
*/
private String nodeName(String name)
{
- if (NodeSafeName[0].search(name))
+ if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name))
{
- return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
+ return quoteChar
+ + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name)
+ + quoteChar;
}
else
{
- return NodeSafeName[2].replaceAll(name);
+ return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name);
}
}
}
}
- // Test
+ /**
+ *
+ * @param args
+ * @j2sIgnore
+ */
public static void main(String[] args)
{
try
trf.parse();
System.out.println("Original file :\n");
- com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
+ Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", "");
System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
System.out.println("Parsed file.\n");