import jalview.datamodel.SequenceNode;
import jalview.util.MessageManager;
-import jalview.util.Platform;
import java.io.BufferedReader;
import java.io.File;
import com.stevesoft.pat.Regex;
-// TODO This class does not conform to Java standards for field name capitalization.
-
/**
* Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the
* tree distances and topology are unreliable when they are parsed. TODO: on
*/
public class NewickFile extends FileParse
{
- private SequenceNode root;
+ SequenceNode root;
private boolean HasBootstrap = false;
private boolean RootHasDistance = false;
// File IO Flags
- private boolean ReplaceUnderscores = false;
-
- private boolean printRootInfo = true;
-
- private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0;
-
- private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1;
-
- private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2;
-
- private static final int REGEX_MAJOR_SYMS = 3;
-
- private static final int REGEX_QNODE_NAME = 4;
-
- private static final int REGEX_COMMENT = 5;
-
- private static final int REGEX_UQNODE_NAME = 6;
+ boolean ReplaceUnderscores = false;
- private static final int REGEX_NBOOTSTRAP = 7;
-
- private static final int REGEX_NDIST = 8;
-
- private static final int REGEX_NO_LINES = 9;
-
- private static final int REGEX_PERL_EXPAND_QUOTES = 10;
-
- private static final int REGEX_MAX = 11;
-
- private static final Regex[] REGEX = new Regex[REGEX_MAX];
-
- private static Regex getRegex(int id)
- {
- if (REGEX[id] == null)
- {
- String code = null;
- String code2 = null;
- String codePerl = null;
- switch (id)
- {
- case REGEX_PERL_NODE_REQUIRE_QUOTE:
- codePerl = "m/[\\[,:'()]/";
- break;
- case REGEX_PERL_NODE_ESCAPE_QUOTE:
- codePerl = "s/'/''/";
- break;
- case REGEX_PERL_NODE_UNQUOTED_WHITESPACE:
- codePerl = "s/\\/w/_/";
- break;
- case REGEX_PERL_EXPAND_QUOTES:
- codePerl = "s/''/'/";
- break;
- case REGEX_MAJOR_SYMS:
- code = "[(\\['),;]";
- break;
- case REGEX_QNODE_NAME:
- code = "'([^']|'')+'";
- break;
- case REGEX_COMMENT:
- code = "]";
- break;
- case REGEX_UQNODE_NAME:
- code = "\\b([^' :;\\](),]+)";
- break;
- case REGEX_NBOOTSTRAP:
- code = "\\s*([0-9+]+)\\s*:";
- break;
- case REGEX_NDIST:
- code = ":([-0-9Ee.+]+)";
- break;
- case REGEX_NO_LINES:
- code = "\n+";
- code2 = "";
- break;
- default:
- return null;
- }
- return codePerl == null ? Platform.newRegex(code, code2)
- : Platform.newRegexPerl(codePerl);
- }
- return REGEX[id];
- }
+ boolean printRootInfo = true;
+ private Regex[] NodeSafeName = new Regex[] {
+ new Regex().perlCode("m/[\\[,:'()]/"), // test for
+ // requiring
+ // quotes
+ new Regex().perlCode("s/'/''/"), // escaping quote
+ // characters
+ new Regex().perlCode("s/\\/w/_/") // unqoted whitespace
+ // transformation
+ };
- private char quoteChar = '\'';
+ char QuoteChar = '\'';
/**
* Creates a new NewickFile object.
*/
public void parse() throws IOException
{
- Platform.ensureRegex();
String nf;
{ // fill nf with complete tree file
boolean ascending = false; // flag indicating that we are leaving the
// current node
- Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]"
+ Regex majorsyms = new Regex(
+ "[(\\['),;]");
int nextcp = 0;
int ncp = cp;
// Deal with quoted fields
case '\'':
- Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'");
+ Regex qnodename = new Regex(
+ "'([^']|'')+'");
if (qnodename.searchFrom(nf, fcp))
{
nodename = new String(
qnodename.stringMatched().substring(1, nl - 1));
// unpack any escaped colons
- Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES);
+ Regex xpandquotes = Regex
+ .perlCode("s/''/'/");
String widernodename = xpandquotes.replaceAll(nodename);
nodename = widernodename;
// jump to after end of quoted nodename
* '"+nf.substring(cp,fcp)+"'"); }
*/
// verify termination.
- Regex comment = getRegex(REGEX_COMMENT); // "]"
+ Regex comment = new Regex(
+ "]");
if (comment.searchFrom(nf, fcp))
{
// Skip the comment field
+ fstring.substring(cend + 1);
}
- Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)"
- Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:");
- Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)");
+ Regex uqnodename = new Regex(
+ "\\b([^' :;\\](),]+)");
+ Regex nbootstrap = new Regex(
+ "\\s*([0-9+]+)\\s*:");
+ Regex ndist = new Regex(
+ ":([-0-9Ee.+]+)");
if (!parsednodename && uqnodename.search(fstring)
&& ((uqnodename.matchedFrom(1) == 0) || (fstring
{
try
{
- bootstrap = (new Integer(nbootstrap.stringMatched(1)))
+ bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1)))
.intValue();
HasBootstrap = true;
} catch (Exception e)
{
try
{
- distance = (new Float(ndist.stringMatched(1))).floatValue();
+ distance = (Float.valueOf(ndist.stringMatched(1))).floatValue();
HasDistances = true;
nodehasdistance = true;
} catch (Exception e)
if (code.toLowerCase().equals("b"))
{
int v = -1;
- Float iv = new Float(value);
+ Float iv = Float.valueOf(value);
v = iv.intValue(); // jalview only does integer bootstraps
// currently
c.setBootstrap(v);
*/
char getQuoteChar()
{
- return quoteChar;
+ return QuoteChar;
}
/**
*/
char setQuoteChar(char c)
{
- char old = quoteChar;
- quoteChar = c;
+ char old = QuoteChar;
+ QuoteChar = c;
return old;
}
*/
private String nodeName(String name)
{
- if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name))
+ if (NodeSafeName[0].search(name))
{
- return quoteChar
- + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name)
- + quoteChar;
+ return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
}
else
{
- return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name);
+ return NodeSafeName[2].replaceAll(name);
}
}
trf.parse();
System.out.println("Original file :\n");
- Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", "");
+ Regex nonl = new Regex("\n+", "");
System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
System.out.println("Parsed file.\n");