/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
// NewickFile.java
// TODO: Extended SequenceNodeI to hold parsed NHX strings
package jalview.io;
-import java.io.*;
+import jalview.datamodel.SequenceNode;
+import jalview.util.MessageManager;
+import jalview.util.Platform;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
import java.util.StringTokenizer;
-import jalview.datamodel.*;
+import com.stevesoft.pat.Regex;
+
+// TODO This class does not conform to Java standards for field name capitalization.
/**
* Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the
*/
public class NewickFile extends FileParse
{
- SequenceNode root;
+ private SequenceNode root;
private boolean HasBootstrap = false;
private boolean RootHasDistance = false;
// File IO Flags
- boolean ReplaceUnderscores = false;
+ private boolean ReplaceUnderscores = false;
+
+ private boolean printRootInfo = true;
+
+ private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0;
+
+ private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1;
+
+ private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2;
+
+ private static final int REGEX_MAJOR_SYMS = 3;
+
+ private static final int REGEX_QNODE_NAME = 4;
+
+ private static final int REGEX_COMMENT = 5;
+
+ private static final int REGEX_UQNODE_NAME = 6;
+
+ private static final int REGEX_NBOOTSTRAP = 7;
- boolean printRootInfo = true;
+ private static final int REGEX_NDIST = 8;
- private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
- { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for
- // requiring
- // quotes
- new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote
- // characters
- new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace
- // transformation
- };
+ private static final int REGEX_NO_LINES = 9;
- char QuoteChar = '\'';
+ private static final int REGEX_PERL_EXPAND_QUOTES = 10;
+
+ private static final int REGEX_MAX = 11;
+
+ private static final Regex[] REGEX = new Regex[REGEX_MAX];
+
+ private static Regex getRegex(int id)
+ {
+ if (REGEX[id] == null)
+ {
+ String code = null;
+ String code2 = null;
+ String codePerl = null;
+ switch (id)
+ {
+ case REGEX_PERL_NODE_REQUIRE_QUOTE:
+ codePerl = "m/[\\[,:'()]/";
+ break;
+ case REGEX_PERL_NODE_ESCAPE_QUOTE:
+ codePerl = "s/'/''/";
+ break;
+ case REGEX_PERL_NODE_UNQUOTED_WHITESPACE:
+ codePerl = "s/\\/w/_/";
+ break;
+ case REGEX_PERL_EXPAND_QUOTES:
+ codePerl = "s/''/'/";
+ break;
+ case REGEX_MAJOR_SYMS:
+ code = "[(\\['),;]";
+ break;
+ case REGEX_QNODE_NAME:
+ code = "'([^']|'')+'";
+ break;
+ case REGEX_COMMENT:
+ code = "]";
+ break;
+ case REGEX_UQNODE_NAME:
+ code = "\\b([^' :;\\](),]+)";
+ break;
+ case REGEX_NBOOTSTRAP:
+ code = "\\s*([0-9+]+)\\s*:";
+ break;
+ case REGEX_NDIST:
+ code = ":([-0-9Ee.+]+)";
+ break;
+ case REGEX_NO_LINES:
+ code = "\n+";
+ code2 = "";
+ break;
+ default:
+ return null;
+ }
+ return codePerl == null ? Platform.newRegex(code, code2)
+ : Platform.newRegexPerl(codePerl);
+ }
+ return REGEX[id];
+ }
+
+
+ private char quoteChar = '\'';
/**
* Creates a new NewickFile object.
*/
public NewickFile(String inStr) throws IOException
{
- super(inStr, "Paste");
+ super(inStr, DataSourceType.PASTE);
}
/**
*
* @param inFile
* DOCUMENT ME!
- * @param type
+ * @param protocol
* DOCUMENT ME!
*
* @throws IOException
* DOCUMENT ME!
*/
- public NewickFile(String inFile, String type) throws IOException
+ public NewickFile(String inFile, DataSourceType protocol)
+ throws IOException
{
- super(inFile, type);
+ super(inFile, protocol);
}
public NewickFile(FileParse source) throws IOException
private String ErrorStringrange(String Error, String Er, int r, int p,
String s)
{
- return ((Error == null) ? "" : Error)
- + Er
- + " at position "
- + p
- + " ( "
+ return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( "
+ s.substring(((p - r) < 0) ? 0 : (p - r),
- ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n";
+ ((p + r) > s.length()) ? s.length() : (p + r))
+ + " )\n";
}
// @tree annotations
*/
public void parse() throws IOException
{
+ Platform.ensureRegex();
String nf;
{ // fill nf with complete tree file
boolean ascending = false; // flag indicating that we are leaving the
// current node
- com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
- "[(\\['),;]");
+ Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]"
int nextcp = 0;
int ncp = cp;
- boolean parsednodename=false;
+ boolean parsednodename = false;
while (majorsyms.searchFrom(nf, cp) && (Error == null))
{
int fcp = majorsyms.matchedFrom();
continue;
}
-
- ;
d++;
if (c.right() == null)
// Deal with quoted fields
case '\'':
- com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
- "'([^']|'')+'");
+ Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'");
if (qnodename.searchFrom(nf, fcp))
{
int nl = qnodename.stringMatched().length();
- nodename = new String(qnodename.stringMatched().substring(1,
- nl - 1));
+ nodename = new String(
+ qnodename.stringMatched().substring(1, nl - 1));
// unpack any escaped colons
- com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex.perlCode("s/''/'/");
+ Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES);
String widernodename = xpandquotes.replaceAll(nodename);
- nodename=widernodename;
+ nodename = widernodename;
// jump to after end of quoted nodename
nextcp = fcp + nl + 1;
- parsednodename=true;
+ parsednodename = true;
}
else
{
{
if (d != -1)
{
- Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d
- + ")", 7, fcp, nf);
+ Error = ErrorStringrange(Error,
+ "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
}
// cp advanced at the end of default
}
* '"+nf.substring(cp,fcp)+"'"); }
*/
// verify termination.
- com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
+ Regex comment = getRegex(REGEX_COMMENT); // "]"
if (comment.searchFrom(nf, fcp))
{
// Skip the comment field
Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp,
nf);
}
-
- ;
}
// Parse simpler field strings
String fstring = nf.substring(ncp, fcp);
+ fstring.substring(cend + 1);
}
- com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
- "\\b([^' :;\\](),]+)");
- com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
- "\\s*([0-9+]+)\\s*:");
- com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
- ":([-0-9Ee.+]+)");
+ Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)"
+ Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:");
+ Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)");
if (!parsednodename && uqnodename.search(fstring)
&& ((uqnodename.matchedFrom(1) == 0) || (fstring
if (nbootstrap.search(fstring))
{
- if (nbootstrap.stringMatched(1).equals(
- uqnodename.stringMatched(1)))
+ if (nbootstrap.stringMatched(1)
+ .equals(uqnodename.stringMatched(1)))
{
nodename = null; // no nodename here.
}
- if (nodename == null
- || nodename.length() == 0
- || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename
- .stringMatched().length()))
+ if (nodename == null || nodename.length() == 0
+ || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1)
+ + uqnodename.stringMatched().length()))
{
try
{
HasBootstrap = true;
} catch (Exception e)
{
- Error = ErrorStringrange(Error,
- "Can't parse bootstrap value", 4,
- ncp + nbootstrap.matchedFrom(), nf);
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value",
+ 4, ncp + nbootstrap.matchedFrom(), nf);
}
}
}
if ((d > -1) && (c == null))
{
- Error = ErrorStringrange(
- Error,
+ Error = ErrorStringrange(Error,
"File broke algorithm: Lost place in tree (is there an extra ')' ?)",
7, fcp, nf);
}
distance = DefDistance;
bootstrap = DefBootstrap;
commentString2 = null;
- parsednodename=false;
+ parsednodename = false;
}
if (nextcp == 0)
{
if (Error != null)
{
- throw (new IOException("NewickFile: " + Error + "\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { Error.toString() })));
}
if (root == null)
{
- throw (new IOException("NewickFile: No Tree read in\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { MessageManager.getString("label.no_tree_read_in") })));
}
// THe next line is failing for topali trees - not sure why yet. if
// (root.right()!=null && root.isDummy())
// more codes here.
} catch (Exception e)
{
- System.err.println("Couldn't parse code '" + code + "' = '"
- + value + "'");
+ System.err.println(
+ "Couldn't parse code '" + code + "' = '" + value + "'");
e.printStackTrace(System.err);
}
}
*/
char getQuoteChar()
{
- return QuoteChar;
+ return quoteChar;
}
/**
*/
char setQuoteChar(char c)
{
- char old = QuoteChar;
- QuoteChar = c;
+ char old = quoteChar;
+ quoteChar = c;
return old;
}
*/
private String nodeName(String name)
{
- if (NodeSafeName[0].search(name))
+ if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name))
{
- return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
+ return quoteChar
+ + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name)
+ + quoteChar;
}
else
{
- return NodeSafeName[2].replaceAll(name);
+ return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name);
}
}
private String printNodeField(SequenceNode c)
{
return ((c.getName() == null) ? "" : nodeName(c.getName()))
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " "
- : "") + c.getBootstrap())
- : "")
- : "") + ((HasDistances) ? (":" + c.dist) : "");
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1)
+ ? ((c.getName() != null ? " " : "") + c.getBootstrap())
+ : "") : "")
+ + ((HasDistances) ? (":" + c.dist) : "");
}
/**
*/
private String printRootField(SequenceNode root)
{
- return (printRootInfo) ? (((root.getName() == null) ? ""
- : nodeName(root.getName()))
- + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root
- .getName() != null ? " " : "") + +root.getBootstrap())
- : "") : "") + ((RootHasDistance) ? (":" + root.dist)
- : "")) : "";
+ return (printRootInfo)
+ ? (((root.getName() == null) ? "" : nodeName(root.getName()))
+ + ((HasBootstrap)
+ ? ((root.getBootstrap() > -1)
+ ? ((root.getName() != null ? " " : "")
+ + +root.getBootstrap())
+ : "")
+ : "")
+ + ((RootHasDistance) ? (":" + root.dist) : ""))
+ : "";
}
// Non recursive call deals with root node properties
}
}
- // Test
+ /**
+ *
+ * @param args
+ * @j2sIgnore
+ */
public static void main(String[] args)
{
try
{
if (args == null || args.length != 1)
{
- System.err
- .println("Takes one argument - file name of a newick tree file.");
+ System.err.println(
+ "Takes one argument - file name of a newick tree file.");
System.exit(0);
}
treefile.close();
System.out.println("Read file :\n");
- NewickFile trf = new NewickFile(args[0], "File");
+ NewickFile trf = new NewickFile(args[0], DataSourceType.FILE);
trf.parse();
System.out.println("Original file :\n");
- com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
+ Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", "");
System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
System.out.println("Parsed file.\n");