/*
- * Jalview - A Sequence Alignment Editor and Viewer
- * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
*/
-
// NewickFile.java
// Tree I/O
// http://evolution.genetics.washington.edu/phylip/newick_doc.html
// TODO: Extended SequenceNodeI to hold parsed NHX strings
package jalview.io;
-import java.io.*;
+import jalview.datamodel.SequenceNode;
+import jalview.util.MessageManager;
+import jalview.util.Platform;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
import java.util.StringTokenizer;
-import jalview.datamodel.*;
+import com.stevesoft.pat.Regex;
+
+// TODO This class does not conform to Java standards for field name capitalization.
/**
* Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the
* this: NHX codes are appended in comments beginning with &&NHX. The codes are
* given below (from http://www.phylosoft.org/forester/NHX.html): Element Type
* Description Corresponding phyloXML element (parent element in parentheses) no
- * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) <name>(<clade>) :
- * decimal branch length to parent node (MUST BE SECOND, IF ASSIGNED)
- * <branch_length>(<clade>) :GN= string gene name <name>(<sequence>) :AC=
- * string sequence accession <accession>(<sequence>) :ND= string node
+ * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED)
+ * <name>(<clade>) : decimal branch length to parent node (MUST BE SECOND, IF
+ * ASSIGNED) <branch_length>(<clade>) :GN= string gene name <name>(<sequence>)
+ * :AC= string sequence accession <accession>(<sequence>) :ND= string node
* identifier - if this is being used, it has to be unique within each phylogeny
* <node_id>(<clade>) :B= decimal confidence value for parent branch
* <confidence>(<clade>) :D= 'T', 'F', or '?' 'T' if this node represents a
*/
public class NewickFile extends FileParse
{
- SequenceNode root;
+ private SequenceNode root;
private boolean HasBootstrap = false;
private boolean RootHasDistance = false;
// File IO Flags
- boolean ReplaceUnderscores = false;
+ private boolean ReplaceUnderscores = false;
+
+ private boolean printRootInfo = true;
+
+ private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0;
+
+ private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1;
+
+ private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2;
+
+ private static final int REGEX_MAJOR_SYMS = 3;
+
+ private static final int REGEX_QNODE_NAME = 4;
+
+ private static final int REGEX_COMMENT = 5;
- boolean printRootInfo = true;
+ private static final int REGEX_UQNODE_NAME = 6;
- private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
- { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for
- // requiring
- // quotes
- new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote
- // characters
- new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace
- // transformation
- };
+ private static final int REGEX_NBOOTSTRAP = 7;
- char QuoteChar = '\'';
+ private static final int REGEX_NDIST = 8;
+
+ private static final int REGEX_NO_LINES = 9;
+
+ private static final int REGEX_PERL_EXPAND_QUOTES = 10;
+
+ private static final int REGEX_MAX = 11;
+
+ private static final Regex[] REGEX = new Regex[REGEX_MAX];
+
+ private static Regex getRegex(int id)
+ {
+ if (REGEX[id] == null)
+ {
+ String code = null;
+ String code2 = null;
+ String codePerl = null;
+ switch (id)
+ {
+ case REGEX_PERL_NODE_REQUIRE_QUOTE:
+ codePerl = "m/[\\[,:'()]/";
+ break;
+ case REGEX_PERL_NODE_ESCAPE_QUOTE:
+ codePerl = "s/'/''/";
+ break;
+ case REGEX_PERL_NODE_UNQUOTED_WHITESPACE:
+ codePerl = "s/\\/w/_/";
+ break;
+ case REGEX_PERL_EXPAND_QUOTES:
+ codePerl = "s/''/'/";
+ break;
+ case REGEX_MAJOR_SYMS:
+ code = "[(\\['),;]";
+ break;
+ case REGEX_QNODE_NAME:
+ code = "'([^']|'')+'";
+ break;
+ case REGEX_COMMENT:
+ code = "]";
+ break;
+ case REGEX_UQNODE_NAME:
+ code = "\\b([^' :;\\](),]+)";
+ break;
+ case REGEX_NBOOTSTRAP:
+ code = "\\s*([0-9+]+)\\s*:";
+ break;
+ case REGEX_NDIST:
+ code = ":([-0-9Ee.+]+)";
+ break;
+ case REGEX_NO_LINES:
+ code = "\n+";
+ code2 = "";
+ break;
+ default:
+ return null;
+ }
+ return codePerl == null ? Platform.newRegex(code, code2)
+ : Platform.newRegexPerl(code2);
+ }
+ return REGEX[id];
+ }
+
+
+ private char quoteChar = '\'';
/**
* Creates a new NewickFile object.
*
* @param inStr
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @throws IOException
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(String inStr) throws IOException
{
- super(inStr, "Paste");
+ super(inStr, DataSourceType.PASTE);
}
/**
* Creates a new NewickFile object.
*
* @param inFile
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
+ * DOCUMENT ME!
+ * @param protocol
+ * DOCUMENT ME!
*
* @throws IOException
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
- public NewickFile(String inFile, String type) throws IOException
+ public NewickFile(String inFile, DataSourceType protocol)
+ throws IOException
{
- super(inFile, type);
+ super(inFile, protocol);
}
public NewickFile(FileParse source) throws IOException
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree)
{
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap)
{
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param distances
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap,
boolean distances)
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param distances
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param rootdistance
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap,
boolean distances, boolean rootdistance)
* DOCUMENT ME!
*
* @param Error
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param Er
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param r
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param p
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param s
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String ErrorStringrange(String Error, String Er, int r, int p,
String s)
{
- return ((Error == null) ? "" : Error)
- + Er
- + " at position "
- + p
- + " ( "
- + s.substring(((p - r) < 0) ? 0 : (p - r), ((p + r) > s
- .length()) ? s.length() : (p + r)) + " )\n";
+ return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( "
+ + s.substring(((p - r) < 0) ? 0 : (p - r),
+ ((p + r) > s.length()) ? s.length() : (p + r))
+ + " )\n";
}
// @tree annotations
* parse the filesource as a newick file (new hampshire and/or extended)
*
* @throws IOException
- * with a line number and character position for badly
- * formatted NH strings
+ * with a line number and character position for badly formatted NH
+ * strings
*/
public void parse() throws IOException
{
+ Platform.ensureRegex();
String nf;
{ // fill nf with complete tree file
String commentString2 = null; // comments after simple node props
float DefDistance = (float) 0.001; // @param Default distance for a node -
- // very very small
+ // very very small
int DefBootstrap = -1; // @param Default bootstrap for a node
float distance = DefDistance;
int bootstrap = DefBootstrap;
boolean ascending = false; // flag indicating that we are leaving the
- // current node
+ // current node
- com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex(
- "[(\\['),;]");
+ Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]"
int nextcp = 0;
int ncp = cp;
+ boolean parsednodename = false;
while (majorsyms.searchFrom(nf, cp) && (Error == null))
{
int fcp = majorsyms.matchedFrom();
continue;
}
-
- ;
d++;
if (c.right() == null)
// Deal with quoted fields
case '\'':
- com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
- "([^']|'')+'");
+ Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'");
if (qnodename.searchFrom(nf, fcp))
{
int nl = qnodename.stringMatched().length();
- nodename = new String(qnodename.stringMatched().substring(0,
- nl - 1));
- cp = fcp + nl + 1;
+ nodename = new String(
+ qnodename.stringMatched().substring(1, nl - 1));
+ // unpack any escaped colons
+ Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES);
+ String widernodename = xpandquotes.replaceAll(nodename);
+ nodename = widernodename;
+ // jump to after end of quoted nodename
+ nextcp = fcp + nl + 1;
+ parsednodename = true;
}
else
{
{
if (d != -1)
{
- Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d
- + ")", 7, fcp, nf);
+ Error = ErrorStringrange(Error,
+ "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
}
// cp advanced at the end of default
}
* '"+nf.substring(cp,fcp)+"'"); }
*/
// verify termination.
- com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
+ Regex comment = getRegex(REGEX_COMMENT); // "]"
if (comment.searchFrom(nf, fcp))
{
// Skip the comment field
Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp,
nf);
}
-
- ;
}
// Parse simpler field strings
String fstring = nf.substring(ncp, fcp);
+ fstring.substring(cend + 1);
}
- com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex(
- "\\b([^' :;\\](),]+)");
- com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex(
- "\\s*([0-9+]+)\\s*:");
- com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
- ":([-0-9Ee.+]+)");
-
- if (uqnodename.search(fstring)
+ Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)"
+ Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:");
+ Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)");
+
+ if (!parsednodename && uqnodename.search(fstring)
&& ((uqnodename.matchedFrom(1) == 0) || (fstring
.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote
- // HACK!
+ // HACK!
{
if (nodename == null)
{
if (nbootstrap.search(fstring))
{
- if (nbootstrap.stringMatched(1).equals(
- uqnodename.stringMatched(1)))
+ if (nbootstrap.stringMatched(1)
+ .equals(uqnodename.stringMatched(1)))
{
nodename = null; // no nodename here.
}
- if (nodename == null
- || nodename.length() == 0
- || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename
- .stringMatched().length()))
+ if (nodename == null || nodename.length() == 0
+ || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1)
+ + uqnodename.stringMatched().length()))
{
try
{
HasBootstrap = true;
} catch (Exception e)
{
- Error = ErrorStringrange(Error,
- "Can't parse bootstrap value", 4, ncp
- + nbootstrap.matchedFrom(), nf);
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value",
+ 4, ncp + nbootstrap.matchedFrom(), nf);
}
}
}
} catch (Exception e)
{
Error = ErrorStringrange(Error,
- "Can't parse node distance value", 7, ncp
- + ndist.matchedFrom(), nf);
+ "Can't parse node distance value", 7,
+ ncp + ndist.matchedFrom(), nf);
}
}
if (c == realroot)
{
RootHasDistance = nodehasdistance; // JBPNote This is really
- // UGLY!!! Ensure root node gets
- // its given distance
+ // UGLY!!! Ensure root node gets
+ // its given distance
}
parseNHXNodeProps(c, commentString2);
commentString2 = null;
if ((d > -1) && (c == null))
{
- Error = ErrorStringrange(
- Error,
+ Error = ErrorStringrange(Error,
"File broke algorithm: Lost place in tree (is there an extra ')' ?)",
7, fcp, nf);
}
nodename = null;
distance = DefDistance;
bootstrap = DefBootstrap;
- commentString2=null;
+ commentString2 = null;
+ parsednodename = false;
}
if (nextcp == 0)
{
if (Error != null)
{
- throw (new IOException("NewickFile: " + Error + "\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { Error.toString() })));
}
if (root == null)
{
- throw (new IOException("NewickFile: No Tree read in\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { MessageManager.getString("label.no_tree_read_in") })));
}
// THe next line is failing for topali trees - not sure why yet. if
// (root.right()!=null && root.isDummy())
// tree is output
if (commentString != null && commentString.startsWith("&&NHX"))
{
- StringTokenizer st = new StringTokenizer(commentString.substring(5),":");
+ StringTokenizer st = new StringTokenizer(commentString.substring(5),
+ ":");
while (st.hasMoreTokens())
{
String tok = st.nextToken();
- int colpos=tok.indexOf("=");
+ int colpos = tok.indexOf("=");
- if (colpos>-1)
+ if (colpos > -1)
{
String code = tok.substring(0, colpos);
- String value = tok.substring(colpos+1);
- try {
+ String value = tok.substring(colpos + 1);
+ try
+ {
// parse out code/value pairs
if (code.toLowerCase().equals("b"))
{
- int v=-1;
+ int v = -1;
Float iv = new Float(value);
v = iv.intValue(); // jalview only does integer bootstraps
- // currently
+ // currently
c.setBootstrap(v);
HasBootstrap = true;
}
// more codes here.
- }
- catch (Exception e)
+ } catch (Exception e)
{
- System.err.println("Couldn't parse code '"+code+"' = '"+value+"'");
+ System.err.println(
+ "Couldn't parse code '" + code + "' = '" + value + "'");
e.printStackTrace(System.err);
}
}
}
}
-
-
+
}
/**
* root distances and user specificied writing of bootstraps.
*
* @param withbootstraps
- * controls if bootstrap values are explicitly written.
+ * controls if bootstrap values are explicitly written.
*
* @return new hampshire tree in a single line
*/
* node distances.
*
* @param withbootstraps
- * explicitly write bootstrap values
+ * explicitly write bootstrap values
* @param withdists
- * explicitly write distances
+ * explicitly write distances
*
* @return new hampshire tree in a single line
*/
* Generate newick format tree according to user specified flags
*
* @param withbootstraps
- * explicitly write bootstrap values
+ * explicitly write bootstrap values
* @param withdists
- * explicitly write distances
+ * explicitly write distances
* @param printRootInfo
- * explicitly write root distance
+ * explicitly write root distance
*
* @return new hampshire tree in a single line
*/
*/
char getQuoteChar()
{
- return QuoteChar;
+ return quoteChar;
}
/**
* DOCUMENT ME!
*
* @param c
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
char setQuoteChar(char c)
{
- char old = QuoteChar;
- QuoteChar = c;
+ char old = quoteChar;
+ quoteChar = c;
return old;
}
* DOCUMENT ME!
*
* @param name
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String nodeName(String name)
{
- if (NodeSafeName[0].search(name))
+ if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name))
{
- return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar;
+ return quoteChar
+ + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name)
+ + quoteChar;
}
else
{
- return NodeSafeName[2].replaceAll(name);
+ return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name);
}
}
* DOCUMENT ME!
*
* @param c
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String printNodeField(SequenceNode c)
{
return ((c.getName() == null) ? "" : nodeName(c.getName()))
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " "
- : "") + c.getBootstrap())
- : "")
- : "") + ((HasDistances) ? (":" + c.dist) : "");
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1)
+ ? ((c.getName() != null ? " " : "") + c.getBootstrap())
+ : "") : "")
+ + ((HasDistances) ? (":" + c.dist) : "");
}
/**
* DOCUMENT ME!
*
* @param root
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String printRootField(SequenceNode root)
{
- return (printRootInfo) ? (((root.getName() == null) ? ""
- : nodeName(root.getName()))
- + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root
- .getName() != null ? " " : "") + +root.getBootstrap())
- : "") : "") + ((RootHasDistance) ? (":" + root.dist)
- : "")) : "";
+ return (printRootInfo)
+ ? (((root.getName() == null) ? "" : nodeName(root.getName()))
+ + ((HasBootstrap)
+ ? ((root.getBootstrap() > -1)
+ ? ((root.getName() != null ? " " : "")
+ + +root.getBootstrap())
+ : "")
+ : "")
+ + ((RootHasDistance) ? (":" + root.dist) : ""))
+ : "";
}
// Non recursive call deals with root node properties
}
}
- // Test
+ /**
+ *
+ * @param args
+ * @j2sIgnore
+ */
public static void main(String[] args)
{
try
{
if (args == null || args.length != 1)
{
- System.err
- .println("Takes one argument - file name of a newick tree file.");
+ System.err.println(
+ "Takes one argument - file name of a newick tree file.");
System.exit(0);
}
treefile.close();
System.out.println("Read file :\n");
- NewickFile trf = new NewickFile(args[0], "File");
+ NewickFile trf = new NewickFile(args[0], DataSourceType.FILE);
trf.parse();
System.out.println("Original file :\n");
- com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", "");
+ Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", "");
System.out.println(nonl.replaceAll(newickfile.toString()) + "\n");
System.out.println("Parsed file.\n");