X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=2221f0090a29830112daf03331f4650e49dcac5c;hb=c6018dc0dc12720e13b75850a5303279ac7094b7;hp=8dc23891bb712243e645ccf9cf1569bff0142ec4;hpb=506d60f0e188723ddc91c26824b41ac7034df3fe;p=jalview.git
diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java
index 8dc2389..2221f00 100755
--- a/src/jalview/io/NewickFile.java
+++ b/src/jalview/io/NewickFile.java
@@ -1,20 +1,22 @@
/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
- * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
+ * This file is part of Jalview.
*
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * along with Jalview. If not, see .
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
*/
// NewickFile.java
// Tree I/O
@@ -24,10 +26,14 @@
// TODO: Extended SequenceNodeI to hold parsed NHX strings
package jalview.io;
-import java.io.*;
-import java.util.StringTokenizer;
+import jalview.datamodel.SequenceNode;
+import jalview.util.MessageManager;
-import jalview.datamodel.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.StringTokenizer;
/**
* Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the
@@ -35,10 +41,10 @@ import jalview.datamodel.*;
* this: NHX codes are appended in comments beginning with &&NHX. The codes are
* given below (from http://www.phylosoft.org/forester/NHX.html): Element Type
* Description Corresponding phyloXML element (parent element in parentheses) no
- * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) () :
- * decimal branch length to parent node (MUST BE SECOND, IF ASSIGNED)
- * () :GN= string gene name () :AC=
- * string sequence accession () :ND= string node
+ * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED)
+ * () : decimal branch length to parent node (MUST BE SECOND, IF
+ * ASSIGNED) () :GN= string gene name ()
+ * :AC= string sequence accession () :ND= string node
* identifier - if this is being used, it has to be unique within each phylogeny
* () :B= decimal confidence value for parent branch
* () :D= 'T', 'F', or '?' 'T' if this node represents a
@@ -81,14 +87,14 @@ public class NewickFile extends FileParse
boolean printRootInfo = true;
- private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[]
- { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for
+ private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] {
+ new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for
// requiring
// quotes
new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote
// characters
new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace
- // transformation
+ // transformation
};
char QuoteChar = '\'';
@@ -97,30 +103,31 @@ public class NewickFile extends FileParse
* Creates a new NewickFile object.
*
* @param inStr
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @throws IOException
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(String inStr) throws IOException
{
- super(inStr, "Paste");
+ super(inStr, DataSourceType.PASTE);
}
/**
* Creates a new NewickFile object.
*
* @param inFile
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
+ * DOCUMENT ME!
+ * @param protocol
+ * DOCUMENT ME!
*
* @throws IOException
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
- public NewickFile(String inFile, String type) throws IOException
+ public NewickFile(String inFile, DataSourceType protocol)
+ throws IOException
{
- super(inFile, type);
+ super(inFile, protocol);
}
public NewickFile(FileParse source) throws IOException
@@ -132,7 +139,7 @@ public class NewickFile extends FileParse
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree)
{
@@ -143,9 +150,9 @@ public class NewickFile extends FileParse
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap)
{
@@ -157,11 +164,11 @@ public class NewickFile extends FileParse
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param distances
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap,
boolean distances)
@@ -175,13 +182,13 @@ public class NewickFile extends FileParse
* Creates a new NewickFile object.
*
* @param newtree
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param bootstrap
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param distances
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param rootdistance
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
public NewickFile(SequenceNode newtree, boolean bootstrap,
boolean distances, boolean rootdistance)
@@ -196,28 +203,25 @@ public class NewickFile extends FileParse
* DOCUMENT ME!
*
* @param Error
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param Er
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param r
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param p
- * DOCUMENT ME!
+ * DOCUMENT ME!
* @param s
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String ErrorStringrange(String Error, String Er, int r, int p,
String s)
{
- return ((Error == null) ? "" : Error)
- + Er
- + " at position "
- + p
- + " ( "
- + s.substring(((p - r) < 0) ? 0 : (p - r), ((p + r) > s
- .length()) ? s.length() : (p + r)) + " )\n";
+ return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( "
+ + s.substring(((p - r) < 0) ? 0 : (p - r),
+ ((p + r) > s.length()) ? s.length() : (p + r))
+ + " )\n";
}
// @tree annotations
@@ -246,8 +250,8 @@ public class NewickFile extends FileParse
* parse the filesource as a newick file (new hampshire and/or extended)
*
* @throws IOException
- * with a line number and character position for badly
- * formatted NH strings
+ * with a line number and character position for badly formatted NH
+ * strings
*/
public void parse() throws IOException
{
@@ -293,6 +297,7 @@ public class NewickFile extends FileParse
int nextcp = 0;
int ncp = cp;
+ boolean parsednodename = false;
while (majorsyms.searchFrom(nf, cp) && (Error == null))
{
int fcp = majorsyms.matchedFrom();
@@ -350,14 +355,21 @@ public class NewickFile extends FileParse
case '\'':
com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex(
- "([^']|'')+'");
+ "'([^']|'')+'");
if (qnodename.searchFrom(nf, fcp))
{
int nl = qnodename.stringMatched().length();
- nodename = new String(qnodename.stringMatched().substring(0,
- nl - 1));
- cp = fcp + nl + 1;
+ nodename = new String(
+ qnodename.stringMatched().substring(1, nl - 1));
+ // unpack any escaped colons
+ com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex
+ .perlCode("s/''/'/");
+ String widernodename = xpandquotes.replaceAll(nodename);
+ nodename = widernodename;
+ // jump to after end of quoted nodename
+ nextcp = fcp + nl + 1;
+ parsednodename = true;
}
else
{
@@ -372,8 +384,8 @@ public class NewickFile extends FileParse
{
if (d != -1)
{
- Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d
- + ")", 7, fcp, nf);
+ Error = ErrorStringrange(Error,
+ "Wayward semicolon (depth=" + d + ")", 7, fcp, nf);
}
// cp advanced at the end of default
}
@@ -386,7 +398,8 @@ public class NewickFile extends FileParse
* '"+nf.substring(cp,fcp)+"'"); }
*/
// verify termination.
- com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]");
+ com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex(
+ "]");
if (comment.searchFrom(nf, fcp))
{
// Skip the comment field
@@ -426,7 +439,7 @@ public class NewickFile extends FileParse
com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex(
":([-0-9Ee.+]+)");
- if (uqnodename.search(fstring)
+ if (!parsednodename && uqnodename.search(fstring)
&& ((uqnodename.matchedFrom(1) == 0) || (fstring
.charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote
// HACK!
@@ -453,26 +466,24 @@ public class NewickFile extends FileParse
if (nbootstrap.search(fstring))
{
- if (nbootstrap.stringMatched(1).equals(
- uqnodename.stringMatched(1)))
+ if (nbootstrap.stringMatched(1)
+ .equals(uqnodename.stringMatched(1)))
{
nodename = null; // no nodename here.
}
- if (nodename == null
- || nodename.length() == 0
- || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename
- .stringMatched().length()))
+ if (nodename == null || nodename.length() == 0
+ || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1)
+ + uqnodename.stringMatched().length()))
{
try
{
- bootstrap = (new Integer(nbootstrap.stringMatched(1)))
+ bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1)))
.intValue();
HasBootstrap = true;
} catch (Exception e)
{
- Error = ErrorStringrange(Error,
- "Can't parse bootstrap value", 4, ncp
- + nbootstrap.matchedFrom(), nf);
+ Error = ErrorStringrange(Error, "Can't parse bootstrap value",
+ 4, ncp + nbootstrap.matchedFrom(), nf);
}
}
}
@@ -483,14 +494,14 @@ public class NewickFile extends FileParse
{
try
{
- distance = (new Float(ndist.stringMatched(1))).floatValue();
+ distance = (Float.valueOf(ndist.stringMatched(1))).floatValue();
HasDistances = true;
nodehasdistance = true;
} catch (Exception e)
{
Error = ErrorStringrange(Error,
- "Can't parse node distance value", 7, ncp
- + ndist.matchedFrom(), nf);
+ "Can't parse node distance value", 7,
+ ncp + ndist.matchedFrom(), nf);
}
}
@@ -549,8 +560,7 @@ public class NewickFile extends FileParse
if ((d > -1) && (c == null))
{
- Error = ErrorStringrange(
- Error,
+ Error = ErrorStringrange(Error,
"File broke algorithm: Lost place in tree (is there an extra ')' ?)",
7, fcp, nf);
}
@@ -585,6 +595,7 @@ public class NewickFile extends FileParse
distance = DefDistance;
bootstrap = DefBootstrap;
commentString2 = null;
+ parsednodename = false;
}
if (nextcp == 0)
{
@@ -599,11 +610,15 @@ public class NewickFile extends FileParse
if (Error != null)
{
- throw (new IOException("NewickFile: " + Error + "\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { Error.toString() })));
}
if (root == null)
{
- throw (new IOException("NewickFile: No Tree read in\n"));
+ throw (new IOException(
+ MessageManager.formatMessage("exception.newfile", new String[]
+ { MessageManager.getString("label.no_tree_read_in") })));
}
// THe next line is failing for topali trees - not sure why yet. if
// (root.right()!=null && root.isDummy())
@@ -646,7 +661,7 @@ public class NewickFile extends FileParse
if (code.toLowerCase().equals("b"))
{
int v = -1;
- Float iv = new Float(value);
+ Float iv = Float.valueOf(value);
v = iv.intValue(); // jalview only does integer bootstraps
// currently
c.setBootstrap(v);
@@ -655,8 +670,8 @@ public class NewickFile extends FileParse
// more codes here.
} catch (Exception e)
{
- System.err.println("Couldn't parse code '" + code + "' = '"
- + value + "'");
+ System.err.println(
+ "Couldn't parse code '" + code + "' = '" + value + "'");
e.printStackTrace(System.err);
}
}
@@ -699,7 +714,7 @@ public class NewickFile extends FileParse
* root distances and user specificied writing of bootstraps.
*
* @param withbootstraps
- * controls if bootstrap values are explicitly written.
+ * controls if bootstrap values are explicitly written.
*
* @return new hampshire tree in a single line
*/
@@ -723,9 +738,9 @@ public class NewickFile extends FileParse
* node distances.
*
* @param withbootstraps
- * explicitly write bootstrap values
+ * explicitly write bootstrap values
* @param withdists
- * explicitly write distances
+ * explicitly write distances
*
* @return new hampshire tree in a single line
*/
@@ -747,11 +762,11 @@ public class NewickFile extends FileParse
* Generate newick format tree according to user specified flags
*
* @param withbootstraps
- * explicitly write bootstrap values
+ * explicitly write bootstrap values
* @param withdists
- * explicitly write distances
+ * explicitly write distances
* @param printRootInfo
- * explicitly write root distance
+ * explicitly write root distance
*
* @return new hampshire tree in a single line
*/
@@ -784,7 +799,7 @@ public class NewickFile extends FileParse
* DOCUMENT ME!
*
* @param c
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
@@ -800,7 +815,7 @@ public class NewickFile extends FileParse
* DOCUMENT ME!
*
* @param name
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
@@ -820,35 +835,39 @@ public class NewickFile extends FileParse
* DOCUMENT ME!
*
* @param c
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String printNodeField(SequenceNode c)
{
return ((c.getName() == null) ? "" : nodeName(c.getName()))
- + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " "
- : "") + c.getBootstrap())
- : "")
- : "") + ((HasDistances) ? (":" + c.dist) : "");
+ + ((HasBootstrap) ? ((c.getBootstrap() > -1)
+ ? ((c.getName() != null ? " " : "") + c.getBootstrap())
+ : "") : "")
+ + ((HasDistances) ? (":" + c.dist) : "");
}
/**
* DOCUMENT ME!
*
* @param root
- * DOCUMENT ME!
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
private String printRootField(SequenceNode root)
{
- return (printRootInfo) ? (((root.getName() == null) ? ""
- : nodeName(root.getName()))
- + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root
- .getName() != null ? " " : "") + +root.getBootstrap())
- : "") : "") + ((RootHasDistance) ? (":" + root.dist)
- : "")) : "";
+ return (printRootInfo)
+ ? (((root.getName() == null) ? "" : nodeName(root.getName()))
+ + ((HasBootstrap)
+ ? ((root.getBootstrap() > -1)
+ ? ((root.getName() != null ? " " : "")
+ + +root.getBootstrap())
+ : "")
+ : "")
+ + ((RootHasDistance) ? (":" + root.dist) : ""))
+ : "";
}
// Non recursive call deals with root node properties
@@ -928,8 +947,8 @@ public class NewickFile extends FileParse
{
if (args == null || args.length != 1)
{
- System.err
- .println("Takes one argument - file name of a newick tree file.");
+ System.err.println(
+ "Takes one argument - file name of a newick tree file.");
System.exit(0);
}
@@ -947,7 +966,7 @@ public class NewickFile extends FileParse
treefile.close();
System.out.println("Read file :\n");
- NewickFile trf = new NewickFile(args[0], "File");
+ NewickFile trf = new NewickFile(args[0], DataSourceType.FILE);
trf.parse();
System.out.println("Original file :\n");