X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=5a986056d2f909a103028a21bcc13945dd4468bd;hb=990ed4ffbaa7a95b2ebb6bf6ab0440310f6e83ab;hp=b7df38f1fc3e1d9590e9b84c66eed69ce9efe2ac;hpb=8a6fa9ea9900d0f106529c3f6283e7f9d76dd2cb;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index b7df38f..5a98605 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -1,19 +1,22 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6) - * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ // NewickFile.java // Tree I/O @@ -23,10 +26,19 @@ // TODO: Extended SequenceNodeI to hold parsed NHX strings package jalview.io; -import java.io.*; +import jalview.datamodel.SequenceNode; +import jalview.util.MessageManager; +import jalview.util.Platform; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; import java.util.StringTokenizer; -import jalview.datamodel.*; +import com.stevesoft.pat.Regex; + +// TODO This class does not conform to Java standards for field name capitalization. /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the @@ -67,7 +79,7 @@ import jalview.datamodel.*; */ public class NewickFile extends FileParse { - SequenceNode root; + private SequenceNode root; private boolean HasBootstrap = false; @@ -76,21 +88,90 @@ public class NewickFile extends FileParse private boolean RootHasDistance = false; // File IO Flags - boolean ReplaceUnderscores = false; + private boolean ReplaceUnderscores = false; + + private boolean printRootInfo = true; + + private static final int REGEX_PERL_NODE_REQUIRE_QUOTE = 0; + + private static final int REGEX_PERL_NODE_ESCAPE_QUOTE = 1; + + private static final int REGEX_PERL_NODE_UNQUOTED_WHITESPACE = 2; + + private static final int REGEX_MAJOR_SYMS = 3; + + private static final int REGEX_QNODE_NAME = 4; + + private static final int REGEX_COMMENT = 5; + + private static final int REGEX_UQNODE_NAME = 6; - boolean printRootInfo = true; + private static final int REGEX_NBOOTSTRAP = 7; - private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] - { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for - // requiring - // quotes - new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote - // characters - new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation - }; + private static final int REGEX_NDIST = 8; - char QuoteChar = '\''; + private static final int REGEX_NO_LINES = 9; + + private static final int REGEX_PERL_EXPAND_QUOTES = 10; + + private static final int REGEX_MAX = 11; + + private static final Regex[] REGEX = new Regex[REGEX_MAX]; + + private static Regex getRegex(int id) + { + if (REGEX[id] == null) + { + String code = null; + String code2 = null; + String codePerl = null; + switch (id) + { + case REGEX_PERL_NODE_REQUIRE_QUOTE: + codePerl = "m/[\\[,:'()]/"; + break; + case REGEX_PERL_NODE_ESCAPE_QUOTE: + codePerl = "s/'/''/"; + break; + case REGEX_PERL_NODE_UNQUOTED_WHITESPACE: + codePerl = "s/\\/w/_/"; + break; + case REGEX_PERL_EXPAND_QUOTES: + codePerl = "s/''/'/"; + break; + case REGEX_MAJOR_SYMS: + code = "[(\\['),;]"; + break; + case REGEX_QNODE_NAME: + code = "'([^']|'')+'"; + break; + case REGEX_COMMENT: + code = "]"; + break; + case REGEX_UQNODE_NAME: + code = "\\b([^' :;\\](),]+)"; + break; + case REGEX_NBOOTSTRAP: + code = "\\s*([0-9+]+)\\s*:"; + break; + case REGEX_NDIST: + code = ":([-0-9Ee.+]+)"; + break; + case REGEX_NO_LINES: + code = "\n+"; + code2 = ""; + break; + default: + return null; + } + return codePerl == null ? Platform.newRegex(code, code2) + : Platform.newRegexPerl(code2); + } + return REGEX[id]; + } + + + private char quoteChar = '\''; /** * Creates a new NewickFile object. @@ -103,7 +184,7 @@ public class NewickFile extends FileParse */ public NewickFile(String inStr) throws IOException { - super(inStr, "Paste"); + super(inStr, DataSourceType.PASTE); } /** @@ -111,15 +192,16 @@ public class NewickFile extends FileParse * * @param inFile * DOCUMENT ME! - * @param type + * @param protocol * DOCUMENT ME! * * @throws IOException * DOCUMENT ME! */ - public NewickFile(String inFile, String type) throws IOException + public NewickFile(String inFile, DataSourceType protocol) + throws IOException { - super(inFile, type); + super(inFile, protocol); } public NewickFile(FileParse source) throws IOException @@ -210,13 +292,10 @@ public class NewickFile extends FileParse private String ErrorStringrange(String Error, String Er, int r, int p, String s) { - return ((Error == null) ? "" : Error) - + Er - + " at position " - + p - + " ( " + return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " + s.substring(((p - r) < 0) ? 0 : (p - r), - ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n"; + ((p + r) > s.length()) ? s.length() : (p + r)) + + " )\n"; } // @tree annotations @@ -250,6 +329,7 @@ public class NewickFile extends FileParse */ public void parse() throws IOException { + Platform.ensureRegex(); String nf; { // fill nf with complete tree file @@ -287,11 +367,11 @@ public class NewickFile extends FileParse boolean ascending = false; // flag indicating that we are leaving the // current node - com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( - "[(\\['),;]"); + Regex majorsyms = getRegex(REGEX_MAJOR_SYMS); // "[(\\['),;]" int nextcp = 0; int ncp = cp; + boolean parsednodename = false; while (majorsyms.searchFrom(nf, cp) && (Error == null)) { int fcp = majorsyms.matchedFrom(); @@ -308,8 +388,6 @@ public class NewickFile extends FileParse continue; } - - ; d++; if (c.right() == null) @@ -348,15 +426,20 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( - "([^']|'')+'"); + Regex qnodename = getRegex(REGEX_QNODE_NAME);// "'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { int nl = qnodename.stringMatched().length(); - nodename = new String(qnodename.stringMatched().substring(0, - nl - 1)); - cp = fcp + nl + 1; + nodename = new String( + qnodename.stringMatched().substring(1, nl - 1)); + // unpack any escaped colons + Regex xpandquotes = getRegex(REGEX_PERL_EXPAND_QUOTES); + String widernodename = xpandquotes.replaceAll(nodename); + nodename = widernodename; + // jump to after end of quoted nodename + nextcp = fcp + nl + 1; + parsednodename = true; } else { @@ -371,8 +454,8 @@ public class NewickFile extends FileParse { if (d != -1) { - Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d - + ")", 7, fcp, nf); + Error = ErrorStringrange(Error, + "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); } // cp advanced at the end of default } @@ -385,7 +468,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]"); + Regex comment = getRegex(REGEX_COMMENT); // "]" if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -401,8 +484,6 @@ public class NewickFile extends FileParse Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); } - - ; } // Parse simpler field strings String fstring = nf.substring(ncp, fcp); @@ -418,14 +499,11 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( - "\\b([^' :;\\](),]+)"); - com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\s*([0-9+]+)\\s*:"); - com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9Ee.+]+)"); - - if (uqnodename.search(fstring) + Regex uqnodename = getRegex(REGEX_UQNODE_NAME);// "\\b([^' :;\\](),]+)" + Regex nbootstrap = getRegex(REGEX_NBOOTSTRAP);// "\\s*([0-9+]+)\\s*:"); + Regex ndist = getRegex(REGEX_NDIST);// ":([-0-9Ee.+]+)"); + + if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote // HACK! @@ -452,15 +530,14 @@ public class NewickFile extends FileParse if (nbootstrap.search(fstring)) { - if (nbootstrap.stringMatched(1).equals( - uqnodename.stringMatched(1))) + if (nbootstrap.stringMatched(1) + .equals(uqnodename.stringMatched(1))) { nodename = null; // no nodename here. } - if (nodename == null - || nodename.length() == 0 - || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename - .stringMatched().length())) + if (nodename == null || nodename.length() == 0 + || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + + uqnodename.stringMatched().length())) { try { @@ -469,9 +546,8 @@ public class NewickFile extends FileParse HasBootstrap = true; } catch (Exception e) { - Error = ErrorStringrange(Error, - "Can't parse bootstrap value", 4, - ncp + nbootstrap.matchedFrom(), nf); + Error = ErrorStringrange(Error, "Can't parse bootstrap value", + 4, ncp + nbootstrap.matchedFrom(), nf); } } } @@ -548,8 +624,7 @@ public class NewickFile extends FileParse if ((d > -1) && (c == null)) { - Error = ErrorStringrange( - Error, + Error = ErrorStringrange(Error, "File broke algorithm: Lost place in tree (is there an extra ')' ?)", 7, fcp, nf); } @@ -584,6 +659,7 @@ public class NewickFile extends FileParse distance = DefDistance; bootstrap = DefBootstrap; commentString2 = null; + parsednodename = false; } if (nextcp == 0) { @@ -598,11 +674,15 @@ public class NewickFile extends FileParse if (Error != null) { - throw (new IOException("NewickFile: " + Error + "\n")); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { Error.toString() }))); } if (root == null) { - throw (new IOException("NewickFile: No Tree read in\n")); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { MessageManager.getString("label.no_tree_read_in") }))); } // THe next line is failing for topali trees - not sure why yet. if // (root.right()!=null && root.isDummy()) @@ -654,8 +734,8 @@ public class NewickFile extends FileParse // more codes here. } catch (Exception e) { - System.err.println("Couldn't parse code '" + code + "' = '" - + value + "'"); + System.err.println( + "Couldn't parse code '" + code + "' = '" + value + "'"); e.printStackTrace(System.err); } } @@ -776,7 +856,7 @@ public class NewickFile extends FileParse */ char getQuoteChar() { - return QuoteChar; + return quoteChar; } /** @@ -789,8 +869,8 @@ public class NewickFile extends FileParse */ char setQuoteChar(char c) { - char old = QuoteChar; - QuoteChar = c; + char old = quoteChar; + quoteChar = c; return old; } @@ -805,13 +885,15 @@ public class NewickFile extends FileParse */ private String nodeName(String name) { - if (NodeSafeName[0].search(name)) + if (getRegex(REGEX_PERL_NODE_REQUIRE_QUOTE).search(name)) { - return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; + return quoteChar + + getRegex(REGEX_PERL_NODE_ESCAPE_QUOTE).replaceAll(name) + + quoteChar; } else { - return NodeSafeName[2].replaceAll(name); + return getRegex(REGEX_PERL_NODE_UNQUOTED_WHITESPACE).replaceAll(name); } } @@ -826,10 +908,10 @@ public class NewickFile extends FileParse private String printNodeField(SequenceNode c) { return ((c.getName() == null) ? "" : nodeName(c.getName())) - + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " " - : "") + c.getBootstrap()) - : "") - : "") + ((HasDistances) ? (":" + c.dist) : ""); + + ((HasBootstrap) ? ((c.getBootstrap() > -1) + ? ((c.getName() != null ? " " : "") + c.getBootstrap()) + : "") : "") + + ((HasDistances) ? (":" + c.dist) : ""); } /** @@ -842,12 +924,16 @@ public class NewickFile extends FileParse */ private String printRootField(SequenceNode root) { - return (printRootInfo) ? (((root.getName() == null) ? "" - : nodeName(root.getName())) - + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root - .getName() != null ? " " : "") + +root.getBootstrap()) - : "") : "") + ((RootHasDistance) ? (":" + root.dist) - : "")) : ""; + return (printRootInfo) + ? (((root.getName() == null) ? "" : nodeName(root.getName())) + + ((HasBootstrap) + ? ((root.getBootstrap() > -1) + ? ((root.getName() != null ? " " : "") + + +root.getBootstrap()) + : "") + : "") + + ((RootHasDistance) ? (":" + root.dist) : "")) + : ""; } // Non recursive call deals with root node properties @@ -920,15 +1006,19 @@ public class NewickFile extends FileParse } } - // Test + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { try { if (args == null || args.length != 1) { - System.err - .println("Takes one argument - file name of a newick tree file."); + System.err.println( + "Takes one argument - file name of a newick tree file."); System.exit(0); } @@ -946,11 +1036,11 @@ public class NewickFile extends FileParse treefile.close(); System.out.println("Read file :\n"); - NewickFile trf = new NewickFile(args[0], "File"); + NewickFile trf = new NewickFile(args[0], DataSourceType.FILE); trf.parse(); System.out.println("Original file :\n"); - com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", ""); + Regex nonl = getRegex(REGEX_NO_LINES);// "\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");