X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FNewickFile.java;h=d3f833f7cb2868813bd79602f64e1b0f02d6cd67;hb=2604ed9c9ad23a7c97b5d885249d81948cbac130;hp=ab76ab9a22f5308d5363de46edafcf2ceec55890;hpb=ab43013b7e357b84b4abade0dba949668dfb2a0e;p=jalview.git diff --git a/src/jalview/io/NewickFile.java b/src/jalview/io/NewickFile.java index ab76ab9..d3f833f 100755 --- a/src/jalview/io/NewickFile.java +++ b/src/jalview/io/NewickFile.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2b1) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -26,12 +26,19 @@ // TODO: Extended SequenceNodeI to hold parsed NHX strings package jalview.io; -import java.io.*; -import java.util.StringTokenizer; +import java.util.Locale; -import jalview.datamodel.*; +import jalview.datamodel.SequenceNode; import jalview.util.MessageManager; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.StringTokenizer; + +import com.stevesoft.pat.Regex; + /** * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the * tree distances and topology are unreliable when they are parsed. TODO: on @@ -73,25 +80,25 @@ public class NewickFile extends FileParse { SequenceNode root; - private boolean HasBootstrap = false; + private boolean hasBootstrap = false; - private boolean HasDistances = false; + private boolean hasDistances = false; - private boolean RootHasDistance = false; + private boolean rootHasDistance = false; // File IO Flags - boolean ReplaceUnderscores = false; + boolean replaceUnderscores = false; boolean printRootInfo = true; - private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] - { new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for + private Regex[] NodeSafeName = new Regex[] { + new Regex().perlCode("m/[\\[,:'()]/"), // test for // requiring // quotes - new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote + new Regex().perlCode("s/'/''/"), // escaping quote // characters - new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace - // transformation + new Regex().perlCode("s/\\/w/_/") // unqoted whitespace + // transformation }; char QuoteChar = '\''; @@ -107,7 +114,7 @@ public class NewickFile extends FileParse */ public NewickFile(String inStr) throws IOException { - super(inStr, "Paste"); + super(inStr, DataSourceType.PASTE); } /** @@ -115,15 +122,16 @@ public class NewickFile extends FileParse * * @param inFile * DOCUMENT ME! - * @param type + * @param protocol * DOCUMENT ME! * * @throws IOException * DOCUMENT ME! */ - public NewickFile(String inFile, String type) throws IOException + public NewickFile(String inFile, DataSourceType protocol) + throws IOException { - super(inFile, type); + super(inFile, protocol); } public NewickFile(FileParse source) throws IOException @@ -152,7 +160,7 @@ public class NewickFile extends FileParse */ public NewickFile(SequenceNode newtree, boolean bootstrap) { - HasBootstrap = bootstrap; + hasBootstrap = bootstrap; root = newtree; } @@ -170,8 +178,8 @@ public class NewickFile extends FileParse boolean distances) { root = newtree; - HasBootstrap = bootstrap; - HasDistances = distances; + hasBootstrap = bootstrap; + hasDistances = distances; } /** @@ -190,9 +198,9 @@ public class NewickFile extends FileParse boolean distances, boolean rootdistance) { root = newtree; - HasBootstrap = bootstrap; - HasDistances = distances; - RootHasDistance = rootdistance; + hasBootstrap = bootstrap; + hasDistances = distances; + rootHasDistance = rootdistance; } /** @@ -214,20 +222,17 @@ public class NewickFile extends FileParse private String ErrorStringrange(String Error, String Er, int r, int p, String s) { - return ((Error == null) ? "" : Error) - + Er - + " at position " - + p - + " ( " + return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " + s.substring(((p - r) < 0) ? 0 : (p - r), - ((p + r) > s.length()) ? s.length() : (p + r)) + " )\n"; + ((p + r) > s.length()) ? s.length() : (p + r)) + + " )\n"; } // @tree annotations // These are set automatically by the reader - public boolean HasBootstrap() + public boolean hasBootstrap() { - return HasBootstrap; + return hasBootstrap; } /** @@ -235,14 +240,14 @@ public class NewickFile extends FileParse * * @return DOCUMENT ME! */ - public boolean HasDistances() + public boolean hasDistances() { - return HasDistances; + return hasDistances; } - public boolean HasRootDistance() + public boolean hasRootDistance() { - return RootHasDistance; + return rootHasDistance; } /** @@ -291,8 +296,7 @@ public class NewickFile extends FileParse boolean ascending = false; // flag indicating that we are leaving the // current node - com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( - "[(\\['),;]"); + Regex majorsyms = new Regex("[(\\['),;]"); int nextcp = 0; int ncp = cp; @@ -313,8 +317,6 @@ public class NewickFile extends FileParse continue; } - - ; d++; if (c.right() == null) @@ -353,17 +355,15 @@ public class NewickFile extends FileParse // Deal with quoted fields case '\'': - com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( - "'([^']|'')+'"); + Regex qnodename = new Regex("'([^']|'')+'"); if (qnodename.searchFrom(nf, fcp)) { int nl = qnodename.stringMatched().length(); - nodename = new String(qnodename.stringMatched().substring(1, - nl - 1)); + nodename = new String( + qnodename.stringMatched().substring(1, nl - 1)); // unpack any escaped colons - com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex - .perlCode("s/''/'/"); + Regex xpandquotes = Regex.perlCode("s/''/'/"); String widernodename = xpandquotes.replaceAll(nodename); nodename = widernodename; // jump to after end of quoted nodename @@ -383,8 +383,8 @@ public class NewickFile extends FileParse { if (d != -1) { - Error = ErrorStringrange(Error, "Wayward semicolon (depth=" + d - + ")", 7, fcp, nf); + Error = ErrorStringrange(Error, + "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); } // cp advanced at the end of default } @@ -397,7 +397,7 @@ public class NewickFile extends FileParse * '"+nf.substring(cp,fcp)+"'"); } */ // verify termination. - com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex("]"); + Regex comment = new Regex("]"); if (comment.searchFrom(nf, fcp)) { // Skip the comment field @@ -413,8 +413,6 @@ public class NewickFile extends FileParse Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, nf); } - - ; } // Parse simpler field strings String fstring = nf.substring(ncp, fcp); @@ -430,22 +428,18 @@ public class NewickFile extends FileParse + fstring.substring(cend + 1); } - com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( - "\\b([^' :;\\](),]+)"); - com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( - "\\s*([0-9+]+)\\s*:"); - com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( - ":([-0-9Ee.+]+)"); - - if (!parsednodename - && uqnodename.search(fstring) + Regex uqnodename = new Regex("\\b([^' :;\\](),]+)"); + Regex nbootstrap = new Regex("\\s*([0-9+]+)\\s*:"); + Regex ndist = new Regex(":([-0-9Ee.+]+)"); + + if (!parsednodename && uqnodename.search(fstring) && ((uqnodename.matchedFrom(1) == 0) || (fstring .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote // HACK! { if (nodename == null) { - if (ReplaceUnderscores) + if (replaceUnderscores) { nodename = uqnodename.stringMatched(1).replace('_', ' '); } @@ -465,26 +459,24 @@ public class NewickFile extends FileParse if (nbootstrap.search(fstring)) { - if (nbootstrap.stringMatched(1).equals( - uqnodename.stringMatched(1))) + if (nbootstrap.stringMatched(1) + .equals(uqnodename.stringMatched(1))) { nodename = null; // no nodename here. } - if (nodename == null - || nodename.length() == 0 - || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + uqnodename - .stringMatched().length())) + if (nodename == null || nodename.length() == 0 + || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) + + uqnodename.stringMatched().length())) { try { - bootstrap = (new Integer(nbootstrap.stringMatched(1))) + bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1))) .intValue(); - HasBootstrap = true; + hasBootstrap = true; } catch (Exception e) { - Error = ErrorStringrange(Error, - "Can't parse bootstrap value", 4, - ncp + nbootstrap.matchedFrom(), nf); + Error = ErrorStringrange(Error, "Can't parse bootstrap value", + 4, ncp + nbootstrap.matchedFrom(), nf); } } } @@ -495,8 +487,8 @@ public class NewickFile extends FileParse { try { - distance = (new Float(ndist.stringMatched(1))).floatValue(); - HasDistances = true; + distance = (Float.valueOf(ndist.stringMatched(1))).floatValue(); + hasDistances = true; nodehasdistance = true; } catch (Exception e) { @@ -511,12 +503,12 @@ public class NewickFile extends FileParse // Write node info here c.setName(nodename); // Trees without distances still need a render distance - c.dist = (HasDistances) ? distance : DefDistance; + c.dist = (hasDistances) ? distance : DefDistance; // be consistent for internal bootstrap defaults too - c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap); + c.setBootstrap((hasBootstrap) ? bootstrap : DefBootstrap); if (c == realroot) { - RootHasDistance = nodehasdistance; // JBPNote This is really + rootHasDistance = nodehasdistance; // JBPNote This is really // UGLY!!! Ensure root node gets // its given distance } @@ -527,8 +519,8 @@ public class NewickFile extends FileParse { // Find a place to put the leaf SequenceNode newnode = new SequenceNode(null, c, nodename, - (HasDistances) ? distance : DefDistance, - (HasBootstrap) ? bootstrap : DefBootstrap, false); + (hasDistances) ? distance : DefDistance, + (hasBootstrap) ? bootstrap : DefBootstrap, false); parseNHXNodeProps(c, commentString2); commentString2 = null; @@ -547,7 +539,7 @@ public class NewickFile extends FileParse // Insert a dummy node for polytomy // dummy nodes have distances SequenceNode newdummy = new SequenceNode(null, c, null, - (HasDistances ? 0 : DefDistance), 0, true); + (hasDistances ? 0 : DefDistance), 0, true); newdummy.SetChildren(c.left(), newnode); c.setLeft(newdummy); } @@ -561,8 +553,7 @@ public class NewickFile extends FileParse if ((d > -1) && (c == null)) { - Error = ErrorStringrange( - Error, + Error = ErrorStringrange(Error, "File broke algorithm: Lost place in tree (is there an extra ')' ?)", 7, fcp, nf); } @@ -612,19 +603,23 @@ public class NewickFile extends FileParse if (Error != null) { - throw (new IOException(MessageManager.formatMessage("exception.newfile", new String[]{Error.toString()}))); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { Error.toString() }))); } if (root == null) { - throw (new IOException(MessageManager.formatMessage("exception.newfile", new String[]{MessageManager.getString("label.no_tree_read_in")}))); + throw (new IOException( + MessageManager.formatMessage("exception.newfile", new String[] + { MessageManager.getString("label.no_tree_read_in") }))); } // THe next line is failing for topali trees - not sure why yet. if // (root.right()!=null && root.isDummy()) root = (SequenceNode) root.right().detach(); // remove the imaginary root. - if (!RootHasDistance) + if (!rootHasDistance) { - root.dist = (HasDistances) ? 0 : DefDistance; + root.dist = (hasDistances) ? 0 : DefDistance; } } @@ -656,20 +651,20 @@ public class NewickFile extends FileParse try { // parse out code/value pairs - if (code.toLowerCase().equals("b")) + if (code.toLowerCase(Locale.ROOT).equals("b")) { int v = -1; - Float iv = new Float(value); + Float iv = Float.valueOf(value); v = iv.intValue(); // jalview only does integer bootstraps // currently c.setBootstrap(v); - HasBootstrap = true; + hasBootstrap = true; } // more codes here. } catch (Exception e) { - System.err.println("Couldn't parse code '" + code + "' = '" - + value + "'"); + System.err.println( + "Couldn't parse code '" + code + "' = '" + value + "'"); e.printStackTrace(System.err); } } @@ -720,11 +715,11 @@ public class NewickFile extends FileParse { synchronized (this) { - boolean boots = this.HasBootstrap; - this.HasBootstrap = withbootstraps; + boolean boots = this.hasBootstrap; + this.hasBootstrap = withbootstraps; String rv = print(); - this.HasBootstrap = boots; + this.hasBootstrap = boots; return rv; } @@ -746,11 +741,11 @@ public class NewickFile extends FileParse { synchronized (this) { - boolean dists = this.HasDistances; - this.HasDistances = withdists; + boolean dists = this.hasDistances; + this.hasDistances = withdists; String rv = print(withbootstraps); - this.HasDistances = dists; + this.hasDistances = dists; return rv; } @@ -840,10 +835,10 @@ public class NewickFile extends FileParse private String printNodeField(SequenceNode c) { return ((c.getName() == null) ? "" : nodeName(c.getName())) - + ((HasBootstrap) ? ((c.getBootstrap() > -1) ? ((c.getName() != null ? " " - : "") + c.getBootstrap()) - : "") - : "") + ((HasDistances) ? (":" + c.dist) : ""); + + ((hasBootstrap) ? ((c.getBootstrap() > -1) + ? ((c.getName() != null ? " " : "") + c.getBootstrap()) + : "") : "") + + ((hasDistances) ? (":" + c.dist) : ""); } /** @@ -856,12 +851,16 @@ public class NewickFile extends FileParse */ private String printRootField(SequenceNode root) { - return (printRootInfo) ? (((root.getName() == null) ? "" - : nodeName(root.getName())) - + ((HasBootstrap) ? ((root.getBootstrap() > -1) ? ((root - .getName() != null ? " " : "") + +root.getBootstrap()) - : "") : "") + ((RootHasDistance) ? (":" + root.dist) - : "")) : ""; + return (printRootInfo) + ? (((root.getName() == null) ? "" : nodeName(root.getName())) + + ((hasBootstrap) + ? ((root.getBootstrap() > -1) + ? ((root.getName() != null ? " " : "") + + +root.getBootstrap()) + : "") + : "") + + ((rootHasDistance) ? (":" + root.dist) : "")) + : ""; } // Non recursive call deals with root node properties @@ -934,15 +933,19 @@ public class NewickFile extends FileParse } } - // Test + /** + * + * @param args + * @j2sIgnore + */ public static void main(String[] args) { try { if (args == null || args.length != 1) { - System.err - .println("Takes one argument - file name of a newick tree file."); + System.err.println( + "Takes one argument - file name of a newick tree file."); System.exit(0); } @@ -960,11 +963,11 @@ public class NewickFile extends FileParse treefile.close(); System.out.println("Read file :\n"); - NewickFile trf = new NewickFile(args[0], "File"); + NewickFile trf = new NewickFile(args[0], DataSourceType.FILE); trf.parse(); System.out.println("Original file :\n"); - com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", ""); + Regex nonl = new Regex("\n+", ""); System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); System.out.println("Parsed file.\n");