From: Daniel Barton Date: Thu, 15 Aug 2013 17:13:25 +0000 (+0100) Subject: Alifold results are now parsed and stored in a ScoreManager object X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=f687d44d69ce305901cc18305332ae07f69c7845;p=jabaws.git Alifold results are now parsed and stored in a ScoreManager object --- diff --git a/datamodel/compbio/data/sequence/RNAStruct.java b/datamodel/compbio/data/sequence/RNAStruct.java new file mode 100644 index 0000000..65bca75 --- /dev/null +++ b/datamodel/compbio/data/sequence/RNAStruct.java @@ -0,0 +1,119 @@ +package compbio.data.sequence; + +import java.util.List; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; +import java.util.List; +import java.util.ArrayList; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; + +import compbio.util.annotation.Immutable; +import compbio.util.SysPrefs; +import compbio.data.sequence.Score; + +/* + * RNA secondary structure + * A class which is essentially just a Pair(List, List>) + * For the purpose of creating and retrieving data from ScoreManager Objects + * which are being used to store RNA folding output + */ + +@XmlAccessorType(XmlAccessType.FIELD) +public final class RNAStruct { + + private List structs = new ArrayList(); + private List> data = new ArrayList>(); + + + public RNAStruct() { + // default JaxB Constructor + } + + public RNAStruct(List structs, List> data) { + assert(structs.size() == data.size()); + this.structs = structs; + this.data = data; + } + + public List getStructs() { + return structs; + } + + public List> getData() { + return data; + } + + // Send this data Structure back to something approximating the stdoutFile + // with extra information from alifold.out + @Override + public String toString() { + String out = ""; + // The first objects hold the Consensus Alignment and the alifold.out info + out += structs.get(0) + SysPrefs.newlinechar; + + // Now the rest of the structures with energies/frequencies + for (int i = 1; i < structs.size(); i++) { + out = out + structs.get(i).toString(); + + if (data.get(i).first().getScores().size() > 0) { + List scores = data.get(i).first().getScores(); + if (scores.size() >= 3) { + out = out + " (" + scores.get(0).toString() + " = " + + scores.get(1).toString() + " + " + scores.get(2).toString() + + ")" + SysPrefs.newlinechar; + } + else if (data.get(i).first().getMethod().equals("alifoldMEA")) { + out = out + " { " + scores.get(0).toString() + " MEA=" + + scores.get(1).toString() + "}" + SysPrefs.newlinechar; + } + else if (scores.size() >= 2) { + out = out + " [" + scores.get(0).toString() + ", " + + scores.get(1).toString() + "]" + SysPrefs.newlinechar; + + } + } else out += SysPrefs.newlinechar; + } + if (data.get(0).first().getScores().size() > 0) { + Iterator iScores = data.get(0).iterator(); + out += "Base Pairings followed by probability" + SysPrefs.newlinechar; + for (int i = 0; i < data.get(0).size(); i++) { + Score s = iScores.next(); + Range r = s.getRanges().first(); + Float score = s.getScores().get(0); + out += String.format("%4d %4d %.1f%n", r.getFrom(), r.getTo(), + score); + } + } + + return out; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (!(obj instanceof RNAStruct)) { + return false; + } + RNAStruct other = (RNAStruct) obj; + if (structs == null) { + if (other.structs != null) { + return false; + } + } else if (!structs.equals(other.structs)) + return false; + if (data == null) { + if (other.data != null) + return false; + } else if (!data.equals(other.data)) + return false; + + return true; + } +} + + diff --git a/datamodel/compbio/data/sequence/RNAStructReader.java b/datamodel/compbio/data/sequence/RNAStructReader.java new file mode 100644 index 0000000..cfc83ff --- /dev/null +++ b/datamodel/compbio/data/sequence/RNAStructReader.java @@ -0,0 +1,292 @@ +package compbio.data.sequence; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Scanner; +import java.util.TreeSet; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; + +import compbio.runner.structure.RNAalifold; + +// Utility class for reading alifold output + +public class RNAStructReader { + + private static Logger log = Logger.getLogger(RNAStructReader.class); + + // Whitespace patterns + static String s = "[+\\s=]+"; + static String bracket = "\\(|\\)|\\{|\\}|\\[|\\]"; + static String notData = "[\\s=+]+"; + + // RNAOut data type patterns + static String seqP = "[_\\-a-zA-Z]{2,}"; // Has to match --mis output aswell (not just ACGU_) + static String structP = "[\\.)({}\\[\\],]{2,}"; + static String floatP = "-?\\d+\\.\\d*(e[+\\-]\\d+)?"; + static String energyP = "-?[0-9]*\\.?[0-9]{2}"; + static String freqP = "^-?\\d\\.\\d{6,}(e[+\\-]\\d+)?$"; + + // alifold out line patterns + static String ps = "\\s*"; + static String alignmentP = "^"+seqP+ps+"$"; + static String stdStructP = "^"+structP+s+"\\("+ps+floatP+s+floatP+s+floatP+ps+"\\)"+ps+"$"; + static String justStructP = "^"+structP+ps+"$"; + static String stochBTStructP = "^"+structP+s+floatP+s+floatP+ps+"$"; + static String PStructP = "^"+structP+s+"\\["+ps+floatP+ps+"\\]"+ps+"$"; + static String centStructP = "^"+structP+s+floatP+ps+"\\{"+ps+floatP+s+floatP+ps+"\\}"+ps+"$"; + static String MEAStructP = "^"+structP+s+"\\{"+ps+floatP+s+"MEA="+floatP+ps+"\\}"+ps+"$"; + static String freeEnergyP = "^"+ps+"free energy of ensemble"+ps+"="+ps+floatP+ps+"kcal/mol"+ps+"$"; + static String ensembleFreqP = "^"+ps+"frequency of mfe structure in ensemble "+floatP+ps+"$"; + + public static ScoreManager readRNAStructStream(InputStream stdout) + throws IOException { + + String error = "Error in parsing alifold stdout file: "; + // The Lists required to construct a ScoreManager Using the new constructor + List structs = new ArrayList(); + List> data = new ArrayList>(); + + // Allocate necessry data structures for creating Score objects + ArrayList scores = new ArrayList(); + + BufferedReader reader = new BufferedReader(new InputStreamReader(stdout)); + // The first 2 lines of the alifold stdout file are always the same format + String fline = reader.readLine(); + assert (Pattern.matches(AlifoldLine.alignment.regex, fline)) : + error + "Sequence Alignment Expected"; + structs.add(fline.trim()); + data.add(newEmptyScore(AlifoldResult.alifoldSeq)); + + fline = reader.readLine(); + assert (Pattern.matches(AlifoldLine.stdStruct.regex, fline)) : + error + "Consensus Structure and Energy Expected"; + Scanner sc = new Scanner(fline); + structs.add(sc.next()); + for (int i = 0; i < 3; i++) { + scores.add(Float.parseFloat(sc.findInLine(floatP))); + } + data.add(newSetScore(AlifoldResult.alifold, scores)); + + // Now the alifold stdout file formats diverge based on arguments + fline = reader.readLine(); + String sline; + Scanner nsc = null; + while ( fline != null) { + scores.clear(); + AlifoldLine ftype = identifyLine(fline); + sline = reader.readLine(); // Look ahead + sc = new Scanner(fline); + if (sline != null) nsc = new Scanner(sline); + + if (ftype.equals(AlifoldLine.PStruct)) { + // The -p or --MEA option is specified + // The next line should always be frequency of mfe structure + assert ( sline != null && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) : + error + "Expected frequency of mfe structure"; + structs.add(sc.next()); + scores.add(Float.parseFloat(sc.findInLine(floatP))); + scores.add(Float.parseFloat(nsc.findInLine(floatP))); + data.add(newSetScore(AlifoldResult.alifoldP, scores)); + // Jump line + sline = reader.readLine(); + } + else if (ftype.equals(AlifoldLine.centStruct)) { + structs.add(sc.next()); + for (int i = 0; i < 3; i++) { + scores.add(Float.parseFloat(sc.findInLine(floatP))); + } + data.add(newSetScore(AlifoldResult.alifoldCentroid, scores)); + } + else if (ftype.equals(AlifoldLine.MEAStruct)) { + structs.add(sc.next()); + for (int i = 0; i < 2; i++) { + scores.add(Float.parseFloat(sc.findInLine(floatP))); + } + data.add(newSetScore(AlifoldResult.alifoldMEA, scores)); + } + else if (ftype.equals(AlifoldLine.justStruct)) { + structs.add(sc.next()); + data.add(newEmptyScore(AlifoldResult.alifoldStochBT)); + } + else if (ftype.equals(AlifoldLine.stochBTStruct)) { + structs.add(sc.next()); + scores.add(sc.nextFloat()); + scores.add(sc.nextFloat()); + data.add(newSetScore(AlifoldResult.alifoldStochBT, scores)); + } + else if (ftype.equals(AlifoldLine.freeEnergy)) { + assert (sline != null + && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) : + error + "Found 'freeEnergy' line on its own"; + structs.add("Free energy of ensemble (kcal/mol) followed by " + + "frequency of mfe structure in ensemble"); + scores.add(Float.parseFloat(sc.findInLine(floatP))); + scores.add(Float.parseFloat(nsc.findInLine(floatP))); + data.add(newSetScore(AlifoldResult.alifoldMetadata, scores)); + // jump line + sline = reader.readLine(); + } + + + assert(!ftype.equals(AlifoldLine.ensembleFreq)) : + error + "Wasn't expecting 'frequency of mfe structure'!"; + assert(!ftype.equals(AlifoldLine.stdStruct)) : + error + "'Standard output' line at a place other than line 2!"; + assert(!ftype.equals(AlifoldLine.alignment)) : + error + "Wasn't expecting an alignment sequence!"; + assert(!ftype.equals(AlifoldLine.OTHER)) : + error + "Wasn't expecting this whatever it is: " + fline; + if (Pattern.matches("^\\s*$", fline)) { + log.warn("While parsing alifold stdout: A line is either empty or" + + " contains only whitespace"); + } + + fline = sline; + } + + sc.close(); + if (nsc != null) nsc.close(); + + return new ScoreManager(new RNAStruct(structs, data)); + } + + // Just for the purpose of creating nee TreeSet objects of length one + // for adding to a 'data' list to make a ScoreManager + private static TreeSet newSetScore(Enum res, List scores) { + // first convert List to float[] + float[] scoresf = new float[scores.size()]; + Float f; + for (int i = 0; i < scoresf.length; i++) { + f = scores.get(i); + scoresf[i] = ( f != null ? f : Float.NaN); + } + return new TreeSet(Arrays.asList(new Score(res, scoresf))); + } + + // A method just for the purpose of neatly creating Almost Empty score objects + // that can't be null + public static TreeSet newEmptyScore(Enum res) { + return new TreeSet(Arrays.asList(new Score(res, new float[0]))); + } + + public static ScoreManager readRNAStructStream(InputStream stdout, + InputStream alifold) throws IOException { + + // The Lists required to construct a ScoreManager Using the new constructor + List structs; + List> data; + + // Get a ScoreManager that takes the std output but ignores alifold.out (-p) + ScoreManager stdSM = readRNAStructStream(stdout); + + // Unpack this into the structs and data lists + structs = stdSM.asRNAStruct().getStructs(); + data = stdSM.asRNAStruct().getData(); + + // Now parse alifold.out + Scanner sc = new Scanner(alifold); + sc.useDelimiter("[\\s%]+"); + + // jump two lines to the data + sc.nextLine(); sc.nextLine(); + + // Read the first, second and fourth columns. Ignoring everything else. + // Allocate necessry data structures for creating Score objects + ArrayList scores = new ArrayList(); + List rangeHolder = new ArrayList(); + String s = "null"; + while (true) { + s = sc.next(); + if (java.util.regex.Pattern.matches("^[\\.)(]{2,}$", s)) break; + if (!sc.hasNextLine()) break; + int t = sc.nextInt(); + rangeHolder.add(new Range(Integer.parseInt(s), t)); + sc.next(); + scores.add(sc.nextFloat()); + sc.nextLine(); + } + sc.close(); + + // Update the first ScoreHolder TreeSet element + assert (rangeHolder.size() == scores.size()); + TreeSet sHolder = new TreeSet(); + for (int i = 0; i < rangeHolder.size(); i++) { + ArrayList singleS = new ArrayList(Arrays.asList(scores.get(i))); + TreeSet singleR = new TreeSet(Arrays.asList(rangeHolder.get(i))); + sHolder.add(new Score(AlifoldResult.alifoldSeq, singleS, singleR)); + } + + data.set(0, sHolder); + + return new ScoreManager(new RNAStruct(structs, data)); + } + + private static RNAOut identify(String token) { + if (Pattern.matches(seqP, token)) { + return RNAOut.SEQ; + } else if (Pattern.matches(structP, token)) { + return RNAOut.STRUCT; + } else if (Pattern.matches(energyP, token)) { + return RNAOut.ENERGY; + } else if (Pattern.matches(freqP, token)) { + return RNAOut.FREQ; + } + + return RNAOut.OTHER; + } + + private static AlifoldLine identifyLine(String line) { + + for (AlifoldLine il : AlifoldLine.values()) { + if (Pattern.matches(il.regex, line)) return il; + } + return AlifoldLine.OTHER; + } + + static enum AlifoldLine { + stdStruct (stdStructP), + justStruct (justStructP), + stochBTStruct (stochBTStructP), + PStruct (PStructP), + centStruct (centStructP), + MEAStruct (MEAStructP), + freeEnergy (freeEnergyP), + ensembleFreq (ensembleFreqP), + alignment (alignmentP), + OTHER (".*"); + + String regex; + AlifoldLine(String regex) { this.regex = regex; } + + } + + //The types of data in an RNAalifold stdout file + static enum RNAOut { + SEQ, STRUCT, ENERGY, FREQ, OTHER + } + + //Something to put in the Score objects of the alifold result which gives information + //about what kind of sequence it is holding in its String Id. + static enum AlifoldResult { + alifold, alifoldP, alifoldMEA, alifoldCentroid, alifoldStochBT, alifoldSeq, alifoldMetadata + } + + + + // Print the full regex Strings for testing + public static void main(String[] args) { + for (AlifoldLine l : AlifoldLine.values()) { + System.out.println(l.toString() + ": " + l.regex.replace("^","").replace("$","")); + } + } + + + +} diff --git a/datamodel/compbio/data/sequence/RNAstruct.java b/datamodel/compbio/data/sequence/RNAstruct.java deleted file mode 100644 index 1e36064..0000000 --- a/datamodel/compbio/data/sequence/RNAstruct.java +++ /dev/null @@ -1,76 +0,0 @@ -package compbio.data.sequence; - -import java.util.List; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; - -import compbio.util.annotation.Immutable; - -/* - * RNA secondary structure - */ - -@XmlAccessorType(XmlAccessType.FIELD) -public final class RNAstruct { - - private String sequence; - private String structure; // needs to be array to deal with all output - private Float minEnergy; - private Float energySum1; private Float energySum2; - - - public RNAstruct() { - // default JaxB Constructor - } - - public RNAstruct(String sequence, String structure, Float minEnergy - , Float energySum1, Float energySum2) { - this.sequence = sequence; - this.structure = structure; - this.minEnergy = minEnergy; - this.energySum1 = energySum1; - this.energySum2 = energySum2; - } - - public String getSequence() { - return sequence; - } - - public String getStructure() { - return structure; - } - - public Float getEnergy() { - return minEnergy; - } - - - @Override - public String toString() { - String newLine = System.getProperty("line.separator","."); - return sequence + newLine + structure + " (" + minEnergy.toString() - + " = " + energySum1.toString() + " + " + energySum2.toString() - + ")"; - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (!(obj instanceof RNAstruct)) { - return false; - } - RNAstruct st = (RNAstruct) obj; - if (!(this.getSequence() == st.getSequence() && - this.getStructure() == st.getStructure() && - this.getEnergy() == st.getEnergy())) { - return false; - } - - return true; - } -} - - diff --git a/datamodel/compbio/data/sequence/Range.java b/datamodel/compbio/data/sequence/Range.java index c2ccc43..9ade348 100644 --- a/datamodel/compbio/data/sequence/Range.java +++ b/datamodel/compbio/data/sequence/Range.java @@ -40,11 +40,17 @@ public class Range implements Comparable { this.from = Integer.parseInt(twoElementAr[0].trim()); this.to = Integer.parseInt(twoElementAr[1].trim()); } - + + // Daniel Check these get methods are ok with Jim + public int getFrom() { return from; } + public int getTo() { return to; } + + @Override public String toString() { return from + "-" + to; } + @Override public int hashCode() { @@ -70,11 +76,24 @@ public class Range implements Comparable { return true; } + // daniel wants to mess with method. this is how it was +// @Override +// public int compareTo(Range o) { +// if (o == null) +// return 1; +// return new Integer(this.from).compareTo(new Integer(o.from)); +// } + @Override public int compareTo(Range o) { if (o == null) return 1; - return new Integer(this.from).compareTo(new Integer(o.from)); + if (new Integer(this.from).compareTo(new Integer(o.from)) != 0) { + return new Integer(this.from).compareTo(new Integer(o.from)); + } + else { + return new Integer(this.to).compareTo(new Integer(o.to)); + } } } diff --git a/datamodel/compbio/data/sequence/Score.java b/datamodel/compbio/data/sequence/Score.java index fed0ef0..b886590 100644 --- a/datamodel/compbio/data/sequence/Score.java +++ b/datamodel/compbio/data/sequence/Score.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.Writer; import java.text.NumberFormat; import java.util.ArrayList; +import java.util.Iterator; import java.util.Locale; import java.util.TreeSet; @@ -231,8 +232,53 @@ public class Score implements Comparable { writer.flush(); } +// @Override +// public int compareTo(Score o) { +// return this.method.compareTo(o.method); +// } + + /* daniel wants to mess with this method and, while preserving the + * ordering when the method Enumerations are different, add additional + * constraints on how equal Score objects must be to be considered equal + * + * It is necessary to distinguish Score objects by their ranges in order + * to use a Set of Score objects to represent the alifold.out information + * + * It is necessary to distinguish Score objects by their scores in order + * to return Stochastic Backtrack information reliably + */ + + @Override public int compareTo(Score o) { - return this.method.compareTo(o.method); + if (this.method.compareTo(o.method) != 0) { + return this.method.compareTo(o.method); + } + int pass; + pass = new Integer(this.scores.size()).compareTo( + new Integer(o.scores.size())); + if (pass != 0) return pass; + for (int i = 0; i < this.scores.size(); i++) { + pass = this.scores.get(i).compareTo(o.scores.get(i)); + if (pass != 0) { + return pass*-1; // descending order + } + } + + pass = new Integer(this.ranges.size()).compareTo( + new Integer(o.ranges.size())); + if (pass != 0) return pass; + Iterator thisRange = this.ranges.iterator(); + Iterator oRange = o.ranges.iterator(); + for (int i = 0; i < this.ranges.size(); i++) { + Range tR = thisRange.next(); + Range oR = oRange.next(); + + if (tR.compareTo(oR) != 0) { + return tR.compareTo(oR); + } + } + + return 0; } } diff --git a/datamodel/compbio/data/sequence/ScoreManager.java b/datamodel/compbio/data/sequence/ScoreManager.java index d16a059..c3b6a85 100644 --- a/datamodel/compbio/data/sequence/ScoreManager.java +++ b/datamodel/compbio/data/sequence/ScoreManager.java @@ -56,6 +56,27 @@ public class ScoreManager { this.seqScores = seqScores; } + /* + * Next constructor added by Daniel + * a constructor for using ScoreManager to represent RNAalifold output + + */ + + public ScoreManager(RNAStruct rnaStruct) { + assert(rnaStruct.getStructs().size() == rnaStruct.getData().size()); + // This had better be used to create a RNAalifold output structure + assert(java.util.regex.Pattern.matches( + "^[\\.)(]*$", rnaStruct.getStructs().get(1))); + List seqScores = new ArrayList(); + + for (int i = 0; i < rnaStruct.getStructs().size(); i++) { + seqScores.add(new ScoreHolder(rnaStruct.getStructs().get(i), + rnaStruct.getData().get(i))); + } + this.seqScores = seqScores; + } + + public static ScoreManager newInstance(Map> data) { return new ScoreManager(data); } @@ -76,6 +97,19 @@ public class ScoreManager { new TreeSet(data)); } + // Also Daniel's. Check this with Jim to make sure that I am not compromising + // the security of this class + public RNAStruct asRNAStruct() { + List structs = new ArrayList(); + List> data = new ArrayList>(); + for (ScoreHolder sch : this.seqScores) { + structs.add(sch.id); + data.add(sch.scores); + } + return new RNAStruct(structs, data); + } + + public Map> asMap() { Map> seqScoresMap = new TreeMap>(); for (ScoreHolder sch : this.seqScores) { diff --git a/datamodel/compbio/data/sequence/SequenceUtil.java b/datamodel/compbio/data/sequence/SequenceUtil.java index 884315d..3a84873 100644 --- a/datamodel/compbio/data/sequence/SequenceUtil.java +++ b/datamodel/compbio/data/sequence/SequenceUtil.java @@ -23,18 +23,21 @@ import java.io.BufferedWriter; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.logging.Level; import java.util.regex.Matcher; @@ -757,32 +760,7 @@ public final class SequenceUtil { } return annotations; } - - /* - * Reads and parses the output of an RNAalifold job - * - * OUT OF USE - */ - public static RNAstruct readRNAalifoldResults(InputStream results) { - if (results == null) { - throw new NullPointerException( - "InputStream with results must be provided"); - } - Scanner sc = new Scanner(results); - sc.useDelimiter("\\s+"); - String sequence = sc.next(); - String structure = sc.next(); - // now get energy values out of the brackets - String regex = "-?[0-9]*\\.?[0-9]+"; - Float minEnergy = Float.parseFloat(sc.findInLine(regex)); - - RNAstruct rnastruct = new RNAstruct(sequence, structure, minEnergy, - Float.parseFloat(sc.findInLine(regex)), - Float.parseFloat(sc.findInLine(regex))); - - sc.close(); - return rnastruct; - } + @@ -889,4 +867,4 @@ enum IUPredResult { + "\nFile must ends with one of [glob, long or short]" + "\n but given file name was: " + file.getName()); } -} \ No newline at end of file +} diff --git a/runner/compbio/runner/Util.java b/runner/compbio/runner/Util.java index c6cb33f..f9106ca 100644 --- a/runner/compbio/runner/Util.java +++ b/runner/compbio/runner/Util.java @@ -25,16 +25,29 @@ import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.util.Arrays; import java.util.List; +import java.util.ArrayList; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.TreeMap; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import java.util.Scanner; + + import org.apache.log4j.Logger; import compbio.data.sequence.Alignment; import compbio.data.sequence.ClustalAlignmentUtil; import compbio.data.sequence.FastaSequence; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.RNAStruct; import compbio.data.sequence.Score; +import compbio.data.sequence.ScoreManager; +import compbio.data.sequence.Range; +import compbio.data.sequence.RNAStructReader; import compbio.data.sequence.SequenceUtil; import compbio.data.sequence.UnknownFileFormatException; import compbio.engine.client.ConfExecutable; @@ -152,38 +165,39 @@ public final class Util { + e.getLocalizedMessage(), e); } } + - // OUT OF USE - -// public static final RNAstruct readRNAStructFile(String workDirectory, -// String structFile) throws IOException, FileNotFoundException { -// assert !compbio.util.Util.isEmpty(workDirectory); -// assert !compbio.util.Util.isEmpty(structFile); -// File sfile = new File(compbio.engine.client.Util.getFullPath( -// workDirectory, structFile)); -// if(!(sfile.exists() && sfile.length() > 0)) { -// throw new FileNotFoundException("Result for the jobId " -// + workDirectory + "with file name " + structFile -// + " is not found!"); -// } -// return compbio.data.sequence.SequenceUtil.readRNAalifoldResults( -// new FileInputStream(sfile)); -// } - - public static String readRNAStruct(String workDirectory, + public static ScoreManager readRNAStruct(String workDirectory, String structFile) throws IOException, FileNotFoundException { + assert !compbio.util.Util.isEmpty(workDirectory); assert !compbio.util.Util.isEmpty(structFile); - File sfile = new File(compbio.engine.client.Util.getFullPath( + // The stdout from RNAalifold + File sFile = new File(compbio.engine.client.Util.getFullPath( workDirectory, structFile)); - if(!(sfile.exists() && sfile.length() > 0)) { + // Base pair probability matrix (-p option) + File aliFile = new File(compbio.engine.client.Util.getFullPath( + workDirectory, "alifold.out")); + // Check that stdout file exists + if(!(sFile.exists() && sFile.length() > 0)) { throw new FileNotFoundException("Result for the jobId " + workDirectory + "with file name " + structFile + " is not found!"); } - return readFile(sfile); + // Check that base pair probability file exists + if(!aliFile.exists()) { + log.warn("The file alifold.out is not found for the jobId " + + workDirectory + "Is the -p or --MEA option not specified?"); + return RNAStructReader.readRNAStructStream(new FileInputStream(sFile)); + + } else { + return RNAStructReader.readRNAStructStream(new FileInputStream(sFile), + new FileInputStream(aliFile)); + } } + + public static String readFile(File inputFile) throws FileNotFoundException, IOException { @@ -203,7 +217,6 @@ public final class Util { input.close(); return file; } - - - } + + diff --git a/runner/compbio/runner/structure/RNAalifold.java b/runner/compbio/runner/structure/RNAalifold.java index 52a784c..0e9e20f 100644 --- a/runner/compbio/runner/structure/RNAalifold.java +++ b/runner/compbio/runner/structure/RNAalifold.java @@ -12,7 +12,8 @@ import java.util.List; import org.apache.log4j.Logger; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.ScoreManager; +import compbio.data.sequence.RNAStruct; import compbio.data.sequence.UnknownFileFormatException; import compbio.engine.client.PipedExecutable; import compbio.engine.client.SkeletalExecutable; @@ -58,7 +59,7 @@ public class RNAalifold extends SkeletalExecutable @SuppressWarnings("unchecked") @Override // PlaceHolder method - public String getResults(String workDirectory) + public ScoreManager getResults(String workDirectory) throws ResultNotAvailableException { try { return Util.readRNAStruct(workDirectory, getOutput()); diff --git a/testsrc/compbio/runner/structure/RNAalifoldParametersTester.java b/testsrc/compbio/runner/structure/RNAalifoldParametersTester.java index b93cd18..2168a6c 100644 --- a/testsrc/compbio/runner/structure/RNAalifoldParametersTester.java +++ b/testsrc/compbio/runner/structure/RNAalifoldParametersTester.java @@ -22,6 +22,7 @@ import org.apache.log4j.Logger; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import compbio.data.sequence.ScoreManager; import compbio.engine.Configurator; import compbio.engine.client.ConfiguredExecutable; import compbio.engine.client.Executable.ExecProvider; @@ -280,7 +281,7 @@ public class RNAalifoldParametersTester { List> options = rnaalifoldOpc.getAllOptions(); // take out -p options so it can be added to all commands later - options = removeParam(options, "Partition Function"); + // options = removeParam(options, "Partition Function"); options.addAll(rnaalifoldOpc.getAllParameters()); List> failedOptionPairs = new ArrayList>(); @@ -293,7 +294,7 @@ public class RNAalifoldParametersTester { pair.add(options.get(i)); pair.add(options.get(j)); List args = rnaalifoldOpc.argumentsToCommandString(pair); // add -p - args.add("-p"); + // args.add("-p"); try { failed = singleRun(args); } catch (ResultNotAvailableException e) { @@ -356,10 +357,10 @@ public class RNAalifoldParametersTester { //List args = rnaalifoldOpc.argumentsToCommandString(params); List args = new ArrayList(); - args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10"); + //args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10"); // replace "=" with " " to fail test args.add("--MEA=1"); - System.out.println("special: args: " + args); + args.add("-p"); singleTest(args); } @@ -379,7 +380,8 @@ public class RNAalifoldParametersTester { confRNAalifold = (ConfiguredExecutable) lr.waitForResult(); assertNotNull(confRNAalifold.getResults(), "results is null"); - //System.out.println("Results: \n" + confRNAalifold.getResults()); + System.out.println("Results: \n" + + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString()); File errors = new File(confRNAalifold.getWorkDirectory(), ExecutableWrapper.PROC_ERR_FILE); @@ -429,8 +431,8 @@ public class RNAalifoldParametersTester { lr.executeJob(); confRNAalifold = (ConfiguredExecutable) lr.waitForResult(); - //System.out.println("Results: \n" + confRNAalifold.getResults()); - + System.out.println("Results: \n" + + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString()); if (confRNAalifold.getResults() != null) fail = false; File errors = new File(confRNAalifold.getWorkDirectory(), ExecutableWrapper.PROC_ERR_FILE); diff --git a/testsrc/compbio/runner/structure/RNAalifoldTester.java b/testsrc/compbio/runner/structure/RNAalifoldTester.java index 4363e8d..4f5b508 100644 --- a/testsrc/compbio/runner/structure/RNAalifoldTester.java +++ b/testsrc/compbio/runner/structure/RNAalifoldTester.java @@ -62,10 +62,11 @@ public class RNAalifoldTester { + "RNAalifoldParameters.xml"; public static String test_outfile = "rnaalifold.out"; + @Test(groups = { AllTestSuit.test_group_runner }) public void testRunLocally() { RNAalifold rnaalifold = new RNAalifold(); - rnaalifold.setInput(AllTestSuit.test_alignment_input).setOutput(test_outfile); + rnaalifold.setInput(AllTestSuit.test_input_aln).setOutput(test_outfile); try{ ConfiguredExecutable confRNAalifold = Configurator @@ -73,6 +74,10 @@ public class RNAalifoldTester { LocalRunner lr = new LocalRunner(confRNAalifold); lr.executeJob(); confRNAalifold = (ConfiguredExecutable) lr.waitForResult(); + + System.out.println("TEST"); + System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString()); + assertNotNull(confRNAalifold.getResults()); } catch (JobSubmissionException e) { e.printStackTrace(); @@ -88,7 +93,7 @@ public class RNAalifoldTester { public static void main(String[] args) throws JobSubmissionException, - JobExecutionException, InterruptedException { + JobExecutionException, InterruptedException, ResultNotAvailableException { log.warn("Logger test :- Run RNAalifold.main()"); @@ -101,6 +106,10 @@ public class RNAalifoldTester { AsyncExecutor lr = new AsyncLocalRunner(); lr.submitJob(confRNAalifold); + + System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString()); + + Thread.sleep(3000); LocalExecutorService.shutDown(); diff --git a/testsrc/compbio/ws/client/TestRNAalifoldWS.java b/testsrc/compbio/ws/client/TestRNAalifoldWS.java index b061fcc..5e66f43 100644 --- a/testsrc/compbio/ws/client/TestRNAalifoldWS.java +++ b/testsrc/compbio/ws/client/TestRNAalifoldWS.java @@ -22,7 +22,7 @@ import compbio.data.msa.FoldWS; import compbio.data.msa.JABAService; import compbio.data.msa.SequenceAnnotation; import compbio.data.sequence.Alignment; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.RNAStruct; import compbio.data.sequence.ClustalAlignmentUtil; import compbio.data.sequence.ConservationMethod; import compbio.data.sequence.FastaSequence; diff --git a/webservices/compbio/data/msa/FoldWS.java b/webservices/compbio/data/msa/FoldWS.java index 4a99b9f..19282d6 100644 --- a/webservices/compbio/data/msa/FoldWS.java +++ b/webservices/compbio/data/msa/FoldWS.java @@ -9,7 +9,7 @@ import javax.jws.WebService; import javax.naming.ldap.UnsolicitedNotificationEvent; import compbio.data.sequence.Alignment; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.RNAStruct; import compbio.metadata.JobSubmissionException; import compbio.metadata.LimitExceededException; import compbio.metadata.Option; diff --git a/webservices/compbio/ws/client/Jws2Client.java b/webservices/compbio/ws/client/Jws2Client.java index a06a6fa..aa048b0 100644 --- a/webservices/compbio/ws/client/Jws2Client.java +++ b/webservices/compbio/ws/client/Jws2Client.java @@ -50,7 +50,7 @@ import compbio.data.msa.RegistryWS; import compbio.data.msa.SequenceAnnotation; import compbio.data.sequence.Alignment; import compbio.data.sequence.FastaSequence; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.RNAStruct; import compbio.data.sequence.ScoreManager; import compbio.data.sequence.ClustalAlignmentUtil; import compbio.data.sequence.SequenceUtil; diff --git a/webservices/compbio/ws/server/RNAalifoldWS.java b/webservices/compbio/ws/server/RNAalifoldWS.java index cafb173..7ff54e7 100644 --- a/webservices/compbio/ws/server/RNAalifoldWS.java +++ b/webservices/compbio/ws/server/RNAalifoldWS.java @@ -13,7 +13,7 @@ import compbio.data.msa.JABAService; import compbio.data.msa.JManagement; import compbio.data.msa.Metadata; import compbio.data.sequence.Alignment; -import compbio.data.sequence.RNAstruct; +import compbio.data.sequence.RNAStruct; import compbio.data.sequence.FastaSequence; import compbio.engine.AsyncExecutor; import compbio.engine.Configurator;