--- /dev/null
+package compbio.data.sequence;
+
+import java.util.List;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.List;
+import java.util.ArrayList;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+
+import compbio.util.annotation.Immutable;
+import compbio.util.SysPrefs;
+import compbio.data.sequence.Score;
+
+/*
+ * RNA secondary structure
+ * A class which is essentially just a Pair(List<String>, List<Set<Score>>)
+ * For the purpose of creating and retrieving data from ScoreManager Objects
+ * which are being used to store RNA folding output
+ */
+
+@XmlAccessorType(XmlAccessType.FIELD)
+public final class RNAStruct {
+
+ private List<String> structs = new ArrayList<String>();
+ private List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();
+
+
+ public RNAStruct() {
+ // default JaxB Constructor
+ }
+
+ public RNAStruct(List<String> structs, List<TreeSet<Score>> data) {
+ assert(structs.size() == data.size());
+ this.structs = structs;
+ this.data = data;
+ }
+
+ public List<String> getStructs() {
+ return structs;
+ }
+
+ public List<TreeSet<Score>> getData() {
+ return data;
+ }
+
+ // Send this data Structure back to something approximating the stdoutFile
+ // with extra information from alifold.out
+ @Override
+ public String toString() {
+ String out = "";
+ // The first objects hold the Consensus Alignment and the alifold.out info
+ out += structs.get(0) + SysPrefs.newlinechar;
+
+ // Now the rest of the structures with energies/frequencies
+ for (int i = 1; i < structs.size(); i++) {
+ out = out + structs.get(i).toString();
+
+ if (data.get(i).first().getScores().size() > 0) {
+ List<Float> scores = data.get(i).first().getScores();
+ if (scores.size() >= 3) {
+ out = out + " (" + scores.get(0).toString() + " = "
+ + scores.get(1).toString() + " + " + scores.get(2).toString()
+ + ")" + SysPrefs.newlinechar;
+ }
+ else if (data.get(i).first().getMethod().equals("alifoldMEA")) {
+ out = out + " { " + scores.get(0).toString() + " MEA="
+ + scores.get(1).toString() + "}" + SysPrefs.newlinechar;
+ }
+ else if (scores.size() >= 2) {
+ out = out + " [" + scores.get(0).toString() + ", "
+ + scores.get(1).toString() + "]" + SysPrefs.newlinechar;
+
+ }
+ } else out += SysPrefs.newlinechar;
+ }
+ if (data.get(0).first().getScores().size() > 0) {
+ Iterator<Score> iScores = data.get(0).iterator();
+ out += "Base Pairings followed by probability" + SysPrefs.newlinechar;
+ for (int i = 0; i < data.get(0).size(); i++) {
+ Score s = iScores.next();
+ Range r = s.getRanges().first();
+ Float score = s.getScores().get(0);
+ out += String.format("%4d %4d %.1f%n", r.getFrom(), r.getTo(),
+ score);
+ }
+ }
+
+ return out;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (!(obj instanceof RNAStruct)) {
+ return false;
+ }
+ RNAStruct other = (RNAStruct) obj;
+ if (structs == null) {
+ if (other.structs != null) {
+ return false;
+ }
+ } else if (!structs.equals(other.structs))
+ return false;
+ if (data == null) {
+ if (other.data != null)
+ return false;
+ } else if (!data.equals(other.data))
+ return false;
+
+ return true;
+ }
+}
+
+
--- /dev/null
+package compbio.data.sequence;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Scanner;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import compbio.runner.structure.RNAalifold;
+
+// Utility class for reading alifold output
+
+public class RNAStructReader {
+
+ private static Logger log = Logger.getLogger(RNAStructReader.class);
+
+ // Whitespace patterns
+ static String s = "[+\\s=]+";
+ static String bracket = "\\(|\\)|\\{|\\}|\\[|\\]";
+ static String notData = "[\\s=+]+";
+
+ // RNAOut data type patterns
+ static String seqP = "[_\\-a-zA-Z]{2,}"; // Has to match --mis output aswell (not just ACGU_)
+ static String structP = "[\\.)({}\\[\\],]{2,}";
+ static String floatP = "-?\\d+\\.\\d*(e[+\\-]\\d+)?";
+ static String energyP = "-?[0-9]*\\.?[0-9]{2}";
+ static String freqP = "^-?\\d\\.\\d{6,}(e[+\\-]\\d+)?$";
+
+ // alifold out line patterns
+ static String ps = "\\s*";
+ static String alignmentP = "^"+seqP+ps+"$";
+ static String stdStructP = "^"+structP+s+"\\("+ps+floatP+s+floatP+s+floatP+ps+"\\)"+ps+"$";
+ static String justStructP = "^"+structP+ps+"$";
+ static String stochBTStructP = "^"+structP+s+floatP+s+floatP+ps+"$";
+ static String PStructP = "^"+structP+s+"\\["+ps+floatP+ps+"\\]"+ps+"$";
+ static String centStructP = "^"+structP+s+floatP+ps+"\\{"+ps+floatP+s+floatP+ps+"\\}"+ps+"$";
+ static String MEAStructP = "^"+structP+s+"\\{"+ps+floatP+s+"MEA="+floatP+ps+"\\}"+ps+"$";
+ static String freeEnergyP = "^"+ps+"free energy of ensemble"+ps+"="+ps+floatP+ps+"kcal/mol"+ps+"$";
+ static String ensembleFreqP = "^"+ps+"frequency of mfe structure in ensemble "+floatP+ps+"$";
+
+ public static ScoreManager readRNAStructStream(InputStream stdout)
+ throws IOException {
+
+ String error = "Error in parsing alifold stdout file: ";
+ // The Lists required to construct a ScoreManager Using the new constructor
+ List<String> structs = new ArrayList<String>();
+ List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();
+
+ // Allocate necessry data structures for creating Score objects
+ ArrayList<Float> scores = new ArrayList<Float>();
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(stdout));
+ // The first 2 lines of the alifold stdout file are always the same format
+ String fline = reader.readLine();
+ assert (Pattern.matches(AlifoldLine.alignment.regex, fline)) :
+ error + "Sequence Alignment Expected";
+ structs.add(fline.trim());
+ data.add(newEmptyScore(AlifoldResult.alifoldSeq));
+
+ fline = reader.readLine();
+ assert (Pattern.matches(AlifoldLine.stdStruct.regex, fline)) :
+ error + "Consensus Structure and Energy Expected";
+ Scanner sc = new Scanner(fline);
+ structs.add(sc.next());
+ for (int i = 0; i < 3; i++) {
+ scores.add(Float.parseFloat(sc.findInLine(floatP)));
+ }
+ data.add(newSetScore(AlifoldResult.alifold, scores));
+
+ // Now the alifold stdout file formats diverge based on arguments
+ fline = reader.readLine();
+ String sline;
+ Scanner nsc = null;
+ while ( fline != null) {
+ scores.clear();
+ AlifoldLine ftype = identifyLine(fline);
+ sline = reader.readLine(); // Look ahead
+ sc = new Scanner(fline);
+ if (sline != null) nsc = new Scanner(sline);
+
+ if (ftype.equals(AlifoldLine.PStruct)) {
+ // The -p or --MEA option is specified
+ // The next line should always be frequency of mfe structure
+ assert ( sline != null && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) :
+ error + "Expected frequency of mfe structure";
+ structs.add(sc.next());
+ scores.add(Float.parseFloat(sc.findInLine(floatP)));
+ scores.add(Float.parseFloat(nsc.findInLine(floatP)));
+ data.add(newSetScore(AlifoldResult.alifoldP, scores));
+ // Jump line
+ sline = reader.readLine();
+ }
+ else if (ftype.equals(AlifoldLine.centStruct)) {
+ structs.add(sc.next());
+ for (int i = 0; i < 3; i++) {
+ scores.add(Float.parseFloat(sc.findInLine(floatP)));
+ }
+ data.add(newSetScore(AlifoldResult.alifoldCentroid, scores));
+ }
+ else if (ftype.equals(AlifoldLine.MEAStruct)) {
+ structs.add(sc.next());
+ for (int i = 0; i < 2; i++) {
+ scores.add(Float.parseFloat(sc.findInLine(floatP)));
+ }
+ data.add(newSetScore(AlifoldResult.alifoldMEA, scores));
+ }
+ else if (ftype.equals(AlifoldLine.justStruct)) {
+ structs.add(sc.next());
+ data.add(newEmptyScore(AlifoldResult.alifoldStochBT));
+ }
+ else if (ftype.equals(AlifoldLine.stochBTStruct)) {
+ structs.add(sc.next());
+ scores.add(sc.nextFloat());
+ scores.add(sc.nextFloat());
+ data.add(newSetScore(AlifoldResult.alifoldStochBT, scores));
+ }
+ else if (ftype.equals(AlifoldLine.freeEnergy)) {
+ assert (sline != null
+ && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) :
+ error + "Found 'freeEnergy' line on its own";
+ structs.add("Free energy of ensemble (kcal/mol) followed by "
+ + "frequency of mfe structure in ensemble");
+ scores.add(Float.parseFloat(sc.findInLine(floatP)));
+ scores.add(Float.parseFloat(nsc.findInLine(floatP)));
+ data.add(newSetScore(AlifoldResult.alifoldMetadata, scores));
+ // jump line
+ sline = reader.readLine();
+ }
+
+
+ assert(!ftype.equals(AlifoldLine.ensembleFreq)) :
+ error + "Wasn't expecting 'frequency of mfe structure'!";
+ assert(!ftype.equals(AlifoldLine.stdStruct)) :
+ error + "'Standard output' line at a place other than line 2!";
+ assert(!ftype.equals(AlifoldLine.alignment)) :
+ error + "Wasn't expecting an alignment sequence!";
+ assert(!ftype.equals(AlifoldLine.OTHER)) :
+ error + "Wasn't expecting this whatever it is: " + fline;
+ if (Pattern.matches("^\\s*$", fline)) {
+ log.warn("While parsing alifold stdout: A line is either empty or"
+ + " contains only whitespace");
+ }
+
+ fline = sline;
+ }
+
+ sc.close();
+ if (nsc != null) nsc.close();
+
+ return new ScoreManager(new RNAStruct(structs, data));
+ }
+
+ // Just for the purpose of creating nee TreeSet<Score> objects of length one
+ // for adding to a 'data' list to make a ScoreManager
+ private static TreeSet<Score> newSetScore(Enum<?> res, List<Float> scores) {
+ // first convert List<Float> to float[]
+ float[] scoresf = new float[scores.size()];
+ Float f;
+ for (int i = 0; i < scoresf.length; i++) {
+ f = scores.get(i);
+ scoresf[i] = ( f != null ? f : Float.NaN);
+ }
+ return new TreeSet<Score>(Arrays.asList(new Score(res, scoresf)));
+ }
+
+ // A method just for the purpose of neatly creating Almost Empty score objects
+ // that can't be null
+ public static TreeSet<Score> newEmptyScore(Enum<?> res) {
+ return new TreeSet<Score>(Arrays.asList(new Score(res, new float[0])));
+ }
+
+ public static ScoreManager readRNAStructStream(InputStream stdout,
+ InputStream alifold) throws IOException {
+
+ // The Lists required to construct a ScoreManager Using the new constructor
+ List<String> structs;
+ List<TreeSet<Score>> data;
+
+ // Get a ScoreManager that takes the std output but ignores alifold.out (-p)
+ ScoreManager stdSM = readRNAStructStream(stdout);
+
+ // Unpack this into the structs and data lists
+ structs = stdSM.asRNAStruct().getStructs();
+ data = stdSM.asRNAStruct().getData();
+
+ // Now parse alifold.out
+ Scanner sc = new Scanner(alifold);
+ sc.useDelimiter("[\\s%]+");
+
+ // jump two lines to the data
+ sc.nextLine(); sc.nextLine();
+
+ // Read the first, second and fourth columns. Ignoring everything else.
+ // Allocate necessry data structures for creating Score objects
+ ArrayList<Float> scores = new ArrayList<Float>();
+ List<Range> rangeHolder = new ArrayList<Range>();
+ String s = "null";
+ while (true) {
+ s = sc.next();
+ if (java.util.regex.Pattern.matches("^[\\.)(]{2,}$", s)) break;
+ if (!sc.hasNextLine()) break;
+ int t = sc.nextInt();
+ rangeHolder.add(new Range(Integer.parseInt(s), t));
+ sc.next();
+ scores.add(sc.nextFloat());
+ sc.nextLine();
+ }
+ sc.close();
+
+ // Update the first ScoreHolder TreeSet<Score> element
+ assert (rangeHolder.size() == scores.size());
+ TreeSet<Score> sHolder = new TreeSet<Score>();
+ for (int i = 0; i < rangeHolder.size(); i++) {
+ ArrayList<Float> singleS = new ArrayList<Float>(Arrays.asList(scores.get(i)));
+ TreeSet<Range> singleR = new TreeSet<Range>(Arrays.asList(rangeHolder.get(i)));
+ sHolder.add(new Score(AlifoldResult.alifoldSeq, singleS, singleR));
+ }
+
+ data.set(0, sHolder);
+
+ return new ScoreManager(new RNAStruct(structs, data));
+ }
+
+ private static RNAOut identify(String token) {
+ if (Pattern.matches(seqP, token)) {
+ return RNAOut.SEQ;
+ } else if (Pattern.matches(structP, token)) {
+ return RNAOut.STRUCT;
+ } else if (Pattern.matches(energyP, token)) {
+ return RNAOut.ENERGY;
+ } else if (Pattern.matches(freqP, token)) {
+ return RNAOut.FREQ;
+ }
+
+ return RNAOut.OTHER;
+ }
+
+ private static AlifoldLine identifyLine(String line) {
+
+ for (AlifoldLine il : AlifoldLine.values()) {
+ if (Pattern.matches(il.regex, line)) return il;
+ }
+ return AlifoldLine.OTHER;
+ }
+
+ static enum AlifoldLine {
+ stdStruct (stdStructP),
+ justStruct (justStructP),
+ stochBTStruct (stochBTStructP),
+ PStruct (PStructP),
+ centStruct (centStructP),
+ MEAStruct (MEAStructP),
+ freeEnergy (freeEnergyP),
+ ensembleFreq (ensembleFreqP),
+ alignment (alignmentP),
+ OTHER (".*");
+
+ String regex;
+ AlifoldLine(String regex) { this.regex = regex; }
+
+ }
+
+ //The types of data in an RNAalifold stdout file
+ static enum RNAOut {
+ SEQ, STRUCT, ENERGY, FREQ, OTHER
+ }
+
+ //Something to put in the Score objects of the alifold result which gives information
+ //about what kind of sequence it is holding in its String Id.
+ static enum AlifoldResult {
+ alifold, alifoldP, alifoldMEA, alifoldCentroid, alifoldStochBT, alifoldSeq, alifoldMetadata
+ }
+
+
+
+ // Print the full regex Strings for testing
+ public static void main(String[] args) {
+ for (AlifoldLine l : AlifoldLine.values()) {
+ System.out.println(l.toString() + ": " + l.regex.replace("^","").replace("$",""));
+ }
+ }
+
+
+
+}
+++ /dev/null
-package compbio.data.sequence;
-
-import java.util.List;
-
-import javax.xml.bind.annotation.XmlAccessType;
-import javax.xml.bind.annotation.XmlAccessorType;
-
-import compbio.util.annotation.Immutable;
-
-/*
- * RNA secondary structure
- */
-
-@XmlAccessorType(XmlAccessType.FIELD)
-public final class RNAstruct {
-
- private String sequence;
- private String structure; // needs to be array to deal with all output
- private Float minEnergy;
- private Float energySum1; private Float energySum2;
-
-
- public RNAstruct() {
- // default JaxB Constructor
- }
-
- public RNAstruct(String sequence, String structure, Float minEnergy
- , Float energySum1, Float energySum2) {
- this.sequence = sequence;
- this.structure = structure;
- this.minEnergy = minEnergy;
- this.energySum1 = energySum1;
- this.energySum2 = energySum2;
- }
-
- public String getSequence() {
- return sequence;
- }
-
- public String getStructure() {
- return structure;
- }
-
- public Float getEnergy() {
- return minEnergy;
- }
-
-
- @Override
- public String toString() {
- String newLine = System.getProperty("line.separator",".");
- return sequence + newLine + structure + " (" + minEnergy.toString()
- + " = " + energySum1.toString() + " + " + energySum2.toString()
- + ")";
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == null) {
- return false;
- }
- if (!(obj instanceof RNAstruct)) {
- return false;
- }
- RNAstruct st = (RNAstruct) obj;
- if (!(this.getSequence() == st.getSequence() &&
- this.getStructure() == st.getStructure() &&
- this.getEnergy() == st.getEnergy())) {
- return false;
- }
-
- return true;
- }
-}
-
-
this.from = Integer.parseInt(twoElementAr[0].trim());\r
this.to = Integer.parseInt(twoElementAr[1].trim());\r
}\r
-\r
+ \r
+ // Daniel Check these get methods are ok with Jim\r
+ public int getFrom() { return from; }\r
+ public int getTo() { return to; }\r
+ \r
+ \r
@Override\r
public String toString() {\r
return from + "-" + to;\r
}\r
+ \r
\r
@Override\r
public int hashCode() {\r
return true;\r
}\r
\r
+ // daniel wants to mess with method. this is how it was\r
+// @Override\r
+// public int compareTo(Range o) {\r
+// if (o == null)\r
+// return 1;\r
+// return new Integer(this.from).compareTo(new Integer(o.from));\r
+// }\r
+ \r
@Override\r
public int compareTo(Range o) {\r
if (o == null)\r
return 1;\r
- return new Integer(this.from).compareTo(new Integer(o.from));\r
+ if (new Integer(this.from).compareTo(new Integer(o.from)) != 0) {\r
+ return new Integer(this.from).compareTo(new Integer(o.from));\r
+ }\r
+ else {\r
+ return new Integer(this.to).compareTo(new Integer(o.to));\r
+ }\r
}\r
\r
}\r
import java.io.Writer;\r
import java.text.NumberFormat;\r
import java.util.ArrayList;\r
+import java.util.Iterator;\r
import java.util.Locale;\r
import java.util.TreeSet;\r
\r
writer.flush();\r
}\r
\r
+// @Override\r
+// public int compareTo(Score o) {\r
+// return this.method.compareTo(o.method);\r
+// }\r
+ \r
+ /* daniel wants to mess with this method and, while preserving the \r
+ * ordering when the method Enumerations are different, add additional\r
+ * constraints on how equal Score objects must be to be considered equal\r
+ * \r
+ * It is necessary to distinguish Score objects by their ranges in order\r
+ * to use a Set of Score objects to represent the alifold.out information\r
+ * \r
+ * It is necessary to distinguish Score objects by their scores in order\r
+ * to return Stochastic Backtrack information reliably\r
+ */\r
+ \r
+ \r
@Override\r
public int compareTo(Score o) {\r
- return this.method.compareTo(o.method);\r
+ if (this.method.compareTo(o.method) != 0) {\r
+ return this.method.compareTo(o.method);\r
+ }\r
+ int pass;\r
+ pass = new Integer(this.scores.size()).compareTo(\r
+ new Integer(o.scores.size())); \r
+ if (pass != 0) return pass;\r
+ for (int i = 0; i < this.scores.size(); i++) {\r
+ pass = this.scores.get(i).compareTo(o.scores.get(i));\r
+ if (pass != 0) {\r
+ return pass*-1; // descending order\r
+ }\r
+ }\r
+ \r
+ pass = new Integer(this.ranges.size()).compareTo(\r
+ new Integer(o.ranges.size())); \r
+ if (pass != 0) return pass; \r
+ Iterator<Range> thisRange = this.ranges.iterator();\r
+ Iterator<Range> oRange = o.ranges.iterator();\r
+ for (int i = 0; i < this.ranges.size(); i++) {\r
+ Range tR = thisRange.next();\r
+ Range oR = oRange.next();\r
+ \r
+ if (tR.compareTo(oR) != 0) {\r
+ return tR.compareTo(oR);\r
+ }\r
+ }\r
+ \r
+ return 0; \r
}\r
}\r
this.seqScores = seqScores;\r
}\r
\r
+ /*\r
+ * Next constructor added by Daniel\r
+ * a constructor for using ScoreManager to represent RNAalifold output\r
+ \r
+ */\r
+ \r
+ public ScoreManager(RNAStruct rnaStruct) {\r
+ assert(rnaStruct.getStructs().size() == rnaStruct.getData().size());\r
+ // This had better be used to create a RNAalifold output structure\r
+ assert(java.util.regex.Pattern.matches(\r
+ "^[\\.)(]*$", rnaStruct.getStructs().get(1)));\r
+ List<ScoreHolder> seqScores = new ArrayList<ScoreHolder>();\r
+ \r
+ for (int i = 0; i < rnaStruct.getStructs().size(); i++) {\r
+ seqScores.add(new ScoreHolder(rnaStruct.getStructs().get(i),\r
+ rnaStruct.getData().get(i)));\r
+ }\r
+ this.seqScores = seqScores;\r
+ }\r
+ \r
+ \r
public static ScoreManager newInstance(Map<String, Set<Score>> data) {\r
return new ScoreManager(data);\r
}\r
new TreeSet(data));\r
}\r
\r
+ // Also Daniel's. Check this with Jim to make sure that I am not compromising\r
+ // the security of this class\r
+ public RNAStruct asRNAStruct() {\r
+ List<String> structs = new ArrayList<String>();\r
+ List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();\r
+ for (ScoreHolder sch : this.seqScores) {\r
+ structs.add(sch.id);\r
+ data.add(sch.scores);\r
+ }\r
+ return new RNAStruct(structs, data);\r
+ }\r
+ \r
+ \r
public Map<String, TreeSet<Score>> asMap() {\r
Map<String, TreeSet<Score>> seqScoresMap = new TreeMap<String, TreeSet<Score>>();\r
for (ScoreHolder sch : this.seqScores) {\r
import java.io.Closeable;\r
import java.io.File;\r
import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
import java.io.IOException;\r
import java.io.InputStream;\r
import java.io.InputStreamReader;\r
import java.io.OutputStream;\r
import java.io.OutputStreamWriter;\r
import java.util.ArrayList;\r
+import java.util.Arrays;\r
import java.util.HashMap;\r
import java.util.HashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Scanner;\r
import java.util.Set;\r
+import java.util.TreeMap;\r
import java.util.TreeSet;\r
import java.util.logging.Level;\r
import java.util.regex.Matcher;\r
}\r
return annotations;\r
}\r
- \r
- /*\r
- * Reads and parses the output of an RNAalifold job \r
- * \r
- * OUT OF USE\r
- */\r
- public static RNAstruct readRNAalifoldResults(InputStream results) {\r
- if (results == null) {\r
- throw new NullPointerException(\r
- "InputStream with results must be provided");\r
- }\r
- Scanner sc = new Scanner(results);\r
- sc.useDelimiter("\\s+");\r
- String sequence = sc.next();\r
- String structure = sc.next();\r
- // now get energy values out of the brackets\r
- String regex = "-?[0-9]*\\.?[0-9]+";\r
- Float minEnergy = Float.parseFloat(sc.findInLine(regex));\r
- \r
- RNAstruct rnastruct = new RNAstruct(sequence, structure, minEnergy,\r
- Float.parseFloat(sc.findInLine(regex)), \r
- Float.parseFloat(sc.findInLine(regex)));\r
- \r
- sc.close();\r
- return rnastruct;\r
- }\r
+\r
\r
\r
\r
+ "\nFile must ends with one of [glob, long or short]"\r
+ "\n but given file name was: " + file.getName());\r
}\r
-}
\ No newline at end of file
+}\r
import java.io.FileNotFoundException;\r
import java.io.FileOutputStream;\r
import java.io.IOException;\r
+import java.util.Arrays;\r
import java.util.List;\r
+import java.util.ArrayList;\r
import java.util.Map;\r
+import java.util.Set;\r
+import java.util.TreeSet;\r
+import java.util.TreeMap;\r
+import java.util.regex.Pattern;\r
+import java.util.regex.Matcher;\r
+import java.util.Scanner;\r
+\r
+\r
\r
import org.apache.log4j.Logger;\r
\r
import compbio.data.sequence.Alignment;\r
import compbio.data.sequence.ClustalAlignmentUtil;\r
import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.RNAstruct;\r
+import compbio.data.sequence.RNAStruct;\r
import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
+import compbio.data.sequence.Range;\r
+import compbio.data.sequence.RNAStructReader;\r
import compbio.data.sequence.SequenceUtil;\r
import compbio.data.sequence.UnknownFileFormatException;\r
import compbio.engine.client.ConfExecutable;\r
+ e.getLocalizedMessage(), e);\r
}\r
}\r
+\r
\r
- // OUT OF USE\r
- \r
-// public static final RNAstruct readRNAStructFile(String workDirectory,\r
-// String structFile) throws IOException, FileNotFoundException {\r
-// assert !compbio.util.Util.isEmpty(workDirectory);\r
-// assert !compbio.util.Util.isEmpty(structFile);\r
-// File sfile = new File(compbio.engine.client.Util.getFullPath(\r
-// workDirectory, structFile));\r
-// if(!(sfile.exists() && sfile.length() > 0)) {\r
-// throw new FileNotFoundException("Result for the jobId "\r
-// + workDirectory + "with file name " + structFile\r
-// + " is not found!");\r
-// }\r
-// return compbio.data.sequence.SequenceUtil.readRNAalifoldResults(\r
-// new FileInputStream(sfile));\r
-// }\r
- \r
- public static String readRNAStruct(String workDirectory,\r
+ public static ScoreManager readRNAStruct(String workDirectory,\r
String structFile) throws IOException, FileNotFoundException {\r
+ \r
assert !compbio.util.Util.isEmpty(workDirectory);\r
assert !compbio.util.Util.isEmpty(structFile);\r
- File sfile = new File(compbio.engine.client.Util.getFullPath(\r
+ // The stdout from RNAalifold\r
+ File sFile = new File(compbio.engine.client.Util.getFullPath(\r
workDirectory, structFile));\r
- if(!(sfile.exists() && sfile.length() > 0)) {\r
+ // Base pair probability matrix (-p option)\r
+ File aliFile = new File(compbio.engine.client.Util.getFullPath(\r
+ workDirectory, "alifold.out"));\r
+ // Check that stdout file exists\r
+ if(!(sFile.exists() && sFile.length() > 0)) {\r
throw new FileNotFoundException("Result for the jobId "\r
+ workDirectory + "with file name " + structFile\r
+ " is not found!");\r
}\r
- return readFile(sfile);\r
+ // Check that base pair probability file exists\r
+ if(!aliFile.exists()) {\r
+ log.warn("The file alifold.out is not found for the jobId "\r
+ + workDirectory + "Is the -p or --MEA option not specified?");\r
+ return RNAStructReader.readRNAStructStream(new FileInputStream(sFile));\r
+ \r
+ } else {\r
+ return RNAStructReader.readRNAStructStream(new FileInputStream(sFile), \r
+ new FileInputStream(aliFile));\r
+ }\r
}\r
\r
+\r
+ \r
public static String readFile(File inputFile) throws \r
FileNotFoundException, IOException {\r
\r
input.close();\r
return file;\r
}\r
- \r
- \r
-\r
}\r
+\r
+\r
import org.apache.log4j.Logger;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.ScoreManager;
+import compbio.data.sequence.RNAStruct;
import compbio.data.sequence.UnknownFileFormatException;
import compbio.engine.client.PipedExecutable;
import compbio.engine.client.SkeletalExecutable;
@SuppressWarnings("unchecked")
@Override
// PlaceHolder method
- public String getResults(String workDirectory)
+ public ScoreManager getResults(String workDirectory)
throws ResultNotAvailableException {
try {
return Util.readRNAStruct(workDirectory, getOutput());
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import compbio.data.sequence.ScoreManager;
import compbio.engine.Configurator;
import compbio.engine.client.ConfiguredExecutable;
import compbio.engine.client.Executable.ExecProvider;
List<Option<?>> options = rnaalifoldOpc.getAllOptions();
// take out -p options so it can be added to all commands later
- options = removeParam(options, "Partition Function");
+ // options = removeParam(options, "Partition Function");
options.addAll(rnaalifoldOpc.getAllParameters());
List<List<String>> failedOptionPairs = new ArrayList<List<String>>();
pair.add(options.get(i)); pair.add(options.get(j));
List<String> args = rnaalifoldOpc.argumentsToCommandString(pair);
// add -p
- args.add("-p");
+ // args.add("-p");
try {
failed = singleRun(args);
} catch (ResultNotAvailableException e) {
//List<String> args = rnaalifoldOpc.argumentsToCommandString(params);
List<String> args = new ArrayList<String>();
- args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10");
+ //args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10");
// replace "=" with " " to fail test
args.add("--MEA=1");
- System.out.println("special: args: " + args);
+ args.add("-p");
singleTest(args);
}
confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
assertNotNull(confRNAalifold.getResults(), "results is null");
- //System.out.println("Results: \n" + confRNAalifold.getResults());
+ System.out.println("Results: \n"
+ + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
File errors = new File(confRNAalifold.getWorkDirectory(),
ExecutableWrapper.PROC_ERR_FILE);
lr.executeJob();
confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
- //System.out.println("Results: \n" + confRNAalifold.getResults());
-
+ System.out.println("Results: \n"
+ + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
if (confRNAalifold.getResults() != null) fail = false;
File errors = new File(confRNAalifold.getWorkDirectory(),
ExecutableWrapper.PROC_ERR_FILE);
+ "RNAalifoldParameters.xml";
public static String test_outfile = "rnaalifold.out";
+
@Test(groups = { AllTestSuit.test_group_runner })
public void testRunLocally() {
RNAalifold rnaalifold = new RNAalifold();
- rnaalifold.setInput(AllTestSuit.test_alignment_input).setOutput(test_outfile);
+ rnaalifold.setInput(AllTestSuit.test_input_aln).setOutput(test_outfile);
try{
ConfiguredExecutable<RNAalifold> confRNAalifold = Configurator
LocalRunner lr = new LocalRunner(confRNAalifold);
lr.executeJob();
confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
+
+ System.out.println("TEST");
+ System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
+
assertNotNull(confRNAalifold.getResults());
} catch (JobSubmissionException e) {
e.printStackTrace();
public static void main(String[] args) throws JobSubmissionException,
- JobExecutionException, InterruptedException {
+ JobExecutionException, InterruptedException, ResultNotAvailableException {
log.warn("Logger test :- Run RNAalifold.main()");
AsyncExecutor lr = new AsyncLocalRunner();
lr.submitJob(confRNAalifold);
+
+ System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
+
+
Thread.sleep(3000);
LocalExecutorService.shutDown();
import compbio.data.msa.JABAService;
import compbio.data.msa.SequenceAnnotation;
import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
import compbio.data.sequence.ClustalAlignmentUtil;
import compbio.data.sequence.ConservationMethod;
import compbio.data.sequence.FastaSequence;
import javax.naming.ldap.UnsolicitedNotificationEvent;
import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
import compbio.metadata.JobSubmissionException;
import compbio.metadata.LimitExceededException;
import compbio.metadata.Option;
import compbio.data.msa.SequenceAnnotation;\r
import compbio.data.sequence.Alignment;\r
import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.RNAstruct;\r
+import compbio.data.sequence.RNAStruct;\r
import compbio.data.sequence.ScoreManager;\r
import compbio.data.sequence.ClustalAlignmentUtil;\r
import compbio.data.sequence.SequenceUtil;\r
import compbio.data.msa.JManagement;
import compbio.data.msa.Metadata;
import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
import compbio.data.sequence.FastaSequence;
import compbio.engine.AsyncExecutor;
import compbio.engine.Configurator;