Alifold results are now parsed and stored in a ScoreManager object
authorDaniel Barton <daluke.barton@gmail.com>
Thu, 15 Aug 2013 17:13:25 +0000 (18:13 +0100)
committerDaniel Barton <daluke.barton@gmail.com>
Thu, 15 Aug 2013 17:13:25 +0000 (18:13 +0100)
15 files changed:
datamodel/compbio/data/sequence/RNAStruct.java [new file with mode: 0644]
datamodel/compbio/data/sequence/RNAStructReader.java [new file with mode: 0644]
datamodel/compbio/data/sequence/RNAstruct.java [deleted file]
datamodel/compbio/data/sequence/Range.java
datamodel/compbio/data/sequence/Score.java
datamodel/compbio/data/sequence/ScoreManager.java
datamodel/compbio/data/sequence/SequenceUtil.java
runner/compbio/runner/Util.java
runner/compbio/runner/structure/RNAalifold.java
testsrc/compbio/runner/structure/RNAalifoldParametersTester.java
testsrc/compbio/runner/structure/RNAalifoldTester.java
testsrc/compbio/ws/client/TestRNAalifoldWS.java
webservices/compbio/data/msa/FoldWS.java
webservices/compbio/ws/client/Jws2Client.java
webservices/compbio/ws/server/RNAalifoldWS.java

diff --git a/datamodel/compbio/data/sequence/RNAStruct.java b/datamodel/compbio/data/sequence/RNAStruct.java
new file mode 100644 (file)
index 0000000..65bca75
--- /dev/null
@@ -0,0 +1,119 @@
+package compbio.data.sequence;
+
+import java.util.List;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.List;
+import java.util.ArrayList;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+
+import compbio.util.annotation.Immutable;
+import compbio.util.SysPrefs;
+import compbio.data.sequence.Score;
+
+/*
+ * RNA secondary structure
+ * A class which is essentially just a Pair(List<String>, List<Set<Score>>)
+ * For the purpose of creating and retrieving data from ScoreManager Objects
+ * which are being used to store RNA folding output
+ */
+
+@XmlAccessorType(XmlAccessType.FIELD)
+public final class RNAStruct {
+       
+       private List<String> structs = new ArrayList<String>();
+       private List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();
+       
+       
+       public RNAStruct() {
+               // default JaxB Constructor
+       }
+       
+       public RNAStruct(List<String> structs, List<TreeSet<Score>> data) {
+               assert(structs.size() == data.size());
+               this.structs = structs;
+               this.data = data;
+       }
+       
+       public List<String> getStructs() {
+               return structs;
+       }
+       
+       public List<TreeSet<Score>> getData() {
+               return data;
+       }
+       
+       // Send this data Structure back to something approximating the stdoutFile
+       // with extra information from alifold.out
+       @Override
+       public String toString() {
+               String out = "";
+               // The first objects hold the Consensus Alignment and the alifold.out info
+               out += structs.get(0) + SysPrefs.newlinechar;
+               
+               // Now the rest of the structures with energies/frequencies
+               for (int i = 1; i < structs.size(); i++) {
+                       out = out + structs.get(i).toString();
+                       
+                       if (data.get(i).first().getScores().size() > 0) {
+                               List<Float> scores = data.get(i).first().getScores();
+                               if (scores.size() >= 3) {
+                                       out = out + " (" + scores.get(0).toString() + " = " 
+                                                       + scores.get(1).toString() + " + " + scores.get(2).toString()
+                                                       + ")" + SysPrefs.newlinechar;
+                               }
+                               else if (data.get(i).first().getMethod().equals("alifoldMEA")) {
+                                       out = out + " { " + scores.get(0).toString() + " MEA=" 
+                                                       + scores.get(1).toString() + "}" + SysPrefs.newlinechar;
+                               }
+                               else if (scores.size() >= 2) {
+                                       out = out + " [" + scores.get(0).toString() + ", " 
+                                                       + scores.get(1).toString() + "]" + SysPrefs.newlinechar;
+                                       
+                               }
+                       } else out += SysPrefs.newlinechar; 
+               }
+               if (data.get(0).first().getScores().size() > 0) {
+                       Iterator<Score> iScores = data.get(0).iterator();
+                       out += "Base Pairings followed by probability" + SysPrefs.newlinechar;
+                       for (int i = 0; i < data.get(0).size(); i++) {
+                               Score s = iScores.next();
+                               Range r = s.getRanges().first();
+                               Float score = s.getScores().get(0);
+                               out += String.format("%4d  %4d    %.1f%n", r.getFrom(), r.getTo(),
+                                               score);
+                       }
+               }
+               
+               return out;
+       }
+       
+       @Override 
+       public boolean equals(Object obj) {
+               if (obj == null) {
+                       return false;
+               }
+               if (!(obj instanceof RNAStruct)) {
+                       return false;
+               }
+               RNAStruct other = (RNAStruct) obj;
+               if (structs == null) {
+                       if (other.structs != null) {
+                               return false;
+                       }
+               } else if (!structs.equals(other.structs))
+                       return false;
+               if (data == null) {
+                       if (other.data != null)
+                               return false;
+               } else if (!data.equals(other.data))
+                       return false;
+
+               return true;
+       }
+}
+
+
diff --git a/datamodel/compbio/data/sequence/RNAStructReader.java b/datamodel/compbio/data/sequence/RNAStructReader.java
new file mode 100644 (file)
index 0000000..cfc83ff
--- /dev/null
@@ -0,0 +1,292 @@
+package compbio.data.sequence;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Scanner;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import compbio.runner.structure.RNAalifold;
+
+// Utility class for reading alifold output
+
+public class RNAStructReader {
+
+       private static Logger log = Logger.getLogger(RNAStructReader.class);
+       
+       // Whitespace patterns
+       static String s = "[+\\s=]+";
+       static String bracket = "\\(|\\)|\\{|\\}|\\[|\\]";
+       static String notData = "[\\s=+]+";
+
+       // RNAOut data type patterns 
+       static String seqP = "[_\\-a-zA-Z]{2,}"; // Has to match --mis output aswell (not just ACGU_)
+       static String structP = "[\\.)({}\\[\\],]{2,}";
+       static String floatP = "-?\\d+\\.\\d*(e[+\\-]\\d+)?";
+       static String energyP = "-?[0-9]*\\.?[0-9]{2}";
+       static String freqP = "^-?\\d\\.\\d{6,}(e[+\\-]\\d+)?$";
+       
+       // alifold out line patterns
+       static String ps = "\\s*";
+       static String alignmentP = "^"+seqP+ps+"$";
+       static String stdStructP = "^"+structP+s+"\\("+ps+floatP+s+floatP+s+floatP+ps+"\\)"+ps+"$";
+       static String justStructP = "^"+structP+ps+"$";
+       static String stochBTStructP = "^"+structP+s+floatP+s+floatP+ps+"$";
+       static String PStructP = "^"+structP+s+"\\["+ps+floatP+ps+"\\]"+ps+"$";
+       static String centStructP = "^"+structP+s+floatP+ps+"\\{"+ps+floatP+s+floatP+ps+"\\}"+ps+"$";
+       static String MEAStructP = "^"+structP+s+"\\{"+ps+floatP+s+"MEA="+floatP+ps+"\\}"+ps+"$";
+       static String freeEnergyP = "^"+ps+"free energy of ensemble"+ps+"="+ps+floatP+ps+"kcal/mol"+ps+"$";
+       static String ensembleFreqP = "^"+ps+"frequency of mfe structure in ensemble "+floatP+ps+"$";
+
+       public static ScoreManager readRNAStructStream(InputStream stdout)
+                       throws IOException {
+               
+               String error = "Error in parsing alifold stdout file: ";
+               // The Lists required to construct a ScoreManager Using the new constructor
+               List<String> structs = new ArrayList<String>();
+               List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();
+
+               // Allocate necessry data structures for creating Score objects
+               ArrayList<Float> scores = new ArrayList<Float>();
+
+               BufferedReader reader = new BufferedReader(new InputStreamReader(stdout));
+               // The first 2 lines of the alifold stdout file are always the same format
+               String fline = reader.readLine();
+               assert (Pattern.matches(AlifoldLine.alignment.regex, fline)) :
+                       error + "Sequence Alignment Expected";
+               structs.add(fline.trim());
+               data.add(newEmptyScore(AlifoldResult.alifoldSeq));
+               
+               fline = reader.readLine();
+               assert (Pattern.matches(AlifoldLine.stdStruct.regex, fline)) :
+                       error + "Consensus Structure and Energy Expected";
+               Scanner sc = new Scanner(fline);
+               structs.add(sc.next());
+               for (int i = 0; i < 3; i++) {
+                       scores.add(Float.parseFloat(sc.findInLine(floatP)));
+               }
+               data.add(newSetScore(AlifoldResult.alifold, scores));
+               
+               // Now the alifold stdout file formats diverge based on arguments
+               fline = reader.readLine();
+               String sline;
+               Scanner nsc = null;
+               while ( fline != null) {
+                       scores.clear();
+                       AlifoldLine ftype = identifyLine(fline);
+                       sline = reader.readLine(); // Look ahead
+                       sc = new Scanner(fline);
+                       if (sline != null) nsc = new Scanner(sline);
+
+                       if (ftype.equals(AlifoldLine.PStruct)) {
+                               // The -p or --MEA option is specified
+                               // The next line should always be frequency of mfe structure
+                               assert ( sline != null && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) :
+                                       error + "Expected frequency of mfe structure";
+                               structs.add(sc.next());
+                               scores.add(Float.parseFloat(sc.findInLine(floatP)));
+                               scores.add(Float.parseFloat(nsc.findInLine(floatP)));
+                               data.add(newSetScore(AlifoldResult.alifoldP, scores));
+                               // Jump line
+                               sline = reader.readLine();
+                       }
+                       else if (ftype.equals(AlifoldLine.centStruct)) {
+                               structs.add(sc.next());
+                               for (int i = 0; i < 3; i++) {
+                                       scores.add(Float.parseFloat(sc.findInLine(floatP)));
+                               }
+                               data.add(newSetScore(AlifoldResult.alifoldCentroid, scores));
+                       }
+                       else if (ftype.equals(AlifoldLine.MEAStruct)) {
+                               structs.add(sc.next());
+                               for (int i = 0; i < 2; i++) {
+                                       scores.add(Float.parseFloat(sc.findInLine(floatP)));
+                               }
+                               data.add(newSetScore(AlifoldResult.alifoldMEA, scores));
+                       }
+                       else if (ftype.equals(AlifoldLine.justStruct)) {
+                               structs.add(sc.next());
+                               data.add(newEmptyScore(AlifoldResult.alifoldStochBT));
+                       }
+                       else if (ftype.equals(AlifoldLine.stochBTStruct)) {
+                               structs.add(sc.next());
+                               scores.add(sc.nextFloat());
+                               scores.add(sc.nextFloat());
+                               data.add(newSetScore(AlifoldResult.alifoldStochBT, scores));
+                       }
+                       else if (ftype.equals(AlifoldLine.freeEnergy)) {
+                               assert (sline != null 
+                                               && Pattern.matches(AlifoldLine.ensembleFreq.regex, sline)) :
+                                               error + "Found 'freeEnergy' line on its own";
+                               structs.add("Free energy of ensemble (kcal/mol) followed by "
+                                               + "frequency of mfe structure in ensemble");
+                               scores.add(Float.parseFloat(sc.findInLine(floatP)));
+                               scores.add(Float.parseFloat(nsc.findInLine(floatP)));
+                               data.add(newSetScore(AlifoldResult.alifoldMetadata, scores));
+                               // jump line
+                               sline = reader.readLine();
+                       }
+                       
+
+                       assert(!ftype.equals(AlifoldLine.ensembleFreq)) :
+                               error + "Wasn't expecting 'frequency of mfe structure'!";
+                       assert(!ftype.equals(AlifoldLine.stdStruct)) :
+                               error + "'Standard output' line at a place other than line 2!";
+                       assert(!ftype.equals(AlifoldLine.alignment)) :
+                               error + "Wasn't expecting an alignment sequence!";
+                       assert(!ftype.equals(AlifoldLine.OTHER)) :
+                               error + "Wasn't expecting this whatever it is: " + fline;
+                       if (Pattern.matches("^\\s*$", fline)) {
+                               log.warn("While parsing alifold stdout: A line is either empty or"
+                                               + " contains only whitespace");
+                       }
+                       
+                       fline = sline;
+               }
+                               
+               sc.close();
+               if (nsc != null) nsc.close();
+               
+               return new ScoreManager(new RNAStruct(structs, data));
+       }
+       
+       // Just for the purpose of creating nee TreeSet<Score> objects of length one
+       // for adding to a 'data' list to make a ScoreManager
+       private static TreeSet<Score> newSetScore(Enum<?> res, List<Float> scores) {
+               // first convert List<Float> to float[]
+               float[] scoresf = new float[scores.size()];
+               Float f;
+               for (int i = 0; i < scoresf.length; i++) {
+                       f = scores.get(i);
+                       scoresf[i] = ( f != null ? f : Float.NaN);
+               }
+               return new TreeSet<Score>(Arrays.asList(new Score(res, scoresf)));
+       }
+
+       // A method just for the purpose of neatly creating Almost Empty score objects
+       // that can't be null
+       public static TreeSet<Score> newEmptyScore(Enum<?> res) {
+               return new TreeSet<Score>(Arrays.asList(new Score(res, new float[0])));
+       }
+
+       public static ScoreManager readRNAStructStream(InputStream stdout, 
+                       InputStream alifold) throws IOException {
+               
+               // The Lists required to construct a ScoreManager Using the new constructor
+               List<String> structs;
+               List<TreeSet<Score>> data; 
+               
+               // Get a ScoreManager that takes the std output but ignores alifold.out (-p)
+               ScoreManager stdSM = readRNAStructStream(stdout);
+               
+               // Unpack this into the structs and data lists
+               structs = stdSM.asRNAStruct().getStructs();
+               data = stdSM.asRNAStruct().getData();
+               
+               // Now parse alifold.out
+               Scanner sc = new Scanner(alifold);
+               sc.useDelimiter("[\\s%]+");
+               
+               // jump two lines to the data 
+               sc.nextLine(); sc.nextLine();
+               
+               // Read the first, second and fourth columns. Ignoring everything else.
+               // Allocate necessry data structures for creating Score objects
+               ArrayList<Float> scores = new ArrayList<Float>();
+               List<Range> rangeHolder = new ArrayList<Range>();
+               String s = "null";
+               while (true) {
+                       s = sc.next();
+                       if (java.util.regex.Pattern.matches("^[\\.)(]{2,}$", s)) break;
+                       if (!sc.hasNextLine()) break;
+                       int t = sc.nextInt();
+                       rangeHolder.add(new Range(Integer.parseInt(s), t));
+                       sc.next();
+                       scores.add(sc.nextFloat());
+                       sc.nextLine();
+               }
+               sc.close();
+               
+               // Update the first ScoreHolder TreeSet<Score> element
+               assert (rangeHolder.size() == scores.size());
+               TreeSet<Score> sHolder = new TreeSet<Score>();
+               for (int i = 0; i < rangeHolder.size(); i++) {
+                       ArrayList<Float> singleS = new ArrayList<Float>(Arrays.asList(scores.get(i)));
+                       TreeSet<Range> singleR = new TreeSet<Range>(Arrays.asList(rangeHolder.get(i)));
+                       sHolder.add(new Score(AlifoldResult.alifoldSeq, singleS, singleR));
+               }
+               
+               data.set(0, sHolder);
+               
+               return new ScoreManager(new RNAStruct(structs, data));
+       }
+
+       private static RNAOut identify(String token) {
+               if (Pattern.matches(seqP, token)) {
+                       return RNAOut.SEQ;
+               } else if (Pattern.matches(structP, token)) {
+                       return RNAOut.STRUCT;
+               } else if (Pattern.matches(energyP, token)) {
+                       return RNAOut.ENERGY;
+               } else if (Pattern.matches(freqP, token)) {
+                       return RNAOut.FREQ;
+               }
+               
+               return RNAOut.OTHER;
+       }
+       
+       private static AlifoldLine identifyLine(String line) {
+               
+               for (AlifoldLine il : AlifoldLine.values()) {
+                       if (Pattern.matches(il.regex, line)) return il;
+               }
+               return AlifoldLine.OTHER;
+       }
+       
+       static enum AlifoldLine {
+               stdStruct (stdStructP),
+               justStruct (justStructP),
+               stochBTStruct (stochBTStructP),
+               PStruct (PStructP),
+               centStruct (centStructP),
+               MEAStruct (MEAStructP),
+               freeEnergy (freeEnergyP),
+               ensembleFreq (ensembleFreqP),
+               alignment (alignmentP), 
+               OTHER (".*");
+               
+               String regex;
+               AlifoldLine(String regex) { this.regex = regex; }
+
+       }
+       
+       //The types of data in an RNAalifold stdout file
+       static enum RNAOut {
+               SEQ, STRUCT, ENERGY, FREQ, OTHER
+       }
+
+       //Something to put in the Score objects of the alifold result which gives information
+       //about what kind of sequence it is holding in its String Id.
+       static enum AlifoldResult {
+               alifold, alifoldP, alifoldMEA, alifoldCentroid, alifoldStochBT, alifoldSeq, alifoldMetadata
+       }
+       
+       
+
+       // Print the full regex Strings for testing 
+       public static void main(String[] args) {
+               for (AlifoldLine l : AlifoldLine.values()) {
+                       System.out.println(l.toString() + ": " + l.regex.replace("^","").replace("$",""));
+               }
+       }
+       
+
+       
+}      
diff --git a/datamodel/compbio/data/sequence/RNAstruct.java b/datamodel/compbio/data/sequence/RNAstruct.java
deleted file mode 100644 (file)
index 1e36064..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-package compbio.data.sequence;
-
-import java.util.List;
-
-import javax.xml.bind.annotation.XmlAccessType;
-import javax.xml.bind.annotation.XmlAccessorType;
-
-import compbio.util.annotation.Immutable;
-
-/*
- * RNA secondary structure
- */
-
-@XmlAccessorType(XmlAccessType.FIELD)
-public final class RNAstruct {
-       
-       private String sequence;
-       private String structure; // needs to be array to deal with all output
-       private Float minEnergy;
-       private Float energySum1; private Float energySum2;
-       
-       
-       public RNAstruct() {
-               // default JaxB Constructor
-       }
-       
-       public RNAstruct(String sequence, String structure, Float minEnergy
-                       , Float energySum1, Float energySum2) {
-               this.sequence = sequence;
-               this.structure = structure;
-               this.minEnergy = minEnergy;
-               this.energySum1 = energySum1;
-               this.energySum2 = energySum2;
-       }
-       
-       public String getSequence() {
-               return sequence;
-       }
-       
-       public String getStructure() {
-               return structure;
-       }
-       
-       public Float getEnergy() {
-               return minEnergy;
-       }
-       
-       
-       @Override
-       public String toString() {
-               String newLine = System.getProperty("line.separator",".");
-               return sequence + newLine + structure + " (" + minEnergy.toString() 
-                               + " = " + energySum1.toString() + " +  " + energySum2.toString()
-                               + ")"; 
-       }
-       
-       @Override 
-       public boolean equals(Object obj) {
-               if (obj == null) {
-                       return false;
-               }
-               if (!(obj instanceof RNAstruct)) {
-                       return false;
-               }
-               RNAstruct st = (RNAstruct) obj;
-               if (!(this.getSequence() == st.getSequence() &&
-                               this.getStructure() == st.getStructure() &&
-                               this.getEnergy() == st.getEnergy())) {
-                       return false;
-               }
-               
-               return true;
-       }
-}
-
-
index c2ccc43..9ade348 100644 (file)
@@ -40,11 +40,17 @@ public class Range implements Comparable<Range> {
                this.from = Integer.parseInt(twoElementAr[0].trim());\r
                this.to = Integer.parseInt(twoElementAr[1].trim());\r
        }\r
-\r
+       \r
+       // Daniel Check these get methods are ok with Jim\r
+       public int getFrom() { return from; }\r
+       public int getTo() { return to; }\r
+       \r
+       \r
        @Override\r
        public String toString() {\r
                return from + "-" + to;\r
        }\r
+       \r
 \r
        @Override\r
        public int hashCode() {\r
@@ -70,11 +76,24 @@ public class Range implements Comparable<Range> {
                return true;\r
        }\r
 \r
+       // daniel wants to mess with method. this is how it was\r
+//     @Override\r
+//     public int compareTo(Range o) {\r
+//             if (o == null)\r
+//                     return 1;\r
+//             return new Integer(this.from).compareTo(new Integer(o.from));\r
+//     }\r
+       \r
        @Override\r
        public int compareTo(Range o) {\r
                if (o == null)\r
                        return 1;\r
-               return new Integer(this.from).compareTo(new Integer(o.from));\r
+               if (new Integer(this.from).compareTo(new Integer(o.from)) != 0) {\r
+                       return new Integer(this.from).compareTo(new Integer(o.from));\r
+               }\r
+               else {\r
+                       return new Integer(this.to).compareTo(new Integer(o.to));\r
+               }\r
        }\r
 \r
 }\r
index fed0ef0..b886590 100644 (file)
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.Writer;\r
 import java.text.NumberFormat;\r
 import java.util.ArrayList;\r
+import java.util.Iterator;\r
 import java.util.Locale;\r
 import java.util.TreeSet;\r
 \r
@@ -231,8 +232,53 @@ public class Score implements Comparable<Score> {
                writer.flush();\r
        }\r
 \r
+//     @Override\r
+//     public int compareTo(Score o) {\r
+//             return this.method.compareTo(o.method);\r
+//     }\r
+       \r
+       /* daniel wants to mess with this method and, while preserving the \r
+        * ordering when the method Enumerations are different, add additional\r
+        * constraints on how equal Score objects must be to be considered equal\r
+        * \r
+        * It is necessary to distinguish Score objects by their ranges in order\r
+        * to use a Set of Score objects to represent the alifold.out information\r
+        * \r
+        * It is necessary to distinguish Score objects by their scores in order\r
+        * to return Stochastic Backtrack information reliably\r
+        */\r
+       \r
+       \r
        @Override\r
        public int compareTo(Score o) {\r
-               return this.method.compareTo(o.method);\r
+               if (this.method.compareTo(o.method) != 0) {\r
+                       return this.method.compareTo(o.method);\r
+               }\r
+               int pass;\r
+               pass = new Integer(this.scores.size()).compareTo(\r
+                               new Integer(o.scores.size())); \r
+               if (pass != 0) return pass;\r
+               for (int i = 0; i < this.scores.size(); i++) {\r
+                       pass = this.scores.get(i).compareTo(o.scores.get(i));\r
+                       if (pass != 0) {\r
+                               return pass*-1; // descending order\r
+                       }\r
+               }\r
+               \r
+               pass = new Integer(this.ranges.size()).compareTo(\r
+                               new Integer(o.ranges.size())); \r
+               if (pass != 0) return pass; \r
+               Iterator<Range> thisRange = this.ranges.iterator();\r
+               Iterator<Range> oRange = o.ranges.iterator();\r
+               for (int i = 0; i < this.ranges.size(); i++) {\r
+                       Range tR = thisRange.next();\r
+                       Range oR = oRange.next();\r
+                       \r
+                       if (tR.compareTo(oR) != 0) {\r
+                               return tR.compareTo(oR);\r
+                       }\r
+               }\r
+               \r
+               return 0;       \r
        }\r
 }\r
index d16a059..c3b6a85 100644 (file)
@@ -56,6 +56,27 @@ public class ScoreManager {
                this.seqScores = seqScores;\r
        }\r
 \r
+       /*\r
+        *  Next constructor added by Daniel\r
+        *  a constructor for using ScoreManager to represent RNAalifold output\r
+           \r
+        */\r
+       \r
+       public ScoreManager(RNAStruct rnaStruct) {\r
+               assert(rnaStruct.getStructs().size() == rnaStruct.getData().size());\r
+               // This had better be used to create a RNAalifold output structure\r
+               assert(java.util.regex.Pattern.matches(\r
+                               "^[\\.)(]*$", rnaStruct.getStructs().get(1)));\r
+               List<ScoreHolder> seqScores = new ArrayList<ScoreHolder>();\r
+               \r
+               for (int i = 0; i < rnaStruct.getStructs().size(); i++) {\r
+                       seqScores.add(new ScoreHolder(rnaStruct.getStructs().get(i),\r
+                                       rnaStruct.getData().get(i)));\r
+               }\r
+               this.seqScores = seqScores;\r
+       }\r
+       \r
+       \r
        public static ScoreManager newInstance(Map<String, Set<Score>> data) {\r
                return new ScoreManager(data);\r
        }\r
@@ -76,6 +97,19 @@ public class ScoreManager {
                                new TreeSet(data));\r
        }\r
 \r
+       // Also Daniel's. Check this with Jim to make sure that I am not compromising\r
+       // the security of this class\r
+               public RNAStruct asRNAStruct() {\r
+                       List<String> structs = new ArrayList<String>();\r
+                       List<TreeSet<Score>> data = new ArrayList<TreeSet<Score>>();\r
+                       for (ScoreHolder sch : this.seqScores) {\r
+                               structs.add(sch.id);\r
+                               data.add(sch.scores);\r
+                       }\r
+                       return new RNAStruct(structs, data);\r
+               }\r
+                \r
+       \r
        public Map<String, TreeSet<Score>> asMap() {\r
                Map<String, TreeSet<Score>> seqScoresMap = new TreeMap<String, TreeSet<Score>>();\r
                for (ScoreHolder sch : this.seqScores) {\r
index 884315d..3a84873 100644 (file)
@@ -23,18 +23,21 @@ import java.io.BufferedWriter;
 import java.io.Closeable;\r
 import java.io.File;\r
 import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.io.InputStreamReader;\r
 import java.io.OutputStream;\r
 import java.io.OutputStreamWriter;\r
 import java.util.ArrayList;\r
+import java.util.Arrays;\r
 import java.util.HashMap;\r
 import java.util.HashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Scanner;\r
 import java.util.Set;\r
+import java.util.TreeMap;\r
 import java.util.TreeSet;\r
 import java.util.logging.Level;\r
 import java.util.regex.Matcher;\r
@@ -757,32 +760,7 @@ public final class SequenceUtil {
                }\r
                return annotations;\r
        }\r
-       \r
-       /*\r
-        * Reads and parses the output of an RNAalifold job \r
-        * \r
-        * OUT OF USE\r
-        */\r
-       public static RNAstruct readRNAalifoldResults(InputStream results) {\r
-               if (results == null) {\r
-                       throw new NullPointerException(\r
-                                       "InputStream with results must be provided");\r
-               }\r
-               Scanner sc = new Scanner(results);\r
-               sc.useDelimiter("\\s+");\r
-               String sequence = sc.next();\r
-               String structure = sc.next();\r
-               // now get energy values out of the brackets\r
-               String regex = "-?[0-9]*\\.?[0-9]+";\r
-               Float minEnergy = Float.parseFloat(sc.findInLine(regex));\r
-               \r
-               RNAstruct rnastruct = new RNAstruct(sequence, structure, minEnergy,\r
-                               Float.parseFloat(sc.findInLine(regex)), \r
-                               Float.parseFloat(sc.findInLine(regex)));\r
-               \r
-               sc.close();\r
-               return rnastruct;\r
-       }\r
+\r
        \r
        \r
 \r
@@ -889,4 +867,4 @@ enum IUPredResult {
                                                + "\nFile must ends with one of [glob, long or short]"\r
                                                + "\n but given file name was: " + file.getName());\r
        }\r
-}
\ No newline at end of file
+}\r
index c6cb33f..f9106ca 100644 (file)
@@ -25,16 +25,29 @@ import java.io.BufferedReader;
 import java.io.FileNotFoundException;\r
 import java.io.FileOutputStream;\r
 import java.io.IOException;\r
+import java.util.Arrays;\r
 import java.util.List;\r
+import java.util.ArrayList;\r
 import java.util.Map;\r
+import java.util.Set;\r
+import java.util.TreeSet;\r
+import java.util.TreeMap;\r
+import java.util.regex.Pattern;\r
+import java.util.regex.Matcher;\r
+import java.util.Scanner;\r
+\r
+\r
 \r
 import org.apache.log4j.Logger;\r
 \r
 import compbio.data.sequence.Alignment;\r
 import compbio.data.sequence.ClustalAlignmentUtil;\r
 import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.RNAstruct;\r
+import compbio.data.sequence.RNAStruct;\r
 import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
+import compbio.data.sequence.Range;\r
+import compbio.data.sequence.RNAStructReader;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
 import compbio.engine.client.ConfExecutable;\r
@@ -152,38 +165,39 @@ public final class Util {
                                                + e.getLocalizedMessage(), e);\r
                }\r
        }\r
+\r
        \r
-       // OUT OF USE\r
-       \r
-//     public static final RNAstruct readRNAStructFile(String workDirectory,\r
-//                     String structFile) throws IOException, FileNotFoundException {\r
-//             assert !compbio.util.Util.isEmpty(workDirectory);\r
-//             assert !compbio.util.Util.isEmpty(structFile);\r
-//             File sfile = new File(compbio.engine.client.Util.getFullPath(\r
-//                             workDirectory, structFile));\r
-//             if(!(sfile.exists() && sfile.length() > 0)) {\r
-//                     throw new FileNotFoundException("Result for the jobId "\r
-//                                     + workDirectory + "with file name " + structFile\r
-//                                     + " is not found!");\r
-//             }\r
-//             return compbio.data.sequence.SequenceUtil.readRNAalifoldResults(\r
-//                             new FileInputStream(sfile));\r
-//     }\r
-       \r
-       public static String readRNAStruct(String workDirectory,\r
+       public static ScoreManager readRNAStruct(String workDirectory,\r
                        String structFile) throws IOException, FileNotFoundException {\r
+               \r
                assert !compbio.util.Util.isEmpty(workDirectory);\r
                assert !compbio.util.Util.isEmpty(structFile);\r
-               File sfile = new File(compbio.engine.client.Util.getFullPath(\r
+               // The stdout from RNAalifold\r
+               File sFile = new File(compbio.engine.client.Util.getFullPath(\r
                                workDirectory, structFile));\r
-               if(!(sfile.exists() && sfile.length() > 0)) {\r
+               // Base pair probability matrix (-p option)\r
+               File aliFile = new File(compbio.engine.client.Util.getFullPath(\r
+                               workDirectory, "alifold.out"));\r
+               // Check that stdout file exists\r
+               if(!(sFile.exists() && sFile.length() > 0)) {\r
                        throw new FileNotFoundException("Result for the jobId "\r
                                        + workDirectory + "with file name " + structFile\r
                                        + " is not found!");\r
                }\r
-               return readFile(sfile);\r
+               // Check that base pair probability file exists\r
+               if(!aliFile.exists()) {\r
+                       log.warn("The file alifold.out is not found for the jobId "\r
+                                       + workDirectory + "Is the -p or --MEA option not specified?");\r
+                       return RNAStructReader.readRNAStructStream(new FileInputStream(sFile));\r
+                       \r
+               } else {\r
+                       return RNAStructReader.readRNAStructStream(new FileInputStream(sFile), \r
+                                       new FileInputStream(aliFile));\r
+               }\r
        }\r
        \r
+\r
+       \r
        public static String readFile(File inputFile) throws \r
                        FileNotFoundException, IOException {\r
 \r
@@ -203,7 +217,6 @@ public final class Util {
                input.close();\r
                return file;\r
        }\r
-       \r
-       \r
-\r
 }\r
+\r
+\r
index 52a784c..0e9e20f 100644 (file)
@@ -12,7 +12,8 @@ import java.util.List;
 
 import org.apache.log4j.Logger;
 
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.ScoreManager;
+import compbio.data.sequence.RNAStruct;
 import compbio.data.sequence.UnknownFileFormatException;
 import compbio.engine.client.PipedExecutable;
 import compbio.engine.client.SkeletalExecutable;
@@ -58,7 +59,7 @@ public class RNAalifold extends SkeletalExecutable<RNAalifold>
        @SuppressWarnings("unchecked")
        @Override
        // PlaceHolder method
-       public String getResults(String workDirectory)
+       public ScoreManager getResults(String workDirectory)
                        throws ResultNotAvailableException {
                try {
                        return Util.readRNAStruct(workDirectory, getOutput());
index b93cd18..2168a6c 100644 (file)
@@ -22,6 +22,7 @@ import org.apache.log4j.Logger;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import compbio.data.sequence.ScoreManager;
 import compbio.engine.Configurator;
 import compbio.engine.client.ConfiguredExecutable;
 import compbio.engine.client.Executable.ExecProvider;
@@ -280,7 +281,7 @@ public class RNAalifoldParametersTester {
                List<Option<?>> options = rnaalifoldOpc.getAllOptions();
                
                // take out -p options so it can be added to all commands later
-               options = removeParam(options, "Partition Function");
+               // options = removeParam(options, "Partition Function");
                
                options.addAll(rnaalifoldOpc.getAllParameters());
                List<List<String>> failedOptionPairs = new ArrayList<List<String>>();
@@ -293,7 +294,7 @@ public class RNAalifoldParametersTester {
                                        pair.add(options.get(i)); pair.add(options.get(j));
                                        List<String> args = rnaalifoldOpc.argumentsToCommandString(pair);
                                        // add -p
-                                       args.add("-p");
+                                       // args.add("-p");
                                        try { 
                                                failed = singleRun(args);
                                        } catch (ResultNotAvailableException e) {
@@ -356,10 +357,10 @@ public class RNAalifoldParametersTester {
                
                //List<String> args = rnaalifoldOpc.argumentsToCommandString(params);
                List<String> args = new ArrayList<String>();
-               args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10");
+               //args.add("-T 37"); args.add("-S 1.07"); args.add("--stochBT_en 10");
                // replace "=" with " " to fail test
                args.add("--MEA=1");
-               System.out.println("special: args: " + args);
+               args.add("-p");
                singleTest(args);
                
        }
@@ -379,7 +380,8 @@ public class RNAalifoldParametersTester {
                        confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
                        assertNotNull(confRNAalifold.getResults(), "results is null");
                        
-                       //System.out.println("Results: \n" + confRNAalifold.getResults());
+                       System.out.println("Results: \n" 
+                       + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
                        
                        File errors = new File(confRNAalifold.getWorkDirectory(),
                                        ExecutableWrapper.PROC_ERR_FILE);
@@ -429,8 +431,8 @@ public class RNAalifoldParametersTester {
                lr.executeJob();
                confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
                
-               //System.out.println("Results: \n" + confRNAalifold.getResults());
-               
+               System.out.println("Results: \n" 
+                               + ((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
                if (confRNAalifold.getResults() != null) fail = false;
                File errors = new File(confRNAalifold.getWorkDirectory(),
                                ExecutableWrapper.PROC_ERR_FILE);
index 4363e8d..4f5b508 100644 (file)
@@ -62,10 +62,11 @@ public class RNAalifoldTester {
                        + "RNAalifoldParameters.xml";
        public static String test_outfile = "rnaalifold.out";
        
+       
        @Test(groups = { AllTestSuit.test_group_runner })
        public void testRunLocally() {
                RNAalifold rnaalifold = new RNAalifold();
-               rnaalifold.setInput(AllTestSuit.test_alignment_input).setOutput(test_outfile);
+               rnaalifold.setInput(AllTestSuit.test_input_aln).setOutput(test_outfile);
                try{
                        
                        ConfiguredExecutable<RNAalifold> confRNAalifold = Configurator
@@ -73,6 +74,10 @@ public class RNAalifoldTester {
                        LocalRunner lr = new LocalRunner(confRNAalifold);
                        lr.executeJob();
                        confRNAalifold = (ConfiguredExecutable<RNAalifold>) lr.waitForResult();
+                       
+                       System.out.println("TEST");
+                       System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
+                       
                        assertNotNull(confRNAalifold.getResults()); 
                } catch (JobSubmissionException e) {
                        e.printStackTrace();
@@ -88,7 +93,7 @@ public class RNAalifoldTester {
        
        
        public static void main(String[] args) throws JobSubmissionException,
-                       JobExecutionException, InterruptedException {
+                       JobExecutionException, InterruptedException, ResultNotAvailableException {
 
                
                log.warn("Logger test :- Run RNAalifold.main()");
@@ -101,6 +106,10 @@ public class RNAalifoldTester {
                AsyncExecutor lr = new AsyncLocalRunner();
                lr.submitJob(confRNAalifold);
                
+               
+               System.out.println(((ScoreManager) confRNAalifold.getResults()).asRNAStruct().toString());
+
+               
                Thread.sleep(3000);
                LocalExecutorService.shutDown();
                
index b061fcc..5e66f43 100644 (file)
@@ -22,7 +22,7 @@ import compbio.data.msa.FoldWS;
 import compbio.data.msa.JABAService;
 import compbio.data.msa.SequenceAnnotation;
 import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
 import compbio.data.sequence.ClustalAlignmentUtil;
 import compbio.data.sequence.ConservationMethod;
 import compbio.data.sequence.FastaSequence;
index 4a99b9f..19282d6 100644 (file)
@@ -9,7 +9,7 @@ import javax.jws.WebService;
 import javax.naming.ldap.UnsolicitedNotificationEvent;
 
 import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
 import compbio.metadata.JobSubmissionException;
 import compbio.metadata.LimitExceededException;
 import compbio.metadata.Option;
index a06a6fa..aa048b0 100644 (file)
@@ -50,7 +50,7 @@ import compbio.data.msa.RegistryWS;
 import compbio.data.msa.SequenceAnnotation;\r
 import compbio.data.sequence.Alignment;\r
 import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.RNAstruct;\r
+import compbio.data.sequence.RNAStruct;\r
 import compbio.data.sequence.ScoreManager;\r
 import compbio.data.sequence.ClustalAlignmentUtil;\r
 import compbio.data.sequence.SequenceUtil;\r
index cafb173..7ff54e7 100644 (file)
@@ -13,7 +13,7 @@ import compbio.data.msa.JABAService;
 import compbio.data.msa.JManagement;
 import compbio.data.msa.Metadata;
 import compbio.data.sequence.Alignment;
-import compbio.data.sequence.RNAstruct;
+import compbio.data.sequence.RNAStruct;
 import compbio.data.sequence.FastaSequence;
 import compbio.engine.AsyncExecutor;
 import compbio.engine.Configurator;