Category.java updated to include new category for RNA folding. RNAalifoldParameters...
authorDaniel Barton <daluke.barton@gmail.com>
Wed, 28 Aug 2013 21:17:49 +0000 (22:17 +0100)
committerDaniel Barton <daluke.barton@gmail.com>
Wed, 28 Aug 2013 21:17:49 +0000 (22:17 +0100)
conf/settings/RNAalifoldParameters.xml
datamodel/compbio/data/sequence/RNAStructReader.java
datamodel/compbio/data/sequence/Score.java
datamodel/compbio/data/sequence/ScoreManager.java
webservices/compbio/data/msa/Category.java

index 5ee3fc2..a730f64 100644 (file)
@@ -5,8 +5,6 @@
                <name>Endgaps</name>
                <description>Score pairs with endgaps same as gap-gap pairs</description>
                <optionNames>-E</optionNames>
-               <optionNames>--endgaps</optionNames>
-               <defaultValue>-E</defaultValue>
        </options>
        <options isRequired='false'>
                <name>Most Informative Sequence</name>
                <name>Partition Function</name>
                <description>Output partition function and probability matrix</description>
                <optionNames>-p</optionNames>
-               <optionNames>--partfunc</optionNames>
-               <defaultValue>-p</defaultValue>
-       </options>
-       <options isRequired='false'>
-               <name>p0</name>
-               <description>deactivates the calculation of the pair probabilities</description>
-               <optionNames>-p0</optionNames>
-       </options>
-       <options isRequired='false'>
-               <name>color</name>
-               <description>Consensus structure plot is colored</description>
-               <optionNames>--color</optionNames>
-       </options>
-       <options isRequired='false'>
-               <name>Alignment</name>
-               <description>Output structure annotated alignment</description>
-               <optionNames>--aln</optionNames>
-       </options>
-       <options isRequired='false'>
-               <name>No Postscript</name>      
-               <description>Do not produce postscript output</description>
-               <optionNames>--noPS</optionNames>
        </options>
        <options isRequired='false'>
                <name>Circular</name>
                <description>Assume circular RNA molecule</description>
                <optionNames>-c</optionNames>
-               <optionNames>--circ</optionNames>
-               <defaultValue>-c</defaultValue>
        </options>
        <!--  Not Currently available with circular structures (-c) -->
        <options isRequired='false'>
                <name>G-Quadruplex</name>
                <description>Incorporate G-Quadruplex formation into prediction algorithm</description>
                <optionNames>-g</optionNames>
-               <optionNames>--gquad</optionNames>
-               <defaultValue>-g</defaultValue>
        </options>
        <options isRequired='false'>
                <name>d2</name>
@@ -84,8 +56,6 @@
                <name>Ribosum Scoring</name>
                <description>Use Ribosum Scoring Matrix</description>
                <optionNames>-r</optionNames>
-               <optionNames>--ribosum_scoring</optionNames>
-               <defaultValue>-r</defaultValue>
        </options>
        <options isRequired='false'>
                <name>d2</name>
                <description>Maximum Expected Accuracy Structure</description>
                <optionNames>--MEA</optionNames>
        </options>
-<!-- Constraints are entered on the command line followed by the input .aln file
-        Causing problems for a batch RNAalifold execution system -->
-<!--  
-       <option isRequired='false'>
-               <name>Constraints</name>
-               <description>Structures calculated subject to constraints</description>
-               <optionNames>-C</optionNames>
-               <validValue>
-                       <type>String</type>
-               </validValue>
-       </option> -->
        
        <prmSeparator> </prmSeparator>
-       <parameters isRequired='false'>
-               <name>Stochastic Backtrack</name>
-               <description>Compute a number of random structures</description>
-               <optionNames>-s</optionNames>
-<!--           Having multiple optionNames requires a default value but -->
-<!--                   in a parameter defaultValue refers to the argument -->
-<!--           <optionNames>-stochBT</optionNames> -->
-               <defaultValue>5</defaultValue>  
-               <validValue>
-                       <type>Integer</type>
-                       <min>1</min>
-                       <max>100000</max>
-               </validValue>
-       </parameters>
-       <parameters isRequired='false'>
-               <name>stochBT_en</name>
-               <description>Print Backtrack structures</description>
-               <optionNames>--stochBT_en</optionNames>
-               <defaultValue>5</defaultValue>  <!-- arbitrary -->
-               <validValue>
-                       <type>Integer</type>
-                       <min>1</min>
-                       <max>100000</max>
-               </validValue>
-       </parameters>
+       
        <parameters isRequired='false'>
                <name>scaling factor</name>
                <description>In calculating pf use scale*mfe as estimate for ensemble free energy]</description>
                <optionNames>-S</optionNames>
-<!--           <optionNames>-pfScale</optionNames> -->
                <defaultValue>1.07</defaultValue>
                <validValue>
                        <type>Float</type>
        </parameters>
        <parameters isRequired='false'>
                <name>Temperature</name>
-               <description>Rescale Energy parameterss to Temperature</description>
+               <description>Rescale Energy parameters to Temperature</description>
                <optionNames>-T</optionNames>
-<!--           <optionNames>-temp</optionNames> -->
                <defaultValue>37</defaultValue>
                <validValue>
                        <type>Float</type>
                </validValue>
        </parameters>
        <parameters isRequired='false'>
-               <name>Dangling End</name>
-               <description>How to treat Dangling End energies for bases adjacent to helices</description>
-               <optionNames>-d</optionNames>
-<!--           <optionNames>-dangles</optionNames> -->
-               <defaultValue>2</defaultValue>
-               <validValue>
-                       <type>Integer</type>
-                       <min>0</min>
-                       <max>100000</max>
-               </validValue>
-       </parameters>
-       <parameters isRequired='false'>
                <name>cfactor</name>
                <description>weight of covariance term</description>
                <optionNames>--cfactor</optionNames>
                </validValue>
        </parameters>
        
-<!--    How to deal with default/possible values for parameter files? -->
-<!--   <parameters isRequired='false'> -->
-<!--           <name>Ribosum File</name> -->
-<!--           <description>Use Specified Ribosum Matrix</description> -->
-<!--           <optionNames>-R</optionNames> -->
-<!--           <optionNames>-ribosum_file</optionNames>  -->
-<!--   </parameters> -->
-<!--   <parameters isRequired='false'> -->
-<!--           <name>Paramfile</name> -->
-<!--           <description>Use Energy parameters from a file</description> -->
-<!--           <optionNames>-P</optionNames> -->
-<!--           <optionNames>-paramFile</optionNames>  -->
-<!--           <validValue> -->
-<!--                   <type>String</type> -->
-<!--           </validValue> -->
-<!--   </parameters> -->
-       
-       <!-- The values of this parameter are in the form of a comma separated
-                       list of allowed pairs. This makes a complete parameter list
-                       too large to be represented as a list of possible values
-                       How to deal with this? -->
-                       <!--  just support the most biologically viable pairs? -->
-       
-       <parameters isRequired='false'>
-               <name>Allow Pairs</name>
-               <description>allow pairs in addition to AU, GC and GU</description>
-               <optionNames>--nsp</optionNames>
-               <defaultValue>-GA</defaultValue>
-               <possibleValues>-GA</possibleValues>
-               <possibleValues>-AG</possibleValues>
-       </parameters>
-       <!--  Is dependant on -p (partfunc) -->
-       <parameters isRequired='false'>
-               <name>betaScale</name>
-               <description>Set scaling of Boltzmann factors</description>
-               <optionNames>--betaScale</optionNames>
-               <defaultValue>1.0</defaultValue>
-               <validValue>
-                       <type>Float</type>
-                       <min>0</min>
-                       <max>100000</max>
-               </validValue>
-       </parameters>
 </runnerConfig>
\ No newline at end of file
index 5eec46f..4292554 100644 (file)
@@ -34,7 +34,7 @@ public class RNAStructReader {
        // alifold out line patterns\r
        static String ps = "\\s*";\r
        static String alignmentP = "^"+seqP+ps+"$";\r
-       static String stdStructP = "^"+structP+s+"\\("+ps+floatP+s+floatP+s+floatP+ps+"\\)"+ps+"$";\r
+       static String mfeStructP = "^"+structP+s+"\\("+ps+floatP+s+floatP+s+floatP+ps+"\\)"+ps+"$";\r
        static String justStructP = "^"+structP+ps+"$";\r
        static String stochBTStructP = "^"+structP+s+floatP+s+floatP+ps+"$";\r
        static String PStructP = "^"+structP+s+"\\["+ps+floatP+ps+"\\]"+ps+"$";\r
@@ -42,7 +42,8 @@ public class RNAStructReader {
        static String MEAStructP = "^"+structP+s+"\\{"+ps+floatP+s+"MEA="+floatP+ps+"\\}"+ps+"$";\r
        static String freeEnergyP = "^"+ps+"free energy of ensemble"+ps+"="+ps+floatP+ps+"kcal/mol"+ps+"$";\r
        static String ensembleFreqP = "^"+ps+"frequency of mfe structure in ensemble "+floatP+ps+"$";\r
-\r
+       \r
+       \r
        public static RNAStructScoreManager readRNAStructStream(InputStream stdout)\r
                        throws IOException {\r
                \r
@@ -60,17 +61,17 @@ public class RNAStructReader {
                assert (Pattern.matches(AlifoldLine.alignment.regex, fline)) :\r
                        error + "Sequence Alignment Expected";\r
                structs.add(fline.trim());\r
-               data.add(newEmptyScore(AlifoldResult.alifoldSeq));\r
+               data.add(newEmptyScore(AlifoldResult.consensusAlignment));\r
                \r
                fline = reader.readLine();\r
-               assert (Pattern.matches(AlifoldLine.stdStruct.regex, fline)) :\r
+               assert (Pattern.matches(AlifoldLine.mfeStruct.regex, fline)) :\r
                        error + "Consensus Structure and Energy Expected";\r
                Scanner sc = new Scanner(fline);\r
                structs.add(sc.next());\r
                for (int i = 0; i < 3; i++) {\r
                        scores.add(Float.parseFloat(sc.findInLine(floatP)));\r
                }\r
-               data.add(newSetScore(AlifoldResult.alifold, scores));\r
+               data.add(newSetScore(AlifoldResult.mfeStructure, scores));\r
                \r
                // Now the alifold stdout file formats diverge based on arguments\r
                fline = reader.readLine();\r
@@ -91,7 +92,7 @@ public class RNAStructReader {
                                structs.add(sc.next());\r
                                scores.add(Float.parseFloat(sc.findInLine(floatP)));\r
                                scores.add(Float.parseFloat(nsc.findInLine(floatP)));\r
-                               data.add(newSetScore(AlifoldResult.alifoldP, scores));\r
+                               data.add(newSetScore(AlifoldResult.contactProbabilityStructure, scores));\r
                                // Jump line\r
                                sline = reader.readLine();\r
                        }\r
@@ -100,24 +101,24 @@ public class RNAStructReader {
                                for (int i = 0; i < 3; i++) {\r
                                        scores.add(Float.parseFloat(sc.findInLine(floatP)));\r
                                }\r
-                               data.add(newSetScore(AlifoldResult.alifoldCentroid, scores));\r
+                               data.add(newSetScore(AlifoldResult.centroidStructure, scores));\r
                        }\r
                        else if (ftype.equals(AlifoldLine.MEAStruct)) {\r
                                structs.add(sc.next());\r
                                for (int i = 0; i < 2; i++) {\r
                                        scores.add(Float.parseFloat(sc.findInLine(floatP)));\r
                                }\r
-                               data.add(newSetScore(AlifoldResult.alifoldMEA, scores));\r
+                               data.add(newSetScore(AlifoldResult.MEAStucture, scores));\r
                        }\r
                        else if (ftype.equals(AlifoldLine.justStruct)) {\r
                                structs.add(sc.next());\r
-                               data.add(newEmptyScore(AlifoldResult.alifoldStochBT));\r
+                               data.add(newEmptyScore(AlifoldResult.stochBTStructure));\r
                        }\r
                        else if (ftype.equals(AlifoldLine.stochBTStruct)) {\r
                                structs.add(sc.next());\r
                                scores.add(sc.nextFloat());\r
                                scores.add(sc.nextFloat());\r
-                               data.add(newSetScore(AlifoldResult.alifoldStochBT, scores));\r
+                               data.add(newSetScore(AlifoldResult.stochBTStructure, scores));\r
                        }\r
                        else if (ftype.equals(AlifoldLine.freeEnergy)) {\r
                                assert (sline != null \r
@@ -127,7 +128,7 @@ public class RNAStructReader {
                                                + "frequency of mfe structure in ensemble");\r
                                scores.add(Float.parseFloat(sc.findInLine(floatP)));\r
                                scores.add(Float.parseFloat(nsc.findInLine(floatP)));\r
-                               data.add(newSetScore(AlifoldResult.alifoldMetadata, scores));\r
+                               data.add(newSetScore(AlifoldResult.ensembleValues, scores));\r
                                // jump line\r
                                sline = reader.readLine();\r
                        }\r
@@ -135,7 +136,7 @@ public class RNAStructReader {
 \r
                        assert(!ftype.equals(AlifoldLine.ensembleFreq)) :\r
                                error + "Wasn't expecting 'frequency of mfe structure'!";\r
-                       assert(!ftype.equals(AlifoldLine.stdStruct)) :\r
+                       assert(!ftype.equals(AlifoldLine.mfeStruct)) :\r
                                error + "'Standard output' line at a place other than line 2!";\r
                        assert(!ftype.equals(AlifoldLine.alignment)) :\r
                                error + "Wasn't expecting an alignment sequence!";\r
@@ -218,7 +219,7 @@ public class RNAStructReader {
                for (int i = 0; i < rangeHolder.size(); i++) {\r
                        ArrayList<Float> singleS = new ArrayList<Float>(Arrays.asList(scores.get(i)));\r
                        TreeSet<Range> singleR = new TreeSet<Range>(Arrays.asList(rangeHolder.get(i)));\r
-                       sHolder.add(new Score(AlifoldResult.alifoldSeq, singleS, singleR));\r
+                       sHolder.add(new Score(AlifoldResult.contactProbabilities, singleS, singleR));\r
                }\r
                \r
                data.set(0, sHolder);\r
@@ -249,7 +250,7 @@ public class RNAStructReader {
        }\r
        \r
        static enum AlifoldLine {\r
-               stdStruct (stdStructP),\r
+               mfeStruct (mfeStructP),\r
                justStruct (justStructP),\r
                stochBTStruct (stochBTStructP),\r
                PStruct (PStructP),\r
@@ -272,11 +273,11 @@ public class RNAStructReader {
 \r
        //Something to put in the Score objects of the alifold result which gives information\r
        //about what kind of sequence it is holding in its String Id.\r
-       static enum AlifoldResult {\r
-               alifold, alifoldP, alifoldMEA, alifoldCentroid, alifoldStochBT, alifoldSeq, alifoldMetadata\r
+\r
+       public static enum AlifoldResult {\r
+               mfeStructure, contactProbabilityStructure, MEAStucture, centroidStructure, stochBTStructure, consensusAlignment, ensembleValues, contactProbabilities\r
        }\r
        \r
-       \r
 \r
        // Print the full regex Strings for testing \r
        public static void main(String[] args) {\r
index b886590..1cac69c 100644 (file)
@@ -249,6 +249,8 @@ public class Score implements Comparable<Score> {
         */\r
        \r
        \r
+\r
+       \r
        @Override\r
        public int compareTo(Score o) {\r
                if (this.method.compareTo(o.method) != 0) {\r
index 3c1bc21..61a8927 100644 (file)
@@ -51,9 +51,7 @@ public class ScoreManager {
        }\r
 \r
        private ScoreManager(Map<String, Set<Score>> data) {\r
-               System.out.println("A ScoreManager was created!!");\r
 \r
-               \r
                List<ScoreHolder> seqScores = new ArrayList<ScoreHolder>();\r
                for (Map.Entry<String, Set<Score>> singleSeqScores : data.entrySet()) {\r
                        seqScores.add(new ScoreHolder(singleSeqScores.getKey(),\r
@@ -63,8 +61,6 @@ public class ScoreManager {
        }\r
 \r
        public static ScoreManager newInstance(Map<String, Set<Score>> data) {\r
-               System.out.println("A ScoreManager was created!!");\r
-\r
                \r
                return new ScoreManager(data);\r
        }\r
@@ -72,9 +68,6 @@ public class ScoreManager {
        public static ScoreManager newInstanceSingleScore(\r
                        Map<String, Score> seqScoresMap) {\r
                \r
-               System.out.println("A ScoreManager was created!!");\r
-\r
-               \r
                Map<String, Set<Score>> multipleScoresMap = new TreeMap<String, Set<Score>>();\r
                for (Map.Entry<String, Score> seqScore : seqScoresMap.entrySet()) {\r
                        Set<Score> scores = new TreeSet<Score>();\r
@@ -85,9 +78,6 @@ public class ScoreManager {
        }\r
 \r
        public static ScoreManager newInstanceSingleSequence(Set<Score> data) {\r
-               System.out.println("A ScoreManager was created!!");\r
-\r
-               \r
                return new ScoreManager(ScoreManager.SINGLE_ENTRY_KEY,\r
                                new TreeSet(data));\r
        }\r
index 56bf1de..3de1b31 100644 (file)
@@ -34,6 +34,7 @@ public class Category {
        public static final String CATEGORY_ALIGNMENT = "Alignment";\r
        public static final String CATEGORY_DISORDER = "Protein Disorder";\r
        public static final String CATEGORY_CONSERVATION = "Conservation";\r
+       public static final String CATEGORY_RNASTRUCT = "RNA Structure Prediction";\r
 \r
        public String name;\r
        Set<Services> services;\r
@@ -78,10 +79,17 @@ public class Category {
                Category conservation = new Category(CATEGORY_CONSERVATION,\r
                                conservation_services);\r
 \r
+               Set<Services> rnastruct_services = new HashSet<Services>();\r
+               rnastruct_services.add(Services.RNAalifoldWS);\r
+               \r
+               Category rnastruct = new Category(CATEGORY_RNASTRUCT,\r
+                               rnastruct_services);\r
+               \r
                Set<Category> categories = new HashSet<Category>();\r
                categories.add(alignment);\r
                categories.add(disorder);\r
                categories.add(conservation);\r
+               categories.add(rnastruct);\r
 \r
                return categories;\r
        }\r