Simple datamodel for RNAalifold.exe output and method to convert from
authorDaniel Barton <daluke.barton@gmail.com>
Mon, 5 Aug 2013 16:51:47 +0000 (17:51 +0100)
committerDaniel Barton <daluke.barton@gmail.com>
Mon, 5 Aug 2013 16:51:47 +0000 (17:51 +0100)
fasta to clustal format and write clustal input files.

datamodel/compbio/data/sequence/RNAstruct.java [new file with mode: 0644]
datamodel/compbio/data/sequence/SequenceUtil.java
runner/compbio/runner/Util.java
runner/compbio/runner/structure/RNAalifold.java
testsrc/compbio/data/sequence/SequenceUtilTester.java
testsrc/compbio/runner/structure/RNAalifoldTester.java
webservices/compbio/data/structure/FoldWS.java [new file with mode: 0644]
webservices/compbio/ws/server/RNAalifoldWS.java
webservices/compbio/ws/server/WSUtil.java

diff --git a/datamodel/compbio/data/sequence/RNAstruct.java b/datamodel/compbio/data/sequence/RNAstruct.java
new file mode 100644 (file)
index 0000000..1e36064
--- /dev/null
@@ -0,0 +1,76 @@
+package compbio.data.sequence;
+
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+
+import compbio.util.annotation.Immutable;
+
+/*
+ * RNA secondary structure
+ */
+
+@XmlAccessorType(XmlAccessType.FIELD)
+public final class RNAstruct {
+       
+       private String sequence;
+       private String structure; // needs to be array to deal with all output
+       private Float minEnergy;
+       private Float energySum1; private Float energySum2;
+       
+       
+       public RNAstruct() {
+               // default JaxB Constructor
+       }
+       
+       public RNAstruct(String sequence, String structure, Float minEnergy
+                       , Float energySum1, Float energySum2) {
+               this.sequence = sequence;
+               this.structure = structure;
+               this.minEnergy = minEnergy;
+               this.energySum1 = energySum1;
+               this.energySum2 = energySum2;
+       }
+       
+       public String getSequence() {
+               return sequence;
+       }
+       
+       public String getStructure() {
+               return structure;
+       }
+       
+       public Float getEnergy() {
+               return minEnergy;
+       }
+       
+       
+       @Override
+       public String toString() {
+               String newLine = System.getProperty("line.separator",".");
+               return sequence + newLine + structure + " (" + minEnergy.toString() 
+                               + " = " + energySum1.toString() + " +  " + energySum2.toString()
+                               + ")"; 
+       }
+       
+       @Override 
+       public boolean equals(Object obj) {
+               if (obj == null) {
+                       return false;
+               }
+               if (!(obj instanceof RNAstruct)) {
+                       return false;
+               }
+               RNAstruct st = (RNAstruct) obj;
+               if (!(this.getSequence() == st.getSequence() &&
+                               this.getStructure() == st.getStructure() &&
+                               this.getEnergy() == st.getEnergy())) {
+                       return false;
+               }
+               
+               return true;
+       }
+}
+
+
index 14dedf4..548e57d 100644 (file)
@@ -757,6 +757,34 @@ public final class SequenceUtil {
                }\r
                return annotations;\r
        }\r
+       \r
+       /*\r
+        * Reads and parses the output of an RNAalifold job \r
+        * \r
+        * Currently only in the defualt no args format\r
+        */\r
+       public static RNAstruct readRNAalifoldResults(InputStream results) {\r
+               if (results == null) {\r
+                       throw new NullPointerException(\r
+                                       "InputStream with results must be provided");\r
+               }\r
+               Scanner sc = new Scanner(results);\r
+               sc.useDelimiter("\\s+");\r
+               String sequence = sc.next();\r
+               String structure = sc.next();\r
+               // now get energy values out of the brackets\r
+               String regex = "-?[0-9]*\\.?[0-9]+";\r
+               Float minEnergy = Float.parseFloat(sc.findInLine(regex));\r
+               \r
+               RNAstruct rnastruct = new RNAstruct(sequence, structure, minEnergy,\r
+                               Float.parseFloat(sc.findInLine(regex)), \r
+                               Float.parseFloat(sc.findInLine(regex)));\r
+               \r
+               sc.close();\r
+               return rnastruct;\r
+       }\r
+       \r
+       \r
 \r
        /**\r
         * Reads and parses Fasta or Clustal formatted file into a list of\r
@@ -791,6 +819,26 @@ public final class SequenceUtil {
                return fastaSeqs;\r
        }\r
 \r
+       // This can't possibly be right for all cases!\r
+       // but it will do for now\r
+       \r
+       // As for the metadata. This function doesnt know what program\r
+       // generated it. How to handle the metadata!?\r
+       \r
+       public static void writeClustal(OutputStream outStream,\r
+                       List<FastaSequence> sequences, char gapChar) \r
+                       throws IOException {\r
+               \r
+               BufferedWriter writer = new BufferedWriter(\r
+                               new OutputStreamWriter(outStream));\r
+               // will give AlignmentMetadata default type of CLUSTAL for now\r
+               AlignmentMetadata al = new AlignmentMetadata(Program.CLUSTAL, gapChar);\r
+               \r
+               ClustalAlignmentUtil.writeClustalAlignment(writer, \r
+                               new Alignment(sequences, al));\r
+               \r
+       }\r
+\r
 }\r
 \r
 enum DisemblResult {\r
index 8d77864..f033bf0 100644 (file)
@@ -19,6 +19,7 @@
 package compbio.runner;\r
 \r
 import java.io.File;\r
+import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
 import java.io.FileOutputStream;\r
 import java.io.IOException;\r
@@ -30,6 +31,7 @@ import org.apache.log4j.Logger;
 import compbio.data.sequence.Alignment;\r
 import compbio.data.sequence.ClustalAlignmentUtil;\r
 import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.RNAstruct;\r
 import compbio.data.sequence.Score;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
@@ -130,5 +132,41 @@ public final class Util {
                                                        + e.getLocalizedMessage(), e);\r
                }\r
        }\r
+       \r
+       public static void writeClustalInput(List<FastaSequence> sequences,\r
+                       ConfiguredExecutable<?> exec, char gapChar) throws JobSubmissionException {\r
+               \r
+               try {\r
+                       File filein = new File(exec.getInput());\r
+                       FileOutputStream fout = new FileOutputStream(filein);\r
+                       log.debug("File path: " + filein.getAbsolutePath());\r
+                       SequenceUtil.writeClustal(fout, sequences, gapChar);\r
+                       fout.close();\r
+               } catch (IOException e) {\r
+                       log.error("IOException while writing input file into the disk: "\r
+                                       + e.getLocalizedMessage(), e);\r
+                       throw new JobSubmissionException(\r
+                                       "We are sorry but JABAWS server seems to have a problem! "\r
+                                               + e.getLocalizedMessage(), e);\r
+               }\r
+       }\r
+       \r
+       public static final RNAstruct readRNAStructFile(String workDirectory,\r
+                       String structFile) throws IOException, FileNotFoundException {\r
+               assert !compbio.util.Util.isEmpty(workDirectory);\r
+               assert !compbio.util.Util.isEmpty(structFile);\r
+               File sfile = new File(compbio.engine.client.Util.getFullPath(\r
+                               workDirectory, structFile));\r
+               log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath());\r
+               if(!(sfile.exists() && sfile.length() > 0)) {\r
+                       throw new FileNotFoundException("Result for the jobId "\r
+                                       + workDirectory + "with file name " + structFile\r
+                                       + " is not found!");\r
+               }\r
+               return compbio.data.sequence.SequenceUtil.readRNAalifoldResults(\r
+                               new FileInputStream(sfile));\r
+       }\r
+       \r
+       \r
 \r
 }\r
index 4ec1ec7..acb4a9e 100644 (file)
@@ -11,9 +11,11 @@ import java.io.FileReader;
 import java.io.BufferedReader;
 import java.io.File;
 
+
+
 import org.apache.log4j.Logger;
 
-import compbio.data.sequence.Alignment;
+import compbio.data.sequence.RNAstruct;
 import compbio.data.sequence.UnknownFileFormatException;
 import compbio.engine.client.PipedExecutable;
 import compbio.engine.client.SkeletalExecutable;
@@ -54,11 +56,12 @@ public class RNAalifold extends SkeletalExecutable<RNAalifold>
        @SuppressWarnings("unchecked")
        @Override
        // PlaceHolder method
-       public String getResults(String workDirectory)
+       public RNAstruct getResults(String workDirectory)
                        throws ResultNotAvailableException {
                try {
-                       // System.out.print(readRNAStruct(workDirectory, getOutput()));
-                       return readRNAStruct(workDirectory, getOutput());
+                       // System.out.println("Specialread " + Util.readRNAStructFile(workDirectory, getOutput()).toString());
+                       return Util.readRNAStructFile(workDirectory, getOutput());
+                       
                } catch (FileNotFoundException e) {
                        log.error(e.getMessage(), e.getCause());
                        throw new ResultNotAvailableException(e);
@@ -68,43 +71,47 @@ public class RNAalifold extends SkeletalExecutable<RNAalifold>
                }
        }
 
+
+       // OLD :- the new methods for reading are found in 
+       // - compbio.data.sequence.SequenceUtil and 
+       // - compbio.runner.Util
        
        // Simple and generic methods for reading a whole file
        // Should surfice until a more detailed datamodel and parser are developed
-       private static String readRNAStruct(String workDirectory,
-                       String structFile) throws IOException, FileNotFoundException {
-               assert !compbio.util.Util.isEmpty(workDirectory);
-               assert !compbio.util.Util.isEmpty(structFile);
-               File sfile = new File(compbio.engine.client.Util.getFullPath(
-                               workDirectory, structFile));
-               log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath());
-               if(!(sfile.exists() && sfile.length() > 0)) {
-                       throw new FileNotFoundException("Result for the jobId "
-                                       + workDirectory + "with file name " + structFile
-                                       + " is not found!");
-               }
-               return readFile(sfile);
-       }
-       
-       private static BufferedReader input;
-       public static String readFile(File inputFile) throws 
-                       FileNotFoundException, IOException {
-               
-               input   = new BufferedReader(new FileReader(inputFile));
-               
-               String file = new String();
-               String line = new String();
-               
-               while (true) {
-                       line = input.readLine();
-                       
-                       if (line != null) {
-                               file = file + line + "\r\n";
-                       } else break;
-               }
-               // Close file
-               input.close();
-               return file;
-       }
+//     private static String readRNAStruct(String workDirectory,
+//                     String structFile) throws IOException, FileNotFoundException {
+//             assert !compbio.util.Util.isEmpty(workDirectory);
+//             assert !compbio.util.Util.isEmpty(structFile);
+//             File sfile = new File(compbio.engine.client.Util.getFullPath(
+//                             workDirectory, structFile));
+//             log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath());
+//             if(!(sfile.exists() && sfile.length() > 0)) {
+//                     throw new FileNotFoundException("Result for the jobId "
+//                                     + workDirectory + "with file name " + structFile
+//                                     + " is not found!");
+//             }
+//             return readFile(sfile);
+//     }
+//     
+//     private static BufferedReader input;
+//     public static String readFile(File inputFile) throws 
+//                     FileNotFoundException, IOException {
+//             
+//             input   = new BufferedReader(new FileReader(inputFile));
+//             
+//             String file = new String();
+//             String line = new String();
+//             
+//             while (true) {
+//                     line = input.readLine();
+//                     
+//                     if (line != null) {
+//                             file = file + line + "\r\n";
+//                     } else break;
+//             }
+//             // Close file
+//             input.close();
+//             return file;
+//     }
        
 }
index e646c1e..25c81a1 100644 (file)
@@ -140,6 +140,37 @@ public class SequenceUtilTester {
                }\r
        }\r
 \r
+       // Potential Bug :- Sequence names are shortened to 2-3 letters\r
+       @Test\r
+       public void testReadFastaWriteClustal() {\r
+               \r
+               try {\r
+                       FileInputStream fio = new FileInputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
+                       assertNotNull(fio);\r
+                       List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
+                       assertNotNull(fseqs);\r
+                       fio.close();\r
+                       \r
+                       char gapChar = '-';\r
+                       FileOutputStream fou = new FileOutputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "TO1381.aln.written");\r
+                       SequenceUtil.writeClustal(fou, fseqs, gapChar);\r
+                       fou.close();\r
+                       \r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }               \r
+               \r
+                       \r
+\r
+                       \r
+       \r
        /**\r
         * This test tests the loading of horizontally formatted Jronn output file\r
         */\r
@@ -409,3 +440,4 @@ public class SequenceUtilTester {
                }\r
        }\r
 }\r
+\r
index 2ce3d5f..4363e8d 100644 (file)
@@ -65,7 +65,7 @@ public class RNAalifoldTester {
        @Test(groups = { AllTestSuit.test_group_runner })
        public void testRunLocally() {
                RNAalifold rnaalifold = new RNAalifold();
-               rnaalifold.setInput(AllTestSuit.test_input_aln).setOutput(test_outfile);
+               rnaalifold.setInput(AllTestSuit.test_alignment_input).setOutput(test_outfile);
                try{
                        
                        ConfiguredExecutable<RNAalifold> confRNAalifold = Configurator
diff --git a/webservices/compbio/data/structure/FoldWS.java b/webservices/compbio/data/structure/FoldWS.java
new file mode 100644 (file)
index 0000000..7d5238f
--- /dev/null
@@ -0,0 +1,41 @@
+package compbio.data.structure;
+
+import java.security.InvalidParameterException;
+import java.util.List;
+
+import javax.jws.WebMethod;
+import javax.jws.WebParam;
+import javax.jws.WebService;
+
+import compbio.data.msa.JABAService;
+import compbio.data.msa.JManagement;
+import compbio.data.msa.Metadata;
+import compbio.metadata.JobSubmissionException;
+import compbio.metadata.LimitExceededException;
+import compbio.metadata.Option;
+import compbio.metadata.Preset;
+import compbio.metadata.ResultNotAvailableException;
+import compbio.metadata.UnsupportedRuntimeException;
+import compbio.metadata.WrongParameterException;
+
+/*
+ * Interface for tools that results RNA secondary structure information
+ * 
+ * Still Unsure whether a new interface is needed or whether secondary structre
+ * information could be exressed as a sequence annotation
+ */
+
+// effect of a different targetNamespace?
+@WebService(targetNamespace = JABAService.V2_SERVICE_NAMESPACE)
+public interface FoldWS<T> 
+               extends 
+                       JABAService,
+                       JManagement,
+                       Metadata<T> {
+
+       
+       
+       @WebMethod
+       String fold(
+                       @WebParam(name = )
+}
index 4a1d5a1..83d7c93 100644 (file)
@@ -47,6 +47,9 @@ public class RNAalifoldWS implements JABAService, JManagement, Metadata<RNAalifo
        private static final LimitsManager<RNAalifold> limitMan = compbio.engine.client.Util
                        .getLimits(new RNAalifold().getType());
        
+       //public String fold()
+       
+       
        ConfiguredExecutable<RNAalifold> init() throws JobSubmissionException {
                RNAalifold rnaalifold = new RNAalifold();
                rnaalifold.setInput(SkeletalExecutable.INPUT)
@@ -115,12 +118,12 @@ public class RNAalifoldWS implements JABAService, JManagement, Metadata<RNAalifo
        // PlaceHolder 
        public ChunkHolder pullExecStatistics(String jobId, long position) {
 
-               WSUtil.validateJobId(jobId);
+//             WSUtil.validateJobId(jobId);
 //             String file = Configurator.getWorkDirectory(jobId) + File.separator
 //                             + RNAalifold.getStatFile();
 //             ChunkHolder cholder = WSUtil.pullFile(file, position);
 //             return cholder;
-               return null;
+               return new ChunkHolder("", -1);
        }
 }
        
index 7341989..71da240 100644 (file)
@@ -111,6 +111,23 @@ public final class WSUtil {
                reportUsage(confExec, logger);\r
                return jobId;\r
        }\r
+       \r
+       // Hardcoded gapchar '-'\r
+       \r
+       public static <T> String fold(List<FastaSequence> sequences,\r
+                       ConfiguredExecutable<T> confExec, Logger logger,\r
+                       String callingMethod, Limit<T> limit)\r
+                       throws LimitExceededException, JobSubmissionException {\r
+               \r
+               if (limit != null && limit.isExceeded(sequences)) {\r
+                       throw LimitExceededException.newLimitExceeded(limit, sequences);\r
+               }\r
+               compbio.runner.Util.writeClustalInput(sequences, confExec, '-');\r
+               AsyncExecutor engine = Configurator.getAsyncEngine(confExec);\r
+               String jobId = engine.submitJob(confExec);\r
+               reportUsage(confExec, logger);\r
+               return jobId;\r
+       }\r
 \r
        static <T> void reportUsage(ConfiguredExecutable<T> confExec, Logger logger) {\r
                if (GAUtils.IS_GA_ENABLED) {\r