From a966c1a63f4365f34a0567514dee6d1bc4d3af91 Mon Sep 17 00:00:00 2001 From: Daniel Barton Date: Mon, 5 Aug 2013 17:51:47 +0100 Subject: [PATCH] Simple datamodel for RNAalifold.exe output and method to convert from fasta to clustal format and write clustal input files. --- datamodel/compbio/data/sequence/RNAstruct.java | 76 +++++++++++++++++ datamodel/compbio/data/sequence/SequenceUtil.java | 48 +++++++++++ runner/compbio/runner/Util.java | 38 +++++++++ runner/compbio/runner/structure/RNAalifold.java | 85 +++++++++++--------- .../compbio/data/sequence/SequenceUtilTester.java | 32 ++++++++ .../compbio/runner/structure/RNAalifoldTester.java | 2 +- webservices/compbio/data/structure/FoldWS.java | 41 ++++++++++ webservices/compbio/ws/server/RNAalifoldWS.java | 7 +- webservices/compbio/ws/server/WSUtil.java | 17 ++++ 9 files changed, 304 insertions(+), 42 deletions(-) create mode 100644 datamodel/compbio/data/sequence/RNAstruct.java create mode 100644 webservices/compbio/data/structure/FoldWS.java diff --git a/datamodel/compbio/data/sequence/RNAstruct.java b/datamodel/compbio/data/sequence/RNAstruct.java new file mode 100644 index 0000000..1e36064 --- /dev/null +++ b/datamodel/compbio/data/sequence/RNAstruct.java @@ -0,0 +1,76 @@ +package compbio.data.sequence; + +import java.util.List; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; + +import compbio.util.annotation.Immutable; + +/* + * RNA secondary structure + */ + +@XmlAccessorType(XmlAccessType.FIELD) +public final class RNAstruct { + + private String sequence; + private String structure; // needs to be array to deal with all output + private Float minEnergy; + private Float energySum1; private Float energySum2; + + + public RNAstruct() { + // default JaxB Constructor + } + + public RNAstruct(String sequence, String structure, Float minEnergy + , Float energySum1, Float energySum2) { + this.sequence = sequence; + this.structure = structure; + this.minEnergy = minEnergy; + this.energySum1 = energySum1; + this.energySum2 = energySum2; + } + + public String getSequence() { + return sequence; + } + + public String getStructure() { + return structure; + } + + public Float getEnergy() { + return minEnergy; + } + + + @Override + public String toString() { + String newLine = System.getProperty("line.separator","."); + return sequence + newLine + structure + " (" + minEnergy.toString() + + " = " + energySum1.toString() + " + " + energySum2.toString() + + ")"; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (!(obj instanceof RNAstruct)) { + return false; + } + RNAstruct st = (RNAstruct) obj; + if (!(this.getSequence() == st.getSequence() && + this.getStructure() == st.getStructure() && + this.getEnergy() == st.getEnergy())) { + return false; + } + + return true; + } +} + + diff --git a/datamodel/compbio/data/sequence/SequenceUtil.java b/datamodel/compbio/data/sequence/SequenceUtil.java index 14dedf4..548e57d 100644 --- a/datamodel/compbio/data/sequence/SequenceUtil.java +++ b/datamodel/compbio/data/sequence/SequenceUtil.java @@ -757,6 +757,34 @@ public final class SequenceUtil { } return annotations; } + + /* + * Reads and parses the output of an RNAalifold job + * + * Currently only in the defualt no args format + */ + public static RNAstruct readRNAalifoldResults(InputStream results) { + if (results == null) { + throw new NullPointerException( + "InputStream with results must be provided"); + } + Scanner sc = new Scanner(results); + sc.useDelimiter("\\s+"); + String sequence = sc.next(); + String structure = sc.next(); + // now get energy values out of the brackets + String regex = "-?[0-9]*\\.?[0-9]+"; + Float minEnergy = Float.parseFloat(sc.findInLine(regex)); + + RNAstruct rnastruct = new RNAstruct(sequence, structure, minEnergy, + Float.parseFloat(sc.findInLine(regex)), + Float.parseFloat(sc.findInLine(regex))); + + sc.close(); + return rnastruct; + } + + /** * Reads and parses Fasta or Clustal formatted file into a list of @@ -791,6 +819,26 @@ public final class SequenceUtil { return fastaSeqs; } + // This can't possibly be right for all cases! + // but it will do for now + + // As for the metadata. This function doesnt know what program + // generated it. How to handle the metadata!? + + public static void writeClustal(OutputStream outStream, + List sequences, char gapChar) + throws IOException { + + BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(outStream)); + // will give AlignmentMetadata default type of CLUSTAL for now + AlignmentMetadata al = new AlignmentMetadata(Program.CLUSTAL, gapChar); + + ClustalAlignmentUtil.writeClustalAlignment(writer, + new Alignment(sequences, al)); + + } + } enum DisemblResult { diff --git a/runner/compbio/runner/Util.java b/runner/compbio/runner/Util.java index 8d77864..f033bf0 100644 --- a/runner/compbio/runner/Util.java +++ b/runner/compbio/runner/Util.java @@ -19,6 +19,7 @@ package compbio.runner; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -30,6 +31,7 @@ import org.apache.log4j.Logger; import compbio.data.sequence.Alignment; import compbio.data.sequence.ClustalAlignmentUtil; import compbio.data.sequence.FastaSequence; +import compbio.data.sequence.RNAstruct; import compbio.data.sequence.Score; import compbio.data.sequence.SequenceUtil; import compbio.data.sequence.UnknownFileFormatException; @@ -130,5 +132,41 @@ public final class Util { + e.getLocalizedMessage(), e); } } + + public static void writeClustalInput(List sequences, + ConfiguredExecutable exec, char gapChar) throws JobSubmissionException { + + try { + File filein = new File(exec.getInput()); + FileOutputStream fout = new FileOutputStream(filein); + log.debug("File path: " + filein.getAbsolutePath()); + SequenceUtil.writeClustal(fout, sequences, gapChar); + fout.close(); + } catch (IOException e) { + log.error("IOException while writing input file into the disk: " + + e.getLocalizedMessage(), e); + throw new JobSubmissionException( + "We are sorry but JABAWS server seems to have a problem! " + + e.getLocalizedMessage(), e); + } + } + + public static final RNAstruct readRNAStructFile(String workDirectory, + String structFile) throws IOException, FileNotFoundException { + assert !compbio.util.Util.isEmpty(workDirectory); + assert !compbio.util.Util.isEmpty(structFile); + File sfile = new File(compbio.engine.client.Util.getFullPath( + workDirectory, structFile)); + log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath()); + if(!(sfile.exists() && sfile.length() > 0)) { + throw new FileNotFoundException("Result for the jobId " + + workDirectory + "with file name " + structFile + + " is not found!"); + } + return compbio.data.sequence.SequenceUtil.readRNAalifoldResults( + new FileInputStream(sfile)); + } + + } diff --git a/runner/compbio/runner/structure/RNAalifold.java b/runner/compbio/runner/structure/RNAalifold.java index 4ec1ec7..acb4a9e 100644 --- a/runner/compbio/runner/structure/RNAalifold.java +++ b/runner/compbio/runner/structure/RNAalifold.java @@ -11,9 +11,11 @@ import java.io.FileReader; import java.io.BufferedReader; import java.io.File; + + import org.apache.log4j.Logger; -import compbio.data.sequence.Alignment; +import compbio.data.sequence.RNAstruct; import compbio.data.sequence.UnknownFileFormatException; import compbio.engine.client.PipedExecutable; import compbio.engine.client.SkeletalExecutable; @@ -54,11 +56,12 @@ public class RNAalifold extends SkeletalExecutable @SuppressWarnings("unchecked") @Override // PlaceHolder method - public String getResults(String workDirectory) + public RNAstruct getResults(String workDirectory) throws ResultNotAvailableException { try { - // System.out.print(readRNAStruct(workDirectory, getOutput())); - return readRNAStruct(workDirectory, getOutput()); + // System.out.println("Specialread " + Util.readRNAStructFile(workDirectory, getOutput()).toString()); + return Util.readRNAStructFile(workDirectory, getOutput()); + } catch (FileNotFoundException e) { log.error(e.getMessage(), e.getCause()); throw new ResultNotAvailableException(e); @@ -68,43 +71,47 @@ public class RNAalifold extends SkeletalExecutable } } + + // OLD :- the new methods for reading are found in + // - compbio.data.sequence.SequenceUtil and + // - compbio.runner.Util // Simple and generic methods for reading a whole file // Should surfice until a more detailed datamodel and parser are developed - private static String readRNAStruct(String workDirectory, - String structFile) throws IOException, FileNotFoundException { - assert !compbio.util.Util.isEmpty(workDirectory); - assert !compbio.util.Util.isEmpty(structFile); - File sfile = new File(compbio.engine.client.Util.getFullPath( - workDirectory, structFile)); - log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath()); - if(!(sfile.exists() && sfile.length() > 0)) { - throw new FileNotFoundException("Result for the jobId " - + workDirectory + "with file name " + structFile - + " is not found!"); - } - return readFile(sfile); - } - - private static BufferedReader input; - public static String readFile(File inputFile) throws - FileNotFoundException, IOException { - - input = new BufferedReader(new FileReader(inputFile)); - - String file = new String(); - String line = new String(); - - while (true) { - line = input.readLine(); - - if (line != null) { - file = file + line + "\r\n"; - } else break; - } - // Close file - input.close(); - return file; - } +// private static String readRNAStruct(String workDirectory, +// String structFile) throws IOException, FileNotFoundException { +// assert !compbio.util.Util.isEmpty(workDirectory); +// assert !compbio.util.Util.isEmpty(structFile); +// File sfile = new File(compbio.engine.client.Util.getFullPath( +// workDirectory, structFile)); +// log.trace("RNAALIFOLD OUTPUT FILE PATH: " + sfile.getAbsolutePath()); +// if(!(sfile.exists() && sfile.length() > 0)) { +// throw new FileNotFoundException("Result for the jobId " +// + workDirectory + "with file name " + structFile +// + " is not found!"); +// } +// return readFile(sfile); +// } +// +// private static BufferedReader input; +// public static String readFile(File inputFile) throws +// FileNotFoundException, IOException { +// +// input = new BufferedReader(new FileReader(inputFile)); +// +// String file = new String(); +// String line = new String(); +// +// while (true) { +// line = input.readLine(); +// +// if (line != null) { +// file = file + line + "\r\n"; +// } else break; +// } +// // Close file +// input.close(); +// return file; +// } } diff --git a/testsrc/compbio/data/sequence/SequenceUtilTester.java b/testsrc/compbio/data/sequence/SequenceUtilTester.java index e646c1e..25c81a1 100644 --- a/testsrc/compbio/data/sequence/SequenceUtilTester.java +++ b/testsrc/compbio/data/sequence/SequenceUtilTester.java @@ -140,6 +140,37 @@ public class SequenceUtilTester { } } + // Potential Bug :- Sequence names are shortened to 2-3 letters + @Test + public void testReadFastaWriteClustal() { + + try { + FileInputStream fio = new FileInputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); + assertNotNull(fio); + List fseqs = SequenceUtil.readFasta(fio); + assertNotNull(fseqs); + fio.close(); + + char gapChar = '-'; + FileOutputStream fou = new FileOutputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.aln.written"); + SequenceUtil.writeClustal(fou, fseqs, gapChar); + fou.close(); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + + + + /** * This test tests the loading of horizontally formatted Jronn output file */ @@ -409,3 +440,4 @@ public class SequenceUtilTester { } } } + diff --git a/testsrc/compbio/runner/structure/RNAalifoldTester.java b/testsrc/compbio/runner/structure/RNAalifoldTester.java index 2ce3d5f..4363e8d 100644 --- a/testsrc/compbio/runner/structure/RNAalifoldTester.java +++ b/testsrc/compbio/runner/structure/RNAalifoldTester.java @@ -65,7 +65,7 @@ public class RNAalifoldTester { @Test(groups = { AllTestSuit.test_group_runner }) public void testRunLocally() { RNAalifold rnaalifold = new RNAalifold(); - rnaalifold.setInput(AllTestSuit.test_input_aln).setOutput(test_outfile); + rnaalifold.setInput(AllTestSuit.test_alignment_input).setOutput(test_outfile); try{ ConfiguredExecutable confRNAalifold = Configurator diff --git a/webservices/compbio/data/structure/FoldWS.java b/webservices/compbio/data/structure/FoldWS.java new file mode 100644 index 0000000..7d5238f --- /dev/null +++ b/webservices/compbio/data/structure/FoldWS.java @@ -0,0 +1,41 @@ +package compbio.data.structure; + +import java.security.InvalidParameterException; +import java.util.List; + +import javax.jws.WebMethod; +import javax.jws.WebParam; +import javax.jws.WebService; + +import compbio.data.msa.JABAService; +import compbio.data.msa.JManagement; +import compbio.data.msa.Metadata; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.LimitExceededException; +import compbio.metadata.Option; +import compbio.metadata.Preset; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.UnsupportedRuntimeException; +import compbio.metadata.WrongParameterException; + +/* + * Interface for tools that results RNA secondary structure information + * + * Still Unsure whether a new interface is needed or whether secondary structre + * information could be exressed as a sequence annotation + */ + +// effect of a different targetNamespace? +@WebService(targetNamespace = JABAService.V2_SERVICE_NAMESPACE) +public interface FoldWS + extends + JABAService, + JManagement, + Metadata { + + + + @WebMethod + String fold( + @WebParam(name = ) +} diff --git a/webservices/compbio/ws/server/RNAalifoldWS.java b/webservices/compbio/ws/server/RNAalifoldWS.java index 4a1d5a1..83d7c93 100644 --- a/webservices/compbio/ws/server/RNAalifoldWS.java +++ b/webservices/compbio/ws/server/RNAalifoldWS.java @@ -47,6 +47,9 @@ public class RNAalifoldWS implements JABAService, JManagement, Metadata limitMan = compbio.engine.client.Util .getLimits(new RNAalifold().getType()); + //public String fold() + + ConfiguredExecutable init() throws JobSubmissionException { RNAalifold rnaalifold = new RNAalifold(); rnaalifold.setInput(SkeletalExecutable.INPUT) @@ -115,12 +118,12 @@ public class RNAalifoldWS implements JABAService, JManagement, Metadata String fold(List sequences, + ConfiguredExecutable confExec, Logger logger, + String callingMethod, Limit limit) + throws LimitExceededException, JobSubmissionException { + + if (limit != null && limit.isExceeded(sequences)) { + throw LimitExceededException.newLimitExceeded(limit, sequences); + } + compbio.runner.Util.writeClustalInput(sequences, confExec, '-'); + AsyncExecutor engine = Configurator.getAsyncEngine(confExec); + String jobId = engine.submitJob(confExec); + reportUsage(confExec, logger); + return jobId; + } static void reportUsage(ConfiguredExecutable confExec, Logger logger) { if (GAUtils.IS_GA_ENABLED) { -- 1.7.10.2