From: pvtroshin Date: Mon, 22 Nov 2010 15:09:13 +0000 (+0000) Subject: Adding AAConWS X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;ds=sidebyside;h=fab6bab770b548a0c99ed6f5dfb46f5aa99f67c7;p=jabaws.git Adding AAConWS git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@3343 e3abac25-378b-4346-85de-24260fe3988d --- diff --git a/.classpath b/.classpath index 9107de9..cff7667 100644 --- a/.classpath +++ b/.classpath @@ -11,6 +11,7 @@ - + + diff --git a/TODO.txt b/TODO.txt index 79f6ca1..6e70077 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,6 @@ TODO: +Add iupred ws http://iupred.enzim.hu/ Add globprot ws Add ronn ws diff --git a/WEB-INF/lib/compbio-util-1.2.jar b/WEB-INF/lib/compbio-util-1.3.jar similarity index 80% rename from WEB-INF/lib/compbio-util-1.2.jar rename to WEB-INF/lib/compbio-util-1.3.jar index a8e20be..9042b5a 100644 Binary files a/WEB-INF/lib/compbio-util-1.2.jar and b/WEB-INF/lib/compbio-util-1.3.jar differ diff --git a/binaries/aaconservation.jar b/binaries/aaconservation.jar new file mode 100644 index 0000000..375b8e9 Binary files /dev/null and b/binaries/aaconservation.jar differ diff --git a/binaries/manual.txt b/binaries/manual.txt new file mode 100644 index 0000000..ff7d66f --- /dev/null +++ b/binaries/manual.txt @@ -0,0 +1,90 @@ + +AA Conservation version 1.0b (2 September 2010) + +This program allows calculation of conservation of amino acids in +multiple sequence alignments. +It implements 17 different conservation scores as described by Valdar in +his paper (Scoring Residue Conservation, PROTEINS: Structure, Function +and Bioinformatics 48:227-241 (2002)) and SMERFS scoring algorithm as described +by Manning, Jefferson and Barton (The contrasting properties of conservation +and correlated phylogeny in protein functional residue prediction, +BMC Bioinformatics (2008)). + +The conservation algorithms supported are: + +KABAT, JORES, SCHNEIDER, SHENKIN, GERSTEIN, TAYLOR_GAPS, TAYLOR_NO_GAPS, +ZVELIBIL, KARLIN, ARMON, THOMPSON, NOT_LANCET, MIRNY, WILLIAMSON, +LANDGRAF, SANDER, VALDAR, SMERFS + +Input format is either a FASTA formatted file containing aligned sequences with +gaps or a Clustal alignment. The valid gap characters are *, -, space character, +X and . (a dot). By default program prints the results to the command window. +If the output file is provided the results are printed to the file in two +possible formats with or without an alignment. +If format is not specified, the program outputs conservation scores without +alignment. The scores are not normalized by default but they can be (see below). +SMERFS default parameters are window width of 7, column score is set to +the middle column, gap% cutoff of 0.1. If different values for SMERFS parameters +are required than all three parameters must be provided. Details of the program +execution can be recorded to a separate file if an appropriate file path is +provided. + +List of command line arguments: + +-m= precedes a comma separated list of method names + EXAMPLE: -m=KABAT,JORES,GERSTEIN + Optional, if no method is specified request for all is assumed. + +-i= precedes a full path to the input FASTA file, required + +-o= precedes a full path to the output file, optional, if no output file is + provided the program will output to the standard out. + +-t= precedes the number of CPUs (CPU cores more precisely) to use. Optional, + defaults to all processors available on the machine. + +-f= precedes the format of the results in the output file + two different formats are possible: + RESULT_WITH_ALIGNMENT + RESULT_NO_ALIGNMENT + Optional, if not specified RESULT_NO_ALIGNMENT is assumed + +-s= precedes a list of three comma separated parameters for SMERFS + the order of parameters is as following: + 1. window width - an integer and an odd number + 2. how to allocate window scores to columns, two ways are possible: + MID_SCORE - gives the window score to the middle column + MAX_SCORE - gives the column the highest score of all the windows it + belongs to + 3. gap percentage cutoff - a float greater than 0 and smaller or equal 1 + EXAMPLE: -s=5,MID_SCORE,0.1 + Optional, default values are 7,MID_SCORE,0.1 + +-d= precedes a full path to a file where program execution details are to be + listed. Optional, if not provided, no execution statistics is produced. + +-g= precedes comma separated list of gap characters provided by the user, if + you're using an unusual gap character (not a -,., ,*,X) you have to + provide it. If you you provide this list you have to list all the gaps + accepted. Including those that were previously treated as a default. + Optional. + +-n using this key causes the results to be normalized. + Normalized results have values between 0 and 1. Please note however, that + some results cannot be normalized. In such a case, the system returns not + normalized value, and log the issue to the standard error stream. + The following formula is used for normalization + n = (d - dmin)/(dmax - dmin) + Negative results first converted to positive by adding an absolute value of + the most negative result. Optional. + +EXAMPLE HOW TO RUN THE PROGRAM: +java -jar -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n + +As a result of the execution KABAT and SMERFS scores will be calculated. +Input comes form prot1 file and an output without an alignment is recorded to +prot1_results file. + +Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton. +Please visit http://www.compbio.dundee.ac.uk for further information. + \ No newline at end of file diff --git a/build.xml b/build.xml index 724776f..05137d6 100644 --- a/build.xml +++ b/build.xml @@ -153,7 +153,7 @@ - + @@ -233,7 +233,7 @@ - + @@ -248,7 +248,8 @@ - + Packing binaries, and configuration files @@ -264,7 +265,7 @@ - + diff --git a/conf/Executable.properties b/conf/Executable.properties index 0a9b5cd..247c9f5 100644 --- a/conf/Executable.properties +++ b/conf/Executable.properties @@ -43,6 +43,8 @@ cluster.tcoffee.bin=/homes/pvtroshin/workspace/jaba2/binaries/src/tcoffee/t_coff #/sw/bin/t_coffee # Sub matrix support does not work #tcoffee.-matrix.path=binaries/matrices +# This variable is required by tcoffee +tcoffee.bin.env=HOME_4_TCOFFEE#jobsout; tcoffee.presets.file=conf/settings/TcoffeePresets.xml tcoffee.parameters.file=conf/settings/TcoffeeParameters.xml tcoffee.limits.file=conf/settings/TcoffeeLimits.xml @@ -75,5 +77,14 @@ jronn.cluster.settings=-q 64bit-pri.q -pe smp 4 -l h_vmem=1700M -l ram=1700M -l local.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py cluster.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py #disembl.parameters.file=conf/settings/JronnParameters.xml -disembl.limits.file=conf/settings/JronnLimits.xml +disembl.limits.file=conf/settings/DisemblLimits.xml disembl.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M + +local.aacon.bin.windows=D:\\Java\\jdk1.6.0_14\\bin\\java.exe +local.aacon.bin=/sw/java/latest/bin/java +cluster.aacon.bin=/sw/java/latest/bin/java +aacon.jar.file=binaries/aaconservation.jar +aacon.parameters.file=conf/settings/AAConParameters.xml +aacon.limits.file=conf/settings/AAConLimits.xml +#TODO jronn.jvm.options=-Xms32M -Xmx512M +aacon.cluster.cpunum=4 diff --git a/conf/settings/AAConLimits.xml b/conf/settings/AAConLimits.xml new file mode 100644 index 0000000..a3e4935 --- /dev/null +++ b/conf/settings/AAConLimits.xml @@ -0,0 +1,13 @@ + + + compbio.runner.conservation.AACon + + 100000 + 100000 + + + # LocalEngineExecutionLimit # + 1000 + 1000 + + diff --git a/conf/settings/AAConParameters.xml b/conf/settings/AAConParameters.xml new file mode 100644 index 0000000..9f5da9f --- /dev/null +++ b/conf/settings/AAConParameters.xml @@ -0,0 +1,36 @@ + + + compbio.runner.conservation.AACon + + Normalize + Normalize the results. The results of the calculation by different methods will all be scaled to the range between 0 and 1, so that they are comparable + -n + http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt + + = + + Calculation method + The method of the calculation to use + -m + http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt + SHENKIN + KABAT + JORES + SCHNEIDER + SHENKIN + GERSTEIN + TAYLOR_GAPS + TAYLOR_NO_GAPS + ZVELIBIL + KARLIN + ARMON + THOMPSON + NOT_LANCET + MIRNY + WILLIAMSON + LANDGRAF + SANDER + VALDAR + SMERFS + + diff --git a/conf/settings/AAConPresets.xml b/conf/settings/AAConPresets.xml new file mode 100644 index 0000000..cdf8d37 --- /dev/null +++ b/conf/settings/AAConPresets.xml @@ -0,0 +1,30 @@ + + + compbio.runner.conservation.AACon + + Quick conservation + Collection of fast conservation methods + + + + + + Slow conservation + Collection of most expensive (slow) conservation methods + + + + + + Complete conservation + Calculate conservation with all supported methods + + + + + + + + + + diff --git a/datamodel/compbio/data/sequence/ClustalAlignmentUtil.java b/datamodel/compbio/data/sequence/ClustalAlignmentUtil.java index 5fce997..b6076a4 100644 --- a/datamodel/compbio/data/sequence/ClustalAlignmentUtil.java +++ b/datamodel/compbio/data/sequence/ClustalAlignmentUtil.java @@ -1,19 +1,15 @@ -/* Copyright (c) 2009 Peter Troshin - * - * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 - * - * This library is free software; you can redistribute it and/or modify it under the terms of the - * Apache License version 2 as published by the Apache Software Foundation - * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache - * License for more details. - * - * A copy of the license is in apache_license.txt. It is also available here: - * @see: http://www.apache.org/licenses/LICENSE-2.0.txt - * - * Any republication or derived work distributed in source code form - * must include this copyright and license notice. +/* + * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 1.0 This library is free software; you can redistribute it + * and/or modify it under the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or + * derived work distributed in source code form must include this copyright and + * license notice. */ package compbio.data.sequence; @@ -45,246 +41,247 @@ import java.util.logging.Logger; */ public final class ClustalAlignmentUtil { - private static final Logger log = Logger - .getLogger(ClustalAlignmentUtil.class.getCanonicalName()); - - /** - * Dash char to be used as gap char in the alignments - */ - public static final char gapchar = '-'; - - /* - * Number of spaces separating the name and the sequence - */ - private static final String spacer = " "; // 6 space characters - /* - * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is - * longer than that it gets trimmed in the end - */ - private static final int maxNameLength = 30; // Maximum name length - /* - * If all sequences names in the alignment is shorter than - * minNameHolderLength than spaces are added to complete the name up to - * minNameHolderLength - */ - private static final int minNameHolderLength = 10; // Minimum number of - - // TODO check whether clustal still loads data if length is 60! - private static final int oneLineAlignmentLength = 60; // this could in fact - - // be 50 - - // for long names ~30 chars - - /** - * Read Clustal formatted alignment. Limitations: Does not read consensus - * - * Sequence names as well as the sequences are not guaranteed to be unique! - * - * @throws {@link IOException} - * @throws {@link UnknownFileFormatException} - */ - public static Alignment readClustalFile(InputStream instream) - throws IOException, UnknownFileFormatException { - - boolean flag = false; - - List headers = new ArrayList(); - Map seqhash = new HashMap(); - FastaSequence[] seqs = null; - - String line; - - BufferedReader breader = new BufferedReader(new InputStreamReader( - instream)); - while ((line = breader.readLine()) != null) { - if (line.indexOf(" ") != 0) { - java.util.StringTokenizer str = new StringTokenizer(line, " "); - String id = ""; - - if (str.hasMoreTokens()) { - id = str.nextToken(); - // PROBCONS output clustal formatted file with not mention - // of CLUSTAL (:-)) - if (id.equals("CLUSTAL") || id.equals("PROBCONS")) { - flag = true; - } else { - if (flag) { - StringBuffer tempseq; - if (seqhash.containsKey(id)) { - tempseq = seqhash.get(id); - } else { - tempseq = new StringBuffer(); - seqhash.put(id, tempseq); - } - - if (!(headers.contains(id))) { - headers.add(id); - } - - tempseq.append(str.nextToken()); + private static final Logger log = Logger + .getLogger(ClustalAlignmentUtil.class.getCanonicalName()); + + /** + * Dash char to be used as gap char in the alignments + */ + public static final char gapchar = '-'; + + /* + * Number of spaces separating the name and the sequence + */ + private static final String spacer = " "; // 6 space characters + /* + * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is + * longer than that it gets trimmed in the end + */ + private static final int maxNameLength = 30; // Maximum name length + /* + * If all sequences names in the alignment is shorter than + * minNameHolderLength than spaces are added to complete the name up to + * minNameHolderLength + */ + private static final int minNameHolderLength = 10; // Minimum number of + + // TODO check whether clustal still loads data if length is 60! + private static final int oneLineAlignmentLength = 60; // this could in fact + + // be 50 + + // for long names ~30 chars + + /** + * Read Clustal formatted alignment. Limitations: Does not read consensus + * + * Sequence names as well as the sequences are not guaranteed to be unique! + * + * @throws {@link IOException} + * @throws {@link UnknownFileFormatException} + */ + public static Alignment readClustalFile(InputStream instream) + throws IOException, UnknownFileFormatException { + + boolean flag = false; + + List headers = new ArrayList(); + Map seqhash = new HashMap(); + FastaSequence[] seqs = null; + + String line; + + BufferedReader breader = new BufferedReader(new InputStreamReader( + instream)); + while ((line = breader.readLine()) != null) { + if (line.indexOf(" ") != 0) { + java.util.StringTokenizer str = new StringTokenizer(line, " "); + String id = ""; + + if (str.hasMoreTokens()) { + id = str.nextToken(); + // PROBCONS output clustal formatted file with not mention + // of CLUSTAL (:-)) + if (id.equals("CLUSTAL") || id.equals("PROBCONS")) { + flag = true; + } else { + if (flag) { + StringBuffer tempseq; + if (seqhash.containsKey(id)) { + tempseq = seqhash.get(id); + } else { + tempseq = new StringBuffer(); + seqhash.put(id, tempseq); + } + + if (!(headers.contains(id))) { + headers.add(id); + } + + tempseq.append(str.nextToken()); + } + } + } } - } } - } - } - breader.close(); + breader.close(); - // TODO improve this bit - if (flag) { + // TODO improve this bit + if (flag) { - // Add sequences to the hash - seqs = new FastaSequence[headers.size()]; - for (int i = 0; i < headers.size(); i++) { - if (seqhash.get(headers.get(i)) != null) { + // Add sequences to the hash + seqs = new FastaSequence[headers.size()]; + for (int i = 0; i < headers.size(); i++) { + if (seqhash.get(headers.get(i)) != null) { - FastaSequence newSeq = new FastaSequence(headers.get(i), - seqhash.get(headers.get(i)).toString()); + FastaSequence newSeq = new FastaSequence(headers.get(i), + seqhash.get(headers.get(i)).toString()); - seqs[i] = newSeq; + seqs[i] = newSeq; - } else { - // should not happened - throw new AssertionError( - "Bizarreness! Can't find sequence for " - + headers.get(i)); + } else { + // should not happened + throw new AssertionError( + "Bizarreness! Can't find sequence for " + + headers.get(i)); + } + } } - } - } - if (seqs == null || seqs.length == 0) { - throw new UnknownFileFormatException( - "Input does not appear to be a clustal file! "); - } - return new Alignment(Arrays.asList(seqs), new AlignmentMetadata( - Program.CLUSTAL, gapchar)); - } - - /** - * - * @param input - * @return true if the file is recognised as Clustal formatted alignment, - * false otherwise - */ - public static boolean isValidClustalFile(InputStream input) { - if (input == null) { - throw new NullPointerException("Input is expected!"); - } - BufferedReader breader = new BufferedReader( - new InputStreamReader(input)); - try { - if (input.available() < 10) { - return false; - } - // read first 10 lines to find "Clustal" - for (int i = 0; i < 10; i++) { - String line = breader.readLine(); - if (line != null) { - line = line.toUpperCase().trim(); - if (line.contains("CLUSTAL") || line.contains("PROBCONS")) { - return true; - } + if (seqs == null || seqs.length == 0) { + throw new UnknownFileFormatException( + "Input does not appear to be a clustal file! "); } - } - - breader.close(); - } catch (IOException e) { - log.severe("Could not read from the stream! " - + e.getLocalizedMessage() + e.getCause()); - } finally { - SequenceUtil.closeSilently(log, breader); - } - return false; - } - - /** - * Write Clustal formatted alignment Limitations: does not record the - * consensus. Potential bug - records 60 chars length alignment where - * Clustal would have recorded 50 chars. - * - * @param outStream - * - * @param alignment - * @throws IOException - */ - public static void writeClustalAlignment(final OutputStream outStream, - final Alignment alignment) throws IOException { - List seqs = alignment.getSequences(); - - PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream)); - - out.write("CLUSTAL\n\n\n"); - - int max = 0; - int maxidLength = 0; - - int i = 0; - // Find the longest sequence name - for (FastaSequence fs : seqs) { - String tmp = fs.getId(); - - if (fs.getSequence().length() > max) { - max = fs.getSequence().length(); - } - if (tmp.length() > maxidLength) { - maxidLength = tmp.length(); - } - i++; - } - if (maxidLength < minNameHolderLength) { - maxidLength = minNameHolderLength; - } - if (maxidLength > maxNameLength) { - maxidLength = 30; // the rest will be trimmed + return new Alignment(Arrays.asList(seqs), new AlignmentMetadata( + Program.CLUSTAL, gapchar)); } - int oneLineAlignmentLength = 60; - int nochunks = max / oneLineAlignmentLength + 1; - - for (i = 0; i < nochunks; i++) { - int j = 0; - for (FastaSequence fs : seqs) { - - String name = fs.getId(); - // display at most 30 characters in the name, keep the names - // 6 spaces away from the alignment for longest sequence names, - // and more than this for shorter names - out.format("%-" + maxidLength + "s" + spacer, - (name.length() > maxNameLength ? name.substring(0, - maxidLength) : name)); - int start = i * oneLineAlignmentLength; - int end = start + oneLineAlignmentLength; - - if (end < fs.getSequence().length() - && start < fs.getSequence().length()) { - out.write(fs.getSequence().substring(start, end) + "\n"); - } else { - if (start < fs.getSequence().length()) { - out.write(fs.getSequence().substring(start) + "\n"); - } + /** + * Please note this method closes the input stream provided as a parameter + * + * @param input + * @return true if the file is recognised as Clustal formatted alignment, + * false otherwise + */ + public static boolean isValidClustalFile(InputStream input) { + if (input == null) { + throw new NullPointerException("Input is expected!"); } - j++; - } - out.write("\n"); - } - try { - out.close(); - } finally { - SequenceUtil.closeSilently(log, out); + BufferedReader breader = new BufferedReader( + new InputStreamReader(input)); + try { + if (input.available() < 10) { + return false; + } + // read first 10 lines to find "Clustal" + for (int i = 0; i < 10; i++) { + String line = breader.readLine(); + if (line != null) { + line = line.toUpperCase().trim(); + if (line.contains("CLUSTAL") || line.contains("PROBCONS")) { + return true; + } + } + } + + breader.close(); + } catch (IOException e) { + log.severe("Could not read from the stream! " + + e.getLocalizedMessage() + e.getCause()); + } finally { + SequenceUtil.closeSilently(log, breader); + } + return false; } - } - public static Alignment readClustalFile(File file) - throws UnknownFileFormatException, IOException { - if (file == null) { - throw new NullPointerException("File is expected!"); + /** + * Write Clustal formatted alignment Limitations: does not record the + * consensus. Potential bug - records 60 chars length alignment where + * Clustal would have recorded 50 chars. + * + * @param outStream + * + * @param alignment + * @throws IOException + */ + public static void writeClustalAlignment(final OutputStream outStream, + final Alignment alignment) throws IOException { + List seqs = alignment.getSequences(); + + PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream)); + + out.write("CLUSTAL\n\n\n"); + + int max = 0; + int maxidLength = 0; + + int i = 0; + // Find the longest sequence name + for (FastaSequence fs : seqs) { + String tmp = fs.getId(); + + if (fs.getSequence().length() > max) { + max = fs.getSequence().length(); + } + if (tmp.length() > maxidLength) { + maxidLength = tmp.length(); + } + i++; + } + if (maxidLength < minNameHolderLength) { + maxidLength = minNameHolderLength; + } + if (maxidLength > maxNameLength) { + maxidLength = 30; // the rest will be trimmed + } + + int oneLineAlignmentLength = 60; + int nochunks = max / oneLineAlignmentLength + 1; + + for (i = 0; i < nochunks; i++) { + int j = 0; + for (FastaSequence fs : seqs) { + + String name = fs.getId(); + // display at most 30 characters in the name, keep the names + // 6 spaces away from the alignment for longest sequence names, + // and more than this for shorter names + out.format("%-" + maxidLength + "s" + spacer, + (name.length() > maxNameLength ? name.substring(0, + maxidLength) : name)); + int start = i * oneLineAlignmentLength; + int end = start + oneLineAlignmentLength; + + if (end < fs.getSequence().length() + && start < fs.getSequence().length()) { + out.write(fs.getSequence().substring(start, end) + "\n"); + } else { + if (start < fs.getSequence().length()) { + out.write(fs.getSequence().substring(start) + "\n"); + } + } + j++; + } + out.write("\n"); + } + try { + out.close(); + } finally { + SequenceUtil.closeSilently(log, out); + } } - FileInputStream fio = new FileInputStream(file); - Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio); - try { - fio.close(); - } finally { - SequenceUtil.closeSilently(log, fio); + + public static Alignment readClustalFile(File file) + throws UnknownFileFormatException, IOException { + if (file == null) { + throw new NullPointerException("File is expected!"); + } + FileInputStream fio = new FileInputStream(file); + Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio); + try { + fio.close(); + } finally { + SequenceUtil.closeSilently(log, fio); + } + return seqAl; } - return seqAl; - } } diff --git a/datamodel/compbio/data/sequence/DisemblResultAnnot.java b/datamodel/compbio/data/sequence/DisemblResultAnnot.java new file mode 100644 index 0000000..c5f026c --- /dev/null +++ b/datamodel/compbio/data/sequence/DisemblResultAnnot.java @@ -0,0 +1,5 @@ +package compbio.data.sequence; + +public enum DisemblResultAnnot { + COILS, REM465, HOTLOOPS +} diff --git a/datamodel/compbio/data/sequence/FastaSequence.java b/datamodel/compbio/data/sequence/FastaSequence.java index 6072d29..2032fec 100644 --- a/datamodel/compbio/data/sequence/FastaSequence.java +++ b/datamodel/compbio/data/sequence/FastaSequence.java @@ -1,19 +1,15 @@ -/* Copyright (c) 2009 Peter Troshin - * - * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 - * - * This library is free software; you can redistribute it and/or modify it under the terms of the - * Apache License version 2 as published by the Apache Software Foundation - * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache - * License for more details. - * - * A copy of the license is in apache_license.txt. It is also available here: - * @see: http://www.apache.org/licenses/LICENSE-2.0.txt - * - * Any republication or derived work distributed in source code form - * must include this copyright and license notice. +/* + * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 1.0 This library is free software; you can redistribute it + * and/or modify it under the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or + * derived work distributed in source code form must include this copyright and + * license notice. */ package compbio.data.sequence; @@ -42,151 +38,155 @@ import compbio.util.annotation.Immutable; @Immutable public class FastaSequence { - /** - * Sequence id - */ - private String id; - - // TODO what about gapped sequence here! should be indicated - /** - * Returns the string representation of sequence - */ - private String sequence; - - private FastaSequence() { - // Default constructor for JaxB - } - - /** - * Upon construction the any whitespace characters are removed from the - * sequence - * - * @param id - * @param sequence - */ - public FastaSequence(String id, String sequence) { - this.id = id; - this.sequence = SequenceUtil.cleanSequence(sequence); - } - - /** - * Gets the value of id - * - * @return the value of id - */ - public String getId() { - return this.id; - } - - /** - * Gets the value of sequence - * - * @return the value of sequence - */ - public String getSequence() { - return this.sequence; - } - - public static int countMatchesInSequence(final String theString, - final String theRegExp) { - final Pattern p = Pattern.compile(theRegExp); - final Matcher m = p.matcher(theString); - int cnt = 0; - while (m.find()) { - cnt++; + /** + * Sequence id + */ + private String id; + + // TODO what about gapped sequence here! should be indicated + /** + * Returns the string representation of sequence + */ + private String sequence; + + private FastaSequence() { + // Default constructor for JaxB } - return cnt; - } - - public String getFormattedFasta() { - return getFormatedSequence(80); - } - - /** - * - * @return one line name, next line sequence, no matter what the sequence - * length is - */ - public String getOnelineFasta() { - String fasta = ">" + getId() + SysPrefs.newlinechar; - fasta += getSequence() + SysPrefs.newlinechar; - return fasta; - } - - /** - * Format sequence per width letter in one string. Without spaces. - * - * @return multiple line formated sequence, one line width letters length - * - */ - public String getFormatedSequence(final int width) { - if (sequence == null) { - return ""; + + /** + * Upon construction the any whitespace characters are removed from the + * sequence + * + * @param id + * @param sequence + */ + public FastaSequence(String id, String sequence) { + this.id = id; + this.sequence = SequenceUtil.cleanSequence(sequence); } - assert width >= 0 : "Wrong width parameter "; - - final StringBuilder sb = new StringBuilder(sequence); - int nchunks = sequence.length() / width; - // add up inserted new line chars - nchunks = (nchunks + sequence.length()) / width; - int nlineCharcounter = 0; - for (int i = 1; i <= nchunks; i++) { - int insPos = width * i + nlineCharcounter; - // to prevent inserting new line in the very end of a sequence then - // it would have failed. - // Also covers the case when the sequences shorter than width - if (sb.length() <= insPos) { - break; - } - sb.insert(insPos, "\n"); - nlineCharcounter++; + /** + * Gets the value of id + * + * @return the value of id + */ + public String getId() { + return this.id; } - return sb.toString(); - } - - /** - * - * @return sequence length - */ - public int getLength() { - return this.sequence.length(); - } - - /** - * Same as oneLineFasta - */ - @Override - public String toString() { - return this.getOnelineFasta(); - } - - @Override - public int hashCode() { - final int prime = 17; - int result = 1; - result = prime * result + ((id == null) ? 0 : id.hashCode()); - result = prime * result - + ((sequence == null) ? 0 : sequence.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; + + /** + * Gets the value of sequence + * + * @return the value of sequence + */ + public String getSequence() { + return this.sequence; } - if (!(obj instanceof FastaSequence)) { - return false; + + public static int countMatchesInSequence(final String theString, + final String theRegExp) { + final Pattern p = Pattern.compile(theRegExp); + final Matcher m = p.matcher(theString); + int cnt = 0; + while (m.find()) { + cnt++; + } + return cnt; } - FastaSequence fs = (FastaSequence) obj; - if (!fs.getId().equals(this.getId())) { - return false; + + public String getFormattedFasta() { + return getFormatedSequence(80); } - if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) { - return false; + + /** + * + * @return one line name, next line sequence, no matter what the sequence + * length is + */ + public String getOnelineFasta() { + String fasta = ">" + getId() + SysPrefs.newlinechar; + fasta += getSequence() + SysPrefs.newlinechar; + return fasta; + } + + /** + * Format sequence per width letter in one string. Without spaces. + * + * @return multiple line formated sequence, one line width letters length + * + */ + public String getFormatedSequence(final int width) { + if (sequence == null) { + return ""; + } + + assert width >= 0 : "Wrong width parameter "; + + final StringBuilder sb = new StringBuilder(sequence); + // int tail = nrOfWindows % WIN_SIZE; + // final int turns = (nrOfWindows - tail) / WIN_SIZE; + + int tailLen = sequence.length() % width; + // add up inserted new line chars + int nchunks = (sequence.length() - tailLen) / width; + int nlineCharcounter = 0; + int insPos = 0; + for (int i = 1; i <= nchunks; i++) { + insPos = width * i + nlineCharcounter; + // to prevent inserting new line in the very end of a sequence then + // it would have failed. + if (sb.length() <= insPos) { + break; + } + sb.insert(insPos, "\n"); + nlineCharcounter++; + } + // sb.insert(insPos + tailLen, "\n"); + return sb.toString(); + } + + /** + * + * @return sequence length + */ + public int getLength() { + return this.sequence.length(); + } + + /** + * Same as oneLineFasta + */ + @Override + public String toString() { + return this.getOnelineFasta(); + } + + @Override + public int hashCode() { + final int prime = 17; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + + ((sequence == null) ? 0 : sequence.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (!(obj instanceof FastaSequence)) { + return false; + } + FastaSequence fs = (FastaSequence) obj; + if (!fs.getId().equals(this.getId())) { + return false; + } + if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) { + return false; + } + return true; } - return true; - } } diff --git a/datamodel/compbio/data/sequence/JalviewAnnotation.java b/datamodel/compbio/data/sequence/JalviewAnnotation.java new file mode 100644 index 0000000..cf19937 --- /dev/null +++ b/datamodel/compbio/data/sequence/JalviewAnnotation.java @@ -0,0 +1,7 @@ +package compbio.data.sequence; + +public class JalviewAnnotation { + + String annotation; + +} diff --git a/datamodel/compbio/data/sequence/MultiAnnotatedSequence.java b/datamodel/compbio/data/sequence/MultiAnnotatedSequence.java index 580a22e..1a889e3 100644 --- a/datamodel/compbio/data/sequence/MultiAnnotatedSequence.java +++ b/datamodel/compbio/data/sequence/MultiAnnotatedSequence.java @@ -2,6 +2,9 @@ package compbio.data.sequence; import java.util.EnumMap; import java.util.List; +import java.util.Map; + +import compbio.util.annotation.NotThreadSafe; /** * TODO complete @@ -11,23 +14,67 @@ import java.util.List; * @param * enum type */ +@NotThreadSafe public class MultiAnnotatedSequence> { - private final EnumMap> annotation; + private final Map> annotations; + + public MultiAnnotatedSequence(Class enumeration) { + this.annotations = new EnumMap>(enumeration); + } + + public void addAnnotation(T type, List annotation) { + assert type != null : "Type is expected"; + assert annotation != null : "Not empty value is expected!"; + if (!annotations.isEmpty()) { + assert annotations.values().iterator().next().size() == annotation + .size() : "Annotations must contain the same number of elements!"; + } + this.annotations.put(type, annotation); + } + + public Map> getAnnotations() { + return new EnumMap>(this.annotations); + } - private MultiAnnotatedSequence(Class type) { - this.annotation = new EnumMap>(type); - } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((annotations == null) ? 0 : annotations.hashCode()); + return result; + } - // public MultiAnnotatedSequence getFloatInstance(FastaSequence fsequence) { - // return null; - //} + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + MultiAnnotatedSequence other = (MultiAnnotatedSequence) obj; + if (annotations == null) { + if (other.annotations != null) + return false; + } else if (!annotations.equals(other.annotations)) + return false; + return true; + } - public EnumMap> getIntegerInstance(Class enumeration) { - return new EnumMap>(enumeration); - } + @Override + public String toString() { + String value = ""; + for (Map.Entry> annt : annotations.entrySet()) { + value += annt.getKey() + " "; + value += annt.getValue() + "\n"; + } + return value; + } - public EnumMap> getFloatInstance(Class enumeration) { - return new EnumMap>(enumeration); - } + public JalviewAnnotation toJalviewAnnotation() { + // TODO Auto-generated method stub + return null; + } } diff --git a/datamodel/compbio/data/sequence/SequenceUtil.java b/datamodel/compbio/data/sequence/SequenceUtil.java index 99a8147..149e0e0 100644 --- a/datamodel/compbio/data/sequence/SequenceUtil.java +++ b/datamodel/compbio/data/sequence/SequenceUtil.java @@ -1,22 +1,15 @@ -/* - * @(#)SequenceUtil.java 1.0 September 2009 - * - * Copyright (c) 2009 Peter Troshin - * - * Jalview Web Services version: 2.0 - * - * This library is free software; you can redistribute it and/or modify it under the terms of the - * Apache License version 2 as published by the Apache Software Foundation - * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache - * License for more details. - * - * A copy of the license is in apache_license.txt. It is also available here: - * see: http://www.apache.org/licenses/LICENSE-2.0.txt - * - * Any republication or derived work distributed in source code form - * must include this copyright and license notice. +/* + * @(#)SequenceUtil.java 1.0 September 2009 Copyright (c) 2009 Peter Troshin + * Jalview Web Services version: 2.0 This library is free software; you can + * redistribute it and/or modify it under the terms of the Apache License + * version 2 as published by the Apache Software Foundation This library is + * distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the Apache License for more details. A copy of the + * license is in apache_license.txt. It is also available here: see: + * http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or derived + * work distributed in source code form must include this copyright and license + * notice. */ package compbio.data.sequence; @@ -33,6 +26,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.List; +import java.util.Scanner; import java.util.logging.Level; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -45,362 +39,415 @@ import java.util.regex.Pattern; */ public final class SequenceUtil { - /** - * A whitespace character: [\t\n\x0B\f\r] - */ - public static final Pattern WHITE_SPACE = Pattern.compile("\\s"); - - /** - * A digit - */ - public static final Pattern DIGIT = Pattern.compile("\\d"); - - /** - * Non word - */ - public static final Pattern NONWORD = Pattern.compile("\\W"); - - /** - * Valid Amino acids - */ - public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+", - Pattern.CASE_INSENSITIVE); - - /** - * inversion of AA pattern - */ - public static final Pattern NON_AA = Pattern.compile( - "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE); - - /** - * Same as AA pattern but with two additional letters - XU - */ - public static final Pattern AMBIGUOUS_AA = Pattern.compile( - "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE); - - /** - * Nucleotides a, t, g, c, u - */ - public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+", - Pattern.CASE_INSENSITIVE); - - /** - * Ambiguous nucleotide - */ - public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile( - "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC - /** - * Non nucleotide - */ - public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+", - Pattern.CASE_INSENSITIVE); - - private SequenceUtil() { - } // utility class, no instantiation - - /* - * public static void write_PirSeq(OutputStream os, FastaSequence seq) - * throws IOException { BufferedWriter pir_out = new BufferedWriter(new - * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() + - * SysPrefs.newlinechar); pir_out.write(seq.getSequence() + - * SysPrefs.newlinechar); pir_out.close(); } - * - * public static void write_FastaSeq(OutputStream os, FastaSequence seq) - * throws IOException { BufferedWriter fasta_out = new BufferedWriter( new - * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() + - * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() + - * SysPrefs.newlinechar); fasta_out.close(); } - */ - - /** - * @return true is the sequence contains only letters a,c, t, g, u - */ - public static boolean isNucleotideSequence(final FastaSequence s) { - return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence()); - } - - /** - * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one - * (!) - B char - */ - public static boolean isNonAmbNucleotideSequence(String sequence) { - sequence = SequenceUtil.cleanSequence(sequence); - if (SequenceUtil.DIGIT.matcher(sequence).find()) { - return false; + /** + * A whitespace character: [\t\n\x0B\f\r] + */ + public static final Pattern WHITE_SPACE = Pattern.compile("\\s"); + + /** + * A digit + */ + public static final Pattern DIGIT = Pattern.compile("\\d"); + + /** + * Non word + */ + public static final Pattern NONWORD = Pattern.compile("\\W"); + + /** + * Valid Amino acids + */ + public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+", + Pattern.CASE_INSENSITIVE); + + /** + * inversion of AA pattern + */ + public static final Pattern NON_AA = Pattern.compile( + "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE); + + /** + * Same as AA pattern but with two additional letters - XU + */ + public static final Pattern AMBIGUOUS_AA = Pattern.compile( + "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE); + + /** + * Nucleotides a, t, g, c, u + */ + public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+", + Pattern.CASE_INSENSITIVE); + + /** + * Ambiguous nucleotide + */ + public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile( + "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC + /** + * Non nucleotide + */ + public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+", + Pattern.CASE_INSENSITIVE); + + private SequenceUtil() { + } // utility class, no instantiation + + /* + * public static void write_PirSeq(OutputStream os, FastaSequence seq) + * throws IOException { BufferedWriter pir_out = new BufferedWriter(new + * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() + + * SysPrefs.newlinechar); pir_out.write(seq.getSequence() + + * SysPrefs.newlinechar); pir_out.close(); } public static void + * write_FastaSeq(OutputStream os, FastaSequence seq) throws IOException { + * BufferedWriter fasta_out = new BufferedWriter( new + * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() + + * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() + + * SysPrefs.newlinechar); fasta_out.close(); } + */ + + /** + * @return true is the sequence contains only letters a,c, t, g, u + */ + public static boolean isNucleotideSequence(final FastaSequence s) { + return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence()); } - if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) { - return false; - /* - * System.out.format("I found the text starting at " + - * "index %d and ending at index %d.%n", nonDNAmatcher .start(), - * nonDNAmatcher.end()); - */ + + /** + * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one + * (!) - B char + */ + public static boolean isNonAmbNucleotideSequence(String sequence) { + sequence = SequenceUtil.cleanSequence(sequence); + if (SequenceUtil.DIGIT.matcher(sequence).find()) { + return false; + } + if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) { + return false; + /* + * System.out.format("I found the text starting at " + + * "index %d and ending at index %d.%n", nonDNAmatcher .start(), + * nonDNAmatcher.end()); + */ + } + final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence); + return DNAmatcher.find(); } - final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence); - return DNAmatcher.find(); - } - - /** - * Removes all whitespace chars in the sequence string - * - * @param sequence - * @return cleaned up sequence - */ - public static String cleanSequence(String sequence) { - assert sequence != null; - final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence); - sequence = m.replaceAll("").toUpperCase(); - return sequence; - } - - /** - * Removes all special characters and digits as well as whitespace chars - * from the sequence - * - * @param sequence - * @return cleaned up sequence - */ - public static String deepCleanSequence(String sequence) { - sequence = SequenceUtil.cleanSequence(sequence); - sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll(""); - sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll(""); - final Pattern othernonSeqChars = Pattern.compile("[_-]+"); - sequence = othernonSeqChars.matcher(sequence).replaceAll(""); - return sequence; - } - - /** - * - * @param sequence - * @return true is the sequence is a protein sequence, false overwise - */ - public static boolean isProteinSequence(String sequence) { - sequence = SequenceUtil.cleanSequence(sequence); - if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) { - return false; + + /** + * Removes all whitespace chars in the sequence string + * + * @param sequence + * @return cleaned up sequence + */ + public static String cleanSequence(String sequence) { + assert sequence != null; + final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence); + sequence = m.replaceAll("").toUpperCase(); + return sequence; } - if (SequenceUtil.DIGIT.matcher(sequence).find()) { - return false; + + /** + * Removes all special characters and digits as well as whitespace chars + * from the sequence + * + * @param sequence + * @return cleaned up sequence + */ + public static String deepCleanSequence(String sequence) { + sequence = SequenceUtil.cleanSequence(sequence); + sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll(""); + sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll(""); + final Pattern othernonSeqChars = Pattern.compile("[_-]+"); + sequence = othernonSeqChars.matcher(sequence).replaceAll(""); + return sequence; } - if (SequenceUtil.NON_AA.matcher(sequence).find()) { - return false; + + /** + * @param sequence + * @return true is the sequence is a protein sequence, false overwise + */ + public static boolean isProteinSequence(String sequence) { + sequence = SequenceUtil.cleanSequence(sequence); + if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) { + return false; + } + if (SequenceUtil.DIGIT.matcher(sequence).find()) { + return false; + } + if (SequenceUtil.NON_AA.matcher(sequence).find()) { + return false; + } + final Matcher protmatcher = SequenceUtil.AA.matcher(sequence); + return protmatcher.find(); } - final Matcher protmatcher = SequenceUtil.AA.matcher(sequence); - return protmatcher.find(); - } - - /** - * Check whether the sequence confirms to amboguous protein sequence - * - * @param sequence - * @return return true only if the sequence if ambiguous protein sequence - * Return false otherwise. e.g. if the sequence is non-ambiguous - * protein or DNA - */ - public static boolean isAmbiguosProtein(String sequence) { - sequence = SequenceUtil.cleanSequence(sequence); - if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) { - return false; + + /** + * Check whether the sequence confirms to amboguous protein sequence + * + * @param sequence + * @return return true only if the sequence if ambiguous protein sequence + * Return false otherwise. e.g. if the sequence is non-ambiguous + * protein or DNA + */ + public static boolean isAmbiguosProtein(String sequence) { + sequence = SequenceUtil.cleanSequence(sequence); + if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) { + return false; + } + if (SequenceUtil.DIGIT.matcher(sequence).find()) { + return false; + } + if (SequenceUtil.NON_AA.matcher(sequence).find()) { + return false; + } + if (SequenceUtil.AA.matcher(sequence).find()) { + return false; + } + final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence); + return amb_prot.find(); } - if (SequenceUtil.DIGIT.matcher(sequence).find()) { - return false; + + /** + * Writes list of FastaSequeces into the outstream formatting the sequence + * so that it contains width chars on each line + * + * @param outstream + * @param sequences + * @param width + * - the maximum number of characters to write in one line + * @throws IOException + */ + public static void writeFasta(final OutputStream outstream, + final List sequences, final int width) + throws IOException { + writeFastaKeepTheStream(outstream, sequences, width); + outstream.close(); } - if (SequenceUtil.NON_AA.matcher(sequence).find()) { - return false; + + public static void writeFastaKeepTheStream(final OutputStream outstream, + final List sequences, final int width) + throws IOException { + final OutputStreamWriter writer = new OutputStreamWriter(outstream); + final BufferedWriter fastawriter = new BufferedWriter(writer); + for (final FastaSequence fs : sequences) { + fastawriter.write(">" + fs.getId() + "\n"); + fastawriter.write(fs.getFormatedSequence(width)); + fastawriter.write("\n"); + } + fastawriter.flush(); + writer.flush(); + } + + /** + * Reads fasta sequences from inStream into the list of FastaSequence + * objects + * + * @param inStream + * from + * @return list of FastaSequence objects + * @throws IOException + */ + public static List readFasta(final InputStream inStream) + throws IOException { + final List seqs = new ArrayList(); + + final BufferedReader infasta = new BufferedReader( + new InputStreamReader(inStream, "UTF8"), 16000); + final Pattern pattern = Pattern.compile("//s+"); + + String line; + String sname = "", seqstr = null; + do { + line = infasta.readLine(); + if ((line == null) || line.startsWith(">")) { + if (seqstr != null) { + seqs.add(new FastaSequence(sname.substring(1), seqstr)); + } + sname = line; // remove > + seqstr = ""; + } else { + final String subseq = pattern.matcher(line).replaceAll(""); + seqstr += subseq; + } + } while (line != null); + + infasta.close(); + return seqs; } - if (SequenceUtil.AA.matcher(sequence).find()) { - return false; + + /** + * Writes FastaSequence in the file, each sequence will take one line only + * + * @param os + * @param sequences + * @throws IOException + */ + public static void writeFasta(final OutputStream os, + final List sequences) throws IOException { + final OutputStreamWriter outWriter = new OutputStreamWriter(os); + final BufferedWriter fasta_out = new BufferedWriter(outWriter); + for (final FastaSequence fs : sequences) { + fasta_out.write(fs.getOnelineFasta()); + } + fasta_out.close(); + outWriter.close(); + } + + public static List readJRonn(final File result) + throws IOException, UnknownFileFormatException { + InputStream input = new FileInputStream(result); + List sequences = readJRonn(input); + input.close(); + return sequences; } - final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence); - return amb_prot.find(); - } - - /** - * Writes list of FastaSequeces into the outstream formatting the sequence - * so that it contains width chars on each line - * - * @param outstream - * @param sequences - * @param width - * - the maximum number of characters to write in one line - * @throws IOException - */ - public static void writeFasta(final OutputStream outstream, - final List sequences, final int width) - throws IOException { - final OutputStreamWriter writer = new OutputStreamWriter(outstream); - final BufferedWriter fastawriter = new BufferedWriter(writer); - for (final FastaSequence fs : sequences) { - fastawriter.write(fs.getFormatedSequence(width)); + + /** + * Reader for JRonn horizontal file format >Foobar M G D T T A G 0.48 0.42 + * 0.42 0.48 0.52 0.53 0.54 All values are tab delimited + * + * @param inStream + * @return + * @throws IOException + * @throws UnknownFileFormatException + */ + public static List readJRonn(final InputStream inStream) + throws IOException, UnknownFileFormatException { + final List seqs = new ArrayList(); + + final BufferedReader infasta = new BufferedReader( + new InputStreamReader(inStream, "UTF8"), 16000); + + String line; + String sname = ""; + do { + line = infasta.readLine(); + if (line == null || line.isEmpty()) { + // skip empty lines + continue; + } + if (line.startsWith(">")) { + // read name + sname = line.trim().substring(1); + // read sequence line + line = infasta.readLine(); + final String sequence = line.replace("\t", ""); + // read annotation line + line = infasta.readLine(); + String[] annotValues = line.split("\t"); + float[] annotation = convertToNumber(annotValues); + if (annotation.length != sequence.length()) { + throw new UnknownFileFormatException( + "File does not look like Jronn horizontally formatted output file!\n" + + JRONN_WRONG_FORMAT_MESSAGE); + } + seqs.add(new AnnotatedSequence(sname, sequence, annotation)); + } + } while (line != null); + + infasta.close(); + return seqs; } - outstream.flush(); - fastawriter.close(); - writer.close(); - } - - /** - * Reads fasta sequences from inStream into the list of FastaSequence - * objects - * - * @param inStream - * from - * @return list of FastaSequence objects - * @throws IOException - */ - public static List readFasta(final InputStream inStream) - throws IOException { - final List seqs = new ArrayList(); - - final BufferedReader infasta = new BufferedReader( - new InputStreamReader(inStream, "UTF8"), 16000); - final Pattern pattern = Pattern.compile("//s+"); - - String line; - String sname = "", seqstr = null; - do { - line = infasta.readLine(); - if ((line == null) || line.startsWith(">")) { - if (seqstr != null) { - seqs.add(new FastaSequence(sname.substring(1), seqstr)); + + private static float[] convertToNumber(String[] annotValues) + throws UnknownFileFormatException { + float[] annotation = new float[annotValues.length]; + try { + for (int i = 0; i < annotation.length; i++) { + annotation[i] = Float.parseFloat(annotValues[i]); + } + } catch (NumberFormatException e) { + throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, + e.getCause()); } - sname = line; // remove > - seqstr = ""; - } else { - final String subseq = pattern.matcher(line).replaceAll(""); - seqstr += subseq; - } - } while (line != null); - - infasta.close(); - return seqs; - } - - /** - * Writes FastaSequence in the file, each sequence will take one line only - * - * @param os - * @param sequences - * @throws IOException - */ - public static void writeFasta(final OutputStream os, - final List sequences) throws IOException { - final OutputStreamWriter outWriter = new OutputStreamWriter(os); - final BufferedWriter fasta_out = new BufferedWriter(outWriter); - for (final FastaSequence fs : sequences) { - fasta_out.write(fs.getOnelineFasta()); + return annotation; } - fasta_out.close(); - outWriter.close(); - } - - public static List readJRonn(final File result) - throws IOException, UnknownFileFormatException { - InputStream input = new FileInputStream(result); - List sequences = readJRonn(input); - input.close(); - return sequences; - } - - /** - * Reader for JRonn horizontal file format - * - * >Foobar - * - * M G D T T A G - * - * 0.48 0.42 0.42 0.48 0.52 0.53 0.54 - * - * All values are tab delimited - * - * @param inStream - * @return - * @throws IOException - * @throws UnknownFileFormatException - */ - public static List readJRonn(final InputStream inStream) - throws IOException, UnknownFileFormatException { - final List seqs = new ArrayList(); - - final BufferedReader infasta = new BufferedReader( - new InputStreamReader(inStream, "UTF8"), 16000); - - String line; - String sname = ""; - do { - line = infasta.readLine(); - if (line == null || line.isEmpty()) { - // skip empty lines - continue; - } - if (line.startsWith(">")) { - // read name - sname = line.trim().substring(1); - // read sequence line - line = infasta.readLine(); - final String sequence = line.replace("\t", ""); - // read annotation line - line = infasta.readLine(); - String[] annotValues = line.split("\t"); - float[] annotation = convertToNumber(annotValues); - if (annotation.length != sequence.length()) { - throw new UnknownFileFormatException( - "File does not look like Jronn horizontally formatted output file!\n" - + JRONN_WRONG_FORMAT_MESSAGE); + + private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n" + + ">sequence_name\n " + + "M V S\n" + + "0.43 0.22 0.65\n" + + "Where first line is the sequence name,\n" + + "second line is the tab delimited sequence,\n" + + "third line contains tab delimited disorder prediction values.\n" + + "No lines are allowed between these three. Additionally, the number of " + + "sequence residues must be equal to the number of the disorder values."; + + /** + * Closes the Closable and logs the exception if any + * + * @param log + * @param stream + */ + public final static void closeSilently(java.util.logging.Logger log, + Closeable stream) { + if (stream != null) { + try { + stream.close(); + } catch (IOException e) { + log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause()); + } } - seqs.add(new AnnotatedSequence(sname, sequence, annotation)); - } - } while (line != null); - - infasta.close(); - return seqs; - } - - private static float[] convertToNumber(String[] annotValues) - throws UnknownFileFormatException { - float[] annotation = new float[annotValues.length]; - try { - for (int i = 0; i < annotation.length; i++) { - annotation[i] = Float.parseFloat(annotValues[i]); - } - } catch (NumberFormatException e) { - throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, e - .getCause()); } - return annotation; - } - - private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n" - + ">sequence_name\n " - + "M V S\n" - + "0.43 0.22 0.65\n" - + "Where first line is the sequence name,\n" - + "second line is the tab delimited sequence,\n" - + "third line contains tab delimited disorder prediction values.\n" - + "No lines are allowed between these three. Additionally, the number of " - + "sequence residues must be equal to the number of the disorder values."; - - /** - * Closes the Closable and logs the exception if any - * - * @param log - * @param stream - */ - public final static void closeSilently(java.util.logging.Logger log, - Closeable stream) { - if (stream != null) { - try { - stream.close(); - } catch (IOException e) { - log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause()); - } + + /** + * + * TODO complete! + * + * # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983 + * 0.85864 0.44331 .... # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 + * 0.37094 + * + * @param input + * @return + * @throws IOException + * @throws UnknownFileFormatException + */ + public static List> readDisembl( + final InputStream input) throws IOException, + UnknownFileFormatException { + Scanner scan = new Scanner(input); + scan.useDelimiter("# RESIDUE COILS REM465 HOTLOOPS\n"); + if (!scan.hasNext()) { + throw new UnknownFileFormatException( + "In Disembl score format each seqeunce score is expected to start from the line: " + + "'# RESIDUE COILS REM465 HOTLOOPS\\n'." + + " No such line was found!"); + } + + List> results = new ArrayList>(); + int seqCounter = 0; + while (scan.hasNext()) { + seqCounter++; + String singleSeq = scan.next(); + Scanner scansingle = new Scanner(singleSeq); + StringBuffer seqbuffer = new StringBuffer(); + List coils = new ArrayList(); + List rem = new ArrayList(); + List hotloops = new ArrayList(); + + MultiAnnotatedSequence disemblRes = new MultiAnnotatedSequence( + DisemblResultAnnot.class); + + while (scansingle.hasNextLine()) { + String valueLine = scansingle.nextLine(); + Scanner values = new Scanner(valueLine); + seqbuffer.append(values.next()); + coils.add(values.nextFloat()); + rem.add(values.nextFloat()); + hotloops.add(values.nextFloat()); + values.close(); + } + disemblRes.addAnnotation(DisemblResultAnnot.COILS, coils); + disemblRes.addAnnotation(DisemblResultAnnot.REM465, rem); + disemblRes.addAnnotation(DisemblResultAnnot.HOTLOOPS, hotloops); + // TODO + // disemblRes.sequence = seqbuffer.toString(); + scansingle.close(); + results.add(disemblRes); + } + + input.close(); + return results; } - } - - public static List readDisembl(final File result) - throws IOException, UnknownFileFormatException { - InputStream input = new FileInputStream(result); - List sequences = readJRonn(input); - input.close(); - return sequences; - } + } diff --git a/runner/compbio/runner/conservation/AACon.java b/runner/compbio/runner/conservation/AACon.java new file mode 100644 index 0000000..5d20d5d --- /dev/null +++ b/runner/compbio/runner/conservation/AACon.java @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 1.0 This library is free software; you can redistribute it + * and/or modify it under the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or + * derived work distributed in source code form must include this copyright and + * license notice. + */ + +package compbio.runner.conservation; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +import org.apache.log4j.Logger; + +import compbio.conservation.Method; +import compbio.conservation.ResultReader; +import compbio.data.sequence.MultiAnnotatedSequence; +import compbio.engine.client.CommandBuilder; +import compbio.engine.client.Executable; +import compbio.engine.client.SkeletalExecutable; +import compbio.metadata.Limit; +import compbio.metadata.LimitsManager; +import compbio.metadata.ResultNotAvailableException; +import compbio.runner.Util; + +/** + * Command line + * + * java -Xmx512 -jar jronn_v3.jar -i=test_seq.txt -n=1 -o=out.txt -s=stat.out + * + * @author pvtroshin + * + */ +public class AACon extends SkeletalExecutable { + + private static Logger log = Logger.getLogger(AACon.class); + + /** + * Number of cores to use, defaults to 1 for local execution or the value of + * "jronn.cluster.cpunum" property for cluster execution + */ + private int ncoreNumber = 0; + + private final String ncorePrm = "-n="; + + // Cache for Limits information + private static LimitsManager limits; + + public static final String KEY_VALUE_SEPARATOR = Util.SPACE; + public static final String STAT_FILE = "stat.txt"; + + public AACon() { + addParameters(Arrays.asList("-jar", getLibPath(), "-d=" + STAT_FILE, + "-f=RESULT_NO_ALIGNMENT")); + } + + @SuppressWarnings("unchecked") + @Override + public MultiAnnotatedSequence getResults(String workDirectory) + throws ResultNotAvailableException { + MultiAnnotatedSequence annotations = null; + try { + InputStream inStream = new FileInputStream(new File(workDirectory, + getOutput())); + annotations = ResultReader.readResults(inStream); + inStream.close(); + } catch (FileNotFoundException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } catch (IOException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } catch (NullPointerException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } + return annotations; + } + + private static String getLibPath() { + + String settings = ph.getProperty("aacon.jar.file"); + if (compbio.util.Util.isEmpty(settings)) { + throw new NullPointerException( + "Please define aacon.jar.file property in Executable.properties file" + + "and initialize it with the location of jronn jar file"); + } + if (new File(settings).isAbsolute()) { + // Jronn jar can be found so no actions necessary + // no further actions is necessary + return settings; + } + return compbio.engine.client.Util.convertToAbsolute(settings); + } + + @Override + public List getCreatedFiles() { + return Arrays.asList(getOutput(), getError()); + } + + @Override + public AACon setInput(String inFile) { + super.setInput(inFile); + cbuilder.setParam("-i=" + inFile); + return this; + } + + @Override + public AACon setOutput(String outFile) { + super.setOutput(outFile); + cbuilder.setParam("-o=" + outFile); + return this; + } + + @Override + public Limit getLimit(String presetName) { + if (limits == null) { + limits = getLimits(); + } + Limit limit = null; + if (limits != null) { + // this returns default limit if preset is undefined! + limit = limits.getLimitByName(presetName); + } + // If limit is not defined for a particular preset, then return default + // limit + if (limit == null) { + log.debug("Limit for the preset " + presetName + + " is not found. Using default"); + limit = limits.getDefaultLimit(); + } + return limit; + } + + @Override + public LimitsManager getLimits() { + // synchronise on static field + synchronized (log) { + if (limits == null) { + limits = Util.getLimits(this.getClass()); + } + } + return limits; + } + + @Override + public Class> getType() { + return this.getClass(); + } + + public static String getStatFile() { + return STAT_FILE; + } + + public void setNCore(int ncoreNumber) { + if (ncoreNumber < 1 || ncoreNumber > 100) { + throw new IndexOutOfBoundsException( + "Number of cores must be within 1 and 100 "); + } + this.ncoreNumber = ncoreNumber; + cbuilder.setParam(ncorePrm + Integer.toString(getNCore())); + } + + int getNCore() { + return ncoreNumber; + } + + @Override + public CommandBuilder getParameters(ExecProvider provider) { + // If number of cores is provided, set it for the cluster execution + // only! + if (provider == Executable.ExecProvider.Cluster) { + int cpunum = SkeletalExecutable.getClusterCpuNum(getType()); + cpunum = (cpunum == 0) ? 1 : cpunum; + setNCore(cpunum); + } else { + // Limit number of cores to 1 for ANY execution which does not set + // Ncores explicitly using setNCore method or is run on local VM + if (ncoreNumber == 0) { + setNCore(1); + } + } + return super.getParameters(provider); + } + +} diff --git a/runner/compbio/runner/disorder/Disembl.java b/runner/compbio/runner/disorder/Disembl.java index 4c76854..c6b278f 100644 --- a/runner/compbio/runner/disorder/Disembl.java +++ b/runner/compbio/runner/disorder/Disembl.java @@ -1,19 +1,15 @@ -/* Copyright (c) 2009 Peter Troshin - * - * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 - * - * This library is free software; you can redistribute it and/or modify it under the terms of the - * Apache License version 2 as published by the Apache Software Foundation - * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache - * License for more details. - * - * A copy of the license is in apache_license.txt. It is also available here: - * @see: http://www.apache.org/licenses/LICENSE-2.0.txt - * - * Any republication or derived work distributed in source code form - * must include this copyright and license notice. +/* + * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 1.0 This library is free software; you can redistribute it + * and/or modify it under the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or + * derived work distributed in source code form must include this copyright and + * license notice. */ package compbio.runner.disorder; @@ -24,7 +20,6 @@ import java.util.Arrays; import org.apache.log4j.Logger; -import com.sun.xml.internal.bind.api.impl.NameConverter.Standard; import compbio.data.sequence.Alignment; import compbio.data.sequence.UnknownFileFormatException; import compbio.engine.client.Executable; @@ -36,98 +31,101 @@ import compbio.metadata.ResultNotAvailableException; import compbio.runner.Util; /** - * @see Standard DisEMBL DisEMBL.py smooth_frame peak_frame join_frame - * fold_coils fold_hotloops fold_rem465 sequence_file print 'A default run - * would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 fasta_file > out' + * @see DisEMBL * - * new DisEMBL is at /homes/pvtroshin/soft/DisEMBL-1.4raw This is not a - * standard DisEMBL! The script has been modified! DisEMBL.py smooth_frame - * peak_frame join_frame fold_coils fold_hotloops fold_rem465 [mode] < - * fasta_file > out print 'A default run would be: ./DisEMBL.py 8 8 4 1.2 - * 1.4 1.2 < fasta_file' print 'Mode: "default"(nothing) or "scores" which - * will give scores per residue in TAB separated format' + * DisEMBL.py smooth_frame peak_frame join_frame fold_coils fold_hotloops + * fold_rem465 sequence_file print 'A default run would be: ./DisEMBL.py 8 + * 8 4 1.2 1.4 1.2 fasta_file > out' new DisEMBL is at + * /homes/pvtroshin/soft/DisEMBL-1.4raw * + * This is not a standard DisEMBL! The script has been modified! DisEMBL.py + * smooth_frame peak_frame join_frame fold_coils fold_hotloops fold_rem465 + * [mode] < fasta_file > out print + * + * 'A default run would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 < fasta_file' + * print 'Mode: "default"(nothing) or "scores" which will give scores per + * residue in TAB separated format' */ public class Disembl extends SkeletalExecutable implements - PipedExecutable { - - private static Logger log = Logger.getLogger(Disembl.class); - - // Cache for Limits information - private static LimitsManager limits; - - public static final String KEY_VALUE_SEPARATOR = Util.SPACE; - - public Disembl() { - // remove default input to prevent it to appear in the parameters list - // that could happen if the parameters are set first - // super.setInput(""); - addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2", - "scores")); - } - - @SuppressWarnings("unchecked") - public Alignment getResults(String workDirectory) - throws ResultNotAvailableException { - try { - return Util.readClustalFile(workDirectory, getOutput()); - } catch (FileNotFoundException e) { - log.error(e.getMessage(), e.getCause()); - throw new ResultNotAvailableException(e); - } catch (IOException e) { - log.error(e.getMessage(), e.getCause()); - throw new ResultNotAvailableException(e); - } catch (UnknownFileFormatException e) { - log.error(e.getMessage(), e.getCause()); - throw new ResultNotAvailableException(e); - } catch (NullPointerException e) { - log.error(e.getMessage(), e.getCause()); - throw new ResultNotAvailableException(e); + PipedExecutable { + + private static Logger log = Logger.getLogger(Disembl.class); + + // Cache for Limits information + private static LimitsManager limits; + + public static final String KEY_VALUE_SEPARATOR = Util.SPACE; + + public Disembl() { + // remove default input to prevent it to appear in the parameters list + // that could happen if the parameters are set first + // super.setInput(""); + addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2", + "scores")); } - } - - @Override - public Disembl setInput(String inFile) { - super.setInput(inFile); - cbuilder.setLast(inFile); - return this; - } - - @Override - public Limit getLimit(String presetName) { - if (limits == null) { - limits = getLimits(); + + @SuppressWarnings("unchecked") + public Alignment getResults(String workDirectory) + throws ResultNotAvailableException { + try { + return Util.readClustalFile(workDirectory, getOutput()); + } catch (FileNotFoundException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } catch (IOException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } catch (UnknownFileFormatException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } catch (NullPointerException e) { + log.error(e.getMessage(), e.getCause()); + throw new ResultNotAvailableException(e); + } } - Limit limit = null; - if (limits != null) { - // this returns default limit if preset is undefined! - limit = limits.getLimitByName(presetName); + @Override + public Disembl setInput(String inFile) { + super.setInput(inFile); + cbuilder.setLast(inFile); + return this; } - // If limit is not defined for a particular preset, then return default - // limit - if (limit == null) { - log.debug("Limit for the preset " + presetName - + " is not found. Using default"); - limit = limits.getDefaultLimit(); + + @Override + public Limit getLimit(String presetName) { + if (limits == null) { + limits = getLimits(); + } + + Limit limit = null; + if (limits != null) { + // this returns default limit if preset is undefined! + limit = limits.getLimitByName(presetName); + } + // If limit is not defined for a particular preset, then return default + // limit + if (limit == null) { + log.debug("Limit for the preset " + presetName + + " is not found. Using default"); + limit = limits.getDefaultLimit(); + } + return limit; } - return limit; - } - - @Override - public LimitsManager getLimits() { - // synchronise on static field - synchronized (log) { - if (limits == null) { - limits = Util.getLimits(this.getClass()); - } + + @Override + public LimitsManager getLimits() { + // synchronise on static field + synchronized (log) { + if (limits == null) { + limits = Util.getLimits(this.getClass()); + } + } + return limits; } - return limits; - } - @Override - public Class> getType() { - return this.getClass(); - } + @Override + public Class> getType() { + return this.getClass(); + } } diff --git a/testsrc/compbio/data/sequence/SequenceUtilTester.java b/testsrc/compbio/data/sequence/SequenceUtilTester.java index f2af670..720037b 100644 --- a/testsrc/compbio/data/sequence/SequenceUtilTester.java +++ b/testsrc/compbio/data/sequence/SequenceUtilTester.java @@ -1,21 +1,16 @@ -/* Copyright (c) 2009 Peter Troshin - * - * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 - * - * This library is free software; you can redistribute it and/or modify it under the terms of the - * Apache License version 2 as published by the Apache Software Foundation - * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache - * License for more details. - * - * A copy of the license is in apache_license.txt. It is also available here: - * @see: http://www.apache.org/licenses/LICENSE-2.0.txt - * - * Any republication or derived work distributed in source code form - * must include this copyright and license notice. +/* + * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 1.0 This library is free software; you can redistribute it + * and/or modify it under the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or + * derived work distributed in source code form must include this copyright and + * license notice. */ - package compbio.data.sequence; import static org.testng.AssertJUnit.assertEquals; @@ -36,112 +31,152 @@ import compbio.metadata.AllTestSuit; public class SequenceUtilTester { - @Test() - public void testisNonAmbNucleotideSequence() { - String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga"; - assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq)); - String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA "; - assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq)); - String nonDna = "atgfctgatgcatgcatgatgctga"; - assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); - - nonDna = "atgc1tgatgcatgcatgatgctga"; - assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); - - nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; - assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); - // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code - assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); - - } - - @Test() - public void testCleanSequence() { - String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; - assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), - SequenceUtil.cleanSequence(dirtySeq)); - } - - @Test() - public void testDeepCleanSequence() { - String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA "; - assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), - SequenceUtil.deepCleanSequence(dirtySeq)); - } - - @Test() - public void testisProteinSequence() { - String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; - assertFalse(SequenceUtil.isProteinSequence(dirtySeq)); - String notaSeq = "atgc1tgatgcatgcatgatgctga"; - assertFalse(SequenceUtil.isProteinSequence(notaSeq)); - String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; - assertTrue(SequenceUtil.isProteinSequence(AAseq)); - AAseq += "XU"; - assertFalse(SequenceUtil.isProteinSequence(AAseq)); - - } - - @Test() - public void testReadWriteFasta() { - - try { - FileInputStream fio = new FileInputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); - assertNotNull(fio); - List fseqs = SequenceUtil.readFasta(fio); - assertNotNull(fseqs); - assertEquals(3, fseqs.size()); - assertEquals(3, fseqs.size()); - fio.close(); - FileOutputStream fou = new FileOutputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written"); - SequenceUtil.writeFasta(fou, fseqs); - fou.close(); - FileOutputStream fou20 = new FileOutputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written"); - SequenceUtil.writeFasta(fou20, fseqs, 20); - fou20.close(); - - } catch (FileNotFoundException e) { - e.printStackTrace(); - fail(e.getLocalizedMessage()); - } catch (IOException e) { - e.printStackTrace(); - fail(e.getLocalizedMessage()); + @Test() + public void testisNonAmbNucleotideSequence() { + String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga"; + assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq)); + String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA "; + assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq)); + String nonDna = "atgfctgatgcatgcatgatgctga"; + assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); + + nonDna = "atgc1tgatgcatgcatgatgctga"; + assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); + + nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; + assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); + // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code + assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); + + } + + @Test() + public void testCleanSequence() { + String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; + assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), + SequenceUtil.cleanSequence(dirtySeq)); + } + + @Test() + public void testDeepCleanSequence() { + String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA "; + assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), + SequenceUtil.deepCleanSequence(dirtySeq)); } - } - - /** - * This test tests the loading of horizontally formatted Jronn output file - */ - @Test - public void loadJronnFile() { - - FileInputStream fio; - try { - fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out"); - List aseqs = SequenceUtil.readJRonn(fio); - assertNotNull(aseqs); - assertEquals(aseqs.size(), 3); - AnnotatedSequence aseq = aseqs.get(0); - assertNotNull(aseq); - assertNotNull(aseq.getAnnotation()); - //System.out.println(aseq); - assertEquals(aseq.getAnnotation().length, aseq.getSequence() - .length()); - fio.close(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - fail(e.getLocalizedMessage()); - } catch (IOException e) { - e.printStackTrace(); - fail(e.getLocalizedMessage()); - } catch (UnknownFileFormatException e) { - e.printStackTrace(); - fail(e.getLocalizedMessage()); + + @Test() + public void testisProteinSequence() { + String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; + assertFalse(SequenceUtil.isProteinSequence(dirtySeq)); + String notaSeq = "atgc1tgatgcatgcatgatgctga"; + assertFalse(SequenceUtil.isProteinSequence(notaSeq)); + String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; + assertTrue(SequenceUtil.isProteinSequence(AAseq)); + AAseq += "XU"; + assertFalse(SequenceUtil.isProteinSequence(AAseq)); + + } + + @Test() + public void testReadWriteFasta() { + + try { + FileInputStream fio = new FileInputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); + assertNotNull(fio); + List fseqs = SequenceUtil.readFasta(fio); + assertNotNull(fseqs); + assertEquals(3, fseqs.size()); + assertEquals(3, fseqs.size()); + fio.close(); + FileOutputStream fou = new FileOutputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written"); + SequenceUtil.writeFasta(fou, fseqs); + fou.close(); + FileOutputStream fou20 = new FileOutputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written"); + SequenceUtil.writeFasta(fou20, fseqs, 21); + fou20.close(); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } } - } + /** + * This test tests the loading of horizontally formatted Jronn output file + */ + @Test + public void loadJronnFile() { + + FileInputStream fio; + try { + fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out"); + List aseqs = SequenceUtil.readJRonn(fio); + assertNotNull(aseqs); + assertEquals(aseqs.size(), 3); + AnnotatedSequence aseq = aseqs.get(0); + assertNotNull(aseq); + assertNotNull(aseq.getAnnotation()); + // System.out.println(aseq); + assertEquals(aseq.getAnnotation().length, aseq.getSequence() + .length()); + fio.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (UnknownFileFormatException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + enum Trial { + one, two, three + }; + + /** + * This test tests the loading of horizontally formatted Jronn output file + */ + @SuppressWarnings("unchecked") + @Test + public void testMultiAnnotatedSequence() { + + FileInputStream fio; + try { + fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + + "disembl.out"); + List> aseqs = SequenceUtil + .readDisembl(fio); + assertNotNull(aseqs); + + /* + * MultiAnnotatedSequence ma = new MultiAnnotatedSequence(); + * Map> val = ma.getInstance(Trial.class); + * List list = new ArrayList(); list.add(new + * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one, + * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f)); + * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0); + */ + fio.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (UnknownFileFormatException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + + } } diff --git a/testsrc/compbio/metadata/AllTestSuit.java b/testsrc/compbio/metadata/AllTestSuit.java index 2c045cc..633e30b 100644 --- a/testsrc/compbio/metadata/AllTestSuit.java +++ b/testsrc/compbio/metadata/AllTestSuit.java @@ -59,8 +59,7 @@ public class AllTestSuit { * For this to work execution must start from the project directory! */ public static final String CURRENT_DIRECTORY = SysPrefs - .getCurrentDirectory() - + File.separator; + .getCurrentDirectory() + File.separator; public static final String TEST_DATA_PATH = "testsrc" + File.separator + "testdata" + File.separator; @@ -78,6 +77,9 @@ public class AllTestSuit { public static final String test_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE + "TO1381.fasta"; + public static final String test_alignment_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE + + "TO1381.fasta.aln"; + public static final String test_input_real = AllTestSuit.TEST_DATA_PATH_ABSOLUTE + "50x500Protein.fasta"; diff --git a/testsrc/compbio/runner/conservation/AAConTester.java b/testsrc/compbio/runner/conservation/AAConTester.java new file mode 100644 index 0000000..033555a --- /dev/null +++ b/testsrc/compbio/runner/conservation/AAConTester.java @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2010 Peter Troshin JAva Bioinformatics Analysis Web Services + * (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the Apache License version 2 as published + * by the Apache Software Foundation This library is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details. A copy of the license is in + * apache_license.txt. It is also available here: + * + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form must include + * this copyright and license notice. + */ +package compbio.runner.conservation; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.text.ParseException; + +import javax.xml.bind.ValidationException; + +import org.ggf.drmaa.DrmaaException; +import org.ggf.drmaa.JobInfo; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import compbio.conservation.Method; +import compbio.data.sequence.MultiAnnotatedSequence; +import compbio.engine.AsyncExecutor; +import compbio.engine.Configurator; +import compbio.engine.FilePuller; +import compbio.engine.SyncExecutor; +import compbio.engine.client.ConfExecutable; +import compbio.engine.client.ConfiguredExecutable; +import compbio.engine.client.Executable; +import compbio.engine.client.RunConfiguration; +import compbio.engine.cluster.drmaa.ClusterUtil; +import compbio.engine.cluster.drmaa.JobRunner; +import compbio.engine.cluster.drmaa.StatisticManager; +import compbio.engine.local.LocalRunner; +import compbio.metadata.ChunkHolder; +import compbio.metadata.JobExecutionException; +import compbio.metadata.JobStatus; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.LimitsManager; +import compbio.metadata.PresetManager; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.RunnerConfig; +import compbio.util.FileWatcher; +import compbio.util.SysPrefs; + +public class AAConTester { + + public static final String CURRENT_DIRECTORY = SysPrefs + .getCurrentDirectory() + File.separator; + + public static String test_outfile = "TO1381.aacon.out"; // "/homes/pvtroshin/TO1381.clustal.cluster.out + public static String test_alignment_input = CURRENT_DIRECTORY + "testsrc" + + File.separator + "testdata" + File.separator + "TO1381.fasta.aln"; + private AACon aacon; + + @BeforeMethod(alwaysRun = true) + void init() { + aacon = new AACon(); + aacon.setInput(test_alignment_input).setOutput(test_outfile); + } + + @Test() + public void testRunOnCluster() { + assertFalse(SysPrefs.isWindows, + "Cluster execution can only be in unix environment"); + try { + ConfiguredExecutable confAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Cluster); + JobRunner runner = JobRunner.getInstance(confAAcon); + + assertNotNull(runner, "Runner is NULL"); + runner.executeJob(); + // assertNotNull("JobId is null", jobId1); + JobStatus status = runner.getJobStatus(); + assertTrue(status == JobStatus.PENDING + || status == JobStatus.RUNNING, + "Status of the process is wrong!"); + JobInfo info = runner.getJobInfo(); + assertNotNull(info, "JobInfo is null"); + StatisticManager sm = new StatisticManager(info); + assertNotNull(sm, "Statictic manager is null"); + try { + + String exits = sm.getExitStatus(); + assertNotNull("Exit status is null", exits); + // cut 4 trailing zeros from the number + int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits) + .intValue(); + assertEquals(0, exitsInt); + System.out.println(sm.getAllStats()); + + } catch (ParseException e) { + e.printStackTrace(); + fail("Parse Exception: " + e.getMessage()); + } + // assertFalse(runner.cleanup()); + assertTrue(sm.hasExited()); + assertFalse(sm.wasAborted()); + assertFalse(sm.hasDump()); + assertFalse(sm.hasSignaled()); + + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (DrmaaException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } + } + + /** + * This tests fails from time to time depending on the cluster load or some + * other factors. Any client code has to adjust for this issue + */ + @Test() + public void testRunOnClusterAsync() { + assertFalse(SysPrefs.isWindows, + "Cluster execution can only be in unix environment"); + try { + ConfiguredExecutable confAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Cluster); + AsyncExecutor aengine = Configurator.getAsyncEngine(confAAcon); + String jobId = aengine.submitJob(confAAcon); + assertNotNull(jobId, "Runner is NULL"); + // let drmaa to start + Thread.sleep(500); + JobStatus status = aengine.getJobStatus(jobId); + while (status != JobStatus.FINISHED) { + System.out.println("Job Status: " + status); + Thread.sleep(1000); + status = aengine.getJobStatus(jobId); + ConfiguredExecutable result = (ConfiguredExecutable) aengine + .getResults(jobId); + assertNotNull(result); + System.out.println("RES:" + result); + // Some times the job could be removed from the cluster + // accounting + // before it has been reported to finish. Make sure + // to stop waiting in such case + if (status == JobStatus.UNDEFINED) { + break; + } + } + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (InterruptedException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test() + public void testRunLocally() { + try { + ConfiguredExecutable confAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Local); + + // For local execution use relative + LocalRunner lr = new LocalRunner(confAAcon); + lr.executeJob(); + ConfiguredExecutable al1 = lr.waitForResult(); + assertNotNull(al1.getResults()); + MultiAnnotatedSequence annotations = confAAcon.getResults(); + assertNotNull(annotations); + assertEquals(annotations.getAnnotations().size(), 18); + assertEquals(al1.getResults(), annotations); + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + @Test() + public void testRunLocallyOnTwoCpu() { + try { + aacon.setNCore(2); + ConfiguredExecutable confAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Local); + + // For local execution use relative + LocalRunner lr = new LocalRunner(confAAcon); + lr.executeJob(); + ConfiguredExecutable al1 = lr.waitForResult(); + assertNotNull(al1.getResults()); + MultiAnnotatedSequence annotations = confAAcon.getResults(); + assertNotNull(annotations); + assertEquals(annotations.getAnnotations().size(), 18); + assertEquals(al1.getResults(), annotations); + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + @Test() + public void readStatistics() { + try { + ConfiguredExecutable confAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Local); + // For local execution use relative + + AsyncExecutor sexec = Configurator.getAsyncEngine(confAAcon); + String jobId = sexec.submitJob(confAAcon); + FilePuller fw = FilePuller.newFilePuller( + confAAcon.getWorkDirectory() + File.separator + + AACon.getStatFile(), + FileWatcher.MIN_CHUNK_SIZE_BYTES); + int count = 0; + long position = 0; + fw.waitForFile(2); + JobStatus status = sexec.getJobStatus(jobId); + do { + ChunkHolder ch = fw.pull(position); + String chunk = ch.getChunk(); + position = ch.getNextPosition(); + // System.out.println(chunk); + count++; + // Make sure the loop is terminated if the job fails + if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) { + fail("job failed!"); + break; + } + Thread.sleep(300); + status = sexec.getJobStatus(jobId); + } while (status != JobStatus.FINISHED || fw.hasMoreData()); + + assertTrue(count >= 1); + ConfiguredExecutable al = sexec.getResults(jobId); + assertNotNull(al.getResults()); + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (InterruptedException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test() + public void testPersistance() { + try { + AACon aacon = new AACon(); + aacon.setError("errrr.txt").setInput(test_alignment_input) + .setOutput("outtt.txt"); + assertEquals(aacon.getInput(), test_alignment_input); + assertEquals(aacon.getError(), "errrr.txt"); + assertEquals(aacon.getOutput(), "outtt.txt"); + ConfiguredExecutable cAAcon = Configurator + .configureExecutable(aacon, Executable.ExecProvider.Local); + + SyncExecutor sexec = Configurator.getSyncEngine(cAAcon); + sexec.executeJob(); + ConfiguredExecutable al = sexec.waitForResult(); + assertNotNull(al.getResults()); + // Save run configuration + assertTrue(cAAcon.saveRunConfiguration()); + + // See if loaded configuration is the same as saved + RunConfiguration loadedRun = RunConfiguration + .load(new FileInputStream(new File(cAAcon + .getWorkDirectory(), RunConfiguration.rconfigFile))); + assertEquals( + ((ConfExecutable) cAAcon).getRunConfiguration(), + loadedRun); + // Load run configuration as ConfExecutable + ConfiguredExecutable resurrectedCAAcon = (ConfiguredExecutable) cAAcon + .loadRunConfiguration(new FileInputStream(new File(cAAcon + .getWorkDirectory(), RunConfiguration.rconfigFile))); + assertNotNull(resurrectedCAAcon); + assertEquals(resurrectedCAAcon.getExecutable().getInput(), + test_alignment_input); + assertEquals(resurrectedCAAcon.getExecutable().getError(), + "errrr.txt"); + assertEquals(resurrectedCAAcon.getExecutable().getOutput(), + "outtt.txt"); + // See in details whether executables are the same + assertEquals(resurrectedCAAcon.getExecutable(), aacon); + + ConfiguredExecutable resAAcon = Configurator + .configureExecutable(resurrectedCAAcon.getExecutable(), + Executable.ExecProvider.Local); + + sexec = Configurator.getSyncEngine(resAAcon, + Executable.ExecProvider.Local); + sexec.executeJob(); + al = sexec.waitForResult(); + assertNotNull(al); + + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test() + public void testConfigurationLoading() { + try { + RunnerConfig aaconConfig = ConfExecutable + .getRunnerOptions(AACon.class); + assertNotNull(aaconConfig); + assertTrue(aaconConfig.getArguments().size() > 0); + + PresetManager aaconPresets = ConfExecutable + .getRunnerPresets(AACon.class); + assertNull(aaconPresets); // there is no presets + + LimitsManager jronnLimits = ConfExecutable + .getRunnerLimits(AACon.class); + assertNotNull(jronnLimits); + assertTrue(jronnLimits.getLimits().size() > 0); + jronnLimits.validate(aaconPresets); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (ValidationException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + +} diff --git a/testsrc/compbio/runner/disorder/DisemblTester.java b/testsrc/compbio/runner/disorder/DisemblTester.java new file mode 100644 index 0000000..85d5e91 --- /dev/null +++ b/testsrc/compbio/runner/disorder/DisemblTester.java @@ -0,0 +1,352 @@ +/* Copyright (c) 2009 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ + +package compbio.runner.disorder; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.text.ParseException; +import java.util.List; + +import javax.xml.bind.ValidationException; + +import org.ggf.drmaa.DrmaaException; +import org.ggf.drmaa.JobInfo; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import compbio.data.sequence.AnnotatedSequence; +import compbio.engine.AsyncExecutor; +import compbio.engine.Configurator; +import compbio.engine.FilePuller; +import compbio.engine.SyncExecutor; +import compbio.engine.client.ConfExecutable; +import compbio.engine.client.ConfiguredExecutable; +import compbio.engine.client.Executable; +import compbio.engine.client.RunConfiguration; +import compbio.engine.cluster.drmaa.ClusterUtil; +import compbio.engine.cluster.drmaa.JobRunner; +import compbio.engine.cluster.drmaa.StatisticManager; +import compbio.engine.local.LocalRunner; +import compbio.metadata.AllTestSuit; +import compbio.metadata.ChunkHolder; +import compbio.metadata.JobExecutionException; +import compbio.metadata.JobStatus; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.LimitsManager; +import compbio.metadata.PresetManager; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.RunnerConfig; +import compbio.util.FileWatcher; +import compbio.util.SysPrefs; + +public class DisemblTester { + + public static String test_outfile = "TO1381.disembl.out"; + + private Disembl disembl; + + @BeforeMethod(alwaysRun = true) + void init() { + disembl = new Disembl(); + disembl.setInput(AllTestSuit.test_input).setOutput(test_outfile); + } + + @Test(groups = { AllTestSuit.test_group_cluster, + AllTestSuit.test_group_runner }) + public void testRunOnCluster() { + assertFalse(SysPrefs.isWindows, + "Cluster execution can only be in unix environment"); + try { + ConfiguredExecutable confDisembl = Configurator + .configureExecutable(disembl, + Executable.ExecProvider.Cluster); + JobRunner runner = JobRunner.getInstance(confDisembl); + + assertNotNull(runner, "Runner is NULL"); + runner.executeJob(); + // assertNotNull("JobId is null", jobId1); + JobStatus status = runner.getJobStatus(); + assertTrue(status == JobStatus.PENDING + || status == JobStatus.RUNNING, + "Status of the process is wrong!"); + JobInfo info = runner.getJobInfo(); + assertNotNull(info, "JobInfo is null"); + StatisticManager sm = new StatisticManager(info); + assertNotNull(sm, "Statictic manager is null"); + try { + + String exits = sm.getExitStatus(); + assertNotNull("Exit status is null", exits); + // cut 4 trailing zeros from the number + int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits) + .intValue(); + assertEquals(0, exitsInt); + System.out.println(sm.getAllStats()); + + } catch (ParseException e) { + e.printStackTrace(); + fail("Parse Exception: " + e.getMessage()); + } + //assertFalse(runner.cleanup()); + assertTrue(sm.hasExited()); + assertFalse(sm.wasAborted()); + assertFalse(sm.hasDump()); + assertFalse(sm.hasSignaled()); + + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (DrmaaException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } + } + + /** + * This tests fails from time to time depending on the cluster load or some + * other factors. Any client code has to adjust for this issue + */ + @Test(groups = { AllTestSuit.test_group_cluster, + AllTestSuit.test_group_runner }) + public void testRunOnClusterAsync() { + assertFalse(SysPrefs.isWindows, + "Cluster execution can only be in unix environment"); + try { + ConfiguredExecutable confDisembl = Configurator + .configureExecutable(disembl, + Executable.ExecProvider.Cluster); + AsyncExecutor aengine = Configurator.getAsyncEngine(confDisembl); + String jobId = aengine.submitJob(confDisembl); + assertNotNull(jobId, "Runner is NULL"); + // let drmaa to start + Thread.sleep(500); + JobStatus status = aengine.getJobStatus(jobId); + while (status != JobStatus.FINISHED) { + System.out.println("Job Status: " + status); + Thread.sleep(1000); + status = aengine.getJobStatus(jobId); + ConfiguredExecutable result = (ConfiguredExecutable) aengine + .getResults(jobId); + assertNotNull(result); + System.out.println("RES:" + result); + // Some times the job could be removed from the cluster accounting + // before it has been reported to finish. Make sure + // to stop waiting in such case + if (status == JobStatus.UNDEFINED) { + break; + } + } + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail("DrmaaException caught:" + e.getMessage()); + } catch (InterruptedException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test(groups = { AllTestSuit.test_group_runner }) + public void testRunLocally() { + try { + ConfiguredExecutable confDisembl = Configurator + .configureExecutable(disembl, Executable.ExecProvider.Local); + + // For local execution use relative + LocalRunner lr = new LocalRunner(confDisembl); + lr.executeJob(); + ConfiguredExecutable al1 = lr.waitForResult(); + assertNotNull(al1.getResults()); + List al2 = confDisembl.getResults(); + assertNotNull(al2); + assertEquals(al2.size(), 3); + assertEquals(al1.getResults(), al2); + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + @Test(groups = { AllTestSuit.test_group_runner }) + public void readStatistics() { + try { + ConfiguredExecutable confDisembl = Configurator + .configureExecutable(disembl, Executable.ExecProvider.Local); + // For local execution use relavive + + AsyncExecutor sexec = Configurator.getAsyncEngine(confDisembl); + String jobId = sexec.submitJob(confDisembl); + FilePuller fw = FilePuller.newFilePuller(confDisembl + .getWorkDirectory() + + File.separator + Jronn.getStatFile(), + FileWatcher.MIN_CHUNK_SIZE_BYTES); + int count = 0; + long position = 0; + fw.waitForFile(4); + JobStatus status = sexec.getJobStatus(jobId); + while (status != JobStatus.FINISHED) { + if (fw.hasMoreData()) { + ChunkHolder ch = fw.pull(position); + String chunk = ch.getChunk(); + position = ch.getNextPosition(); + } + count++; + // Make sure the loop is terminated if the job fails + if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) { + break; + } + Thread.sleep(300); + status = sexec.getJobStatus(jobId); + } + assertTrue(count > 1); + ConfiguredExecutable al = sexec.getResults(jobId); + assertNotNull(al.getResults()); + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (InterruptedException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test(groups = { AllTestSuit.test_group_runner }) + public void testPersistance() { + try { + Disembl disembl = new Disembl(); + disembl.setError("errrr.txt").setInput(AllTestSuit.test_input) + .setOutput("outtt.txt"); + assertEquals(disembl.getInput(), AllTestSuit.test_input); + assertEquals(disembl.getError(), "errrr.txt"); + assertEquals(disembl.getOutput(), "outtt.txt"); + ConfiguredExecutable cDisembl = Configurator + .configureExecutable(disembl, Executable.ExecProvider.Local); + + SyncExecutor sexec = Configurator.getSyncEngine(cDisembl); + sexec.executeJob(); + ConfiguredExecutable al = sexec.waitForResult(); + assertNotNull(al.getResults()); + // Save run configuration + assertTrue(cDisembl.saveRunConfiguration()); + + // See if loaded configuration is the same as saved + RunConfiguration loadedRun = RunConfiguration + .load(new FileInputStream(new File(cDisembl + .getWorkDirectory(), RunConfiguration.rconfigFile))); + assertEquals(((ConfExecutable) cDisembl) + .getRunConfiguration(), loadedRun); + // Load run configuration as ConfExecutable + ConfiguredExecutable resurrectedCDisembl = (ConfiguredExecutable) cDisembl + .loadRunConfiguration(new FileInputStream(new File(cDisembl + .getWorkDirectory(), RunConfiguration.rconfigFile))); + assertNotNull(resurrectedCDisembl); + assertEquals(resurrectedCDisembl.getExecutable().getInput(), + AllTestSuit.test_input); + assertEquals(resurrectedCDisembl.getExecutable().getError(), + "errrr.txt"); + assertEquals(resurrectedCDisembl.getExecutable().getOutput(), + "outtt.txt"); + // See in details whether executables are the same + assertEquals(resurrectedCDisembl.getExecutable(), disembl); + + ConfiguredExecutable resJronn = Configurator + .configureExecutable(resurrectedCDisembl.getExecutable(), + Executable.ExecProvider.Local); + + sexec = Configurator.getSyncEngine(resJronn, + Executable.ExecProvider.Local); + sexec.executeJob(); + al = sexec.waitForResult(); + assertNotNull(al); + + } catch (JobSubmissionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (JobExecutionException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } catch (ResultNotAvailableException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test(groups = { AllTestSuit.test_group_runner }) + public void testConfigurationLoading() { + try { + RunnerConfig disemblConfig = ConfExecutable + .getRunnerOptions(Disembl.class); + assertNotNull(disemblConfig); + assertTrue(disemblConfig.getArguments().size() > 0); + + PresetManager disemblPresets = ConfExecutable + .getRunnerPresets(Disembl.class); + assertNull(disemblPresets); // there is no presets + + LimitsManager disemblLimits = ConfExecutable + .getRunnerLimits(Disembl.class); + assertNotNull(disemblLimits); + assertTrue(disemblLimits.getLimits().size() > 0); + disemblLimits.validate(disemblPresets); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (ValidationException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + +} diff --git a/testsrc/testdata/TO1381.fasta.aln b/testsrc/testdata/TO1381.fasta.aln new file mode 100644 index 0000000..f6da7b8 --- /dev/null +++ b/testsrc/testdata/TO1381.fasta.aln @@ -0,0 +1,35 @@ +>Foobar_dundeefriends +MTADGPRELLQLRAAVRHRPQDFVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV +ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL +LPEEPYITAQLLNWRRRLCDWRALDVLSAQVRAAVAQGVGAVEPFAFLSEDASAAEQLAC +ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM +HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV +FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR +VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA +RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL +TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES +GVFEMDGFADDFGALLQALARRHGWLGI + +>Foobar +-----------------------------------MGDTTAGEMAVQRGLALH------- +---------QQRHAEAAVLLQQASDAAPEHPGIALWL-HALEDAGQAEAAAA-YTRAHQL +LPEEPYITAQLLN--------------------AVAQGVGAVEPFAFLSEDASAAE---- +----------SVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM +HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV +FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR +VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA +RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL +TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES +GVFEMDGFADDFGALLQALARRHGWLGI + +>dundeefriends +-MTADGPRELLQLRAAVRHRPQDVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV +ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALED--------------HQL +LPEEPYITAQLDVLSAQVR-------------AAVAQGVGAVEPFAFLSEDASAAEQLAC +ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM +HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV +FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR +VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA +RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL +TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES +I--------------------------- \ No newline at end of file diff --git a/webservices/compbio/data/msa/Annotation.java b/webservices/compbio/data/msa/Annotation.java new file mode 100644 index 0000000..c73c0bc --- /dev/null +++ b/webservices/compbio/data/msa/Annotation.java @@ -0,0 +1,175 @@ +package compbio.data.msa; + +import java.security.InvalidParameterException; +import java.util.List; + +import javax.jws.WebParam; +import javax.jws.WebService; + +import compbio.data.sequence.FastaSequence; +import compbio.data.sequence.MultiAnnotatedSequence; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.LimitExceededException; +import compbio.metadata.Option; +import compbio.metadata.Preset; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.UnsupportedRuntimeException; +import compbio.metadata.WrongParameterException; + +/** + * Interface for tools that results to one or more annotation to sequence(s) + * + * @author pvtroshin + * + * Date November 2010 + * + * @param + * executable type / web service type + */ +@WebService(targetNamespace = "http://a.data.compbio/01/12/2010/") +public interface Annotation extends JManagement, Metadata { + + /** + * + * Any dataset containing a greater number of sequences or the average + * length of the sequences are greater then defined in the default Limit + * will not be accepted for an alignment operation and + * JobSubmissionException will be thrown. + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @return jobId - unique identifier for the job + * @throws JobSubmissionException + * is thrown when the job could not be submitted due to the + * following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String analize( + @WebParam(name = "fastaSequences") List sequences) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException; + + /** + * + * @see Option + * + * Default Limit is used to decide whether the calculation will be + * permitted or denied + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @param options + * A list of Options + * @return jobId - unique identifier for the job + * @throws JobSubmissionException. This + * exception is thrown when the job could not be submitted due + * to the following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws WrongParameterException + * is throws when 1) One of the Options provided is not + * supported, 2) The value of the option is defined outside the + * boundaries. In both cases exception object contain the + * information on the violating Option. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String customAnalize( + @WebParam(name = "fastaSequences") List sequences, + @WebParam(name = "options") List> options) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException, WrongParameterException; + + /** + * + * + * Limit for a presetName is used whether the calculation will be permitted + * or denied. If no Limit was defined for a presetName, than default limit + * is used. + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @param preset + * A list of Options + * @return String - jobId - unique identifier for the job + * @throws JobSubmissionException. This + * exception is thrown when the job could not be submitted due + * to the following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws WrongParameterException + * is throws when 1) One of the Options provided is not + * supported, 2) The value of the option is defined outside the + * boundaries. In both cases exception object contain the + * information on the violating Option. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String presetAnalize( + @WebParam(name = "fastaSequences") List sequences, + @WebParam(name = "preset") Preset preset) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException, WrongParameterException; + + /** + * Return the result of the job. + * + * @param jobId + * a unique job identifier + * @return + * @throws ResultNotAvailableException + * this exception is throw if the job execution was not + * successful or the result of the execution could not be found. + * (e.g. removed). Exception could also be thrown is dues to the + * lower level problems on the server i.e. IOException, + * FileNotFoundException problems as well as + * UnknownFileFormatException. + * @throws InvalidParameterException + * thrown if jobId is empty or cannot be recognised e.g. in + * invalid format + */ + MultiAnnotatedSequence getResult(@WebParam(name = "jobId") String jobId) + throws ResultNotAvailableException; +} diff --git a/webservices/compbio/data/msa/JManagement.java b/webservices/compbio/data/msa/JManagement.java new file mode 100644 index 0000000..b9b2f01 --- /dev/null +++ b/webservices/compbio/data/msa/JManagement.java @@ -0,0 +1,54 @@ +package compbio.data.msa; + +import java.security.InvalidParameterException; + +import javax.jws.WebParam; + +import compbio.metadata.ChunkHolder; +import compbio.metadata.JobStatus; + +public interface JManagement { + + /** + * Stop running job but leave its output untouched + * + * @return true if job was cancelled successfully, false otherwise + * @throws InvalidParameterException + * thrown if jobId is empty or cannot be recognised e.g. in + * invalid format + */ + boolean cancelJob(@WebParam(name = "jobId") String jobId); + + /** + * Return the status of the job. @see JobStatus + * + * @param jobId + * - unique job identifier + * @return JobStatus - status of the job + * @throws InvalidParameterException + * thrown if jobId is empty or cannot be recognised e.g. in + * invalid format + */ + JobStatus getJobStatus(@WebParam(name = "jobId") String jobId); + + /** + * Reads 1kb chunk from the statistics file which is specific to a given web + * service from the position. If in time of a request less then 1kb data is + * available from the position to the end of the file, then it returns all + * the data available from the position to the end of the file. + * + * @param jobId + * - unique job identifier + * @param position + * - next position within the file to read + * @return ChunkHolder - @see ChunkHolder which contains a chuink of data + * and a next position within the file from which no data has been + * read + * @throws InvalidParameterException + * thrown if jobId is empty or cannot be recognised e.g. in + * invalid format and also if the position value is negative + */ + ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId, + @WebParam(name = "position") long position); + +} diff --git a/webservices/compbio/data/msa/Metadata.java b/webservices/compbio/data/msa/Metadata.java new file mode 100644 index 0000000..67abb5d --- /dev/null +++ b/webservices/compbio/data/msa/Metadata.java @@ -0,0 +1,50 @@ +package compbio.data.msa; + +import javax.jws.WebParam; + +import compbio.metadata.Limit; +import compbio.metadata.LimitsManager; +import compbio.metadata.PresetManager; +import compbio.metadata.RunnerConfig; + +public interface Metadata { + + /** + * Get options supported by a web service + * + * @return RunnerConfig the list of options and parameters supported by a + * web service. + */ + RunnerConfig getRunnerOptions(); + + /** + * Get presets supported by a web service + * + * @return PresetManager the object contains information about presets + * supported by a web service + */ + PresetManager getPresets(); + + /** + * Get a Limit for a preset. + * + * @param presetName + * the name of the preset. if no name is provided, then the + * default preset is returned. If no limit for a particular + * preset is defined then the default preset is returned + * @return Limit + */ + Limit getLimit(@WebParam(name = "presetName") String presetName); + + /** + * List Limits supported by a web service. + * + * @param presetName + * the name of the preset. if no name is provided, then the + * default preset is returned. If no limit for a particular + * preset is defined then the default preset is returned + * @return LimitManager + */ + LimitsManager getLimits(); + +} diff --git a/webservices/compbio/data/msa/MsaWS.java b/webservices/compbio/data/msa/MsaWS.java index 4a22009..b1735fb 100644 --- a/webservices/compbio/data/msa/MsaWS.java +++ b/webservices/compbio/data/msa/MsaWS.java @@ -26,17 +26,11 @@ import javax.jws.WebService; import compbio.data.sequence.Alignment; import compbio.data.sequence.FastaSequence; -import compbio.metadata.ChunkHolder; -import compbio.metadata.JobStatus; import compbio.metadata.JobSubmissionException; -import compbio.metadata.Limit; import compbio.metadata.LimitExceededException; -import compbio.metadata.LimitsManager; import compbio.metadata.Option; import compbio.metadata.Preset; -import compbio.metadata.PresetManager; import compbio.metadata.ResultNotAvailableException; -import compbio.metadata.RunnerConfig; import compbio.metadata.UnsupportedRuntimeException; import compbio.metadata.WrongParameterException; @@ -45,248 +39,158 @@ import compbio.metadata.WrongParameterException; * * @author pvtroshin * - * Date September 2009 + * Date November 2010 * * @param * executable type / web service type */ -@WebService(targetNamespace = "http://msa.data.compbio/01/01/2010/") -public interface MsaWS { - - /** - * Align a list of sequences with default settings. - * - * Any dataset containing a greater number of sequences or the average - * length of the sequences are greater then defined in the default Limit - * will not be accepted for an alignment operation and - * JobSubmissionException will be thrown. - * - * @param sequences - * List of FastaSequence objects. The programme does not perform - * any sequence validity checks. Nor does it checks whether the - * sequences names are unique. It is responsibility of the caller - * to validate this information - * @return jobId - unique identifier for the job - * @throws JobSubmissionException. This - * exception is thrown when the job could not be submitted due - * to the following reasons: 1) The number of sequences in the - * submission or their average length is greater then defined by - * the default Limit. 2) Any problems on the server side e.g. it - * is misconfigured or malfunction, is reported via this - * exception. In the first case the information on the limit - * could be obtained from an exception. - * @throws InvalidParameterException - * thrown if input list of fasta sequence is null or empty - * @throws UnsupportedRuntimeException - * thrown if server OS does not support native executables for a - * given web service, e.g. JWS2 is deployed on Windows and Mafft - * service is called - * @throws LimitExceededException - * is throw if the input sequences number or average length - * exceeds what is defined by the limit - */ - String align( - @WebParam(name = "fastaSequences") List sequences) - throws UnsupportedRuntimeException, LimitExceededException, - JobSubmissionException; - - /** - * Align a list of sequences with options. - * - * @see Option - * - * Default Limit is used to decide whether the calculation will be - * permitted or denied - * - * @param sequences - * List of FastaSequence objects. The programme does not perform - * any sequence validity checks. Nor does it checks whether the - * sequences names are unique. It is responsibility of the caller - * to validate this information - * @param options - * A list of Options - * @return jobId - unique identifier for the job - * @throws JobSubmissionException. This - * exception is thrown when the job could not be submitted due - * to the following reasons: 1) The number of sequences in the - * submission or their average length is greater then defined by - * the default Limit. 2) Any problems on the server side e.g. it - * is misconfigured or malfunction, is reported via this - * exception. In the first case the information on the limit - * could be obtained from an exception. - * @throws WrongParameterException - * is throws when 1) One of the Options provided is not - * supported, 2) The value of the option is defined outside the - * boundaries. In both cases exception object contain the - * information on the violating Option. - * @throws InvalidParameterException - * thrown if input list of fasta sequence is null or empty - * @throws UnsupportedRuntimeException - * thrown if server OS does not support native executables for a - * given web service, e.g. JWS2 is deployed on Windows and Mafft - * service is called - * @throws LimitExceededException - * is throw if the input sequences number or average length - * exceeds what is defined by the limit - */ - String customAlign( - @WebParam(name = "fastaSequences") List sequences, - @WebParam(name = "options") List> options) - throws UnsupportedRuntimeException, LimitExceededException, - JobSubmissionException, WrongParameterException; - - /** - * Align a list of sequences with preset. @see Preset - * - * Limit for a presetName is used whether the calculation will be permitted - * or denied. If no Limit was defined for a presetName, than default limit - * is used. - * - * @param sequences - * List of FastaSequence objects. The programme does not perform - * any sequence validity checks. Nor does it checks whether the - * sequences names are unique. It is responsibility of the caller - * to validate this information - * @param preset - * A list of Options - * @return String - jobId - unique identifier for the job - * @throws JobSubmissionException. This - * exception is thrown when the job could not be submitted due - * to the following reasons: 1) The number of sequences in the - * submission or their average length is greater then defined by - * the default Limit. 2) Any problems on the server side e.g. it - * is misconfigured or malfunction, is reported via this - * exception. In the first case the information on the limit - * could be obtained from an exception. - * @throws WrongParameterException - * is throws when 1) One of the Options provided is not - * supported, 2) The value of the option is defined outside the - * boundaries. In both cases exception object contain the - * information on the violating Option. - * @throws InvalidParameterException - * thrown if input list of fasta sequence is null or empty - * @throws UnsupportedRuntimeException - * thrown if server OS does not support native executables for a - * given web service, e.g. JWS2 is deployed on Windows and Mafft - * service is called - * @throws LimitExceededException - * is throw if the input sequences number or average length - * exceeds what is defined by the limit - */ - String presetAlign( - @WebParam(name = "fastaSequences") List sequences, - @WebParam(name = "preset") Preset preset) - throws UnsupportedRuntimeException, LimitExceededException, - JobSubmissionException, WrongParameterException; - - /** - * Return the result of the job. - * - * @param jobId - * a unique job identifier - * @return Alignment - * @throws ResultNotAvailableException - * this exception is throw if the job execution was not - * successful or the result of the execution could not be found. - * (e.g. removed). Exception could also be thrown is dues to the - * lower level problems on the server i.e. IOException, - * FileNotFoundException problems as well as - * UnknownFileFormatException. - * @throws InvalidParameterException - * thrown if jobId is empty or cannot be recognised e.g. in - * invalid format - */ - Alignment getResult(@WebParam(name = "jobId") String jobId) - throws ResultNotAvailableException; - - /** - * Stop running job but leave its output untouched - * - * @return true if job was cancelled successfully, false otherwise - * @throws InvalidParameterException - * thrown if jobId is empty or cannot be recognised e.g. in - * invalid format - */ - boolean cancelJob(@WebParam(name = "jobId") String jobId); - - /** - * Return the status of the job. @see JobStatus - * - * @param jobId - * - unique job identifier - * @return JobStatus - status of the job - * @throws InvalidParameterException - * thrown if jobId is empty or cannot be recognised e.g. in - * invalid format - */ - JobStatus getJobStatus(@WebParam(name = "jobId") String jobId); - - /** - * Reads 1kb chunk from the statistics file which is specific to a given web - * service from the position. If in time of a request less then 1kb data is - * available from the position to the end of the file, then it returns all - * the data available from the position to the end of the file. - * - * @param jobId - * - unique job identifier - * @param position - * - next position within the file to read - * @return ChunkHolder - @see ChunkHolder which contains a chuink of data - * and a next position within the file from which no data has been - * read - * @throws InvalidParameterException - * thrown if jobId is empty or cannot be recognised e.g. in - * invalid format and also if the position value is negative - */ - ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId, - @WebParam(name = "position") long position); - - /* - * TODO - * - * @param jobId - * - * @return - * - * byte getProgress(@WebParam(name = "jobId") String jobId); - */ - - /** - * Get options supported by a web service - * - * @return RunnerConfig the list of options and parameters supported by a - * web service. - */ - RunnerConfig getRunnerOptions(); - - /** - * Get presets supported by a web service - * - * @return PresetManager the object contains information about presets - * supported by a web service - */ - PresetManager getPresets(); - - /** - * Get a Limit for a preset. - * - * @param presetName - * the name of the preset. if no name is provided, then the - * default preset is returned. If no limit for a particular - * preset is defined then the default preset is returned - * @return Limit - */ - Limit getLimit(@WebParam(name = "presetName") String presetName); - - /** - * List Limits supported by a web service. - * - * @param presetName - * the name of the preset. if no name is provided, then the - * default preset is returned. If no limit for a particular - * preset is defined then the default preset is returned - * @return LimitManager - */ - LimitsManager getLimits(); +@WebService(targetNamespace = "http://msa.data.compbio/01/12/2010/") +public interface MsaWS extends JManagement, Metadata { + + /** + * Align a list of sequences with default settings. + * + * Any dataset containing a greater number of sequences or the average + * length of the sequences are greater then defined in the default Limit + * will not be accepted for an alignment operation and + * JobSubmissionException will be thrown. + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @return jobId - unique identifier for the job + * @throws JobSubmissionException. This + * exception is thrown when the job could not be submitted due + * to the following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String align( + @WebParam(name = "fastaSequences") List sequences) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException; + + /** + * Align a list of sequences with options. + * + * @see Option + * + * Default Limit is used to decide whether the calculation will be + * permitted or denied + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @param options + * A list of Options + * @return jobId - unique identifier for the job + * @throws JobSubmissionException. This + * exception is thrown when the job could not be submitted due + * to the following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws WrongParameterException + * is throws when 1) One of the Options provided is not + * supported, 2) The value of the option is defined outside the + * boundaries. In both cases exception object contain the + * information on the violating Option. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String customAlign( + @WebParam(name = "fastaSequences") List sequences, + @WebParam(name = "options") List> options) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException, WrongParameterException; + + /** + * Align a list of sequences with preset. @see Preset + * + * Limit for a presetName is used whether the calculation will be permitted + * or denied. If no Limit was defined for a presetName, than default limit + * is used. + * + * @param sequences + * List of FastaSequence objects. The programme does not perform + * any sequence validity checks. Nor does it checks whether the + * sequences names are unique. It is responsibility of the caller + * to validate this information + * @param preset + * A list of Options + * @return String - jobId - unique identifier for the job + * @throws JobSubmissionException. This + * exception is thrown when the job could not be submitted due + * to the following reasons: 1) The number of sequences in the + * submission or their average length is greater then defined by + * the default Limit. 2) Any problems on the server side e.g. it + * is misconfigured or malfunction, is reported via this + * exception. In the first case the information on the limit + * could be obtained from an exception. + * @throws WrongParameterException + * is throws when 1) One of the Options provided is not + * supported, 2) The value of the option is defined outside the + * boundaries. In both cases exception object contain the + * information on the violating Option. + * @throws InvalidParameterException + * thrown if input list of fasta sequence is null or empty + * @throws UnsupportedRuntimeException + * thrown if server OS does not support native executables for a + * given web service, e.g. JWS2 is deployed on Windows and Mafft + * service is called + * @throws LimitExceededException + * is throw if the input sequences number or average length + * exceeds what is defined by the limit + */ + String presetAlign( + @WebParam(name = "fastaSequences") List sequences, + @WebParam(name = "preset") Preset preset) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException, WrongParameterException; + + /** + * Return the result of the job. + * + * @param jobId + * a unique job identifier + * @return Alignment + * @throws ResultNotAvailableException + * this exception is throw if the job execution was not + * successful or the result of the execution could not be found. + * (e.g. removed). Exception could also be thrown is dues to the + * lower level problems on the server i.e. IOException, + * FileNotFoundException problems as well as + * UnknownFileFormatException. + * @throws InvalidParameterException + * thrown if jobId is empty or cannot be recognised e.g. in + * invalid format + */ + Alignment getResult(@WebParam(name = "jobId") String jobId) + throws ResultNotAvailableException; } diff --git a/webservices/compbio/ws/server/AAConWS.java b/webservices/compbio/ws/server/AAConWS.java new file mode 100644 index 0000000..cf382df --- /dev/null +++ b/webservices/compbio/ws/server/AAConWS.java @@ -0,0 +1,140 @@ +package compbio.ws.server; + +import java.io.File; +import java.util.List; + +import javax.annotation.Resource; +import javax.jws.WebService; +import javax.xml.ws.WebServiceContext; + +import org.apache.log4j.Logger; + +import compbio.conservation.Method; +import compbio.data.msa.Annotation; +import compbio.data.sequence.FastaSequence; +import compbio.data.sequence.JalviewAnnotation; +import compbio.data.sequence.MultiAnnotatedSequence; +import compbio.engine.AsyncExecutor; +import compbio.engine.Configurator; +import compbio.engine.client.ConfiguredExecutable; +import compbio.metadata.ChunkHolder; +import compbio.metadata.JobStatus; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.Limit; +import compbio.metadata.LimitExceededException; +import compbio.metadata.LimitsManager; +import compbio.metadata.Option; +import compbio.metadata.Preset; +import compbio.metadata.PresetManager; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.RunnerConfig; +import compbio.metadata.UnsupportedRuntimeException; +import compbio.metadata.WrongParameterException; +import compbio.runner.Util; +import compbio.runner.conservation.AACon; + +@WebService(endpointInterface = "compbio.data.msa.MsaWS", targetNamespace = "http://msa.data.compbio/01/01/2010/", serviceName = "MuscleWS") +public class AAConWS implements Annotation { + + // Ask for resource injection + @Resource + WebServiceContext wsContext; + + private static Logger statLog = Logger.getLogger("AAConWS-stats"); + + private static Logger log = Logger.getLogger(AAConWS.class); + + private static final RunnerConfig aaconOptions = Util + .getSupportedOptions(AACon.class); + + private static final PresetManager aaconPresets = Util + .getPresets(AACon.class); + + ConfiguredExecutable init(List sequences) + throws JobSubmissionException { + AACon aacon = new AACon(); + aacon.setInput("fasta.in").setOutput("fasta.out"); + return Configurator.configureExecutable(aacon, sequences); + } + + @SuppressWarnings("unchecked") + public MultiAnnotatedSequence getResult(String jobId) + throws ResultNotAvailableException { + WSUtil.validateJobId(jobId); + AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId); + ConfiguredExecutable aacon = (ConfiguredExecutable) asyncEngine + .getResults(jobId); + MultiAnnotatedSequence mas = aacon.getResults(); + // log(jobId, "getResults"); + return mas; + } + + @SuppressWarnings("unchecked") + public JalviewAnnotation getJalviewAnnotation(String jobId) + throws ResultNotAvailableException { + MultiAnnotatedSequence result = getResult(jobId); + + // log(jobId, "getResults"); + return result.toJalviewAnnotation(); + } + + public Limit getLimit(String presetName) { + return new AACon().getLimit(presetName); + } + + public LimitsManager getLimits() { + return new AACon().getLimits(); + } + + public ChunkHolder pullExecStatistics(String jobId, long position) { + WSUtil.validateJobId(jobId); + String file = Configurator.getWorkDirectory(jobId) + File.separator + + AACon.getStatFile(); + return WSUtil.pullFile(file, position); + } + + public boolean cancelJob(String jobId) { + WSUtil.validateJobId(jobId); + return WSUtil.cancelJob(jobId); + } + + public JobStatus getJobStatus(String jobId) { + WSUtil.validateJobId(jobId); + return WSUtil.getJobStatus(jobId); + } + + public PresetManager getPresets() { + return aaconPresets; + } + + public RunnerConfig getRunnerOptions() { + return aaconOptions; + } + + @Override + public String analize(List sequences) + throws UnsupportedRuntimeException, LimitExceededException, + JobSubmissionException { + // TODO Auto-generated method stub + return null; + } + + @Override + public String customAnalize(List sequences, + List> options) throws UnsupportedRuntimeException, + LimitExceededException, JobSubmissionException, + WrongParameterException { + // TODO Auto-generated method stub + return null; + } + + @Override + public String presetAnalize(List sequences, + Preset preset) throws UnsupportedRuntimeException, + LimitExceededException, JobSubmissionException, + WrongParameterException { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/webservices/compbio/ws/server/WSUtil.java b/webservices/compbio/ws/server/WSUtil.java index dd1051f..62996e5 100644 --- a/webservices/compbio/ws/server/WSUtil.java +++ b/webservices/compbio/ws/server/WSUtil.java @@ -36,73 +36,73 @@ import compbio.util.Timer; public final class WSUtil { - public static final void validateJobId(String jobId) - throws InvalidParameterException { - if (!compbio.engine.client.Util.isValidJobId(jobId)) { - throw new InvalidParameterException( - "JobId is not provided or cannot be recognised! Given value: " - + jobId); + public static final void validateJobId(String jobId) + throws InvalidParameterException { + if (!compbio.engine.client.Util.isValidJobId(jobId)) { + throw new InvalidParameterException( + "JobId is not provided or cannot be recognised! Given value: " + + jobId); + } } - } - public static final void validateFastaInput(List sequences) - throws InvalidParameterException { - if (sequences == null || sequences.isEmpty()) { - throw new InvalidParameterException( - "List of fasta sequences required but not provided! "); + public static final void validateFastaInput(List sequences) + throws InvalidParameterException { + if (sequences == null || sequences.isEmpty()) { + throw new InvalidParameterException( + "List of fasta sequences required but not provided! "); + } } - } - public static JobStatus getJobStatus(String jobId) { - AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId); - return asyncEngine.getJobStatus(jobId); - } - - public static ChunkHolder pullFile(String file, long position) { - return ProgressGetter.pull(file, position); - } + public static JobStatus getJobStatus(String jobId) { + AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId); + return asyncEngine.getJobStatus(jobId); + } - public static byte getProgress(String jobId) { - throw new UnsupportedOperationException(); - } + public static ChunkHolder pullFile(String file, long position) { + return ProgressGetter.pull(file, position); + } - public static AsyncExecutor getEngine(ConfiguredExecutable confClustal) { - assert confClustal != null; - return Configurator.getAsyncEngine(confClustal); - } + public static byte getProgress(String jobId) { + throw new UnsupportedOperationException(); + } - public static boolean cancelJob(String jobId) { - AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId); - return asyncEngine.cancelJob(jobId); - } + public static AsyncExecutor getEngine(ConfiguredExecutable confClustal) { + assert confClustal != null; + return Configurator.getAsyncEngine(confClustal); + } - public static String align(List sequences, - ConfiguredExecutable confExec, WSLogger logger, - String callingMethod, Limit limit) - throws LimitExceededException, JobSubmissionException { - Timer timer = Timer.getMilliSecondsTimer(); - if (limit.isExceeded(sequences)) { - throw LimitExceededException.newLimitExceeded(limit, sequences); + public static boolean cancelJob(String jobId) { + AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId); + return asyncEngine.cancelJob(jobId); } - compbio.runner.Util.writeInput(sequences, confExec); - AsyncExecutor engine = Configurator.getAsyncEngine(confExec); - String jobId = engine.submitJob(confExec); - if (logger != null) { - logger.log(timer, callingMethod, jobId); + + public static String align(List sequences, + ConfiguredExecutable confExec, WSLogger logger, + String callingMethod, Limit limit) + throws LimitExceededException, JobSubmissionException { + Timer timer = Timer.getMilliSecondsTimer(); + if (limit != null && limit.isExceeded(sequences)) { + throw LimitExceededException.newLimitExceeded(limit, sequences); + } + compbio.runner.Util.writeInput(sequences, confExec); + AsyncExecutor engine = Configurator.getAsyncEngine(confExec); + String jobId = engine.submitJob(confExec); + if (logger != null) { + logger.log(timer, callingMethod, jobId); + } + return jobId; } - return jobId; - } - /* - * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation - */ - public static final List getCommands(List> options, - String keyValueSeparator) { - List oList = new ArrayList(); - for (Option o : options) { - oList.add(o.toCommand(keyValueSeparator)); + /* + * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation + */ + public static final List getCommands(List> options, + String keyValueSeparator) { + List oList = new ArrayList(); + for (Option o : options) { + oList.add(o.toCommand(keyValueSeparator)); + } + return oList; } - return oList; - } }