<classpathentry kind="lib" path="testsrc/lib/testng-5.10-jdk15.jar"/>\r
<classpathentry kind="lib" path="lib/servlet-api.jar"/>\r
<classpathentry kind="lib" path="WEB-INF/lib/drmaa.jar"/>\r
- <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.2.jar"/>\r
+ <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.3.jar"/>\r
+ <classpathentry kind="lib" path="binaries/aaconservation.jar"/>\r
<classpathentry kind="output" path="WEB-INF/classes"/>\r
</classpath>\r
TODO: \r
\r
+Add iupred ws http://iupred.enzim.hu/\r
Add globprot ws \r
Add ronn ws\r
\r
--- /dev/null
+
+AA Conservation version 1.0b (2 September 2010)
+
+This program allows calculation of conservation of amino acids in
+multiple sequence alignments.
+It implements 17 different conservation scores as described by Valdar in
+his paper (Scoring Residue Conservation, PROTEINS: Structure, Function
+and Bioinformatics 48:227-241 (2002)) and SMERFS scoring algorithm as described
+by Manning, Jefferson and Barton (The contrasting properties of conservation
+and correlated phylogeny in protein functional residue prediction,
+BMC Bioinformatics (2008)).
+
+The conservation algorithms supported are:
+
+KABAT, JORES, SCHNEIDER, SHENKIN, GERSTEIN, TAYLOR_GAPS, TAYLOR_NO_GAPS,
+ZVELIBIL, KARLIN, ARMON, THOMPSON, NOT_LANCET, MIRNY, WILLIAMSON,
+LANDGRAF, SANDER, VALDAR, SMERFS
+
+Input format is either a FASTA formatted file containing aligned sequences with
+gaps or a Clustal alignment. The valid gap characters are *, -, space character,
+X and . (a dot). By default program prints the results to the command window.
+If the output file is provided the results are printed to the file in two
+possible formats with or without an alignment.
+If format is not specified, the program outputs conservation scores without
+alignment. The scores are not normalized by default but they can be (see below).
+SMERFS default parameters are window width of 7, column score is set to
+the middle column, gap% cutoff of 0.1. If different values for SMERFS parameters
+are required than all three parameters must be provided. Details of the program
+execution can be recorded to a separate file if an appropriate file path is
+provided.
+
+List of command line arguments:
+
+-m= precedes a comma separated list of method names
+ EXAMPLE: -m=KABAT,JORES,GERSTEIN
+ Optional, if no method is specified request for all is assumed.
+
+-i= precedes a full path to the input FASTA file, required
+
+-o= precedes a full path to the output file, optional, if no output file is
+ provided the program will output to the standard out.
+
+-t= precedes the number of CPUs (CPU cores more precisely) to use. Optional,
+ defaults to all processors available on the machine.
+
+-f= precedes the format of the results in the output file
+ two different formats are possible:
+ RESULT_WITH_ALIGNMENT
+ RESULT_NO_ALIGNMENT
+ Optional, if not specified RESULT_NO_ALIGNMENT is assumed
+
+-s= precedes a list of three comma separated parameters for SMERFS
+ the order of parameters is as following:
+ 1. window width - an integer and an odd number
+ 2. how to allocate window scores to columns, two ways are possible:
+ MID_SCORE - gives the window score to the middle column
+ MAX_SCORE - gives the column the highest score of all the windows it
+ belongs to
+ 3. gap percentage cutoff - a float greater than 0 and smaller or equal 1
+ EXAMPLE: -s=5,MID_SCORE,0.1
+ Optional, default values are 7,MID_SCORE,0.1
+
+-d= precedes a full path to a file where program execution details are to be
+ listed. Optional, if not provided, no execution statistics is produced.
+
+-g= precedes comma separated list of gap characters provided by the user, if
+ you're using an unusual gap character (not a -,., ,*,X) you have to
+ provide it. If you you provide this list you have to list all the gaps
+ accepted. Including those that were previously treated as a default.
+ Optional.
+
+-n using this key causes the results to be normalized.
+ Normalized results have values between 0 and 1. Please note however, that
+ some results cannot be normalized. In such a case, the system returns not
+ normalized value, and log the issue to the standard error stream.
+ The following formula is used for normalization
+ n = (d - dmin)/(dmax - dmin)
+ Negative results first converted to positive by adding an absolute value of
+ the most negative result. Optional.
+
+EXAMPLE HOW TO RUN THE PROGRAM:
+java -jar <jar name> -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n
+
+As a result of the execution KABAT and SMERFS scores will be calculated.
+Input comes form prot1 file and an output without an alignment is recorded to
+prot1_results file.
+
+Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton.
+Please visit http://www.compbio.dundee.ac.uk for further information.
+
\ No newline at end of file
<delete file="${basedir}/${datamodel}"></delete>\r
<jar jarfile="${basedir}/${datamodel}">\r
<zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
- <include name="compbio-util-1.2.jar"/>\r
+ <include name="compbio-util-1.3.jar"/>\r
</zipgroupfileset>\r
<fileset dir="${basedir}/WEB-INF/classes/"> \r
<include name="compbio/data/sequence/*.class"/>\r
<jar jarfile="${full-jaba-client}">\r
<zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
<include name="log4j-1.2.15.jar"/>\r
- <include name="compbio-util-1.2.jar"/>\r
+ <include name="compbio-util-1.3.jar"/>\r
<include name="compbio-annotations-1.0.jar"/>\r
<include name="drmaa.jar"/>\r
</zipgroupfileset>\r
</jar>\r
</target>\r
\r
- <target name="full-jaba-client" description="Pack binaries with sources and configuration files" depends="full-jaba-client-jar, pack-binaries">\r
+ <target name="full-jaba-client" description="Pack binaries with sources and configuration files" \r
+ depends="full-jaba-client-jar, pack-binaries">\r
<echo>Packing binaries, and configuration files</echo>\r
<zip destfile="${full-jaba-client-pack}" >\r
<zipgroupfileset file="${binaries}" />\r
<jar jarfile="${full-jaba-client}">\r
<zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
<include name="log4j-1.2.15.jar"/>\r
- <include name="compbio-util-1.2.jar"/>\r
+ <include name="compbio-util-1.3.jar"/>\r
<include name="compbio-annotations-1.0.jar"/>\r
<include name="drmaa.jar"/>\r
</zipgroupfileset>\r
#/sw/bin/t_coffee\r
# Sub matrix support does not work \r
#tcoffee.-matrix.path=binaries/matrices\r
+# This variable is required by tcoffee\r
+tcoffee.bin.env=HOME_4_TCOFFEE#jobsout;\r
tcoffee.presets.file=conf/settings/TcoffeePresets.xml\r
tcoffee.parameters.file=conf/settings/TcoffeeParameters.xml\r
tcoffee.limits.file=conf/settings/TcoffeeLimits.xml\r
local.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py\r
cluster.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py\r
#disembl.parameters.file=conf/settings/JronnParameters.xml\r
-disembl.limits.file=conf/settings/JronnLimits.xml\r
+disembl.limits.file=conf/settings/DisemblLimits.xml\r
disembl.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M\r
+\r
+local.aacon.bin.windows=D:\\Java\\jdk1.6.0_14\\bin\\java.exe \r
+local.aacon.bin=/sw/java/latest/bin/java\r
+cluster.aacon.bin=/sw/java/latest/bin/java\r
+aacon.jar.file=binaries/aaconservation.jar\r
+aacon.parameters.file=conf/settings/AAConParameters.xml\r
+aacon.limits.file=conf/settings/AAConLimits.xml\r
+#TODO jronn.jvm.options=-Xms32M -Xmx512M\r
+aacon.cluster.cpunum=4\r
--- /dev/null
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<limits>\r
+ <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+ <limit isDefault="true">\r
+ <seqNumber>100000</seqNumber>\r
+ <seqLength>100000</seqLength>\r
+ </limit>\r
+ <limit isDefault="false">\r
+ <preset># LocalEngineExecutionLimit #</preset>\r
+ <seqNumber>1000</seqNumber>\r
+ <seqLength>1000</seqLength>\r
+ </limit>\r
+</limits>\r
--- /dev/null
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<runnerConfig>\r
+ <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+ <options>\r
+ <name>Normalize</name>\r
+ <description>Normalize the results. The results of the calculation by different methods will all be scaled to the range between 0 and 1, so that they are comparable</description>\r
+ <optionNames>-n</optionNames>\r
+ <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+ </options>\r
+ <prmSeparator>=</prmSeparator>\r
+ <parameters>\r
+ <name>Calculation method</name>\r
+ <description>The method of the calculation to use</description>\r
+ <optionNames>-m</optionNames>\r
+ <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+ <defaultValue>SHENKIN</defaultValue>\r
+ <possibleValues>KABAT</possibleValues>\r
+ <possibleValues>JORES</possibleValues>\r
+ <possibleValues>SCHNEIDER</possibleValues>\r
+ <possibleValues>SHENKIN</possibleValues>\r
+ <possibleValues>GERSTEIN</possibleValues>\r
+ <possibleValues>TAYLOR_GAPS</possibleValues>\r
+ <possibleValues>TAYLOR_NO_GAPS</possibleValues> \r
+ <possibleValues>ZVELIBIL</possibleValues>\r
+ <possibleValues>KARLIN</possibleValues>\r
+ <possibleValues>ARMON</possibleValues>\r
+ <possibleValues>THOMPSON</possibleValues>\r
+ <possibleValues>NOT_LANCET</possibleValues>\r
+ <possibleValues>MIRNY</possibleValues>\r
+ <possibleValues>WILLIAMSON</possibleValues> \r
+ <possibleValues>LANDGRAF</possibleValues>\r
+ <possibleValues>SANDER</possibleValues>\r
+ <possibleValues>VALDAR</possibleValues>\r
+ <possibleValues>SMERFS</possibleValues>\r
+ </parameters>\r
+</runnerConfig>\r
--- /dev/null
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<presets>\r
+ <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+ <preset>\r
+ <name>Quick conservation</name>\r
+ <description>Collection of fast conservation methods</description>\r
+ <optlist>\r
+ <option>-m=KABAT,JORES,SCHNEIDER,SHENKIN,GERSTEIN,TAYLOR_GAPS,TAYLOR_NO_GAPS,ZVELIBIL,ARMON,THOMPSON,NOT_LANCET,MIRNY,WILLIAMSON</option>\r
+ </optlist>\r
+ </preset>\r
+ <preset>\r
+ <name>Slow conservation</name>\r
+ <description>Collection of most expensive (slow) conservation methods</description>\r
+ <optlist>\r
+ <option>-m=LANDGRAF,KARLIN,SANDER,VALDAR,SMERFS</option>\r
+ </optlist>\r
+ </preset>\r
+ <preset>\r
+ <name>Complete conservation</name>\r
+ <description>Calculate conservation with all supported methods</description>\r
+ <optlist>\r
+ <option>-m=KABAT,JORES,SCHNEIDER,SHENKIN,GERSTEIN,TAYLOR_GAPS,TAYLOR_NO_GAPS,ZVELIBIL,ARMON,THOMPSON,NOT_LANCET,MIRNY,WILLIAMSON,LANDGRAF,KARLIN,SANDER,VALDAR,SMERFS</option>\r
+ </optlist>\r
+ </preset>\r
+</presets>\r
+\r
+\r
+\r
+ \r
+\r
-/* Copyright (c) 2009 Peter Troshin\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
*/\r
\r
package compbio.data.sequence;\r
*/\r
public final class ClustalAlignmentUtil {\r
\r
- private static final Logger log = Logger\r
- .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
-\r
- /**\r
- * Dash char to be used as gap char in the alignments\r
- */\r
- public static final char gapchar = '-';\r
-\r
- /*\r
- * Number of spaces separating the name and the sequence\r
- */\r
- private static final String spacer = " "; // 6 space characters\r
- /*\r
- * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
- * longer than that it gets trimmed in the end\r
- */\r
- private static final int maxNameLength = 30; // Maximum name length\r
- /*\r
- * If all sequences names in the alignment is shorter than\r
- * minNameHolderLength than spaces are added to complete the name up to\r
- * minNameHolderLength\r
- */\r
- private static final int minNameHolderLength = 10; // Minimum number of\r
-\r
- // TODO check whether clustal still loads data if length is 60!\r
- private static final int oneLineAlignmentLength = 60; // this could in fact\r
-\r
- // be 50\r
-\r
- // for long names ~30 chars\r
-\r
- /**\r
- * Read Clustal formatted alignment. Limitations: Does not read consensus\r
- * \r
- * Sequence names as well as the sequences are not guaranteed to be unique!\r
- * \r
- * @throws {@link IOException}\r
- * @throws {@link UnknownFileFormatException}\r
- */\r
- public static Alignment readClustalFile(InputStream instream)\r
- throws IOException, UnknownFileFormatException {\r
-\r
- boolean flag = false;\r
-\r
- List<String> headers = new ArrayList<String>();\r
- Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
- FastaSequence[] seqs = null;\r
-\r
- String line;\r
-\r
- BufferedReader breader = new BufferedReader(new InputStreamReader(\r
- instream));\r
- while ((line = breader.readLine()) != null) {\r
- if (line.indexOf(" ") != 0) {\r
- java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
- String id = "";\r
-\r
- if (str.hasMoreTokens()) {\r
- id = str.nextToken();\r
- // PROBCONS output clustal formatted file with not mention\r
- // of CLUSTAL (:-))\r
- if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
- flag = true;\r
- } else {\r
- if (flag) {\r
- StringBuffer tempseq;\r
- if (seqhash.containsKey(id)) {\r
- tempseq = seqhash.get(id);\r
- } else {\r
- tempseq = new StringBuffer();\r
- seqhash.put(id, tempseq);\r
- }\r
-\r
- if (!(headers.contains(id))) {\r
- headers.add(id);\r
- }\r
-\r
- tempseq.append(str.nextToken());\r
+ private static final Logger log = Logger\r
+ .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
+\r
+ /**\r
+ * Dash char to be used as gap char in the alignments\r
+ */\r
+ public static final char gapchar = '-';\r
+\r
+ /*\r
+ * Number of spaces separating the name and the sequence\r
+ */\r
+ private static final String spacer = " "; // 6 space characters\r
+ /*\r
+ * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
+ * longer than that it gets trimmed in the end\r
+ */\r
+ private static final int maxNameLength = 30; // Maximum name length\r
+ /*\r
+ * If all sequences names in the alignment is shorter than\r
+ * minNameHolderLength than spaces are added to complete the name up to\r
+ * minNameHolderLength\r
+ */\r
+ private static final int minNameHolderLength = 10; // Minimum number of\r
+\r
+ // TODO check whether clustal still loads data if length is 60!\r
+ private static final int oneLineAlignmentLength = 60; // this could in fact\r
+\r
+ // be 50\r
+\r
+ // for long names ~30 chars\r
+\r
+ /**\r
+ * Read Clustal formatted alignment. Limitations: Does not read consensus\r
+ * \r
+ * Sequence names as well as the sequences are not guaranteed to be unique!\r
+ * \r
+ * @throws {@link IOException}\r
+ * @throws {@link UnknownFileFormatException}\r
+ */\r
+ public static Alignment readClustalFile(InputStream instream)\r
+ throws IOException, UnknownFileFormatException {\r
+\r
+ boolean flag = false;\r
+\r
+ List<String> headers = new ArrayList<String>();\r
+ Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
+ FastaSequence[] seqs = null;\r
+\r
+ String line;\r
+\r
+ BufferedReader breader = new BufferedReader(new InputStreamReader(\r
+ instream));\r
+ while ((line = breader.readLine()) != null) {\r
+ if (line.indexOf(" ") != 0) {\r
+ java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
+ String id = "";\r
+\r
+ if (str.hasMoreTokens()) {\r
+ id = str.nextToken();\r
+ // PROBCONS output clustal formatted file with not mention\r
+ // of CLUSTAL (:-))\r
+ if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
+ flag = true;\r
+ } else {\r
+ if (flag) {\r
+ StringBuffer tempseq;\r
+ if (seqhash.containsKey(id)) {\r
+ tempseq = seqhash.get(id);\r
+ } else {\r
+ tempseq = new StringBuffer();\r
+ seqhash.put(id, tempseq);\r
+ }\r
+\r
+ if (!(headers.contains(id))) {\r
+ headers.add(id);\r
+ }\r
+\r
+ tempseq.append(str.nextToken());\r
+ }\r
+ }\r
+ }\r
}\r
- }\r
}\r
- }\r
- }\r
- breader.close();\r
+ breader.close();\r
\r
- // TODO improve this bit\r
- if (flag) {\r
+ // TODO improve this bit\r
+ if (flag) {\r
\r
- // Add sequences to the hash\r
- seqs = new FastaSequence[headers.size()];\r
- for (int i = 0; i < headers.size(); i++) {\r
- if (seqhash.get(headers.get(i)) != null) {\r
+ // Add sequences to the hash\r
+ seqs = new FastaSequence[headers.size()];\r
+ for (int i = 0; i < headers.size(); i++) {\r
+ if (seqhash.get(headers.get(i)) != null) {\r
\r
- FastaSequence newSeq = new FastaSequence(headers.get(i),\r
- seqhash.get(headers.get(i)).toString());\r
+ FastaSequence newSeq = new FastaSequence(headers.get(i),\r
+ seqhash.get(headers.get(i)).toString());\r
\r
- seqs[i] = newSeq;\r
+ seqs[i] = newSeq;\r
\r
- } else {\r
- // should not happened\r
- throw new AssertionError(\r
- "Bizarreness! Can't find sequence for "\r
- + headers.get(i));\r
+ } else {\r
+ // should not happened\r
+ throw new AssertionError(\r
+ "Bizarreness! Can't find sequence for "\r
+ + headers.get(i));\r
+ }\r
+ }\r
}\r
- }\r
- }\r
- if (seqs == null || seqs.length == 0) {\r
- throw new UnknownFileFormatException(\r
- "Input does not appear to be a clustal file! ");\r
- }\r
- return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
- Program.CLUSTAL, gapchar));\r
- }\r
-\r
- /**\r
- * \r
- * @param input\r
- * @return true if the file is recognised as Clustal formatted alignment,\r
- * false otherwise\r
- */\r
- public static boolean isValidClustalFile(InputStream input) {\r
- if (input == null) {\r
- throw new NullPointerException("Input is expected!");\r
- }\r
- BufferedReader breader = new BufferedReader(\r
- new InputStreamReader(input));\r
- try {\r
- if (input.available() < 10) {\r
- return false;\r
- }\r
- // read first 10 lines to find "Clustal"\r
- for (int i = 0; i < 10; i++) {\r
- String line = breader.readLine();\r
- if (line != null) {\r
- line = line.toUpperCase().trim();\r
- if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
- return true;\r
- }\r
+ if (seqs == null || seqs.length == 0) {\r
+ throw new UnknownFileFormatException(\r
+ "Input does not appear to be a clustal file! ");\r
}\r
- }\r
-\r
- breader.close();\r
- } catch (IOException e) {\r
- log.severe("Could not read from the stream! "\r
- + e.getLocalizedMessage() + e.getCause());\r
- } finally {\r
- SequenceUtil.closeSilently(log, breader);\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * Write Clustal formatted alignment Limitations: does not record the\r
- * consensus. Potential bug - records 60 chars length alignment where\r
- * Clustal would have recorded 50 chars.\r
- * \r
- * @param outStream\r
- * \r
- * @param alignment\r
- * @throws IOException\r
- */\r
- public static void writeClustalAlignment(final OutputStream outStream,\r
- final Alignment alignment) throws IOException {\r
- List<FastaSequence> seqs = alignment.getSequences();\r
-\r
- PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream));\r
-\r
- out.write("CLUSTAL\n\n\n");\r
-\r
- int max = 0;\r
- int maxidLength = 0;\r
-\r
- int i = 0;\r
- // Find the longest sequence name\r
- for (FastaSequence fs : seqs) {\r
- String tmp = fs.getId();\r
-\r
- if (fs.getSequence().length() > max) {\r
- max = fs.getSequence().length();\r
- }\r
- if (tmp.length() > maxidLength) {\r
- maxidLength = tmp.length();\r
- }\r
- i++;\r
- }\r
- if (maxidLength < minNameHolderLength) {\r
- maxidLength = minNameHolderLength;\r
- }\r
- if (maxidLength > maxNameLength) {\r
- maxidLength = 30; // the rest will be trimmed\r
+ return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
+ Program.CLUSTAL, gapchar));\r
}\r
\r
- int oneLineAlignmentLength = 60;\r
- int nochunks = max / oneLineAlignmentLength + 1;\r
-\r
- for (i = 0; i < nochunks; i++) {\r
- int j = 0;\r
- for (FastaSequence fs : seqs) {\r
-\r
- String name = fs.getId();\r
- // display at most 30 characters in the name, keep the names\r
- // 6 spaces away from the alignment for longest sequence names,\r
- // and more than this for shorter names\r
- out.format("%-" + maxidLength + "s" + spacer,\r
- (name.length() > maxNameLength ? name.substring(0,\r
- maxidLength) : name));\r
- int start = i * oneLineAlignmentLength;\r
- int end = start + oneLineAlignmentLength;\r
-\r
- if (end < fs.getSequence().length()\r
- && start < fs.getSequence().length()) {\r
- out.write(fs.getSequence().substring(start, end) + "\n");\r
- } else {\r
- if (start < fs.getSequence().length()) {\r
- out.write(fs.getSequence().substring(start) + "\n");\r
- }\r
+ /**\r
+ * Please note this method closes the input stream provided as a parameter\r
+ * \r
+ * @param input\r
+ * @return true if the file is recognised as Clustal formatted alignment,\r
+ * false otherwise\r
+ */\r
+ public static boolean isValidClustalFile(InputStream input) {\r
+ if (input == null) {\r
+ throw new NullPointerException("Input is expected!");\r
}\r
- j++;\r
- }\r
- out.write("\n");\r
- }\r
- try {\r
- out.close();\r
- } finally {\r
- SequenceUtil.closeSilently(log, out);\r
+ BufferedReader breader = new BufferedReader(\r
+ new InputStreamReader(input));\r
+ try {\r
+ if (input.available() < 10) {\r
+ return false;\r
+ }\r
+ // read first 10 lines to find "Clustal"\r
+ for (int i = 0; i < 10; i++) {\r
+ String line = breader.readLine();\r
+ if (line != null) {\r
+ line = line.toUpperCase().trim();\r
+ if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
+ return true;\r
+ }\r
+ }\r
+ }\r
+\r
+ breader.close();\r
+ } catch (IOException e) {\r
+ log.severe("Could not read from the stream! "\r
+ + e.getLocalizedMessage() + e.getCause());\r
+ } finally {\r
+ SequenceUtil.closeSilently(log, breader);\r
+ }\r
+ return false;\r
}\r
- }\r
\r
- public static Alignment readClustalFile(File file)\r
- throws UnknownFileFormatException, IOException {\r
- if (file == null) {\r
- throw new NullPointerException("File is expected!");\r
+ /**\r
+ * Write Clustal formatted alignment Limitations: does not record the\r
+ * consensus. Potential bug - records 60 chars length alignment where\r
+ * Clustal would have recorded 50 chars.\r
+ * \r
+ * @param outStream\r
+ * \r
+ * @param alignment\r
+ * @throws IOException\r
+ */\r
+ public static void writeClustalAlignment(final OutputStream outStream,\r
+ final Alignment alignment) throws IOException {\r
+ List<FastaSequence> seqs = alignment.getSequences();\r
+\r
+ PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream));\r
+\r
+ out.write("CLUSTAL\n\n\n");\r
+\r
+ int max = 0;\r
+ int maxidLength = 0;\r
+\r
+ int i = 0;\r
+ // Find the longest sequence name\r
+ for (FastaSequence fs : seqs) {\r
+ String tmp = fs.getId();\r
+\r
+ if (fs.getSequence().length() > max) {\r
+ max = fs.getSequence().length();\r
+ }\r
+ if (tmp.length() > maxidLength) {\r
+ maxidLength = tmp.length();\r
+ }\r
+ i++;\r
+ }\r
+ if (maxidLength < minNameHolderLength) {\r
+ maxidLength = minNameHolderLength;\r
+ }\r
+ if (maxidLength > maxNameLength) {\r
+ maxidLength = 30; // the rest will be trimmed\r
+ }\r
+\r
+ int oneLineAlignmentLength = 60;\r
+ int nochunks = max / oneLineAlignmentLength + 1;\r
+\r
+ for (i = 0; i < nochunks; i++) {\r
+ int j = 0;\r
+ for (FastaSequence fs : seqs) {\r
+\r
+ String name = fs.getId();\r
+ // display at most 30 characters in the name, keep the names\r
+ // 6 spaces away from the alignment for longest sequence names,\r
+ // and more than this for shorter names\r
+ out.format("%-" + maxidLength + "s" + spacer,\r
+ (name.length() > maxNameLength ? name.substring(0,\r
+ maxidLength) : name));\r
+ int start = i * oneLineAlignmentLength;\r
+ int end = start + oneLineAlignmentLength;\r
+\r
+ if (end < fs.getSequence().length()\r
+ && start < fs.getSequence().length()) {\r
+ out.write(fs.getSequence().substring(start, end) + "\n");\r
+ } else {\r
+ if (start < fs.getSequence().length()) {\r
+ out.write(fs.getSequence().substring(start) + "\n");\r
+ }\r
+ }\r
+ j++;\r
+ }\r
+ out.write("\n");\r
+ }\r
+ try {\r
+ out.close();\r
+ } finally {\r
+ SequenceUtil.closeSilently(log, out);\r
+ }\r
}\r
- FileInputStream fio = new FileInputStream(file);\r
- Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
- try {\r
- fio.close();\r
- } finally {\r
- SequenceUtil.closeSilently(log, fio);\r
+\r
+ public static Alignment readClustalFile(File file)\r
+ throws UnknownFileFormatException, IOException {\r
+ if (file == null) {\r
+ throw new NullPointerException("File is expected!");\r
+ }\r
+ FileInputStream fio = new FileInputStream(file);\r
+ Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
+ try {\r
+ fio.close();\r
+ } finally {\r
+ SequenceUtil.closeSilently(log, fio);\r
+ }\r
+ return seqAl;\r
}\r
- return seqAl;\r
- }\r
}\r
--- /dev/null
+package compbio.data.sequence;\r
+\r
+public enum DisemblResultAnnot {\r
+ COILS, REM465, HOTLOOPS\r
+}\r
-/* Copyright (c) 2009 Peter Troshin\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0\r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
*/\r
\r
package compbio.data.sequence;\r
@Immutable\r
public class FastaSequence {\r
\r
- /**\r
- * Sequence id\r
- */\r
- private String id;\r
-\r
- // TODO what about gapped sequence here! should be indicated\r
- /**\r
- * Returns the string representation of sequence\r
- */\r
- private String sequence;\r
-\r
- private FastaSequence() {\r
- // Default constructor for JaxB\r
- }\r
-\r
- /**\r
- * Upon construction the any whitespace characters are removed from the\r
- * sequence\r
- * \r
- * @param id\r
- * @param sequence\r
- */\r
- public FastaSequence(String id, String sequence) {\r
- this.id = id;\r
- this.sequence = SequenceUtil.cleanSequence(sequence);\r
- }\r
-\r
- /**\r
- * Gets the value of id\r
- * \r
- * @return the value of id\r
- */\r
- public String getId() {\r
- return this.id;\r
- }\r
-\r
- /**\r
- * Gets the value of sequence\r
- * \r
- * @return the value of sequence\r
- */\r
- public String getSequence() {\r
- return this.sequence;\r
- }\r
-\r
- public static int countMatchesInSequence(final String theString,\r
- final String theRegExp) {\r
- final Pattern p = Pattern.compile(theRegExp);\r
- final Matcher m = p.matcher(theString);\r
- int cnt = 0;\r
- while (m.find()) {\r
- cnt++;\r
+ /**\r
+ * Sequence id\r
+ */\r
+ private String id;\r
+\r
+ // TODO what about gapped sequence here! should be indicated\r
+ /**\r
+ * Returns the string representation of sequence\r
+ */\r
+ private String sequence;\r
+\r
+ private FastaSequence() {\r
+ // Default constructor for JaxB\r
}\r
- return cnt;\r
- }\r
-\r
- public String getFormattedFasta() {\r
- return getFormatedSequence(80);\r
- }\r
-\r
- /**\r
- * \r
- * @return one line name, next line sequence, no matter what the sequence\r
- * length is\r
- */\r
- public String getOnelineFasta() {\r
- String fasta = ">" + getId() + SysPrefs.newlinechar;\r
- fasta += getSequence() + SysPrefs.newlinechar;\r
- return fasta;\r
- }\r
-\r
- /**\r
- * Format sequence per width letter in one string. Without spaces.\r
- * \r
- * @return multiple line formated sequence, one line width letters length\r
- * \r
- */\r
- public String getFormatedSequence(final int width) {\r
- if (sequence == null) {\r
- return "";\r
+\r
+ /**\r
+ * Upon construction the any whitespace characters are removed from the\r
+ * sequence\r
+ * \r
+ * @param id\r
+ * @param sequence\r
+ */\r
+ public FastaSequence(String id, String sequence) {\r
+ this.id = id;\r
+ this.sequence = SequenceUtil.cleanSequence(sequence);\r
}\r
\r
- assert width >= 0 : "Wrong width parameter ";\r
-\r
- final StringBuilder sb = new StringBuilder(sequence);\r
- int nchunks = sequence.length() / width;\r
- // add up inserted new line chars\r
- nchunks = (nchunks + sequence.length()) / width;\r
- int nlineCharcounter = 0;\r
- for (int i = 1; i <= nchunks; i++) {\r
- int insPos = width * i + nlineCharcounter;\r
- // to prevent inserting new line in the very end of a sequence then\r
- // it would have failed.\r
- // Also covers the case when the sequences shorter than width\r
- if (sb.length() <= insPos) {\r
- break;\r
- }\r
- sb.insert(insPos, "\n");\r
- nlineCharcounter++;\r
+ /**\r
+ * Gets the value of id\r
+ * \r
+ * @return the value of id\r
+ */\r
+ public String getId() {\r
+ return this.id;\r
}\r
- return sb.toString();\r
- }\r
-\r
- /**\r
- * \r
- * @return sequence length\r
- */\r
- public int getLength() {\r
- return this.sequence.length();\r
- }\r
-\r
- /**\r
- * Same as oneLineFasta\r
- */\r
- @Override\r
- public String toString() {\r
- return this.getOnelineFasta();\r
- }\r
-\r
- @Override\r
- public int hashCode() {\r
- final int prime = 17;\r
- int result = 1;\r
- result = prime * result + ((id == null) ? 0 : id.hashCode());\r
- result = prime * result\r
- + ((sequence == null) ? 0 : sequence.hashCode());\r
- return result;\r
- }\r
-\r
- @Override\r
- public boolean equals(Object obj) {\r
- if (obj == null) {\r
- return false;\r
+\r
+ /**\r
+ * Gets the value of sequence\r
+ * \r
+ * @return the value of sequence\r
+ */\r
+ public String getSequence() {\r
+ return this.sequence;\r
}\r
- if (!(obj instanceof FastaSequence)) {\r
- return false;\r
+\r
+ public static int countMatchesInSequence(final String theString,\r
+ final String theRegExp) {\r
+ final Pattern p = Pattern.compile(theRegExp);\r
+ final Matcher m = p.matcher(theString);\r
+ int cnt = 0;\r
+ while (m.find()) {\r
+ cnt++;\r
+ }\r
+ return cnt;\r
}\r
- FastaSequence fs = (FastaSequence) obj;\r
- if (!fs.getId().equals(this.getId())) {\r
- return false;\r
+\r
+ public String getFormattedFasta() {\r
+ return getFormatedSequence(80);\r
}\r
- if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {\r
- return false;\r
+\r
+ /**\r
+ * \r
+ * @return one line name, next line sequence, no matter what the sequence\r
+ * length is\r
+ */\r
+ public String getOnelineFasta() {\r
+ String fasta = ">" + getId() + SysPrefs.newlinechar;\r
+ fasta += getSequence() + SysPrefs.newlinechar;\r
+ return fasta;\r
+ }\r
+\r
+ /**\r
+ * Format sequence per width letter in one string. Without spaces.\r
+ * \r
+ * @return multiple line formated sequence, one line width letters length\r
+ * \r
+ */\r
+ public String getFormatedSequence(final int width) {\r
+ if (sequence == null) {\r
+ return "";\r
+ }\r
+\r
+ assert width >= 0 : "Wrong width parameter ";\r
+\r
+ final StringBuilder sb = new StringBuilder(sequence);\r
+ // int tail = nrOfWindows % WIN_SIZE;\r
+ // final int turns = (nrOfWindows - tail) / WIN_SIZE;\r
+\r
+ int tailLen = sequence.length() % width;\r
+ // add up inserted new line chars\r
+ int nchunks = (sequence.length() - tailLen) / width;\r
+ int nlineCharcounter = 0;\r
+ int insPos = 0;\r
+ for (int i = 1; i <= nchunks; i++) {\r
+ insPos = width * i + nlineCharcounter;\r
+ // to prevent inserting new line in the very end of a sequence then\r
+ // it would have failed.\r
+ if (sb.length() <= insPos) {\r
+ break;\r
+ }\r
+ sb.insert(insPos, "\n");\r
+ nlineCharcounter++;\r
+ }\r
+ // sb.insert(insPos + tailLen, "\n");\r
+ return sb.toString();\r
+ }\r
+\r
+ /**\r
+ * \r
+ * @return sequence length\r
+ */\r
+ public int getLength() {\r
+ return this.sequence.length();\r
+ }\r
+\r
+ /**\r
+ * Same as oneLineFasta\r
+ */\r
+ @Override\r
+ public String toString() {\r
+ return this.getOnelineFasta();\r
+ }\r
+\r
+ @Override\r
+ public int hashCode() {\r
+ final int prime = 17;\r
+ int result = 1;\r
+ result = prime * result + ((id == null) ? 0 : id.hashCode());\r
+ result = prime * result\r
+ + ((sequence == null) ? 0 : sequence.hashCode());\r
+ return result;\r
+ }\r
+\r
+ @Override\r
+ public boolean equals(Object obj) {\r
+ if (obj == null) {\r
+ return false;\r
+ }\r
+ if (!(obj instanceof FastaSequence)) {\r
+ return false;\r
+ }\r
+ FastaSequence fs = (FastaSequence) obj;\r
+ if (!fs.getId().equals(this.getId())) {\r
+ return false;\r
+ }\r
+ if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {\r
+ return false;\r
+ }\r
+ return true;\r
}\r
- return true;\r
- }\r
\r
}\r
--- /dev/null
+package compbio.data.sequence;\r
+\r
+public class JalviewAnnotation {\r
+\r
+ String annotation;\r
+\r
+}\r
\r
import java.util.EnumMap;\r
import java.util.List;\r
+import java.util.Map;\r
+\r
+import compbio.util.annotation.NotThreadSafe;\r
\r
/**\r
* TODO complete\r
* @param <T>\r
* enum type\r
*/\r
+@NotThreadSafe\r
public class MultiAnnotatedSequence<T extends Enum<T>> {\r
\r
- private final EnumMap<T, List<Float>> annotation;\r
+ private final Map<T, List<Float>> annotations;\r
+\r
+ public MultiAnnotatedSequence(Class<T> enumeration) {\r
+ this.annotations = new EnumMap<T, List<Float>>(enumeration);\r
+ }\r
+\r
+ public void addAnnotation(T type, List<Float> annotation) {\r
+ assert type != null : "Type is expected";\r
+ assert annotation != null : "Not empty value is expected!";\r
+ if (!annotations.isEmpty()) {\r
+ assert annotations.values().iterator().next().size() == annotation\r
+ .size() : "Annotations must contain the same number of elements!";\r
+ }\r
+ this.annotations.put(type, annotation);\r
+ }\r
+\r
+ public Map<T, List<Float>> getAnnotations() {\r
+ return new EnumMap<T, List<Float>>(this.annotations);\r
+ }\r
\r
- private MultiAnnotatedSequence(Class<T> type) {\r
- this.annotation = new EnumMap<T, List<Float>>(type);\r
- }\r
+ @Override\r
+ public int hashCode() {\r
+ final int prime = 31;\r
+ int result = 1;\r
+ result = prime * result\r
+ + ((annotations == null) ? 0 : annotations.hashCode());\r
+ return result;\r
+ }\r
\r
- // public MultiAnnotatedSequence getFloatInstance(FastaSequence fsequence) {\r
- // return null;\r
- //}\r
+ @Override\r
+ public boolean equals(Object obj) {\r
+ if (this == obj)\r
+ return true;\r
+ if (obj == null)\r
+ return false;\r
+ if (getClass() != obj.getClass())\r
+ return false;\r
+ MultiAnnotatedSequence other = (MultiAnnotatedSequence) obj;\r
+ if (annotations == null) {\r
+ if (other.annotations != null)\r
+ return false;\r
+ } else if (!annotations.equals(other.annotations))\r
+ return false;\r
+ return true;\r
+ }\r
\r
- public EnumMap<T, List<Integer>> getIntegerInstance(Class<T> enumeration) {\r
- return new EnumMap<T, List<Integer>>(enumeration);\r
- }\r
+ @Override\r
+ public String toString() {\r
+ String value = "";\r
+ for (Map.Entry<T, List<Float>> annt : annotations.entrySet()) {\r
+ value += annt.getKey() + " ";\r
+ value += annt.getValue() + "\n";\r
+ }\r
+ return value;\r
+ }\r
\r
- public EnumMap<T, List<Float>> getFloatInstance(Class<T> enumeration) {\r
- return new EnumMap<T, List<Float>>(enumeration);\r
- }\r
+ public JalviewAnnotation toJalviewAnnotation() {\r
+ // TODO Auto-generated method stub\r
+ return null;\r
+ }\r
}\r
-/* \r
- * @(#)SequenceUtil.java 1.0 September 2009\r
- * \r
- * Copyright (c) 2009 Peter Troshin\r
- * \r
- * Jalview Web Services version: 2.0 \r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * @(#)SequenceUtil.java 1.0 September 2009 Copyright (c) 2009 Peter Troshin\r
+ * Jalview Web Services version: 2.0 This library is free software; you can\r
+ * redistribute it and/or modify it under the terms of the Apache License\r
+ * version 2 as published by the Apache Software Foundation This library is\r
+ * distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;\r
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\r
+ * PARTICULAR PURPOSE. See the Apache License for more details. A copy of the\r
+ * license is in apache_license.txt. It is also available here: see:\r
+ * http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or derived\r
+ * work distributed in source code form must include this copyright and license\r
+ * notice.\r
*/\r
\r
package compbio.data.sequence;\r
import java.io.OutputStreamWriter;\r
import java.util.ArrayList;\r
import java.util.List;\r
+import java.util.Scanner;\r
import java.util.logging.Level;\r
import java.util.regex.Matcher;\r
import java.util.regex.Pattern;\r
*/\r
public final class SequenceUtil {\r
\r
- /**\r
- * A whitespace character: [\t\n\x0B\f\r]\r
- */\r
- public static final Pattern WHITE_SPACE = Pattern.compile("\\s");\r
-\r
- /**\r
- * A digit\r
- */\r
- public static final Pattern DIGIT = Pattern.compile("\\d");\r
-\r
- /**\r
- * Non word\r
- */\r
- public static final Pattern NONWORD = Pattern.compile("\\W");\r
-\r
- /**\r
- * Valid Amino acids\r
- */\r
- public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+",\r
- Pattern.CASE_INSENSITIVE);\r
-\r
- /**\r
- * inversion of AA pattern\r
- */\r
- public static final Pattern NON_AA = Pattern.compile(\r
- "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE);\r
-\r
- /**\r
- * Same as AA pattern but with two additional letters - XU\r
- */\r
- public static final Pattern AMBIGUOUS_AA = Pattern.compile(\r
- "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE);\r
-\r
- /**\r
- * Nucleotides a, t, g, c, u\r
- */\r
- public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+",\r
- Pattern.CASE_INSENSITIVE);\r
-\r
- /**\r
- * Ambiguous nucleotide\r
- */\r
- public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile(\r
- "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC\r
- /**\r
- * Non nucleotide\r
- */\r
- public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+",\r
- Pattern.CASE_INSENSITIVE);\r
-\r
- private SequenceUtil() {\r
- } // utility class, no instantiation\r
-\r
- /*\r
- * public static void write_PirSeq(OutputStream os, FastaSequence seq)\r
- * throws IOException { BufferedWriter pir_out = new BufferedWriter(new\r
- * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() +\r
- * SysPrefs.newlinechar); pir_out.write(seq.getSequence() +\r
- * SysPrefs.newlinechar); pir_out.close(); }\r
- * \r
- * public static void write_FastaSeq(OutputStream os, FastaSequence seq)\r
- * throws IOException { BufferedWriter fasta_out = new BufferedWriter( new\r
- * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() +\r
- * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() +\r
- * SysPrefs.newlinechar); fasta_out.close(); }\r
- */\r
-\r
- /**\r
- * @return true is the sequence contains only letters a,c, t, g, u\r
- */\r
- public static boolean isNucleotideSequence(final FastaSequence s) {\r
- return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence());\r
- }\r
-\r
- /**\r
- * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one\r
- * (!) - B char\r
- */\r
- public static boolean isNonAmbNucleotideSequence(String sequence) {\r
- sequence = SequenceUtil.cleanSequence(sequence);\r
- if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
- return false;\r
+ /**\r
+ * A whitespace character: [\t\n\x0B\f\r]\r
+ */\r
+ public static final Pattern WHITE_SPACE = Pattern.compile("\\s");\r
+\r
+ /**\r
+ * A digit\r
+ */\r
+ public static final Pattern DIGIT = Pattern.compile("\\d");\r
+\r
+ /**\r
+ * Non word\r
+ */\r
+ public static final Pattern NONWORD = Pattern.compile("\\W");\r
+\r
+ /**\r
+ * Valid Amino acids\r
+ */\r
+ public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+",\r
+ Pattern.CASE_INSENSITIVE);\r
+\r
+ /**\r
+ * inversion of AA pattern\r
+ */\r
+ public static final Pattern NON_AA = Pattern.compile(\r
+ "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE);\r
+\r
+ /**\r
+ * Same as AA pattern but with two additional letters - XU\r
+ */\r
+ public static final Pattern AMBIGUOUS_AA = Pattern.compile(\r
+ "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE);\r
+\r
+ /**\r
+ * Nucleotides a, t, g, c, u\r
+ */\r
+ public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+",\r
+ Pattern.CASE_INSENSITIVE);\r
+\r
+ /**\r
+ * Ambiguous nucleotide\r
+ */\r
+ public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile(\r
+ "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC\r
+ /**\r
+ * Non nucleotide\r
+ */\r
+ public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+",\r
+ Pattern.CASE_INSENSITIVE);\r
+\r
+ private SequenceUtil() {\r
+ } // utility class, no instantiation\r
+\r
+ /*\r
+ * public static void write_PirSeq(OutputStream os, FastaSequence seq)\r
+ * throws IOException { BufferedWriter pir_out = new BufferedWriter(new\r
+ * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() +\r
+ * SysPrefs.newlinechar); pir_out.write(seq.getSequence() +\r
+ * SysPrefs.newlinechar); pir_out.close(); } public static void\r
+ * write_FastaSeq(OutputStream os, FastaSequence seq) throws IOException {\r
+ * BufferedWriter fasta_out = new BufferedWriter( new\r
+ * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() +\r
+ * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() +\r
+ * SysPrefs.newlinechar); fasta_out.close(); }\r
+ */\r
+\r
+ /**\r
+ * @return true is the sequence contains only letters a,c, t, g, u\r
+ */\r
+ public static boolean isNucleotideSequence(final FastaSequence s) {\r
+ return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence());\r
}\r
- if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) {\r
- return false;\r
- /*\r
- * System.out.format("I found the text starting at " +\r
- * "index %d and ending at index %d.%n", nonDNAmatcher .start(),\r
- * nonDNAmatcher.end());\r
- */\r
+\r
+ /**\r
+ * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one\r
+ * (!) - B char\r
+ */\r
+ public static boolean isNonAmbNucleotideSequence(String sequence) {\r
+ sequence = SequenceUtil.cleanSequence(sequence);\r
+ if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) {\r
+ return false;\r
+ /*\r
+ * System.out.format("I found the text starting at " +\r
+ * "index %d and ending at index %d.%n", nonDNAmatcher .start(),\r
+ * nonDNAmatcher.end());\r
+ */\r
+ }\r
+ final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence);\r
+ return DNAmatcher.find();\r
}\r
- final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence);\r
- return DNAmatcher.find();\r
- }\r
-\r
- /**\r
- * Removes all whitespace chars in the sequence string\r
- * \r
- * @param sequence\r
- * @return cleaned up sequence\r
- */\r
- public static String cleanSequence(String sequence) {\r
- assert sequence != null;\r
- final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence);\r
- sequence = m.replaceAll("").toUpperCase();\r
- return sequence;\r
- }\r
-\r
- /**\r
- * Removes all special characters and digits as well as whitespace chars\r
- * from the sequence\r
- * \r
- * @param sequence\r
- * @return cleaned up sequence\r
- */\r
- public static String deepCleanSequence(String sequence) {\r
- sequence = SequenceUtil.cleanSequence(sequence);\r
- sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll("");\r
- sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll("");\r
- final Pattern othernonSeqChars = Pattern.compile("[_-]+");\r
- sequence = othernonSeqChars.matcher(sequence).replaceAll("");\r
- return sequence;\r
- }\r
-\r
- /**\r
- * \r
- * @param sequence\r
- * @return true is the sequence is a protein sequence, false overwise\r
- */\r
- public static boolean isProteinSequence(String sequence) {\r
- sequence = SequenceUtil.cleanSequence(sequence);\r
- if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
- return false;\r
+\r
+ /**\r
+ * Removes all whitespace chars in the sequence string\r
+ * \r
+ * @param sequence\r
+ * @return cleaned up sequence\r
+ */\r
+ public static String cleanSequence(String sequence) {\r
+ assert sequence != null;\r
+ final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence);\r
+ sequence = m.replaceAll("").toUpperCase();\r
+ return sequence;\r
}\r
- if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
- return false;\r
+\r
+ /**\r
+ * Removes all special characters and digits as well as whitespace chars\r
+ * from the sequence\r
+ * \r
+ * @param sequence\r
+ * @return cleaned up sequence\r
+ */\r
+ public static String deepCleanSequence(String sequence) {\r
+ sequence = SequenceUtil.cleanSequence(sequence);\r
+ sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll("");\r
+ sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll("");\r
+ final Pattern othernonSeqChars = Pattern.compile("[_-]+");\r
+ sequence = othernonSeqChars.matcher(sequence).replaceAll("");\r
+ return sequence;\r
}\r
- if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
- return false;\r
+\r
+ /**\r
+ * @param sequence\r
+ * @return true is the sequence is a protein sequence, false overwise\r
+ */\r
+ public static boolean isProteinSequence(String sequence) {\r
+ sequence = SequenceUtil.cleanSequence(sequence);\r
+ if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ final Matcher protmatcher = SequenceUtil.AA.matcher(sequence);\r
+ return protmatcher.find();\r
}\r
- final Matcher protmatcher = SequenceUtil.AA.matcher(sequence);\r
- return protmatcher.find();\r
- }\r
-\r
- /**\r
- * Check whether the sequence confirms to amboguous protein sequence\r
- * \r
- * @param sequence\r
- * @return return true only if the sequence if ambiguous protein sequence\r
- * Return false otherwise. e.g. if the sequence is non-ambiguous\r
- * protein or DNA\r
- */\r
- public static boolean isAmbiguosProtein(String sequence) {\r
- sequence = SequenceUtil.cleanSequence(sequence);\r
- if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
- return false;\r
+\r
+ /**\r
+ * Check whether the sequence confirms to amboguous protein sequence\r
+ * \r
+ * @param sequence\r
+ * @return return true only if the sequence if ambiguous protein sequence\r
+ * Return false otherwise. e.g. if the sequence is non-ambiguous\r
+ * protein or DNA\r
+ */\r
+ public static boolean isAmbiguosProtein(String sequence) {\r
+ sequence = SequenceUtil.cleanSequence(sequence);\r
+ if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ if (SequenceUtil.AA.matcher(sequence).find()) {\r
+ return false;\r
+ }\r
+ final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence);\r
+ return amb_prot.find();\r
}\r
- if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
- return false;\r
+\r
+ /**\r
+ * Writes list of FastaSequeces into the outstream formatting the sequence\r
+ * so that it contains width chars on each line\r
+ * \r
+ * @param outstream\r
+ * @param sequences\r
+ * @param width\r
+ * - the maximum number of characters to write in one line\r
+ * @throws IOException\r
+ */\r
+ public static void writeFasta(final OutputStream outstream,\r
+ final List<FastaSequence> sequences, final int width)\r
+ throws IOException {\r
+ writeFastaKeepTheStream(outstream, sequences, width);\r
+ outstream.close();\r
}\r
- if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
- return false;\r
+\r
+ public static void writeFastaKeepTheStream(final OutputStream outstream,\r
+ final List<FastaSequence> sequences, final int width)\r
+ throws IOException {\r
+ final OutputStreamWriter writer = new OutputStreamWriter(outstream);\r
+ final BufferedWriter fastawriter = new BufferedWriter(writer);\r
+ for (final FastaSequence fs : sequences) {\r
+ fastawriter.write(">" + fs.getId() + "\n");\r
+ fastawriter.write(fs.getFormatedSequence(width));\r
+ fastawriter.write("\n");\r
+ }\r
+ fastawriter.flush();\r
+ writer.flush();\r
+ }\r
+\r
+ /**\r
+ * Reads fasta sequences from inStream into the list of FastaSequence\r
+ * objects\r
+ * \r
+ * @param inStream\r
+ * from\r
+ * @return list of FastaSequence objects\r
+ * @throws IOException\r
+ */\r
+ public static List<FastaSequence> readFasta(final InputStream inStream)\r
+ throws IOException {\r
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();\r
+\r
+ final BufferedReader infasta = new BufferedReader(\r
+ new InputStreamReader(inStream, "UTF8"), 16000);\r
+ final Pattern pattern = Pattern.compile("//s+");\r
+\r
+ String line;\r
+ String sname = "", seqstr = null;\r
+ do {\r
+ line = infasta.readLine();\r
+ if ((line == null) || line.startsWith(">")) {\r
+ if (seqstr != null) {\r
+ seqs.add(new FastaSequence(sname.substring(1), seqstr));\r
+ }\r
+ sname = line; // remove >\r
+ seqstr = "";\r
+ } else {\r
+ final String subseq = pattern.matcher(line).replaceAll("");\r
+ seqstr += subseq;\r
+ }\r
+ } while (line != null);\r
+\r
+ infasta.close();\r
+ return seqs;\r
}\r
- if (SequenceUtil.AA.matcher(sequence).find()) {\r
- return false;\r
+\r
+ /**\r
+ * Writes FastaSequence in the file, each sequence will take one line only\r
+ * \r
+ * @param os\r
+ * @param sequences\r
+ * @throws IOException\r
+ */\r
+ public static void writeFasta(final OutputStream os,\r
+ final List<FastaSequence> sequences) throws IOException {\r
+ final OutputStreamWriter outWriter = new OutputStreamWriter(os);\r
+ final BufferedWriter fasta_out = new BufferedWriter(outWriter);\r
+ for (final FastaSequence fs : sequences) {\r
+ fasta_out.write(fs.getOnelineFasta());\r
+ }\r
+ fasta_out.close();\r
+ outWriter.close();\r
+ }\r
+\r
+ public static List<AnnotatedSequence> readJRonn(final File result)\r
+ throws IOException, UnknownFileFormatException {\r
+ InputStream input = new FileInputStream(result);\r
+ List<AnnotatedSequence> sequences = readJRonn(input);\r
+ input.close();\r
+ return sequences;\r
}\r
- final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence);\r
- return amb_prot.find();\r
- }\r
-\r
- /**\r
- * Writes list of FastaSequeces into the outstream formatting the sequence\r
- * so that it contains width chars on each line\r
- * \r
- * @param outstream\r
- * @param sequences\r
- * @param width\r
- * - the maximum number of characters to write in one line\r
- * @throws IOException\r
- */\r
- public static void writeFasta(final OutputStream outstream,\r
- final List<FastaSequence> sequences, final int width)\r
- throws IOException {\r
- final OutputStreamWriter writer = new OutputStreamWriter(outstream);\r
- final BufferedWriter fastawriter = new BufferedWriter(writer);\r
- for (final FastaSequence fs : sequences) {\r
- fastawriter.write(fs.getFormatedSequence(width));\r
+\r
+ /**\r
+ * Reader for JRonn horizontal file format >Foobar M G D T T A G 0.48 0.42\r
+ * 0.42 0.48 0.52 0.53 0.54 All values are tab delimited\r
+ * \r
+ * @param inStream\r
+ * @return\r
+ * @throws IOException\r
+ * @throws UnknownFileFormatException\r
+ */\r
+ public static List<AnnotatedSequence> readJRonn(final InputStream inStream)\r
+ throws IOException, UnknownFileFormatException {\r
+ final List<AnnotatedSequence> seqs = new ArrayList<AnnotatedSequence>();\r
+\r
+ final BufferedReader infasta = new BufferedReader(\r
+ new InputStreamReader(inStream, "UTF8"), 16000);\r
+\r
+ String line;\r
+ String sname = "";\r
+ do {\r
+ line = infasta.readLine();\r
+ if (line == null || line.isEmpty()) {\r
+ // skip empty lines\r
+ continue;\r
+ }\r
+ if (line.startsWith(">")) {\r
+ // read name\r
+ sname = line.trim().substring(1);\r
+ // read sequence line\r
+ line = infasta.readLine();\r
+ final String sequence = line.replace("\t", "");\r
+ // read annotation line\r
+ line = infasta.readLine();\r
+ String[] annotValues = line.split("\t");\r
+ float[] annotation = convertToNumber(annotValues);\r
+ if (annotation.length != sequence.length()) {\r
+ throw new UnknownFileFormatException(\r
+ "File does not look like Jronn horizontally formatted output file!\n"\r
+ + JRONN_WRONG_FORMAT_MESSAGE);\r
+ }\r
+ seqs.add(new AnnotatedSequence(sname, sequence, annotation));\r
+ }\r
+ } while (line != null);\r
+\r
+ infasta.close();\r
+ return seqs;\r
}\r
- outstream.flush();\r
- fastawriter.close();\r
- writer.close();\r
- }\r
-\r
- /**\r
- * Reads fasta sequences from inStream into the list of FastaSequence\r
- * objects\r
- * \r
- * @param inStream\r
- * from\r
- * @return list of FastaSequence objects\r
- * @throws IOException\r
- */\r
- public static List<FastaSequence> readFasta(final InputStream inStream)\r
- throws IOException {\r
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();\r
-\r
- final BufferedReader infasta = new BufferedReader(\r
- new InputStreamReader(inStream, "UTF8"), 16000);\r
- final Pattern pattern = Pattern.compile("//s+");\r
-\r
- String line;\r
- String sname = "", seqstr = null;\r
- do {\r
- line = infasta.readLine();\r
- if ((line == null) || line.startsWith(">")) {\r
- if (seqstr != null) {\r
- seqs.add(new FastaSequence(sname.substring(1), seqstr));\r
+\r
+ private static float[] convertToNumber(String[] annotValues)\r
+ throws UnknownFileFormatException {\r
+ float[] annotation = new float[annotValues.length];\r
+ try {\r
+ for (int i = 0; i < annotation.length; i++) {\r
+ annotation[i] = Float.parseFloat(annotValues[i]);\r
+ }\r
+ } catch (NumberFormatException e) {\r
+ throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE,\r
+ e.getCause());\r
}\r
- sname = line; // remove >\r
- seqstr = "";\r
- } else {\r
- final String subseq = pattern.matcher(line).replaceAll("");\r
- seqstr += subseq;\r
- }\r
- } while (line != null);\r
-\r
- infasta.close();\r
- return seqs;\r
- }\r
-\r
- /**\r
- * Writes FastaSequence in the file, each sequence will take one line only\r
- * \r
- * @param os\r
- * @param sequences\r
- * @throws IOException\r
- */\r
- public static void writeFasta(final OutputStream os,\r
- final List<FastaSequence> sequences) throws IOException {\r
- final OutputStreamWriter outWriter = new OutputStreamWriter(os);\r
- final BufferedWriter fasta_out = new BufferedWriter(outWriter);\r
- for (final FastaSequence fs : sequences) {\r
- fasta_out.write(fs.getOnelineFasta());\r
+ return annotation;\r
}\r
- fasta_out.close();\r
- outWriter.close();\r
- }\r
-\r
- public static List<AnnotatedSequence> readJRonn(final File result)\r
- throws IOException, UnknownFileFormatException {\r
- InputStream input = new FileInputStream(result);\r
- List<AnnotatedSequence> sequences = readJRonn(input);\r
- input.close();\r
- return sequences;\r
- }\r
-\r
- /**\r
- * Reader for JRonn horizontal file format\r
- * \r
- * >Foobar\r
- * \r
- * M G D T T A G\r
- * \r
- * 0.48 0.42 0.42 0.48 0.52 0.53 0.54\r
- * \r
- * All values are tab delimited\r
- * \r
- * @param inStream\r
- * @return\r
- * @throws IOException\r
- * @throws UnknownFileFormatException\r
- */\r
- public static List<AnnotatedSequence> readJRonn(final InputStream inStream)\r
- throws IOException, UnknownFileFormatException {\r
- final List<AnnotatedSequence> seqs = new ArrayList<AnnotatedSequence>();\r
-\r
- final BufferedReader infasta = new BufferedReader(\r
- new InputStreamReader(inStream, "UTF8"), 16000);\r
-\r
- String line;\r
- String sname = "";\r
- do {\r
- line = infasta.readLine();\r
- if (line == null || line.isEmpty()) {\r
- // skip empty lines\r
- continue;\r
- }\r
- if (line.startsWith(">")) {\r
- // read name\r
- sname = line.trim().substring(1);\r
- // read sequence line\r
- line = infasta.readLine();\r
- final String sequence = line.replace("\t", "");\r
- // read annotation line\r
- line = infasta.readLine();\r
- String[] annotValues = line.split("\t");\r
- float[] annotation = convertToNumber(annotValues);\r
- if (annotation.length != sequence.length()) {\r
- throw new UnknownFileFormatException(\r
- "File does not look like Jronn horizontally formatted output file!\n"\r
- + JRONN_WRONG_FORMAT_MESSAGE);\r
+\r
+ private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n"\r
+ + ">sequence_name\n "\r
+ + "M V S\n"\r
+ + "0.43 0.22 0.65\n"\r
+ + "Where first line is the sequence name,\n"\r
+ + "second line is the tab delimited sequence,\n"\r
+ + "third line contains tab delimited disorder prediction values.\n"\r
+ + "No lines are allowed between these three. Additionally, the number of "\r
+ + "sequence residues must be equal to the number of the disorder values.";\r
+\r
+ /**\r
+ * Closes the Closable and logs the exception if any\r
+ * \r
+ * @param log\r
+ * @param stream\r
+ */\r
+ public final static void closeSilently(java.util.logging.Logger log,\r
+ Closeable stream) {\r
+ if (stream != null) {\r
+ try {\r
+ stream.close();\r
+ } catch (IOException e) {\r
+ log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());\r
+ }\r
}\r
- seqs.add(new AnnotatedSequence(sname, sequence, annotation));\r
- }\r
- } while (line != null);\r
-\r
- infasta.close();\r
- return seqs;\r
- }\r
-\r
- private static float[] convertToNumber(String[] annotValues)\r
- throws UnknownFileFormatException {\r
- float[] annotation = new float[annotValues.length];\r
- try {\r
- for (int i = 0; i < annotation.length; i++) {\r
- annotation[i] = Float.parseFloat(annotValues[i]);\r
- }\r
- } catch (NumberFormatException e) {\r
- throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, e\r
- .getCause());\r
}\r
- return annotation;\r
- }\r
-\r
- private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n"\r
- + ">sequence_name\n "\r
- + "M V S\n"\r
- + "0.43 0.22 0.65\n"\r
- + "Where first line is the sequence name,\n"\r
- + "second line is the tab delimited sequence,\n"\r
- + "third line contains tab delimited disorder prediction values.\n"\r
- + "No lines are allowed between these three. Additionally, the number of "\r
- + "sequence residues must be equal to the number of the disorder values.";\r
-\r
- /**\r
- * Closes the Closable and logs the exception if any\r
- * \r
- * @param log\r
- * @param stream\r
- */\r
- public final static void closeSilently(java.util.logging.Logger log,\r
- Closeable stream) {\r
- if (stream != null) {\r
- try {\r
- stream.close();\r
- } catch (IOException e) {\r
- log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());\r
- }\r
+\r
+ /**\r
+ * \r
+ * TODO complete!\r
+ * \r
+ * # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983\r
+ * 0.85864 0.44331 .... # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512\r
+ * 0.37094\r
+ * \r
+ * @param input\r
+ * @return\r
+ * @throws IOException\r
+ * @throws UnknownFileFormatException\r
+ */\r
+ public static List<MultiAnnotatedSequence<DisemblResultAnnot>> readDisembl(\r
+ final InputStream input) throws IOException,\r
+ UnknownFileFormatException {\r
+ Scanner scan = new Scanner(input);\r
+ scan.useDelimiter("# RESIDUE COILS REM465 HOTLOOPS\n");\r
+ if (!scan.hasNext()) {\r
+ throw new UnknownFileFormatException(\r
+ "In Disembl score format each seqeunce score is expected to start from the line: "\r
+ + "'# RESIDUE COILS REM465 HOTLOOPS\\n'."\r
+ + " No such line was found!");\r
+ }\r
+\r
+ List<MultiAnnotatedSequence<DisemblResultAnnot>> results = new ArrayList<MultiAnnotatedSequence<DisemblResultAnnot>>();\r
+ int seqCounter = 0;\r
+ while (scan.hasNext()) {\r
+ seqCounter++;\r
+ String singleSeq = scan.next();\r
+ Scanner scansingle = new Scanner(singleSeq);\r
+ StringBuffer seqbuffer = new StringBuffer();\r
+ List<Float> coils = new ArrayList<Float>();\r
+ List<Float> rem = new ArrayList<Float>();\r
+ List<Float> hotloops = new ArrayList<Float>();\r
+\r
+ MultiAnnotatedSequence<DisemblResultAnnot> disemblRes = new MultiAnnotatedSequence<DisemblResultAnnot>(\r
+ DisemblResultAnnot.class);\r
+\r
+ while (scansingle.hasNextLine()) {\r
+ String valueLine = scansingle.nextLine();\r
+ Scanner values = new Scanner(valueLine);\r
+ seqbuffer.append(values.next());\r
+ coils.add(values.nextFloat());\r
+ rem.add(values.nextFloat());\r
+ hotloops.add(values.nextFloat());\r
+ values.close();\r
+ }\r
+ disemblRes.addAnnotation(DisemblResultAnnot.COILS, coils);\r
+ disemblRes.addAnnotation(DisemblResultAnnot.REM465, rem);\r
+ disemblRes.addAnnotation(DisemblResultAnnot.HOTLOOPS, hotloops);\r
+ // TODO\r
+ // disemblRes.sequence = seqbuffer.toString();\r
+ scansingle.close();\r
+ results.add(disemblRes);\r
+ }\r
+\r
+ input.close();\r
+ return results;\r
}\r
- }\r
-\r
- public static List<AnnotatedSequence> readDisembl(final File result)\r
- throws IOException, UnknownFileFormatException {\r
- InputStream input = new FileInputStream(result);\r
- List<AnnotatedSequence> sequences = readJRonn(input);\r
- input.close();\r
- return sequences;\r
- }\r
+\r
}\r
--- /dev/null
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
+ */\r
+\r
+package compbio.runner.conservation;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.Arrays;\r
+import java.util.List;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.conservation.ResultReader;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.client.CommandBuilder;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.SkeletalExecutable;\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.runner.Util;\r
+\r
+/**\r
+ * Command line\r
+ * \r
+ * java -Xmx512 -jar jronn_v3.jar -i=test_seq.txt -n=1 -o=out.txt -s=stat.out\r
+ * \r
+ * @author pvtroshin\r
+ * \r
+ */\r
+public class AACon extends SkeletalExecutable<AACon> {\r
+\r
+ private static Logger log = Logger.getLogger(AACon.class);\r
+\r
+ /**\r
+ * Number of cores to use, defaults to 1 for local execution or the value of\r
+ * "jronn.cluster.cpunum" property for cluster execution\r
+ */\r
+ private int ncoreNumber = 0;\r
+\r
+ private final String ncorePrm = "-n=";\r
+\r
+ // Cache for Limits information\r
+ private static LimitsManager<AACon> limits;\r
+\r
+ public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
+ public static final String STAT_FILE = "stat.txt";\r
+\r
+ public AACon() {\r
+ addParameters(Arrays.asList("-jar", getLibPath(), "-d=" + STAT_FILE,\r
+ "-f=RESULT_NO_ALIGNMENT"));\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+ @Override\r
+ public MultiAnnotatedSequence<Method> getResults(String workDirectory)\r
+ throws ResultNotAvailableException {\r
+ MultiAnnotatedSequence<Method> annotations = null;\r
+ try {\r
+ InputStream inStream = new FileInputStream(new File(workDirectory,\r
+ getOutput()));\r
+ annotations = ResultReader.readResults(inStream);\r
+ inStream.close();\r
+ } catch (FileNotFoundException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ } catch (IOException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ } catch (NullPointerException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ }\r
+ return annotations;\r
+ }\r
+\r
+ private static String getLibPath() {\r
+\r
+ String settings = ph.getProperty("aacon.jar.file");\r
+ if (compbio.util.Util.isEmpty(settings)) {\r
+ throw new NullPointerException(\r
+ "Please define aacon.jar.file property in Executable.properties file"\r
+ + "and initialize it with the location of jronn jar file");\r
+ }\r
+ if (new File(settings).isAbsolute()) {\r
+ // Jronn jar can be found so no actions necessary\r
+ // no further actions is necessary\r
+ return settings;\r
+ }\r
+ return compbio.engine.client.Util.convertToAbsolute(settings);\r
+ }\r
+\r
+ @Override\r
+ public List<String> getCreatedFiles() {\r
+ return Arrays.asList(getOutput(), getError());\r
+ }\r
+\r
+ @Override\r
+ public AACon setInput(String inFile) {\r
+ super.setInput(inFile);\r
+ cbuilder.setParam("-i=" + inFile);\r
+ return this;\r
+ }\r
+\r
+ @Override\r
+ public AACon setOutput(String outFile) {\r
+ super.setOutput(outFile);\r
+ cbuilder.setParam("-o=" + outFile);\r
+ return this;\r
+ }\r
+\r
+ @Override\r
+ public Limit<AACon> getLimit(String presetName) {\r
+ if (limits == null) {\r
+ limits = getLimits();\r
+ }\r
+ Limit<AACon> limit = null;\r
+ if (limits != null) {\r
+ // this returns default limit if preset is undefined!\r
+ limit = limits.getLimitByName(presetName);\r
+ }\r
+ // If limit is not defined for a particular preset, then return default\r
+ // limit\r
+ if (limit == null) {\r
+ log.debug("Limit for the preset " + presetName\r
+ + " is not found. Using default");\r
+ limit = limits.getDefaultLimit();\r
+ }\r
+ return limit;\r
+ }\r
+\r
+ @Override\r
+ public LimitsManager<AACon> getLimits() {\r
+ // synchronise on static field\r
+ synchronized (log) {\r
+ if (limits == null) {\r
+ limits = Util.getLimits(this.getClass());\r
+ }\r
+ }\r
+ return limits;\r
+ }\r
+\r
+ @Override\r
+ public Class<? extends Executable<?>> getType() {\r
+ return this.getClass();\r
+ }\r
+\r
+ public static String getStatFile() {\r
+ return STAT_FILE;\r
+ }\r
+\r
+ public void setNCore(int ncoreNumber) {\r
+ if (ncoreNumber < 1 || ncoreNumber > 100) {\r
+ throw new IndexOutOfBoundsException(\r
+ "Number of cores must be within 1 and 100 ");\r
+ }\r
+ this.ncoreNumber = ncoreNumber;\r
+ cbuilder.setParam(ncorePrm + Integer.toString(getNCore()));\r
+ }\r
+\r
+ int getNCore() {\r
+ return ncoreNumber;\r
+ }\r
+\r
+ @Override\r
+ public CommandBuilder<AACon> getParameters(ExecProvider provider) {\r
+ // If number of cores is provided, set it for the cluster execution\r
+ // only!\r
+ if (provider == Executable.ExecProvider.Cluster) {\r
+ int cpunum = SkeletalExecutable.getClusterCpuNum(getType());\r
+ cpunum = (cpunum == 0) ? 1 : cpunum;\r
+ setNCore(cpunum);\r
+ } else {\r
+ // Limit number of cores to 1 for ANY execution which does not set\r
+ // Ncores explicitly using setNCore method or is run on local VM\r
+ if (ncoreNumber == 0) {\r
+ setNCore(1);\r
+ }\r
+ }\r
+ return super.getParameters(provider);\r
+ }\r
+\r
+}\r
-/* Copyright (c) 2009 Peter Troshin\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
*/\r
\r
package compbio.runner.disorder;\r
\r
import org.apache.log4j.Logger;\r
\r
-import com.sun.xml.internal.bind.api.impl.NameConverter.Standard;\r
import compbio.data.sequence.Alignment;\r
import compbio.data.sequence.UnknownFileFormatException;\r
import compbio.engine.client.Executable;\r
import compbio.runner.Util;\r
\r
/**\r
- * @see Standard DisEMBL DisEMBL.py smooth_frame peak_frame join_frame\r
- * fold_coils fold_hotloops fold_rem465 sequence_file print 'A default run\r
- * would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 fasta_file > out'\r
+ * @see DisEMBL\r
* \r
- * new DisEMBL is at /homes/pvtroshin/soft/DisEMBL-1.4raw This is not a\r
- * standard DisEMBL! The script has been modified! DisEMBL.py smooth_frame\r
- * peak_frame join_frame fold_coils fold_hotloops fold_rem465 [mode] <\r
- * fasta_file > out print 'A default run would be: ./DisEMBL.py 8 8 4 1.2\r
- * 1.4 1.2 < fasta_file' print 'Mode: "default"(nothing) or "scores" which\r
- * will give scores per residue in TAB separated format'\r
+ * DisEMBL.py smooth_frame peak_frame join_frame fold_coils fold_hotloops\r
+ * fold_rem465 sequence_file print 'A default run would be: ./DisEMBL.py 8\r
+ * 8 4 1.2 1.4 1.2 fasta_file > out' new DisEMBL is at\r
+ * /homes/pvtroshin/soft/DisEMBL-1.4raw\r
* \r
+ * This is not a standard DisEMBL! The script has been modified! DisEMBL.py\r
+ * smooth_frame peak_frame join_frame fold_coils fold_hotloops fold_rem465\r
+ * [mode] < fasta_file > out print\r
+ * \r
+ * 'A default run would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 < fasta_file'\r
+ * print 'Mode: "default"(nothing) or "scores" which will give scores per\r
+ * residue in TAB separated format'\r
*/\r
public class Disembl extends SkeletalExecutable<Disembl> implements\r
- PipedExecutable<Disembl> {\r
-\r
- private static Logger log = Logger.getLogger(Disembl.class);\r
-\r
- // Cache for Limits information\r
- private static LimitsManager<Disembl> limits;\r
-\r
- public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
-\r
- public Disembl() {\r
- // remove default input to prevent it to appear in the parameters list\r
- // that could happen if the parameters are set first\r
- // super.setInput("");\r
- addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2",\r
- "scores"));\r
- }\r
-\r
- @SuppressWarnings("unchecked")\r
- public Alignment getResults(String workDirectory)\r
- throws ResultNotAvailableException {\r
- try {\r
- return Util.readClustalFile(workDirectory, getOutput());\r
- } catch (FileNotFoundException e) {\r
- log.error(e.getMessage(), e.getCause());\r
- throw new ResultNotAvailableException(e);\r
- } catch (IOException e) {\r
- log.error(e.getMessage(), e.getCause());\r
- throw new ResultNotAvailableException(e);\r
- } catch (UnknownFileFormatException e) {\r
- log.error(e.getMessage(), e.getCause());\r
- throw new ResultNotAvailableException(e);\r
- } catch (NullPointerException e) {\r
- log.error(e.getMessage(), e.getCause());\r
- throw new ResultNotAvailableException(e);\r
+ PipedExecutable<Disembl> {\r
+\r
+ private static Logger log = Logger.getLogger(Disembl.class);\r
+\r
+ // Cache for Limits information\r
+ private static LimitsManager<Disembl> limits;\r
+\r
+ public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
+\r
+ public Disembl() {\r
+ // remove default input to prevent it to appear in the parameters list\r
+ // that could happen if the parameters are set first\r
+ // super.setInput("");\r
+ addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2",\r
+ "scores"));\r
}\r
- }\r
-\r
- @Override\r
- public Disembl setInput(String inFile) {\r
- super.setInput(inFile);\r
- cbuilder.setLast(inFile);\r
- return this;\r
- }\r
-\r
- @Override\r
- public Limit<Disembl> getLimit(String presetName) {\r
- if (limits == null) {\r
- limits = getLimits();\r
+\r
+ @SuppressWarnings("unchecked")\r
+ public Alignment getResults(String workDirectory)\r
+ throws ResultNotAvailableException {\r
+ try {\r
+ return Util.readClustalFile(workDirectory, getOutput());\r
+ } catch (FileNotFoundException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ } catch (IOException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ } catch (UnknownFileFormatException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ } catch (NullPointerException e) {\r
+ log.error(e.getMessage(), e.getCause());\r
+ throw new ResultNotAvailableException(e);\r
+ }\r
}\r
\r
- Limit<Disembl> limit = null;\r
- if (limits != null) {\r
- // this returns default limit if preset is undefined!\r
- limit = limits.getLimitByName(presetName);\r
+ @Override\r
+ public Disembl setInput(String inFile) {\r
+ super.setInput(inFile);\r
+ cbuilder.setLast(inFile);\r
+ return this;\r
}\r
- // If limit is not defined for a particular preset, then return default\r
- // limit\r
- if (limit == null) {\r
- log.debug("Limit for the preset " + presetName\r
- + " is not found. Using default");\r
- limit = limits.getDefaultLimit();\r
+\r
+ @Override\r
+ public Limit<Disembl> getLimit(String presetName) {\r
+ if (limits == null) {\r
+ limits = getLimits();\r
+ }\r
+\r
+ Limit<Disembl> limit = null;\r
+ if (limits != null) {\r
+ // this returns default limit if preset is undefined!\r
+ limit = limits.getLimitByName(presetName);\r
+ }\r
+ // If limit is not defined for a particular preset, then return default\r
+ // limit\r
+ if (limit == null) {\r
+ log.debug("Limit for the preset " + presetName\r
+ + " is not found. Using default");\r
+ limit = limits.getDefaultLimit();\r
+ }\r
+ return limit;\r
}\r
- return limit;\r
- }\r
-\r
- @Override\r
- public LimitsManager<Disembl> getLimits() {\r
- // synchronise on static field\r
- synchronized (log) {\r
- if (limits == null) {\r
- limits = Util.getLimits(this.getClass());\r
- }\r
+\r
+ @Override\r
+ public LimitsManager<Disembl> getLimits() {\r
+ // synchronise on static field\r
+ synchronized (log) {\r
+ if (limits == null) {\r
+ limits = Util.getLimits(this.getClass());\r
+ }\r
+ }\r
+ return limits;\r
}\r
- return limits;\r
- }\r
\r
- @Override\r
- public Class<? extends Executable<?>> getType() {\r
- return this.getClass();\r
- }\r
+ @Override\r
+ public Class<? extends Executable<?>> getType() {\r
+ return this.getClass();\r
+ }\r
\r
}\r
-/* Copyright (c) 2009 Peter Troshin\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
*/\r
-\r
package compbio.data.sequence;\r
\r
import static org.testng.AssertJUnit.assertEquals;\r
\r
public class SequenceUtilTester {\r
\r
- @Test()\r
- public void testisNonAmbNucleotideSequence() {\r
- String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
- assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
- String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
- assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
- String nonDna = "atgfctgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
- nonDna = "atgc1tgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
- nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
- // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
- }\r
-\r
- @Test()\r
- public void testCleanSequence() {\r
- String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
- assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
- SequenceUtil.cleanSequence(dirtySeq));\r
- }\r
-\r
- @Test()\r
- public void testDeepCleanSequence() {\r
- String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
- assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
- SequenceUtil.deepCleanSequence(dirtySeq));\r
- }\r
-\r
- @Test()\r
- public void testisProteinSequence() {\r
- String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
- assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
- String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
- String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
- assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
- AAseq += "XU";\r
- assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
-\r
- }\r
-\r
- @Test()\r
- public void testReadWriteFasta() {\r
-\r
- try {\r
- FileInputStream fio = new FileInputStream(\r
- AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
- assertNotNull(fio);\r
- List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
- assertNotNull(fseqs);\r
- assertEquals(3, fseqs.size());\r
- assertEquals(3, fseqs.size());\r
- fio.close();\r
- FileOutputStream fou = new FileOutputStream(\r
- AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
- SequenceUtil.writeFasta(fou, fseqs);\r
- fou.close();\r
- FileOutputStream fou20 = new FileOutputStream(\r
- AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
- SequenceUtil.writeFasta(fou20, fseqs, 20);\r
- fou20.close();\r
-\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
+ @Test()\r
+ public void testisNonAmbNucleotideSequence() {\r
+ String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
+ assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
+ String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
+ assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
+ String nonDna = "atgfctgatgcatgcatgatgctga";\r
+ assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+ nonDna = "atgc1tgatgcatgcatgatgctga";\r
+ assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+ nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+ assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+ // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
+ assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+ }\r
+\r
+ @Test()\r
+ public void testCleanSequence() {\r
+ String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+ assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
+ SequenceUtil.cleanSequence(dirtySeq));\r
+ }\r
+\r
+ @Test()\r
+ public void testDeepCleanSequence() {\r
+ String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
+ assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
+ SequenceUtil.deepCleanSequence(dirtySeq));\r
}\r
- }\r
-\r
- /**\r
- * This test tests the loading of horizontally formatted Jronn output file\r
- */\r
- @Test\r
- public void loadJronnFile() {\r
-\r
- FileInputStream fio;\r
- try {\r
- fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
- List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
- assertNotNull(aseqs);\r
- assertEquals(aseqs.size(), 3);\r
- AnnotatedSequence aseq = aseqs.get(0);\r
- assertNotNull(aseq);\r
- assertNotNull(aseq.getAnnotation());\r
- //System.out.println(aseq);\r
- assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
- .length());\r
- fio.close();\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
+\r
+ @Test()\r
+ public void testisProteinSequence() {\r
+ String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+ assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+ String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
+ assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+ String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+ assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+ AAseq += "XU";\r
+ assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+\r
+ }\r
+\r
+ @Test()\r
+ public void testReadWriteFasta() {\r
+\r
+ try {\r
+ FileInputStream fio = new FileInputStream(\r
+ AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
+ assertNotNull(fio);\r
+ List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
+ assertNotNull(fseqs);\r
+ assertEquals(3, fseqs.size());\r
+ assertEquals(3, fseqs.size());\r
+ fio.close();\r
+ FileOutputStream fou = new FileOutputStream(\r
+ AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
+ SequenceUtil.writeFasta(fou, fseqs);\r
+ fou.close();\r
+ FileOutputStream fou20 = new FileOutputStream(\r
+ AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
+ SequenceUtil.writeFasta(fou20, fseqs, 21);\r
+ fou20.close();\r
+\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
}\r
\r
- }\r
+ /**\r
+ * This test tests the loading of horizontally formatted Jronn output file\r
+ */\r
+ @Test\r
+ public void loadJronnFile() {\r
+\r
+ FileInputStream fio;\r
+ try {\r
+ fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
+ List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
+ assertNotNull(aseqs);\r
+ assertEquals(aseqs.size(), 3);\r
+ AnnotatedSequence aseq = aseqs.get(0);\r
+ assertNotNull(aseq);\r
+ assertNotNull(aseq.getAnnotation());\r
+ // System.out.println(aseq);\r
+ assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
+ .length());\r
+ fio.close();\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (UnknownFileFormatException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
\r
+ }\r
+\r
+ enum Trial {\r
+ one, two, three\r
+ };\r
+\r
+ /**\r
+ * This test tests the loading of horizontally formatted Jronn output file\r
+ */\r
+ @SuppressWarnings("unchecked")\r
+ @Test\r
+ public void testMultiAnnotatedSequence() {\r
+\r
+ FileInputStream fio;\r
+ try {\r
+ fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
+ + "disembl.out");\r
+ List<MultiAnnotatedSequence<DisemblResultAnnot>> aseqs = SequenceUtil\r
+ .readDisembl(fio);\r
+ assertNotNull(aseqs);\r
+\r
+ /*\r
+ * MultiAnnotatedSequence ma = new MultiAnnotatedSequence();\r
+ * Map<Trial, List<Number>> val = ma.getInstance(Trial.class);\r
+ * List<Number> list = new ArrayList<Number>(); list.add(new\r
+ * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one,\r
+ * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f));\r
+ * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0);\r
+ */\r
+ fio.close();\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (UnknownFileFormatException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+\r
+ }\r
}\r
* For this to work execution must start from the project directory!\r
*/\r
public static final String CURRENT_DIRECTORY = SysPrefs\r
- .getCurrentDirectory()\r
- + File.separator;\r
+ .getCurrentDirectory() + File.separator;\r
\r
public static final String TEST_DATA_PATH = "testsrc" + File.separator\r
+ "testdata" + File.separator;\r
public static final String test_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
+ "TO1381.fasta";\r
\r
+ public static final String test_alignment_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
+ + "TO1381.fasta.aln";\r
+\r
public static final String test_input_real = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
+ "50x500Protein.fasta";\r
\r
--- /dev/null
+/*\r
+ * Copyright (c) 2010 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 2.0 \r
+ * \r
+ * This library is free software; you can redistribute it and/or modify it under \r
+ * the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * \r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt \r
+ * \r
+ * Any republication or derived work distributed in source code form must include \r
+ * this copyright and license notice.\r
+ */\r
+package compbio.runner.conservation;\r
+\r
+import static org.testng.Assert.assertEquals;\r
+import static org.testng.Assert.assertFalse;\r
+import static org.testng.Assert.assertNotNull;\r
+import static org.testng.Assert.assertNull;\r
+import static org.testng.Assert.assertTrue;\r
+import static org.testng.Assert.fail;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.text.ParseException;\r
+\r
+import javax.xml.bind.ValidationException;\r
+\r
+import org.ggf.drmaa.DrmaaException;\r
+import org.ggf.drmaa.JobInfo;\r
+import org.testng.annotations.BeforeMethod;\r
+import org.testng.annotations.Test;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.FilePuller;\r
+import compbio.engine.SyncExecutor;\r
+import compbio.engine.client.ConfExecutable;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.RunConfiguration;\r
+import compbio.engine.cluster.drmaa.ClusterUtil;\r
+import compbio.engine.cluster.drmaa.JobRunner;\r
+import compbio.engine.cluster.drmaa.StatisticManager;\r
+import compbio.engine.local.LocalRunner;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobExecutionException;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.util.FileWatcher;\r
+import compbio.util.SysPrefs;\r
+\r
+public class AAConTester {\r
+\r
+ public static final String CURRENT_DIRECTORY = SysPrefs\r
+ .getCurrentDirectory() + File.separator;\r
+\r
+ public static String test_outfile = "TO1381.aacon.out"; // "/homes/pvtroshin/TO1381.clustal.cluster.out\r
+ public static String test_alignment_input = CURRENT_DIRECTORY + "testsrc"\r
+ + File.separator + "testdata" + File.separator + "TO1381.fasta.aln";\r
+ private AACon aacon;\r
+\r
+ @BeforeMethod(alwaysRun = true)\r
+ void init() {\r
+ aacon = new AACon();\r
+ aacon.setInput(test_alignment_input).setOutput(test_outfile);\r
+ }\r
+\r
+ @Test()\r
+ public void testRunOnCluster() {\r
+ assertFalse(SysPrefs.isWindows,\r
+ "Cluster execution can only be in unix environment");\r
+ try {\r
+ ConfiguredExecutable<AACon> confAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Cluster);\r
+ JobRunner runner = JobRunner.getInstance(confAAcon);\r
+\r
+ assertNotNull(runner, "Runner is NULL");\r
+ runner.executeJob();\r
+ // assertNotNull("JobId is null", jobId1);\r
+ JobStatus status = runner.getJobStatus();\r
+ assertTrue(status == JobStatus.PENDING\r
+ || status == JobStatus.RUNNING,\r
+ "Status of the process is wrong!");\r
+ JobInfo info = runner.getJobInfo();\r
+ assertNotNull(info, "JobInfo is null");\r
+ StatisticManager sm = new StatisticManager(info);\r
+ assertNotNull(sm, "Statictic manager is null");\r
+ try {\r
+\r
+ String exits = sm.getExitStatus();\r
+ assertNotNull("Exit status is null", exits);\r
+ // cut 4 trailing zeros from the number\r
+ int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits)\r
+ .intValue();\r
+ assertEquals(0, exitsInt);\r
+ System.out.println(sm.getAllStats());\r
+\r
+ } catch (ParseException e) {\r
+ e.printStackTrace();\r
+ fail("Parse Exception: " + e.getMessage());\r
+ }\r
+ // assertFalse(runner.cleanup());\r
+ assertTrue(sm.hasExited());\r
+ assertFalse(sm.wasAborted());\r
+ assertFalse(sm.hasDump());\r
+ assertFalse(sm.hasSignaled());\r
+\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (DrmaaException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ }\r
+ }\r
+\r
+ /**\r
+ * This tests fails from time to time depending on the cluster load or some\r
+ * other factors. Any client code has to adjust for this issue\r
+ */\r
+ @Test()\r
+ public void testRunOnClusterAsync() {\r
+ assertFalse(SysPrefs.isWindows,\r
+ "Cluster execution can only be in unix environment");\r
+ try {\r
+ ConfiguredExecutable<AACon> confAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Cluster);\r
+ AsyncExecutor aengine = Configurator.getAsyncEngine(confAAcon);\r
+ String jobId = aengine.submitJob(confAAcon);\r
+ assertNotNull(jobId, "Runner is NULL");\r
+ // let drmaa to start\r
+ Thread.sleep(500);\r
+ JobStatus status = aengine.getJobStatus(jobId);\r
+ while (status != JobStatus.FINISHED) {\r
+ System.out.println("Job Status: " + status);\r
+ Thread.sleep(1000);\r
+ status = aengine.getJobStatus(jobId);\r
+ ConfiguredExecutable<AACon> result = (ConfiguredExecutable<AACon>) aengine\r
+ .getResults(jobId);\r
+ assertNotNull(result);\r
+ System.out.println("RES:" + result);\r
+ // Some times the job could be removed from the cluster\r
+ // accounting\r
+ // before it has been reported to finish. Make sure\r
+ // to stop waiting in such case\r
+ if (status == JobStatus.UNDEFINED) {\r
+ break;\r
+ }\r
+ }\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (InterruptedException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test()\r
+ public void testRunLocally() {\r
+ try {\r
+ ConfiguredExecutable<AACon> confAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+ // For local execution use relative\r
+ LocalRunner lr = new LocalRunner(confAAcon);\r
+ lr.executeJob();\r
+ ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+ assertNotNull(al1.getResults());\r
+ MultiAnnotatedSequence<Method> annotations = confAAcon.getResults();\r
+ assertNotNull(annotations);\r
+ assertEquals(annotations.getAnnotations().size(), 18);\r
+ assertEquals(al1.getResults(), annotations);\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+ }\r
+\r
+ @Test()\r
+ public void testRunLocallyOnTwoCpu() {\r
+ try {\r
+ aacon.setNCore(2);\r
+ ConfiguredExecutable<AACon> confAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+ // For local execution use relative\r
+ LocalRunner lr = new LocalRunner(confAAcon);\r
+ lr.executeJob();\r
+ ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+ assertNotNull(al1.getResults());\r
+ MultiAnnotatedSequence<Method> annotations = confAAcon.getResults();\r
+ assertNotNull(annotations);\r
+ assertEquals(annotations.getAnnotations().size(), 18);\r
+ assertEquals(al1.getResults(), annotations);\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+ }\r
+\r
+ @Test()\r
+ public void readStatistics() {\r
+ try {\r
+ ConfiguredExecutable<AACon> confAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+ // For local execution use relative\r
+\r
+ AsyncExecutor sexec = Configurator.getAsyncEngine(confAAcon);\r
+ String jobId = sexec.submitJob(confAAcon);\r
+ FilePuller fw = FilePuller.newFilePuller(\r
+ confAAcon.getWorkDirectory() + File.separator\r
+ + AACon.getStatFile(),\r
+ FileWatcher.MIN_CHUNK_SIZE_BYTES);\r
+ int count = 0;\r
+ long position = 0;\r
+ fw.waitForFile(2);\r
+ JobStatus status = sexec.getJobStatus(jobId);\r
+ do {\r
+ ChunkHolder ch = fw.pull(position);\r
+ String chunk = ch.getChunk();\r
+ position = ch.getNextPosition();\r
+ // System.out.println(chunk);\r
+ count++;\r
+ // Make sure the loop is terminated if the job fails\r
+ if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) {\r
+ fail("job failed!");\r
+ break;\r
+ }\r
+ Thread.sleep(300);\r
+ status = sexec.getJobStatus(jobId);\r
+ } while (status != JobStatus.FINISHED || fw.hasMoreData());\r
+\r
+ assertTrue(count >= 1);\r
+ ConfiguredExecutable<?> al = sexec.getResults(jobId);\r
+ assertNotNull(al.getResults());\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (InterruptedException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test()\r
+ public void testPersistance() {\r
+ try {\r
+ AACon aacon = new AACon();\r
+ aacon.setError("errrr.txt").setInput(test_alignment_input)\r
+ .setOutput("outtt.txt");\r
+ assertEquals(aacon.getInput(), test_alignment_input);\r
+ assertEquals(aacon.getError(), "errrr.txt");\r
+ assertEquals(aacon.getOutput(), "outtt.txt");\r
+ ConfiguredExecutable<AACon> cAAcon = Configurator\r
+ .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+ SyncExecutor sexec = Configurator.getSyncEngine(cAAcon);\r
+ sexec.executeJob();\r
+ ConfiguredExecutable<?> al = sexec.waitForResult();\r
+ assertNotNull(al.getResults());\r
+ // Save run configuration\r
+ assertTrue(cAAcon.saveRunConfiguration());\r
+\r
+ // See if loaded configuration is the same as saved\r
+ RunConfiguration loadedRun = RunConfiguration\r
+ .load(new FileInputStream(new File(cAAcon\r
+ .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+ assertEquals(\r
+ ((ConfExecutable<AACon>) cAAcon).getRunConfiguration(),\r
+ loadedRun);\r
+ // Load run configuration as ConfExecutable\r
+ ConfiguredExecutable<AACon> resurrectedCAAcon = (ConfiguredExecutable<AACon>) cAAcon\r
+ .loadRunConfiguration(new FileInputStream(new File(cAAcon\r
+ .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+ assertNotNull(resurrectedCAAcon);\r
+ assertEquals(resurrectedCAAcon.getExecutable().getInput(),\r
+ test_alignment_input);\r
+ assertEquals(resurrectedCAAcon.getExecutable().getError(),\r
+ "errrr.txt");\r
+ assertEquals(resurrectedCAAcon.getExecutable().getOutput(),\r
+ "outtt.txt");\r
+ // See in details whether executables are the same\r
+ assertEquals(resurrectedCAAcon.getExecutable(), aacon);\r
+\r
+ ConfiguredExecutable<AACon> resAAcon = Configurator\r
+ .configureExecutable(resurrectedCAAcon.getExecutable(),\r
+ Executable.ExecProvider.Local);\r
+\r
+ sexec = Configurator.getSyncEngine(resAAcon,\r
+ Executable.ExecProvider.Local);\r
+ sexec.executeJob();\r
+ al = sexec.waitForResult();\r
+ assertNotNull(al);\r
+\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test()\r
+ public void testConfigurationLoading() {\r
+ try {\r
+ RunnerConfig<AACon> aaconConfig = ConfExecutable\r
+ .getRunnerOptions(AACon.class);\r
+ assertNotNull(aaconConfig);\r
+ assertTrue(aaconConfig.getArguments().size() > 0);\r
+\r
+ PresetManager<AACon> aaconPresets = ConfExecutable\r
+ .getRunnerPresets(AACon.class);\r
+ assertNull(aaconPresets); // there is no presets\r
+\r
+ LimitsManager<AACon> jronnLimits = ConfExecutable\r
+ .getRunnerLimits(AACon.class);\r
+ assertNotNull(jronnLimits);\r
+ assertTrue(jronnLimits.getLimits().size() > 0);\r
+ jronnLimits.validate(aaconPresets);\r
+\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (ValidationException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+ }\r
+\r
+}\r
--- /dev/null
+/* Copyright (c) 2009 Peter Troshin\r
+ * \r
+ * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
+ * \r
+ * This library is free software; you can redistribute it and/or modify it under the terms of the\r
+ * Apache License version 2 as published by the Apache Software Foundation\r
+ * \r
+ * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
+ * License for more details.\r
+ * \r
+ * A copy of the license is in apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
+ * \r
+ * Any republication or derived work distributed in source code form\r
+ * must include this copyright and license notice.\r
+ */\r
+\r
+package compbio.runner.disorder;\r
+\r
+import static org.testng.Assert.assertEquals;\r
+import static org.testng.Assert.assertFalse;\r
+import static org.testng.Assert.assertNotNull;\r
+import static org.testng.Assert.assertNull;\r
+import static org.testng.Assert.assertTrue;\r
+import static org.testng.Assert.fail;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.text.ParseException;\r
+import java.util.List;\r
+\r
+import javax.xml.bind.ValidationException;\r
+\r
+import org.ggf.drmaa.DrmaaException;\r
+import org.ggf.drmaa.JobInfo;\r
+import org.testng.annotations.BeforeMethod;\r
+import org.testng.annotations.Test;\r
+\r
+import compbio.data.sequence.AnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.FilePuller;\r
+import compbio.engine.SyncExecutor;\r
+import compbio.engine.client.ConfExecutable;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.RunConfiguration;\r
+import compbio.engine.cluster.drmaa.ClusterUtil;\r
+import compbio.engine.cluster.drmaa.JobRunner;\r
+import compbio.engine.cluster.drmaa.StatisticManager;\r
+import compbio.engine.local.LocalRunner;\r
+import compbio.metadata.AllTestSuit;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobExecutionException;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.util.FileWatcher;\r
+import compbio.util.SysPrefs;\r
+\r
+public class DisemblTester {\r
+\r
+ public static String test_outfile = "TO1381.disembl.out";\r
+\r
+ private Disembl disembl;\r
+\r
+ @BeforeMethod(alwaysRun = true)\r
+ void init() {\r
+ disembl = new Disembl();\r
+ disembl.setInput(AllTestSuit.test_input).setOutput(test_outfile);\r
+ }\r
+\r
+ @Test(groups = { AllTestSuit.test_group_cluster,\r
+ AllTestSuit.test_group_runner })\r
+ public void testRunOnCluster() {\r
+ assertFalse(SysPrefs.isWindows,\r
+ "Cluster execution can only be in unix environment");\r
+ try {\r
+ ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+ .configureExecutable(disembl,\r
+ Executable.ExecProvider.Cluster);\r
+ JobRunner runner = JobRunner.getInstance(confDisembl);\r
+\r
+ assertNotNull(runner, "Runner is NULL");\r
+ runner.executeJob();\r
+ // assertNotNull("JobId is null", jobId1);\r
+ JobStatus status = runner.getJobStatus();\r
+ assertTrue(status == JobStatus.PENDING\r
+ || status == JobStatus.RUNNING,\r
+ "Status of the process is wrong!");\r
+ JobInfo info = runner.getJobInfo();\r
+ assertNotNull(info, "JobInfo is null");\r
+ StatisticManager sm = new StatisticManager(info);\r
+ assertNotNull(sm, "Statictic manager is null");\r
+ try {\r
+\r
+ String exits = sm.getExitStatus();\r
+ assertNotNull("Exit status is null", exits);\r
+ // cut 4 trailing zeros from the number\r
+ int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits)\r
+ .intValue();\r
+ assertEquals(0, exitsInt);\r
+ System.out.println(sm.getAllStats());\r
+\r
+ } catch (ParseException e) {\r
+ e.printStackTrace();\r
+ fail("Parse Exception: " + e.getMessage());\r
+ }\r
+ //assertFalse(runner.cleanup());\r
+ assertTrue(sm.hasExited());\r
+ assertFalse(sm.wasAborted());\r
+ assertFalse(sm.hasDump());\r
+ assertFalse(sm.hasSignaled());\r
+\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (DrmaaException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ }\r
+ }\r
+\r
+ /**\r
+ * This tests fails from time to time depending on the cluster load or some\r
+ * other factors. Any client code has to adjust for this issue\r
+ */\r
+ @Test(groups = { AllTestSuit.test_group_cluster,\r
+ AllTestSuit.test_group_runner })\r
+ public void testRunOnClusterAsync() {\r
+ assertFalse(SysPrefs.isWindows,\r
+ "Cluster execution can only be in unix environment");\r
+ try {\r
+ ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+ .configureExecutable(disembl,\r
+ Executable.ExecProvider.Cluster);\r
+ AsyncExecutor aengine = Configurator.getAsyncEngine(confDisembl);\r
+ String jobId = aengine.submitJob(confDisembl);\r
+ assertNotNull(jobId, "Runner is NULL");\r
+ // let drmaa to start\r
+ Thread.sleep(500);\r
+ JobStatus status = aengine.getJobStatus(jobId);\r
+ while (status != JobStatus.FINISHED) {\r
+ System.out.println("Job Status: " + status);\r
+ Thread.sleep(1000);\r
+ status = aengine.getJobStatus(jobId);\r
+ ConfiguredExecutable<Jronn> result = (ConfiguredExecutable<Jronn>) aengine\r
+ .getResults(jobId);\r
+ assertNotNull(result);\r
+ System.out.println("RES:" + result);\r
+ // Some times the job could be removed from the cluster accounting \r
+ // before it has been reported to finish. Make sure \r
+ // to stop waiting in such case\r
+ if (status == JobStatus.UNDEFINED) {\r
+ break;\r
+ }\r
+ }\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail("DrmaaException caught:" + e.getMessage());\r
+ } catch (InterruptedException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test(groups = { AllTestSuit.test_group_runner })\r
+ public void testRunLocally() {\r
+ try {\r
+ ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+ .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+\r
+ // For local execution use relative\r
+ LocalRunner lr = new LocalRunner(confDisembl);\r
+ lr.executeJob();\r
+ ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+ assertNotNull(al1.getResults());\r
+ List<AnnotatedSequence> al2 = confDisembl.getResults();\r
+ assertNotNull(al2);\r
+ assertEquals(al2.size(), 3);\r
+ assertEquals(al1.getResults(), al2);\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+ }\r
+\r
+ @Test(groups = { AllTestSuit.test_group_runner })\r
+ public void readStatistics() {\r
+ try {\r
+ ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+ .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+ // For local execution use relavive\r
+\r
+ AsyncExecutor sexec = Configurator.getAsyncEngine(confDisembl);\r
+ String jobId = sexec.submitJob(confDisembl);\r
+ FilePuller fw = FilePuller.newFilePuller(confDisembl\r
+ .getWorkDirectory()\r
+ + File.separator + Jronn.getStatFile(),\r
+ FileWatcher.MIN_CHUNK_SIZE_BYTES);\r
+ int count = 0;\r
+ long position = 0;\r
+ fw.waitForFile(4);\r
+ JobStatus status = sexec.getJobStatus(jobId);\r
+ while (status != JobStatus.FINISHED) {\r
+ if (fw.hasMoreData()) {\r
+ ChunkHolder ch = fw.pull(position);\r
+ String chunk = ch.getChunk();\r
+ position = ch.getNextPosition();\r
+ }\r
+ count++;\r
+ // Make sure the loop is terminated if the job fails\r
+ if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) {\r
+ break;\r
+ }\r
+ Thread.sleep(300);\r
+ status = sexec.getJobStatus(jobId);\r
+ }\r
+ assertTrue(count > 1);\r
+ ConfiguredExecutable<?> al = sexec.getResults(jobId);\r
+ assertNotNull(al.getResults());\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (InterruptedException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test(groups = { AllTestSuit.test_group_runner })\r
+ public void testPersistance() {\r
+ try {\r
+ Disembl disembl = new Disembl();\r
+ disembl.setError("errrr.txt").setInput(AllTestSuit.test_input)\r
+ .setOutput("outtt.txt");\r
+ assertEquals(disembl.getInput(), AllTestSuit.test_input);\r
+ assertEquals(disembl.getError(), "errrr.txt");\r
+ assertEquals(disembl.getOutput(), "outtt.txt");\r
+ ConfiguredExecutable<Disembl> cDisembl = Configurator\r
+ .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+\r
+ SyncExecutor sexec = Configurator.getSyncEngine(cDisembl);\r
+ sexec.executeJob();\r
+ ConfiguredExecutable<?> al = sexec.waitForResult();\r
+ assertNotNull(al.getResults());\r
+ // Save run configuration\r
+ assertTrue(cDisembl.saveRunConfiguration());\r
+\r
+ // See if loaded configuration is the same as saved\r
+ RunConfiguration loadedRun = RunConfiguration\r
+ .load(new FileInputStream(new File(cDisembl\r
+ .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+ assertEquals(((ConfExecutable<Disembl>) cDisembl)\r
+ .getRunConfiguration(), loadedRun);\r
+ // Load run configuration as ConfExecutable\r
+ ConfiguredExecutable<Disembl> resurrectedCDisembl = (ConfiguredExecutable<Disembl>) cDisembl\r
+ .loadRunConfiguration(new FileInputStream(new File(cDisembl\r
+ .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+ assertNotNull(resurrectedCDisembl);\r
+ assertEquals(resurrectedCDisembl.getExecutable().getInput(),\r
+ AllTestSuit.test_input);\r
+ assertEquals(resurrectedCDisembl.getExecutable().getError(),\r
+ "errrr.txt");\r
+ assertEquals(resurrectedCDisembl.getExecutable().getOutput(),\r
+ "outtt.txt");\r
+ // See in details whether executables are the same\r
+ assertEquals(resurrectedCDisembl.getExecutable(), disembl);\r
+\r
+ ConfiguredExecutable<Disembl> resJronn = Configurator\r
+ .configureExecutable(resurrectedCDisembl.getExecutable(),\r
+ Executable.ExecProvider.Local);\r
+\r
+ sexec = Configurator.getSyncEngine(resJronn,\r
+ Executable.ExecProvider.Local);\r
+ sexec.executeJob();\r
+ al = sexec.waitForResult();\r
+ assertNotNull(al);\r
+\r
+ } catch (JobSubmissionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (JobExecutionException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ } catch (ResultNotAvailableException e) {\r
+ e.printStackTrace();\r
+ fail(e.getMessage());\r
+ }\r
+ }\r
+\r
+ @Test(groups = { AllTestSuit.test_group_runner })\r
+ public void testConfigurationLoading() {\r
+ try {\r
+ RunnerConfig<Disembl> disemblConfig = ConfExecutable\r
+ .getRunnerOptions(Disembl.class);\r
+ assertNotNull(disemblConfig);\r
+ assertTrue(disemblConfig.getArguments().size() > 0);\r
+\r
+ PresetManager<Disembl> disemblPresets = ConfExecutable\r
+ .getRunnerPresets(Disembl.class);\r
+ assertNull(disemblPresets); // there is no presets\r
+\r
+ LimitsManager<Disembl> disemblLimits = ConfExecutable\r
+ .getRunnerLimits(Disembl.class);\r
+ assertNotNull(disemblLimits);\r
+ assertTrue(disemblLimits.getLimits().size() > 0);\r
+ disemblLimits.validate(disemblPresets);\r
+\r
+ } catch (FileNotFoundException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (IOException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ } catch (ValidationException e) {\r
+ e.printStackTrace();\r
+ fail(e.getLocalizedMessage());\r
+ }\r
+ }\r
+\r
+}\r
--- /dev/null
+>Foobar_dundeefriends \r
+MTADGPRELLQLRAAVRHRPQDFVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r
+ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL\r
+LPEEPYITAQLLNWRRRLCDWRALDVLSAQVRAAVAQGVGAVEPFAFLSEDASAAEQLAC\r
+ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+GVFEMDGFADDFGALLQALARRHGWLGI\r
+\r
+>Foobar \r
+-----------------------------------MGDTTAGEMAVQRGLALH-------\r
+---------QQRHAEAAVLLQQASDAAPEHPGIALWL-HALEDAGQAEAAAA-YTRAHQL\r
+LPEEPYITAQLLN--------------------AVAQGVGAVEPFAFLSEDASAAE----\r
+----------SVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+GVFEMDGFADDFGALLQALARRHGWLGI\r
+\r
+>dundeefriends \r
+-MTADGPRELLQLRAAVRHRPQDVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r
+ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALED--------------HQL\r
+LPEEPYITAQLDVLSAQVR-------------AAVAQGVGAVEPFAFLSEDASAAEQLAC\r
+ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+I---------------------------
\ No newline at end of file
--- /dev/null
+package compbio.data.msa;\r
+\r
+import java.security.InvalidParameterException;\r
+import java.util.List;\r
+\r
+import javax.jws.WebParam;\r
+import javax.jws.WebService;\r
+\r
+import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitExceededException;\r
+import compbio.metadata.Option;\r
+import compbio.metadata.Preset;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.UnsupportedRuntimeException;\r
+import compbio.metadata.WrongParameterException;\r
+\r
+/**\r
+ * Interface for tools that results to one or more annotation to sequence(s)\r
+ * \r
+ * @author pvtroshin\r
+ * \r
+ * Date November 2010\r
+ * \r
+ * @param <T>\r
+ * executable type / web service type\r
+ */\r
+@WebService(targetNamespace = "http://a.data.compbio/01/12/2010/")\r
+public interface Annotation<T> extends JManagement, Metadata<T> {\r
+\r
+ /**\r
+ * \r
+ * Any dataset containing a greater number of sequences or the average\r
+ * length of the sequences are greater then defined in the default Limit\r
+ * will not be accepted for an alignment operation and\r
+ * JobSubmissionException will be thrown.\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @return jobId - unique identifier for the job\r
+ * @throws JobSubmissionException\r
+ * is thrown when the job could not be submitted due to the\r
+ * following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String analize(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException;\r
+\r
+ /**\r
+ * \r
+ * @see Option\r
+ * \r
+ * Default Limit is used to decide whether the calculation will be\r
+ * permitted or denied\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @param options\r
+ * A list of Options\r
+ * @return jobId - unique identifier for the job\r
+ * @throws JobSubmissionException. This\r
+ * exception is thrown when the job could not be submitted due\r
+ * to the following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws WrongParameterException\r
+ * is throws when 1) One of the Options provided is not\r
+ * supported, 2) The value of the option is defined outside the\r
+ * boundaries. In both cases exception object contain the\r
+ * information on the violating Option.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String customAnalize(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+ @WebParam(name = "options") List<Option<T>> options)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException, WrongParameterException;\r
+\r
+ /**\r
+ * \r
+ * \r
+ * Limit for a presetName is used whether the calculation will be permitted\r
+ * or denied. If no Limit was defined for a presetName, than default limit\r
+ * is used.\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @param preset\r
+ * A list of Options\r
+ * @return String - jobId - unique identifier for the job\r
+ * @throws JobSubmissionException. This\r
+ * exception is thrown when the job could not be submitted due\r
+ * to the following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws WrongParameterException\r
+ * is throws when 1) One of the Options provided is not\r
+ * supported, 2) The value of the option is defined outside the\r
+ * boundaries. In both cases exception object contain the\r
+ * information on the violating Option.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String presetAnalize(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+ @WebParam(name = "preset") Preset<T> preset)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException, WrongParameterException;\r
+\r
+ /**\r
+ * Return the result of the job.\r
+ * \r
+ * @param jobId\r
+ * a unique job identifier\r
+ * @return\r
+ * @throws ResultNotAvailableException\r
+ * this exception is throw if the job execution was not\r
+ * successful or the result of the execution could not be found.\r
+ * (e.g. removed). Exception could also be thrown is dues to the\r
+ * lower level problems on the server i.e. IOException,\r
+ * FileNotFoundException problems as well as\r
+ * UnknownFileFormatException.\r
+ * @throws InvalidParameterException\r
+ * thrown if jobId is empty or cannot be recognised e.g. in\r
+ * invalid format\r
+ */\r
+ MultiAnnotatedSequence<?> getResult(@WebParam(name = "jobId") String jobId)\r
+ throws ResultNotAvailableException;\r
+}\r
--- /dev/null
+package compbio.data.msa;\r
+\r
+import java.security.InvalidParameterException;\r
+\r
+import javax.jws.WebParam;\r
+\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobStatus;\r
+\r
+public interface JManagement {\r
+\r
+ /**\r
+ * Stop running job but leave its output untouched\r
+ * \r
+ * @return true if job was cancelled successfully, false otherwise\r
+ * @throws InvalidParameterException\r
+ * thrown if jobId is empty or cannot be recognised e.g. in\r
+ * invalid format\r
+ */\r
+ boolean cancelJob(@WebParam(name = "jobId") String jobId);\r
+\r
+ /**\r
+ * Return the status of the job. @see JobStatus\r
+ * \r
+ * @param jobId\r
+ * - unique job identifier\r
+ * @return JobStatus - status of the job\r
+ * @throws InvalidParameterException\r
+ * thrown if jobId is empty or cannot be recognised e.g. in\r
+ * invalid format\r
+ */\r
+ JobStatus getJobStatus(@WebParam(name = "jobId") String jobId);\r
+\r
+ /**\r
+ * Reads 1kb chunk from the statistics file which is specific to a given web\r
+ * service from the position. If in time of a request less then 1kb data is\r
+ * available from the position to the end of the file, then it returns all\r
+ * the data available from the position to the end of the file.\r
+ * \r
+ * @param jobId\r
+ * - unique job identifier\r
+ * @param position\r
+ * - next position within the file to read\r
+ * @return ChunkHolder - @see ChunkHolder which contains a chuink of data\r
+ * and a next position within the file from which no data has been\r
+ * read\r
+ * @throws InvalidParameterException\r
+ * thrown if jobId is empty or cannot be recognised e.g. in\r
+ * invalid format and also if the position value is negative\r
+ */\r
+ ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId,\r
+ @WebParam(name = "position") long position);\r
+\r
+}\r
--- /dev/null
+package compbio.data.msa;\r
+\r
+import javax.jws.WebParam;\r
+\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.RunnerConfig;\r
+\r
+public interface Metadata<T> {\r
+\r
+ /**\r
+ * Get options supported by a web service\r
+ * \r
+ * @return RunnerConfig the list of options and parameters supported by a\r
+ * web service.\r
+ */\r
+ RunnerConfig<T> getRunnerOptions();\r
+\r
+ /**\r
+ * Get presets supported by a web service\r
+ * \r
+ * @return PresetManager the object contains information about presets\r
+ * supported by a web service\r
+ */\r
+ PresetManager<T> getPresets();\r
+\r
+ /**\r
+ * Get a Limit for a preset.\r
+ * \r
+ * @param presetName\r
+ * the name of the preset. if no name is provided, then the\r
+ * default preset is returned. If no limit for a particular\r
+ * preset is defined then the default preset is returned\r
+ * @return Limit\r
+ */\r
+ Limit<T> getLimit(@WebParam(name = "presetName") String presetName);\r
+\r
+ /**\r
+ * List Limits supported by a web service.\r
+ * \r
+ * @param presetName\r
+ * the name of the preset. if no name is provided, then the\r
+ * default preset is returned. If no limit for a particular\r
+ * preset is defined then the default preset is returned\r
+ * @return LimitManager\r
+ */\r
+ LimitsManager<T> getLimits();\r
+\r
+}\r
\r
import compbio.data.sequence.Alignment;\r
import compbio.data.sequence.FastaSequence;\r
-import compbio.metadata.ChunkHolder;\r
-import compbio.metadata.JobStatus;\r
import compbio.metadata.JobSubmissionException;\r
-import compbio.metadata.Limit;\r
import compbio.metadata.LimitExceededException;\r
-import compbio.metadata.LimitsManager;\r
import compbio.metadata.Option;\r
import compbio.metadata.Preset;\r
-import compbio.metadata.PresetManager;\r
import compbio.metadata.ResultNotAvailableException;\r
-import compbio.metadata.RunnerConfig;\r
import compbio.metadata.UnsupportedRuntimeException;\r
import compbio.metadata.WrongParameterException;\r
\r
* \r
* @author pvtroshin\r
* \r
- * Date September 2009\r
+ * Date November 2010\r
* \r
* @param <T>\r
* executable type / web service type\r
*/\r
-@WebService(targetNamespace = "http://msa.data.compbio/01/01/2010/")\r
-public interface MsaWS<T> {\r
-\r
- /**\r
- * Align a list of sequences with default settings.\r
- * \r
- * Any dataset containing a greater number of sequences or the average\r
- * length of the sequences are greater then defined in the default Limit\r
- * will not be accepted for an alignment operation and\r
- * JobSubmissionException will be thrown.\r
- * \r
- * @param sequences\r
- * List of FastaSequence objects. The programme does not perform\r
- * any sequence validity checks. Nor does it checks whether the\r
- * sequences names are unique. It is responsibility of the caller\r
- * to validate this information\r
- * @return jobId - unique identifier for the job\r
- * @throws JobSubmissionException. This\r
- * exception is thrown when the job could not be submitted due\r
- * to the following reasons: 1) The number of sequences in the\r
- * submission or their average length is greater then defined by\r
- * the default Limit. 2) Any problems on the server side e.g. it\r
- * is misconfigured or malfunction, is reported via this\r
- * exception. In the first case the information on the limit\r
- * could be obtained from an exception.\r
- * @throws InvalidParameterException\r
- * thrown if input list of fasta sequence is null or empty\r
- * @throws UnsupportedRuntimeException\r
- * thrown if server OS does not support native executables for a\r
- * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
- * service is called\r
- * @throws LimitExceededException\r
- * is throw if the input sequences number or average length\r
- * exceeds what is defined by the limit\r
- */\r
- String align(\r
- @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
- throws UnsupportedRuntimeException, LimitExceededException,\r
- JobSubmissionException;\r
-\r
- /**\r
- * Align a list of sequences with options.\r
- * \r
- * @see Option\r
- * \r
- * Default Limit is used to decide whether the calculation will be\r
- * permitted or denied\r
- * \r
- * @param sequences\r
- * List of FastaSequence objects. The programme does not perform\r
- * any sequence validity checks. Nor does it checks whether the\r
- * sequences names are unique. It is responsibility of the caller\r
- * to validate this information\r
- * @param options\r
- * A list of Options\r
- * @return jobId - unique identifier for the job\r
- * @throws JobSubmissionException. This\r
- * exception is thrown when the job could not be submitted due\r
- * to the following reasons: 1) The number of sequences in the\r
- * submission or their average length is greater then defined by\r
- * the default Limit. 2) Any problems on the server side e.g. it\r
- * is misconfigured or malfunction, is reported via this\r
- * exception. In the first case the information on the limit\r
- * could be obtained from an exception.\r
- * @throws WrongParameterException\r
- * is throws when 1) One of the Options provided is not\r
- * supported, 2) The value of the option is defined outside the\r
- * boundaries. In both cases exception object contain the\r
- * information on the violating Option.\r
- * @throws InvalidParameterException\r
- * thrown if input list of fasta sequence is null or empty\r
- * @throws UnsupportedRuntimeException\r
- * thrown if server OS does not support native executables for a\r
- * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
- * service is called\r
- * @throws LimitExceededException\r
- * is throw if the input sequences number or average length\r
- * exceeds what is defined by the limit\r
- */\r
- String customAlign(\r
- @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
- @WebParam(name = "options") List<Option<T>> options)\r
- throws UnsupportedRuntimeException, LimitExceededException,\r
- JobSubmissionException, WrongParameterException;\r
-\r
- /**\r
- * Align a list of sequences with preset. @see Preset\r
- * \r
- * Limit for a presetName is used whether the calculation will be permitted\r
- * or denied. If no Limit was defined for a presetName, than default limit\r
- * is used.\r
- * \r
- * @param sequences\r
- * List of FastaSequence objects. The programme does not perform\r
- * any sequence validity checks. Nor does it checks whether the\r
- * sequences names are unique. It is responsibility of the caller\r
- * to validate this information\r
- * @param preset\r
- * A list of Options\r
- * @return String - jobId - unique identifier for the job\r
- * @throws JobSubmissionException. This\r
- * exception is thrown when the job could not be submitted due\r
- * to the following reasons: 1) The number of sequences in the\r
- * submission or their average length is greater then defined by\r
- * the default Limit. 2) Any problems on the server side e.g. it\r
- * is misconfigured or malfunction, is reported via this\r
- * exception. In the first case the information on the limit\r
- * could be obtained from an exception.\r
- * @throws WrongParameterException\r
- * is throws when 1) One of the Options provided is not\r
- * supported, 2) The value of the option is defined outside the\r
- * boundaries. In both cases exception object contain the\r
- * information on the violating Option.\r
- * @throws InvalidParameterException\r
- * thrown if input list of fasta sequence is null or empty\r
- * @throws UnsupportedRuntimeException\r
- * thrown if server OS does not support native executables for a\r
- * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
- * service is called\r
- * @throws LimitExceededException\r
- * is throw if the input sequences number or average length\r
- * exceeds what is defined by the limit\r
- */\r
- String presetAlign(\r
- @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
- @WebParam(name = "preset") Preset<T> preset)\r
- throws UnsupportedRuntimeException, LimitExceededException,\r
- JobSubmissionException, WrongParameterException;\r
-\r
- /**\r
- * Return the result of the job.\r
- * \r
- * @param jobId\r
- * a unique job identifier\r
- * @return Alignment\r
- * @throws ResultNotAvailableException\r
- * this exception is throw if the job execution was not\r
- * successful or the result of the execution could not be found.\r
- * (e.g. removed). Exception could also be thrown is dues to the\r
- * lower level problems on the server i.e. IOException,\r
- * FileNotFoundException problems as well as\r
- * UnknownFileFormatException.\r
- * @throws InvalidParameterException\r
- * thrown if jobId is empty or cannot be recognised e.g. in\r
- * invalid format\r
- */\r
- Alignment getResult(@WebParam(name = "jobId") String jobId)\r
- throws ResultNotAvailableException;\r
-\r
- /**\r
- * Stop running job but leave its output untouched\r
- * \r
- * @return true if job was cancelled successfully, false otherwise\r
- * @throws InvalidParameterException\r
- * thrown if jobId is empty or cannot be recognised e.g. in\r
- * invalid format\r
- */\r
- boolean cancelJob(@WebParam(name = "jobId") String jobId);\r
-\r
- /**\r
- * Return the status of the job. @see JobStatus\r
- * \r
- * @param jobId\r
- * - unique job identifier\r
- * @return JobStatus - status of the job\r
- * @throws InvalidParameterException\r
- * thrown if jobId is empty or cannot be recognised e.g. in\r
- * invalid format\r
- */\r
- JobStatus getJobStatus(@WebParam(name = "jobId") String jobId);\r
-\r
- /**\r
- * Reads 1kb chunk from the statistics file which is specific to a given web\r
- * service from the position. If in time of a request less then 1kb data is\r
- * available from the position to the end of the file, then it returns all\r
- * the data available from the position to the end of the file.\r
- * \r
- * @param jobId\r
- * - unique job identifier\r
- * @param position\r
- * - next position within the file to read\r
- * @return ChunkHolder - @see ChunkHolder which contains a chuink of data\r
- * and a next position within the file from which no data has been\r
- * read\r
- * @throws InvalidParameterException\r
- * thrown if jobId is empty or cannot be recognised e.g. in\r
- * invalid format and also if the position value is negative\r
- */\r
- ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId,\r
- @WebParam(name = "position") long position);\r
-\r
- /*\r
- * TODO\r
- * \r
- * @param jobId\r
- * \r
- * @return\r
- * \r
- * byte getProgress(@WebParam(name = "jobId") String jobId);\r
- */\r
-\r
- /**\r
- * Get options supported by a web service\r
- * \r
- * @return RunnerConfig the list of options and parameters supported by a\r
- * web service.\r
- */\r
- RunnerConfig<T> getRunnerOptions();\r
-\r
- /**\r
- * Get presets supported by a web service\r
- * \r
- * @return PresetManager the object contains information about presets\r
- * supported by a web service\r
- */\r
- PresetManager<T> getPresets();\r
-\r
- /**\r
- * Get a Limit for a preset.\r
- * \r
- * @param presetName\r
- * the name of the preset. if no name is provided, then the\r
- * default preset is returned. If no limit for a particular\r
- * preset is defined then the default preset is returned\r
- * @return Limit\r
- */\r
- Limit<T> getLimit(@WebParam(name = "presetName") String presetName);\r
-\r
- /**\r
- * List Limits supported by a web service.\r
- * \r
- * @param presetName\r
- * the name of the preset. if no name is provided, then the\r
- * default preset is returned. If no limit for a particular\r
- * preset is defined then the default preset is returned\r
- * @return LimitManager\r
- */\r
- LimitsManager<T> getLimits();\r
+@WebService(targetNamespace = "http://msa.data.compbio/01/12/2010/")\r
+public interface MsaWS<T> extends JManagement, Metadata<T> {\r
+\r
+ /**\r
+ * Align a list of sequences with default settings.\r
+ * \r
+ * Any dataset containing a greater number of sequences or the average\r
+ * length of the sequences are greater then defined in the default Limit\r
+ * will not be accepted for an alignment operation and\r
+ * JobSubmissionException will be thrown.\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @return jobId - unique identifier for the job\r
+ * @throws JobSubmissionException. This\r
+ * exception is thrown when the job could not be submitted due\r
+ * to the following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String align(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException;\r
+\r
+ /**\r
+ * Align a list of sequences with options.\r
+ * \r
+ * @see Option\r
+ * \r
+ * Default Limit is used to decide whether the calculation will be\r
+ * permitted or denied\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @param options\r
+ * A list of Options\r
+ * @return jobId - unique identifier for the job\r
+ * @throws JobSubmissionException. This\r
+ * exception is thrown when the job could not be submitted due\r
+ * to the following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws WrongParameterException\r
+ * is throws when 1) One of the Options provided is not\r
+ * supported, 2) The value of the option is defined outside the\r
+ * boundaries. In both cases exception object contain the\r
+ * information on the violating Option.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String customAlign(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+ @WebParam(name = "options") List<Option<T>> options)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException, WrongParameterException;\r
+\r
+ /**\r
+ * Align a list of sequences with preset. @see Preset\r
+ * \r
+ * Limit for a presetName is used whether the calculation will be permitted\r
+ * or denied. If no Limit was defined for a presetName, than default limit\r
+ * is used.\r
+ * \r
+ * @param sequences\r
+ * List of FastaSequence objects. The programme does not perform\r
+ * any sequence validity checks. Nor does it checks whether the\r
+ * sequences names are unique. It is responsibility of the caller\r
+ * to validate this information\r
+ * @param preset\r
+ * A list of Options\r
+ * @return String - jobId - unique identifier for the job\r
+ * @throws JobSubmissionException. This\r
+ * exception is thrown when the job could not be submitted due\r
+ * to the following reasons: 1) The number of sequences in the\r
+ * submission or their average length is greater then defined by\r
+ * the default Limit. 2) Any problems on the server side e.g. it\r
+ * is misconfigured or malfunction, is reported via this\r
+ * exception. In the first case the information on the limit\r
+ * could be obtained from an exception.\r
+ * @throws WrongParameterException\r
+ * is throws when 1) One of the Options provided is not\r
+ * supported, 2) The value of the option is defined outside the\r
+ * boundaries. In both cases exception object contain the\r
+ * information on the violating Option.\r
+ * @throws InvalidParameterException\r
+ * thrown if input list of fasta sequence is null or empty\r
+ * @throws UnsupportedRuntimeException\r
+ * thrown if server OS does not support native executables for a\r
+ * given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+ * service is called\r
+ * @throws LimitExceededException\r
+ * is throw if the input sequences number or average length\r
+ * exceeds what is defined by the limit\r
+ */\r
+ String presetAlign(\r
+ @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+ @WebParam(name = "preset") Preset<T> preset)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException, WrongParameterException;\r
+\r
+ /**\r
+ * Return the result of the job.\r
+ * \r
+ * @param jobId\r
+ * a unique job identifier\r
+ * @return Alignment\r
+ * @throws ResultNotAvailableException\r
+ * this exception is throw if the job execution was not\r
+ * successful or the result of the execution could not be found.\r
+ * (e.g. removed). Exception could also be thrown is dues to the\r
+ * lower level problems on the server i.e. IOException,\r
+ * FileNotFoundException problems as well as\r
+ * UnknownFileFormatException.\r
+ * @throws InvalidParameterException\r
+ * thrown if jobId is empty or cannot be recognised e.g. in\r
+ * invalid format\r
+ */\r
+ Alignment getResult(@WebParam(name = "jobId") String jobId)\r
+ throws ResultNotAvailableException;\r
\r
}\r
--- /dev/null
+package compbio.ws.server;\r
+\r
+import java.io.File;\r
+import java.util.List;\r
+\r
+import javax.annotation.Resource;\r
+import javax.jws.WebService;\r
+import javax.xml.ws.WebServiceContext;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.data.msa.Annotation;\r
+import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.JalviewAnnotation;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitExceededException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.Option;\r
+import compbio.metadata.Preset;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.metadata.UnsupportedRuntimeException;\r
+import compbio.metadata.WrongParameterException;\r
+import compbio.runner.Util;\r
+import compbio.runner.conservation.AACon;\r
+\r
+@WebService(endpointInterface = "compbio.data.msa.MsaWS", targetNamespace = "http://msa.data.compbio/01/01/2010/", serviceName = "MuscleWS")\r
+public class AAConWS implements Annotation<AACon> {\r
+\r
+ // Ask for resource injection\r
+ @Resource\r
+ WebServiceContext wsContext;\r
+\r
+ private static Logger statLog = Logger.getLogger("AAConWS-stats");\r
+\r
+ private static Logger log = Logger.getLogger(AAConWS.class);\r
+\r
+ private static final RunnerConfig<AACon> aaconOptions = Util\r
+ .getSupportedOptions(AACon.class);\r
+\r
+ private static final PresetManager<AACon> aaconPresets = Util\r
+ .getPresets(AACon.class);\r
+\r
+ ConfiguredExecutable<AACon> init(List<FastaSequence> sequences)\r
+ throws JobSubmissionException {\r
+ AACon aacon = new AACon();\r
+ aacon.setInput("fasta.in").setOutput("fasta.out");\r
+ return Configurator.configureExecutable(aacon, sequences);\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+ public MultiAnnotatedSequence<Method> getResult(String jobId)\r
+ throws ResultNotAvailableException {\r
+ WSUtil.validateJobId(jobId);\r
+ AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+ ConfiguredExecutable<AACon> aacon = (ConfiguredExecutable<AACon>) asyncEngine\r
+ .getResults(jobId);\r
+ MultiAnnotatedSequence<Method> mas = aacon.getResults();\r
+ // log(jobId, "getResults");\r
+ return mas;\r
+ }\r
+\r
+ @SuppressWarnings("unchecked")\r
+ public JalviewAnnotation getJalviewAnnotation(String jobId)\r
+ throws ResultNotAvailableException {\r
+ MultiAnnotatedSequence<Method> result = getResult(jobId);\r
+\r
+ // log(jobId, "getResults");\r
+ return result.toJalviewAnnotation();\r
+ }\r
+\r
+ public Limit<AACon> getLimit(String presetName) {\r
+ return new AACon().getLimit(presetName);\r
+ }\r
+\r
+ public LimitsManager<AACon> getLimits() {\r
+ return new AACon().getLimits();\r
+ }\r
+\r
+ public ChunkHolder pullExecStatistics(String jobId, long position) {\r
+ WSUtil.validateJobId(jobId);\r
+ String file = Configurator.getWorkDirectory(jobId) + File.separator\r
+ + AACon.getStatFile();\r
+ return WSUtil.pullFile(file, position);\r
+ }\r
+\r
+ public boolean cancelJob(String jobId) {\r
+ WSUtil.validateJobId(jobId);\r
+ return WSUtil.cancelJob(jobId);\r
+ }\r
+\r
+ public JobStatus getJobStatus(String jobId) {\r
+ WSUtil.validateJobId(jobId);\r
+ return WSUtil.getJobStatus(jobId);\r
+ }\r
+\r
+ public PresetManager<AACon> getPresets() {\r
+ return aaconPresets;\r
+ }\r
+\r
+ public RunnerConfig<AACon> getRunnerOptions() {\r
+ return aaconOptions;\r
+ }\r
+\r
+ @Override\r
+ public String analize(List<FastaSequence> sequences)\r
+ throws UnsupportedRuntimeException, LimitExceededException,\r
+ JobSubmissionException {\r
+ // TODO Auto-generated method stub\r
+ return null;\r
+ }\r
+\r
+ @Override\r
+ public String customAnalize(List<FastaSequence> sequences,\r
+ List<Option<AACon>> options) throws UnsupportedRuntimeException,\r
+ LimitExceededException, JobSubmissionException,\r
+ WrongParameterException {\r
+ // TODO Auto-generated method stub\r
+ return null;\r
+ }\r
+\r
+ @Override\r
+ public String presetAnalize(List<FastaSequence> sequences,\r
+ Preset<AACon> preset) throws UnsupportedRuntimeException,\r
+ LimitExceededException, JobSubmissionException,\r
+ WrongParameterException {\r
+ // TODO Auto-generated method stub\r
+ return null;\r
+ }\r
+\r
+}\r
\r
public final class WSUtil {\r
\r
- public static final void validateJobId(String jobId)\r
- throws InvalidParameterException {\r
- if (!compbio.engine.client.Util.isValidJobId(jobId)) {\r
- throw new InvalidParameterException(\r
- "JobId is not provided or cannot be recognised! Given value: "\r
- + jobId);\r
+ public static final void validateJobId(String jobId)\r
+ throws InvalidParameterException {\r
+ if (!compbio.engine.client.Util.isValidJobId(jobId)) {\r
+ throw new InvalidParameterException(\r
+ "JobId is not provided or cannot be recognised! Given value: "\r
+ + jobId);\r
+ }\r
}\r
- }\r
\r
- public static final void validateFastaInput(List<FastaSequence> sequences)\r
- throws InvalidParameterException {\r
- if (sequences == null || sequences.isEmpty()) {\r
- throw new InvalidParameterException(\r
- "List of fasta sequences required but not provided! ");\r
+ public static final void validateFastaInput(List<FastaSequence> sequences)\r
+ throws InvalidParameterException {\r
+ if (sequences == null || sequences.isEmpty()) {\r
+ throw new InvalidParameterException(\r
+ "List of fasta sequences required but not provided! ");\r
+ }\r
}\r
- }\r
\r
- public static JobStatus getJobStatus(String jobId) {\r
- AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
- return asyncEngine.getJobStatus(jobId);\r
- }\r
-\r
- public static ChunkHolder pullFile(String file, long position) {\r
- return ProgressGetter.pull(file, position);\r
- }\r
+ public static JobStatus getJobStatus(String jobId) {\r
+ AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+ return asyncEngine.getJobStatus(jobId);\r
+ }\r
\r
- public static byte getProgress(String jobId) {\r
- throw new UnsupportedOperationException();\r
- }\r
+ public static ChunkHolder pullFile(String file, long position) {\r
+ return ProgressGetter.pull(file, position);\r
+ }\r
\r
- public static AsyncExecutor getEngine(ConfiguredExecutable<?> confClustal) {\r
- assert confClustal != null;\r
- return Configurator.getAsyncEngine(confClustal);\r
- }\r
+ public static byte getProgress(String jobId) {\r
+ throw new UnsupportedOperationException();\r
+ }\r
\r
- public static boolean cancelJob(String jobId) {\r
- AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
- return asyncEngine.cancelJob(jobId);\r
- }\r
+ public static AsyncExecutor getEngine(ConfiguredExecutable<?> confClustal) {\r
+ assert confClustal != null;\r
+ return Configurator.getAsyncEngine(confClustal);\r
+ }\r
\r
- public static <T> String align(List<FastaSequence> sequences,\r
- ConfiguredExecutable<T> confExec, WSLogger logger,\r
- String callingMethod, Limit<T> limit)\r
- throws LimitExceededException, JobSubmissionException {\r
- Timer timer = Timer.getMilliSecondsTimer();\r
- if (limit.isExceeded(sequences)) {\r
- throw LimitExceededException.newLimitExceeded(limit, sequences);\r
+ public static boolean cancelJob(String jobId) {\r
+ AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+ return asyncEngine.cancelJob(jobId);\r
}\r
- compbio.runner.Util.writeInput(sequences, confExec);\r
- AsyncExecutor engine = Configurator.getAsyncEngine(confExec);\r
- String jobId = engine.submitJob(confExec);\r
- if (logger != null) {\r
- logger.log(timer, callingMethod, jobId);\r
+\r
+ public static <T> String align(List<FastaSequence> sequences,\r
+ ConfiguredExecutable<T> confExec, WSLogger logger,\r
+ String callingMethod, Limit<T> limit)\r
+ throws LimitExceededException, JobSubmissionException {\r
+ Timer timer = Timer.getMilliSecondsTimer();\r
+ if (limit != null && limit.isExceeded(sequences)) {\r
+ throw LimitExceededException.newLimitExceeded(limit, sequences);\r
+ }\r
+ compbio.runner.Util.writeInput(sequences, confExec);\r
+ AsyncExecutor engine = Configurator.getAsyncEngine(confExec);\r
+ String jobId = engine.submitJob(confExec);\r
+ if (logger != null) {\r
+ logger.log(timer, callingMethod, jobId);\r
+ }\r
+ return jobId;\r
}\r
- return jobId;\r
- }\r
\r
- /*\r
- * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation\r
- */\r
- public static final <T> List<String> getCommands(List<Option<T>> options,\r
- String keyValueSeparator) {\r
- List<String> oList = new ArrayList<String>();\r
- for (Option<T> o : options) {\r
- oList.add(o.toCommand(keyValueSeparator));\r
+ /*\r
+ * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation\r
+ */\r
+ public static final <T> List<String> getCommands(List<Option<T>> options,\r
+ String keyValueSeparator) {\r
+ List<String> oList = new ArrayList<String>();\r
+ for (Option<T> o : options) {\r
+ oList.add(o.toCommand(keyValueSeparator));\r
+ }\r
+ return oList;\r
}\r
- return oList;\r
- }\r
\r
}\r