Adding AAConWS
authorpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Mon, 22 Nov 2010 15:09:13 +0000 (15:09 +0000)
committerpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Mon, 22 Nov 2010 15:09:13 +0000 (15:09 +0000)
git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@3343 e3abac25-378b-4346-85de-24260fe3988d

29 files changed:
.classpath
TODO.txt
WEB-INF/lib/compbio-util-1.3.jar [moved from WEB-INF/lib/compbio-util-1.2.jar with 80% similarity]
binaries/aaconservation.jar [new file with mode: 0644]
binaries/manual.txt [new file with mode: 0644]
build.xml
conf/Executable.properties
conf/settings/AAConLimits.xml [new file with mode: 0644]
conf/settings/AAConParameters.xml [new file with mode: 0644]
conf/settings/AAConPresets.xml [new file with mode: 0644]
datamodel/compbio/data/sequence/ClustalAlignmentUtil.java
datamodel/compbio/data/sequence/DisemblResultAnnot.java [new file with mode: 0644]
datamodel/compbio/data/sequence/FastaSequence.java
datamodel/compbio/data/sequence/JalviewAnnotation.java [new file with mode: 0644]
datamodel/compbio/data/sequence/MultiAnnotatedSequence.java
datamodel/compbio/data/sequence/SequenceUtil.java
runner/compbio/runner/conservation/AACon.java [new file with mode: 0644]
runner/compbio/runner/disorder/Disembl.java
testsrc/compbio/data/sequence/SequenceUtilTester.java
testsrc/compbio/metadata/AllTestSuit.java
testsrc/compbio/runner/conservation/AAConTester.java [new file with mode: 0644]
testsrc/compbio/runner/disorder/DisemblTester.java [new file with mode: 0644]
testsrc/testdata/TO1381.fasta.aln [new file with mode: 0644]
webservices/compbio/data/msa/Annotation.java [new file with mode: 0644]
webservices/compbio/data/msa/JManagement.java [new file with mode: 0644]
webservices/compbio/data/msa/Metadata.java [new file with mode: 0644]
webservices/compbio/data/msa/MsaWS.java
webservices/compbio/ws/server/AAConWS.java [new file with mode: 0644]
webservices/compbio/ws/server/WSUtil.java

index 9107de9..cff7667 100644 (file)
@@ -11,6 +11,7 @@
        <classpathentry kind="lib" path="testsrc/lib/testng-5.10-jdk15.jar"/>\r
        <classpathentry kind="lib" path="lib/servlet-api.jar"/>\r
        <classpathentry kind="lib" path="WEB-INF/lib/drmaa.jar"/>\r
-       <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.2.jar"/>\r
+       <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.3.jar"/>\r
+       <classpathentry kind="lib" path="binaries/aaconservation.jar"/>\r
        <classpathentry kind="output" path="WEB-INF/classes"/>\r
 </classpath>\r
index 79f6ca1..6e70077 100644 (file)
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,5 +1,6 @@
 TODO: \r
 \r
+Add iupred ws http://iupred.enzim.hu/\r
 Add globprot ws \r
 Add ronn ws\r
 \r
similarity index 80%
rename from WEB-INF/lib/compbio-util-1.2.jar
rename to WEB-INF/lib/compbio-util-1.3.jar
index a8e20be..9042b5a 100644 (file)
Binary files a/WEB-INF/lib/compbio-util-1.2.jar and b/WEB-INF/lib/compbio-util-1.3.jar differ
diff --git a/binaries/aaconservation.jar b/binaries/aaconservation.jar
new file mode 100644 (file)
index 0000000..375b8e9
Binary files /dev/null and b/binaries/aaconservation.jar differ
diff --git a/binaries/manual.txt b/binaries/manual.txt
new file mode 100644 (file)
index 0000000..ff7d66f
--- /dev/null
@@ -0,0 +1,90 @@
+
+AA Conservation version 1.0b (2 September 2010)
+
+This program allows calculation of conservation of amino acids in
+multiple sequence alignments.
+It implements 17 different conservation scores as described by Valdar in
+his paper (Scoring Residue Conservation, PROTEINS: Structure, Function
+and  Bioinformatics 48:227-241 (2002)) and SMERFS scoring algorithm as described
+by Manning, Jefferson and Barton (The contrasting properties of conservation
+and correlated phylogeny in protein functional residue prediction,
+BMC Bioinformatics (2008)).
+
+The conservation algorithms supported are:
+
+KABAT, JORES, SCHNEIDER, SHENKIN, GERSTEIN, TAYLOR_GAPS, TAYLOR_NO_GAPS, 
+ZVELIBIL, KARLIN, ARMON, THOMPSON, NOT_LANCET, MIRNY, WILLIAMSON, 
+LANDGRAF, SANDER, VALDAR, SMERFS
+
+Input format is either a FASTA formatted file containing aligned sequences with 
+gaps or a Clustal alignment. The valid gap characters are *, -, space character,
+X and . (a dot). By default program prints the results to the command window. 
+If the output file is provided the results are printed to the file in two 
+possible formats with or without an alignment.
+If format is not specified, the program outputs conservation scores without 
+alignment. The scores are not normalized by default but they can be (see below).
+SMERFS default parameters are window width of 7, column score is set to
+the middle column, gap% cutoff of 0.1. If different values for SMERFS parameters 
+are required than all three parameters must be provided. Details of the program 
+execution can be recorded to a separate file if an appropriate file path is 
+provided.
+
+List of command line arguments:
+
+-m=  precedes a comma separated list of method names
+     EXAMPLE: -m=KABAT,JORES,GERSTEIN
+     Optional, if no method is specified request for all is assumed. 
+
+-i=  precedes a full path to the input FASTA file, required
+
+-o=  precedes a full path to the output file, optional, if no output file is 
+     provided the program will output to the standard out.  
+
+-t=  precedes the number of CPUs (CPU cores more precisely) to use. Optional, 
+        defaults to all processors available on the machine.  
+      
+-f=  precedes the format  of the results in the output file
+     two different formats are possible:
+      RESULT_WITH_ALIGNMENT
+      RESULT_NO_ALIGNMENT
+     Optional, if not specified RESULT_NO_ALIGNMENT is assumed 
+
+-s=  precedes a list of three comma separated parameters for SMERFS
+     the order of parameters is as following:
+      1. window width - an integer and an odd number
+      2. how to allocate window scores to columns, two ways are possible:
+            MID_SCORE - gives the window score to the middle column
+            MAX_SCORE - gives the column the highest score of all the windows it 
+            belongs to
+      3. gap percentage cutoff - a float greater than 0 and smaller or equal 1
+     EXAMPLE: -s=5,MID_SCORE,0.1
+     Optional, default values are 7,MID_SCORE,0.1 
+      
+-d=  precedes a full path to a file where program execution details are to be 
+     listed. Optional, if not provided, no execution statistics is produced.  
+      
+-g=  precedes comma separated list of gap characters provided by the user, if 
+     you're using an unusual gap character (not a -,., ,*,X) you have to 
+     provide it. If you you provide this list you have to list all the gaps 
+     accepted. Including those that were previously treated as a default. 
+     Optional.      
+      
+-n   using this key causes the results to be normalized. 
+        Normalized results have values between 0 and 1. Please note however, that 
+        some results cannot be normalized. In such a case, the system returns not 
+        normalized value, and log the issue to the standard error stream. 
+        The following formula is used for normalization 
+                       n = (d - dmin)/(dmax - dmin)
+        Negative results first converted to positive by adding an absolute value of
+        the most negative result. Optional. 
+
+EXAMPLE HOW TO RUN THE PROGRAM:
+java -jar <jar name> -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n
+
+As a result of the execution KABAT and SMERFS scores will be calculated. 
+Input comes form prot1 file and an output without an alignment is recorded to 
+prot1_results file. 
+
+Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton.
+Please visit http://www.compbio.dundee.ac.uk for further information.
\ No newline at end of file
index 724776f..05137d6 100644 (file)
--- a/build.xml
+++ b/build.xml
                        <delete file="${basedir}/${datamodel}"></delete>\r
                        <jar jarfile="${basedir}/${datamodel}">\r
                                <zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
-                                       <include name="compbio-util-1.2.jar"/>\r
+                                       <include name="compbio-util-1.3.jar"/>\r
                                </zipgroupfileset>\r
                                <fileset dir="${basedir}/WEB-INF/classes/"> \r
                                        <include name="compbio/data/sequence/*.class"/>\r
                        <jar jarfile="${full-jaba-client}">\r
                                <zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
                                        <include name="log4j-1.2.15.jar"/>\r
-                                       <include name="compbio-util-1.2.jar"/>\r
+                                       <include name="compbio-util-1.3.jar"/>\r
                                        <include name="compbio-annotations-1.0.jar"/>\r
                                        <include name="drmaa.jar"/>\r
                                </zipgroupfileset>\r
                        </jar>\r
                </target>\r
        \r
-       <target name="full-jaba-client" description="Pack binaries with sources and configuration files" depends="full-jaba-client-jar, pack-binaries">\r
+       <target name="full-jaba-client" description="Pack binaries with sources and configuration files" \r
+                       depends="full-jaba-client-jar, pack-binaries">\r
                <echo>Packing binaries, and configuration files</echo>\r
                <zip destfile="${full-jaba-client-pack}" >\r
                        <zipgroupfileset file="${binaries}" />\r
                        <jar jarfile="${full-jaba-client}">\r
                                <zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >\r
                                        <include name="log4j-1.2.15.jar"/>\r
-                                       <include name="compbio-util-1.2.jar"/>\r
+                                       <include name="compbio-util-1.3.jar"/>\r
                                        <include name="compbio-annotations-1.0.jar"/>\r
                                        <include name="drmaa.jar"/>\r
                                </zipgroupfileset>\r
index 0a9b5cd..247c9f5 100644 (file)
@@ -43,6 +43,8 @@ cluster.tcoffee.bin=/homes/pvtroshin/workspace/jaba2/binaries/src/tcoffee/t_coff
 #/sw/bin/t_coffee\r
 # Sub matrix support does not work \r
 #tcoffee.-matrix.path=binaries/matrices\r
+# This variable is required by tcoffee\r
+tcoffee.bin.env=HOME_4_TCOFFEE#jobsout;\r
 tcoffee.presets.file=conf/settings/TcoffeePresets.xml\r
 tcoffee.parameters.file=conf/settings/TcoffeeParameters.xml\r
 tcoffee.limits.file=conf/settings/TcoffeeLimits.xml\r
@@ -75,5 +77,14 @@ jronn.cluster.settings=-q 64bit-pri.q -pe smp 4 -l h_vmem=1700M -l ram=1700M -l
 local.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py\r
 cluster.disembl.bin=/homes/pvtroshin/soft/DisEMBL-1.4raw/DisEMBL.py\r
 #disembl.parameters.file=conf/settings/JronnParameters.xml\r
-disembl.limits.file=conf/settings/JronnLimits.xml\r
+disembl.limits.file=conf/settings/DisemblLimits.xml\r
 disembl.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M\r
+\r
+local.aacon.bin.windows=D:\\Java\\jdk1.6.0_14\\bin\\java.exe \r
+local.aacon.bin=/sw/java/latest/bin/java\r
+cluster.aacon.bin=/sw/java/latest/bin/java\r
+aacon.jar.file=binaries/aaconservation.jar\r
+aacon.parameters.file=conf/settings/AAConParameters.xml\r
+aacon.limits.file=conf/settings/AAConLimits.xml\r
+#TODO jronn.jvm.options=-Xms32M -Xmx512M\r
+aacon.cluster.cpunum=4\r
diff --git a/conf/settings/AAConLimits.xml b/conf/settings/AAConLimits.xml
new file mode 100644 (file)
index 0000000..a3e4935
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<limits>\r
+ <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+       <limit isDefault="true">\r
+               <seqNumber>100000</seqNumber>\r
+               <seqLength>100000</seqLength>\r
+       </limit>\r
+       <limit isDefault="false">\r
+               <preset># LocalEngineExecutionLimit #</preset>\r
+               <seqNumber>1000</seqNumber>\r
+               <seqLength>1000</seqLength>\r
+       </limit>\r
+</limits>\r
diff --git a/conf/settings/AAConParameters.xml b/conf/settings/AAConParameters.xml
new file mode 100644 (file)
index 0000000..9f5da9f
--- /dev/null
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<runnerConfig>\r
+       <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+       <options>\r
+               <name>Normalize</name>\r
+               <description>Normalize the results. The results of the calculation by different methods will all be scaled to the range between 0 and 1, so that they are comparable</description>\r
+               <optionNames>-n</optionNames>\r
+               <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+       </options>\r
+       <prmSeparator>=</prmSeparator>\r
+       <parameters>\r
+               <name>Calculation method</name>\r
+               <description>The method of the calculation to use</description>\r
+               <optionNames>-m</optionNames>\r
+               <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+               <defaultValue>SHENKIN</defaultValue>\r
+        <possibleValues>KABAT</possibleValues>\r
+        <possibleValues>JORES</possibleValues>\r
+               <possibleValues>SCHNEIDER</possibleValues>\r
+               <possibleValues>SHENKIN</possibleValues>\r
+               <possibleValues>GERSTEIN</possibleValues>\r
+               <possibleValues>TAYLOR_GAPS</possibleValues>\r
+               <possibleValues>TAYLOR_NO_GAPS</possibleValues> \r
+        <possibleValues>ZVELIBIL</possibleValues>\r
+        <possibleValues>KARLIN</possibleValues>\r
+               <possibleValues>ARMON</possibleValues>\r
+               <possibleValues>THOMPSON</possibleValues>\r
+               <possibleValues>NOT_LANCET</possibleValues>\r
+               <possibleValues>MIRNY</possibleValues>\r
+               <possibleValues>WILLIAMSON</possibleValues> \r
+        <possibleValues>LANDGRAF</possibleValues>\r
+        <possibleValues>SANDER</possibleValues>\r
+        <possibleValues>VALDAR</possibleValues>\r
+        <possibleValues>SMERFS</possibleValues>\r
+       </parameters>\r
+</runnerConfig>\r
diff --git a/conf/settings/AAConPresets.xml b/conf/settings/AAConPresets.xml
new file mode 100644 (file)
index 0000000..cdf8d37
--- /dev/null
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
+<presets>\r
+ <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
+       <preset>\r
+               <name>Quick conservation</name>\r
+               <description>Collection of fast conservation methods</description>\r
+               <optlist>\r
+                       <option>-m=KABAT,JORES,SCHNEIDER,SHENKIN,GERSTEIN,TAYLOR_GAPS,TAYLOR_NO_GAPS,ZVELIBIL,ARMON,THOMPSON,NOT_LANCET,MIRNY,WILLIAMSON</option>\r
+               </optlist>\r
+       </preset>\r
+       <preset>\r
+               <name>Slow conservation</name>\r
+               <description>Collection of most expensive (slow) conservation methods</description>\r
+               <optlist>\r
+                       <option>-m=LANDGRAF,KARLIN,SANDER,VALDAR,SMERFS</option>\r
+               </optlist>\r
+       </preset>\r
+       <preset>\r
+               <name>Complete conservation</name>\r
+               <description>Calculate conservation with all supported methods</description>\r
+               <optlist>\r
+                       <option>-m=KABAT,JORES,SCHNEIDER,SHENKIN,GERSTEIN,TAYLOR_GAPS,TAYLOR_NO_GAPS,ZVELIBIL,ARMON,THOMPSON,NOT_LANCET,MIRNY,WILLIAMSON,LANDGRAF,KARLIN,SANDER,VALDAR,SMERFS</option>\r
+               </optlist>\r
+       </preset>\r
+</presets>\r
+\r
+\r
+\r
\r
+\r
index 5fce997..b6076a4 100644 (file)
@@ -1,19 +1,15 @@
-/* Copyright (c) 2009 Peter Troshin\r
- *  \r
- *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
- * \r
- *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
- *  Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- *  License for more details.\r
- * \r
- *  A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
  */\r
 \r
 package compbio.data.sequence;\r
@@ -45,246 +41,247 @@ import java.util.logging.Logger;
  */\r
 public final class ClustalAlignmentUtil {\r
 \r
-    private static final Logger log = Logger\r
-           .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
-\r
-    /**\r
-     * Dash char to be used as gap char in the alignments\r
-     */\r
-    public static final char gapchar = '-';\r
-\r
-    /*\r
-     * Number of spaces separating the name and the sequence\r
-     */\r
-    private static final String spacer = "      "; // 6 space characters\r
-    /*\r
-     * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
-     * longer than that it gets trimmed in the end\r
-     */\r
-    private static final int maxNameLength = 30; // Maximum name length\r
-    /*\r
-     * If all sequences names in the alignment is shorter than\r
-     * minNameHolderLength than spaces are added to complete the name up to\r
-     * minNameHolderLength\r
-     */\r
-    private static final int minNameHolderLength = 10; // Minimum number of\r
-\r
-    // TODO check whether clustal still loads data if length is 60!\r
-    private static final int oneLineAlignmentLength = 60; // this could in fact\r
-\r
-    // be 50\r
-\r
-    // for long names ~30 chars\r
-\r
-    /**\r
-     * Read Clustal formatted alignment. Limitations: Does not read consensus\r
-     * \r
-     * Sequence names as well as the sequences are not guaranteed to be unique!\r
-     * \r
-     * @throws {@link IOException}\r
-     * @throws {@link UnknownFileFormatException}\r
-     */\r
-    public static Alignment readClustalFile(InputStream instream)\r
-           throws IOException, UnknownFileFormatException {\r
-\r
-       boolean flag = false;\r
-\r
-       List<String> headers = new ArrayList<String>();\r
-       Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
-       FastaSequence[] seqs = null;\r
-\r
-       String line;\r
-\r
-       BufferedReader breader = new BufferedReader(new InputStreamReader(\r
-               instream));\r
-       while ((line = breader.readLine()) != null) {\r
-           if (line.indexOf(" ") != 0) {\r
-               java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
-               String id = "";\r
-\r
-               if (str.hasMoreTokens()) {\r
-                   id = str.nextToken();\r
-                   // PROBCONS output clustal formatted file with not mention\r
-                   // of CLUSTAL (:-))\r
-                   if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
-                       flag = true;\r
-                   } else {\r
-                       if (flag) {\r
-                           StringBuffer tempseq;\r
-                           if (seqhash.containsKey(id)) {\r
-                               tempseq = seqhash.get(id);\r
-                           } else {\r
-                               tempseq = new StringBuffer();\r
-                               seqhash.put(id, tempseq);\r
-                           }\r
-\r
-                           if (!(headers.contains(id))) {\r
-                               headers.add(id);\r
-                           }\r
-\r
-                           tempseq.append(str.nextToken());\r
+       private static final Logger log = Logger\r
+                       .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
+\r
+       /**\r
+        * Dash char to be used as gap char in the alignments\r
+        */\r
+       public static final char gapchar = '-';\r
+\r
+       /*\r
+        * Number of spaces separating the name and the sequence\r
+        */\r
+       private static final String spacer = "      "; // 6 space characters\r
+       /*\r
+        * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
+        * longer than that it gets trimmed in the end\r
+        */\r
+       private static final int maxNameLength = 30; // Maximum name length\r
+       /*\r
+        * If all sequences names in the alignment is shorter than\r
+        * minNameHolderLength than spaces are added to complete the name up to\r
+        * minNameHolderLength\r
+        */\r
+       private static final int minNameHolderLength = 10; // Minimum number of\r
+\r
+       // TODO check whether clustal still loads data if length is 60!\r
+       private static final int oneLineAlignmentLength = 60; // this could in fact\r
+\r
+       // be 50\r
+\r
+       // for long names ~30 chars\r
+\r
+       /**\r
+        * Read Clustal formatted alignment. Limitations: Does not read consensus\r
+        * \r
+        * Sequence names as well as the sequences are not guaranteed to be unique!\r
+        * \r
+        * @throws {@link IOException}\r
+        * @throws {@link UnknownFileFormatException}\r
+        */\r
+       public static Alignment readClustalFile(InputStream instream)\r
+                       throws IOException, UnknownFileFormatException {\r
+\r
+               boolean flag = false;\r
+\r
+               List<String> headers = new ArrayList<String>();\r
+               Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
+               FastaSequence[] seqs = null;\r
+\r
+               String line;\r
+\r
+               BufferedReader breader = new BufferedReader(new InputStreamReader(\r
+                               instream));\r
+               while ((line = breader.readLine()) != null) {\r
+                       if (line.indexOf(" ") != 0) {\r
+                               java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
+                               String id = "";\r
+\r
+                               if (str.hasMoreTokens()) {\r
+                                       id = str.nextToken();\r
+                                       // PROBCONS output clustal formatted file with not mention\r
+                                       // of CLUSTAL (:-))\r
+                                       if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
+                                               flag = true;\r
+                                       } else {\r
+                                               if (flag) {\r
+                                                       StringBuffer tempseq;\r
+                                                       if (seqhash.containsKey(id)) {\r
+                                                               tempseq = seqhash.get(id);\r
+                                                       } else {\r
+                                                               tempseq = new StringBuffer();\r
+                                                               seqhash.put(id, tempseq);\r
+                                                       }\r
+\r
+                                                       if (!(headers.contains(id))) {\r
+                                                               headers.add(id);\r
+                                                       }\r
+\r
+                                                       tempseq.append(str.nextToken());\r
+                                               }\r
+                                       }\r
+                               }\r
                        }\r
-                   }\r
                }\r
-           }\r
-       }\r
-       breader.close();\r
+               breader.close();\r
 \r
-       // TODO improve this bit\r
-       if (flag) {\r
+               // TODO improve this bit\r
+               if (flag) {\r
 \r
-           // Add sequences to the hash\r
-           seqs = new FastaSequence[headers.size()];\r
-           for (int i = 0; i < headers.size(); i++) {\r
-               if (seqhash.get(headers.get(i)) != null) {\r
+                       // Add sequences to the hash\r
+                       seqs = new FastaSequence[headers.size()];\r
+                       for (int i = 0; i < headers.size(); i++) {\r
+                               if (seqhash.get(headers.get(i)) != null) {\r
 \r
-                   FastaSequence newSeq = new FastaSequence(headers.get(i),\r
-                           seqhash.get(headers.get(i)).toString());\r
+                                       FastaSequence newSeq = new FastaSequence(headers.get(i),\r
+                                                       seqhash.get(headers.get(i)).toString());\r
 \r
-                   seqs[i] = newSeq;\r
+                                       seqs[i] = newSeq;\r
 \r
-               } else {\r
-                   // should not happened\r
-                   throw new AssertionError(\r
-                           "Bizarreness! Can't find sequence for "\r
-                                   + headers.get(i));\r
+                               } else {\r
+                                       // should not happened\r
+                                       throw new AssertionError(\r
+                                                       "Bizarreness! Can't find sequence for "\r
+                                                                       + headers.get(i));\r
+                               }\r
+                       }\r
                }\r
-           }\r
-       }\r
-       if (seqs == null || seqs.length == 0) {\r
-           throw new UnknownFileFormatException(\r
-                   "Input does not appear to be a clustal file! ");\r
-       }\r
-       return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
-               Program.CLUSTAL, gapchar));\r
-    }\r
-\r
-    /**\r
-     * \r
-     * @param input\r
-     * @return true if the file is recognised as Clustal formatted alignment,\r
-     *         false otherwise\r
-     */\r
-    public static boolean isValidClustalFile(InputStream input) {\r
-       if (input == null) {\r
-           throw new NullPointerException("Input is expected!");\r
-       }\r
-       BufferedReader breader = new BufferedReader(\r
-               new InputStreamReader(input));\r
-       try {\r
-           if (input.available() < 10) {\r
-               return false;\r
-           }\r
-           // read first 10 lines to find "Clustal"\r
-           for (int i = 0; i < 10; i++) {\r
-               String line = breader.readLine();\r
-               if (line != null) {\r
-                   line = line.toUpperCase().trim();\r
-                   if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
-                       return true;\r
-                   }\r
+               if (seqs == null || seqs.length == 0) {\r
+                       throw new UnknownFileFormatException(\r
+                                       "Input does not appear to be a clustal file! ");\r
                }\r
-           }\r
-\r
-           breader.close();\r
-       } catch (IOException e) {\r
-           log.severe("Could not read from the stream! "\r
-                   + e.getLocalizedMessage() + e.getCause());\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, breader);\r
-       }\r
-       return false;\r
-    }\r
-\r
-    /**\r
-     * Write Clustal formatted alignment Limitations: does not record the\r
-     * consensus. Potential bug - records 60 chars length alignment where\r
-     * Clustal would have recorded 50 chars.\r
-     * \r
-     * @param outStream\r
-     * \r
-     * @param alignment\r
-     * @throws IOException\r
-     */\r
-    public static void writeClustalAlignment(final OutputStream outStream,\r
-           final Alignment alignment) throws IOException {\r
-       List<FastaSequence> seqs = alignment.getSequences();\r
-\r
-       PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream));\r
-\r
-       out.write("CLUSTAL\n\n\n");\r
-\r
-       int max = 0;\r
-       int maxidLength = 0;\r
-\r
-       int i = 0;\r
-       // Find the longest sequence name\r
-       for (FastaSequence fs : seqs) {\r
-           String tmp = fs.getId();\r
-\r
-           if (fs.getSequence().length() > max) {\r
-               max = fs.getSequence().length();\r
-           }\r
-           if (tmp.length() > maxidLength) {\r
-               maxidLength = tmp.length();\r
-           }\r
-           i++;\r
-       }\r
-       if (maxidLength < minNameHolderLength) {\r
-           maxidLength = minNameHolderLength;\r
-       }\r
-       if (maxidLength > maxNameLength) {\r
-           maxidLength = 30; // the rest will be trimmed\r
+               return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
+                               Program.CLUSTAL, gapchar));\r
        }\r
 \r
-       int oneLineAlignmentLength = 60;\r
-       int nochunks = max / oneLineAlignmentLength + 1;\r
-\r
-       for (i = 0; i < nochunks; i++) {\r
-           int j = 0;\r
-           for (FastaSequence fs : seqs) {\r
-\r
-               String name = fs.getId();\r
-               // display at most 30 characters in the name, keep the names\r
-               // 6 spaces away from the alignment for longest sequence names,\r
-               // and more than this for shorter names\r
-               out.format("%-" + maxidLength + "s" + spacer,\r
-                       (name.length() > maxNameLength ? name.substring(0,\r
-                               maxidLength) : name));\r
-               int start = i * oneLineAlignmentLength;\r
-               int end = start + oneLineAlignmentLength;\r
-\r
-               if (end < fs.getSequence().length()\r
-                       && start < fs.getSequence().length()) {\r
-                   out.write(fs.getSequence().substring(start, end) + "\n");\r
-               } else {\r
-                   if (start < fs.getSequence().length()) {\r
-                       out.write(fs.getSequence().substring(start) + "\n");\r
-                   }\r
+       /**\r
+        * Please note this method closes the input stream provided as a parameter\r
+        * \r
+        * @param input\r
+        * @return true if the file is recognised as Clustal formatted alignment,\r
+        *         false otherwise\r
+        */\r
+       public static boolean isValidClustalFile(InputStream input) {\r
+               if (input == null) {\r
+                       throw new NullPointerException("Input is expected!");\r
                }\r
-               j++;\r
-           }\r
-           out.write("\n");\r
-       }\r
-       try {\r
-           out.close();\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, out);\r
+               BufferedReader breader = new BufferedReader(\r
+                               new InputStreamReader(input));\r
+               try {\r
+                       if (input.available() < 10) {\r
+                               return false;\r
+                       }\r
+                       // read first 10 lines to find "Clustal"\r
+                       for (int i = 0; i < 10; i++) {\r
+                               String line = breader.readLine();\r
+                               if (line != null) {\r
+                                       line = line.toUpperCase().trim();\r
+                                       if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
+                                               return true;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+                       breader.close();\r
+               } catch (IOException e) {\r
+                       log.severe("Could not read from the stream! "\r
+                                       + e.getLocalizedMessage() + e.getCause());\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, breader);\r
+               }\r
+               return false;\r
        }\r
-    }\r
 \r
-    public static Alignment readClustalFile(File file)\r
-           throws UnknownFileFormatException, IOException {\r
-       if (file == null) {\r
-           throw new NullPointerException("File is expected!");\r
+       /**\r
+        * Write Clustal formatted alignment Limitations: does not record the\r
+        * consensus. Potential bug - records 60 chars length alignment where\r
+        * Clustal would have recorded 50 chars.\r
+        * \r
+        * @param outStream\r
+        * \r
+        * @param alignment\r
+        * @throws IOException\r
+        */\r
+       public static void writeClustalAlignment(final OutputStream outStream,\r
+                       final Alignment alignment) throws IOException {\r
+               List<FastaSequence> seqs = alignment.getSequences();\r
+\r
+               PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream));\r
+\r
+               out.write("CLUSTAL\n\n\n");\r
+\r
+               int max = 0;\r
+               int maxidLength = 0;\r
+\r
+               int i = 0;\r
+               // Find the longest sequence name\r
+               for (FastaSequence fs : seqs) {\r
+                       String tmp = fs.getId();\r
+\r
+                       if (fs.getSequence().length() > max) {\r
+                               max = fs.getSequence().length();\r
+                       }\r
+                       if (tmp.length() > maxidLength) {\r
+                               maxidLength = tmp.length();\r
+                       }\r
+                       i++;\r
+               }\r
+               if (maxidLength < minNameHolderLength) {\r
+                       maxidLength = minNameHolderLength;\r
+               }\r
+               if (maxidLength > maxNameLength) {\r
+                       maxidLength = 30; // the rest will be trimmed\r
+               }\r
+\r
+               int oneLineAlignmentLength = 60;\r
+               int nochunks = max / oneLineAlignmentLength + 1;\r
+\r
+               for (i = 0; i < nochunks; i++) {\r
+                       int j = 0;\r
+                       for (FastaSequence fs : seqs) {\r
+\r
+                               String name = fs.getId();\r
+                               // display at most 30 characters in the name, keep the names\r
+                               // 6 spaces away from the alignment for longest sequence names,\r
+                               // and more than this for shorter names\r
+                               out.format("%-" + maxidLength + "s" + spacer,\r
+                                               (name.length() > maxNameLength ? name.substring(0,\r
+                                                               maxidLength) : name));\r
+                               int start = i * oneLineAlignmentLength;\r
+                               int end = start + oneLineAlignmentLength;\r
+\r
+                               if (end < fs.getSequence().length()\r
+                                               && start < fs.getSequence().length()) {\r
+                                       out.write(fs.getSequence().substring(start, end) + "\n");\r
+                               } else {\r
+                                       if (start < fs.getSequence().length()) {\r
+                                               out.write(fs.getSequence().substring(start) + "\n");\r
+                                       }\r
+                               }\r
+                               j++;\r
+                       }\r
+                       out.write("\n");\r
+               }\r
+               try {\r
+                       out.close();\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, out);\r
+               }\r
        }\r
-       FileInputStream fio = new FileInputStream(file);\r
-       Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
-       try {\r
-           fio.close();\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, fio);\r
+\r
+       public static Alignment readClustalFile(File file)\r
+                       throws UnknownFileFormatException, IOException {\r
+               if (file == null) {\r
+                       throw new NullPointerException("File is expected!");\r
+               }\r
+               FileInputStream fio = new FileInputStream(file);\r
+               Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
+               try {\r
+                       fio.close();\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, fio);\r
+               }\r
+               return seqAl;\r
        }\r
-       return seqAl;\r
-    }\r
 }\r
diff --git a/datamodel/compbio/data/sequence/DisemblResultAnnot.java b/datamodel/compbio/data/sequence/DisemblResultAnnot.java
new file mode 100644 (file)
index 0000000..c5f026c
--- /dev/null
@@ -0,0 +1,5 @@
+package compbio.data.sequence;\r
+\r
+public enum DisemblResultAnnot {\r
+       COILS, REM465, HOTLOOPS\r
+}\r
index 6072d29..2032fec 100644 (file)
@@ -1,19 +1,15 @@
-/* Copyright (c) 2009 Peter Troshin\r
- *  \r
- *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0\r
- * \r
- *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
- *  Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- *  License for more details.\r
- * \r
- *  A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
  */\r
 \r
 package compbio.data.sequence;\r
@@ -42,151 +38,155 @@ import compbio.util.annotation.Immutable;
 @Immutable\r
 public class FastaSequence {\r
 \r
-    /**\r
-     * Sequence id\r
-     */\r
-    private String id;\r
-\r
-    // TODO what about gapped sequence here! should be indicated\r
-    /**\r
-     * Returns the string representation of sequence\r
-     */\r
-    private String sequence;\r
-\r
-    private FastaSequence() {\r
-       // Default constructor for JaxB\r
-    }\r
-\r
-    /**\r
-     * Upon construction the any whitespace characters are removed from the\r
-     * sequence\r
-     * \r
-     * @param id\r
-     * @param sequence\r
-     */\r
-    public FastaSequence(String id, String sequence) {\r
-       this.id = id;\r
-       this.sequence = SequenceUtil.cleanSequence(sequence);\r
-    }\r
-\r
-    /**\r
-     * Gets the value of id\r
-     * \r
-     * @return the value of id\r
-     */\r
-    public String getId() {\r
-       return this.id;\r
-    }\r
-\r
-    /**\r
-     * Gets the value of sequence\r
-     * \r
-     * @return the value of sequence\r
-     */\r
-    public String getSequence() {\r
-       return this.sequence;\r
-    }\r
-\r
-    public static int countMatchesInSequence(final String theString,\r
-           final String theRegExp) {\r
-       final Pattern p = Pattern.compile(theRegExp);\r
-       final Matcher m = p.matcher(theString);\r
-       int cnt = 0;\r
-       while (m.find()) {\r
-           cnt++;\r
+       /**\r
+        * Sequence id\r
+        */\r
+       private String id;\r
+\r
+       // TODO what about gapped sequence here! should be indicated\r
+       /**\r
+        * Returns the string representation of sequence\r
+        */\r
+       private String sequence;\r
+\r
+       private FastaSequence() {\r
+               // Default constructor for JaxB\r
        }\r
-       return cnt;\r
-    }\r
-\r
-    public String getFormattedFasta() {\r
-       return getFormatedSequence(80);\r
-    }\r
-\r
-    /**\r
-     * \r
-     * @return one line name, next line sequence, no matter what the sequence\r
-     *         length is\r
-     */\r
-    public String getOnelineFasta() {\r
-       String fasta = ">" + getId() + SysPrefs.newlinechar;\r
-       fasta += getSequence() + SysPrefs.newlinechar;\r
-       return fasta;\r
-    }\r
-\r
-    /**\r
-     * Format sequence per width letter in one string. Without spaces.\r
-     * \r
-     * @return multiple line formated sequence, one line width letters length\r
-     * \r
-     */\r
-    public String getFormatedSequence(final int width) {\r
-       if (sequence == null) {\r
-           return "";\r
+\r
+       /**\r
+        * Upon construction the any whitespace characters are removed from the\r
+        * sequence\r
+        * \r
+        * @param id\r
+        * @param sequence\r
+        */\r
+       public FastaSequence(String id, String sequence) {\r
+               this.id = id;\r
+               this.sequence = SequenceUtil.cleanSequence(sequence);\r
        }\r
 \r
-       assert width >= 0 : "Wrong width parameter ";\r
-\r
-       final StringBuilder sb = new StringBuilder(sequence);\r
-       int nchunks = sequence.length() / width;\r
-       // add up inserted new line chars\r
-       nchunks = (nchunks + sequence.length()) / width;\r
-       int nlineCharcounter = 0;\r
-       for (int i = 1; i <= nchunks; i++) {\r
-           int insPos = width * i + nlineCharcounter;\r
-           // to prevent inserting new line in the very end of a sequence then\r
-           // it would have failed.\r
-           // Also covers the case when the sequences shorter than width\r
-           if (sb.length() <= insPos) {\r
-               break;\r
-           }\r
-           sb.insert(insPos, "\n");\r
-           nlineCharcounter++;\r
+       /**\r
+        * Gets the value of id\r
+        * \r
+        * @return the value of id\r
+        */\r
+       public String getId() {\r
+               return this.id;\r
        }\r
-       return sb.toString();\r
-    }\r
-\r
-    /**\r
-     * \r
-     * @return sequence length\r
-     */\r
-    public int getLength() {\r
-       return this.sequence.length();\r
-    }\r
-\r
-    /**\r
-     * Same as oneLineFasta\r
-     */\r
-    @Override\r
-    public String toString() {\r
-       return this.getOnelineFasta();\r
-    }\r
-\r
-    @Override\r
-    public int hashCode() {\r
-       final int prime = 17;\r
-       int result = 1;\r
-       result = prime * result + ((id == null) ? 0 : id.hashCode());\r
-       result = prime * result\r
-               + ((sequence == null) ? 0 : sequence.hashCode());\r
-       return result;\r
-    }\r
-\r
-    @Override\r
-    public boolean equals(Object obj) {\r
-       if (obj == null) {\r
-           return false;\r
+\r
+       /**\r
+        * Gets the value of sequence\r
+        * \r
+        * @return the value of sequence\r
+        */\r
+       public String getSequence() {\r
+               return this.sequence;\r
        }\r
-       if (!(obj instanceof FastaSequence)) {\r
-           return false;\r
+\r
+       public static int countMatchesInSequence(final String theString,\r
+                       final String theRegExp) {\r
+               final Pattern p = Pattern.compile(theRegExp);\r
+               final Matcher m = p.matcher(theString);\r
+               int cnt = 0;\r
+               while (m.find()) {\r
+                       cnt++;\r
+               }\r
+               return cnt;\r
        }\r
-       FastaSequence fs = (FastaSequence) obj;\r
-       if (!fs.getId().equals(this.getId())) {\r
-           return false;\r
+\r
+       public String getFormattedFasta() {\r
+               return getFormatedSequence(80);\r
        }\r
-       if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {\r
-           return false;\r
+\r
+       /**\r
+        * \r
+        * @return one line name, next line sequence, no matter what the sequence\r
+        *         length is\r
+        */\r
+       public String getOnelineFasta() {\r
+               String fasta = ">" + getId() + SysPrefs.newlinechar;\r
+               fasta += getSequence() + SysPrefs.newlinechar;\r
+               return fasta;\r
+       }\r
+\r
+       /**\r
+        * Format sequence per width letter in one string. Without spaces.\r
+        * \r
+        * @return multiple line formated sequence, one line width letters length\r
+        * \r
+        */\r
+       public String getFormatedSequence(final int width) {\r
+               if (sequence == null) {\r
+                       return "";\r
+               }\r
+\r
+               assert width >= 0 : "Wrong width parameter ";\r
+\r
+               final StringBuilder sb = new StringBuilder(sequence);\r
+               // int tail = nrOfWindows % WIN_SIZE;\r
+               // final int turns = (nrOfWindows - tail) / WIN_SIZE;\r
+\r
+               int tailLen = sequence.length() % width;\r
+               // add up inserted new line chars\r
+               int nchunks = (sequence.length() - tailLen) / width;\r
+               int nlineCharcounter = 0;\r
+               int insPos = 0;\r
+               for (int i = 1; i <= nchunks; i++) {\r
+                       insPos = width * i + nlineCharcounter;\r
+                       // to prevent inserting new line in the very end of a sequence then\r
+                       // it would have failed.\r
+                       if (sb.length() <= insPos) {\r
+                               break;\r
+                       }\r
+                       sb.insert(insPos, "\n");\r
+                       nlineCharcounter++;\r
+               }\r
+               // sb.insert(insPos + tailLen, "\n");\r
+               return sb.toString();\r
+       }\r
+\r
+       /**\r
+        * \r
+        * @return sequence length\r
+        */\r
+       public int getLength() {\r
+               return this.sequence.length();\r
+       }\r
+\r
+       /**\r
+        * Same as oneLineFasta\r
+        */\r
+       @Override\r
+       public String toString() {\r
+               return this.getOnelineFasta();\r
+       }\r
+\r
+       @Override\r
+       public int hashCode() {\r
+               final int prime = 17;\r
+               int result = 1;\r
+               result = prime * result + ((id == null) ? 0 : id.hashCode());\r
+               result = prime * result\r
+                               + ((sequence == null) ? 0 : sequence.hashCode());\r
+               return result;\r
+       }\r
+\r
+       @Override\r
+       public boolean equals(Object obj) {\r
+               if (obj == null) {\r
+                       return false;\r
+               }\r
+               if (!(obj instanceof FastaSequence)) {\r
+                       return false;\r
+               }\r
+               FastaSequence fs = (FastaSequence) obj;\r
+               if (!fs.getId().equals(this.getId())) {\r
+                       return false;\r
+               }\r
+               if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {\r
+                       return false;\r
+               }\r
+               return true;\r
        }\r
-       return true;\r
-    }\r
 \r
 }\r
diff --git a/datamodel/compbio/data/sequence/JalviewAnnotation.java b/datamodel/compbio/data/sequence/JalviewAnnotation.java
new file mode 100644 (file)
index 0000000..cf19937
--- /dev/null
@@ -0,0 +1,7 @@
+package compbio.data.sequence;\r
+\r
+public class JalviewAnnotation {\r
+\r
+       String annotation;\r
+\r
+}\r
index 580a22e..1a889e3 100644 (file)
@@ -2,6 +2,9 @@ package compbio.data.sequence;
 \r
 import java.util.EnumMap;\r
 import java.util.List;\r
+import java.util.Map;\r
+\r
+import compbio.util.annotation.NotThreadSafe;\r
 \r
 /**\r
  * TODO complete\r
@@ -11,23 +14,67 @@ import java.util.List;
  * @param <T>\r
  *            enum type\r
  */\r
+@NotThreadSafe\r
 public class MultiAnnotatedSequence<T extends Enum<T>> {\r
 \r
-    private final EnumMap<T, List<Float>> annotation;\r
+       private final Map<T, List<Float>> annotations;\r
+\r
+       public MultiAnnotatedSequence(Class<T> enumeration) {\r
+               this.annotations = new EnumMap<T, List<Float>>(enumeration);\r
+       }\r
+\r
+       public void addAnnotation(T type, List<Float> annotation) {\r
+               assert type != null : "Type is expected";\r
+               assert annotation != null : "Not empty value is expected!";\r
+               if (!annotations.isEmpty()) {\r
+                       assert annotations.values().iterator().next().size() == annotation\r
+                                       .size() : "Annotations must contain the same number of elements!";\r
+               }\r
+               this.annotations.put(type, annotation);\r
+       }\r
+\r
+       public Map<T, List<Float>> getAnnotations() {\r
+               return new EnumMap<T, List<Float>>(this.annotations);\r
+       }\r
 \r
-    private MultiAnnotatedSequence(Class<T> type) {\r
-       this.annotation = new EnumMap<T, List<Float>>(type);\r
-    }\r
+       @Override\r
+       public int hashCode() {\r
+               final int prime = 31;\r
+               int result = 1;\r
+               result = prime * result\r
+                               + ((annotations == null) ? 0 : annotations.hashCode());\r
+               return result;\r
+       }\r
 \r
-    //    public MultiAnnotatedSequence getFloatInstance(FastaSequence fsequence) {\r
-    // return null;\r
-    //}\r
+       @Override\r
+       public boolean equals(Object obj) {\r
+               if (this == obj)\r
+                       return true;\r
+               if (obj == null)\r
+                       return false;\r
+               if (getClass() != obj.getClass())\r
+                       return false;\r
+               MultiAnnotatedSequence other = (MultiAnnotatedSequence) obj;\r
+               if (annotations == null) {\r
+                       if (other.annotations != null)\r
+                               return false;\r
+               } else if (!annotations.equals(other.annotations))\r
+                       return false;\r
+               return true;\r
+       }\r
 \r
-    public EnumMap<T, List<Integer>> getIntegerInstance(Class<T> enumeration) {\r
-       return new EnumMap<T, List<Integer>>(enumeration);\r
-    }\r
+       @Override\r
+       public String toString() {\r
+               String value = "";\r
+               for (Map.Entry<T, List<Float>> annt : annotations.entrySet()) {\r
+                       value += annt.getKey() + " ";\r
+                       value += annt.getValue() + "\n";\r
+               }\r
+               return value;\r
+       }\r
 \r
-    public EnumMap<T, List<Float>> getFloatInstance(Class<T> enumeration) {\r
-       return new EnumMap<T, List<Float>>(enumeration);\r
-    }\r
+       public JalviewAnnotation toJalviewAnnotation() {\r
+               // TODO Auto-generated method stub\r
+               return null;\r
+       }\r
 }\r
index 99a8147..149e0e0 100644 (file)
@@ -1,22 +1,15 @@
-/* \r
- * @(#)SequenceUtil.java 1.0 September 2009\r
- * \r
- * Copyright (c) 2009 Peter Troshin\r
- *  \r
- * Jalview Web Services version: 2.0     \r
- * \r
- *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
- *  Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- *  License for more details.\r
- * \r
- *  A copy of the license is in apache_license.txt. It is also available here:\r
- * see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * @(#)SequenceUtil.java 1.0 September 2009 Copyright (c) 2009 Peter Troshin\r
+ * Jalview Web Services version: 2.0 This library is free software; you can\r
+ * redistribute it and/or modify it under the terms of the Apache License\r
+ * version 2 as published by the Apache Software Foundation This library is\r
+ * distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;\r
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\r
+ * PARTICULAR PURPOSE. See the Apache License for more details. A copy of the\r
+ * license is in apache_license.txt. It is also available here: see:\r
+ * http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or derived\r
+ * work distributed in source code form must include this copyright and license\r
+ * notice.\r
  */\r
 \r
 package compbio.data.sequence;\r
@@ -33,6 +26,7 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;\r
 import java.util.ArrayList;\r
 import java.util.List;\r
+import java.util.Scanner;\r
 import java.util.logging.Level;\r
 import java.util.regex.Matcher;\r
 import java.util.regex.Pattern;\r
@@ -45,362 +39,415 @@ import java.util.regex.Pattern;
  */\r
 public final class SequenceUtil {\r
 \r
-    /**\r
-     * A whitespace character: [\t\n\x0B\f\r]\r
-     */\r
-    public static final Pattern WHITE_SPACE = Pattern.compile("\\s");\r
-\r
-    /**\r
-     * A digit\r
-     */\r
-    public static final Pattern DIGIT = Pattern.compile("\\d");\r
-\r
-    /**\r
-     * Non word\r
-     */\r
-    public static final Pattern NONWORD = Pattern.compile("\\W");\r
-\r
-    /**\r
-     * Valid Amino acids\r
-     */\r
-    public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+",\r
-           Pattern.CASE_INSENSITIVE);\r
-\r
-    /**\r
-     * inversion of AA pattern\r
-     */\r
-    public static final Pattern NON_AA = Pattern.compile(\r
-           "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE);\r
-\r
-    /**\r
-     * Same as AA pattern but with two additional letters - XU\r
-     */\r
-    public static final Pattern AMBIGUOUS_AA = Pattern.compile(\r
-           "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE);\r
-\r
-    /**\r
-     * Nucleotides a, t, g, c, u\r
-     */\r
-    public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+",\r
-           Pattern.CASE_INSENSITIVE);\r
-\r
-    /**\r
-     * Ambiguous nucleotide\r
-     */\r
-    public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile(\r
-           "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC\r
-    /**\r
-     * Non nucleotide\r
-     */\r
-    public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+",\r
-           Pattern.CASE_INSENSITIVE);\r
-\r
-    private SequenceUtil() {\r
-    } // utility class, no instantiation\r
-\r
-    /*\r
-     * public static void write_PirSeq(OutputStream os, FastaSequence seq)\r
-     * throws IOException { BufferedWriter pir_out = new BufferedWriter(new\r
-     * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() +\r
-     * SysPrefs.newlinechar); pir_out.write(seq.getSequence() +\r
-     * SysPrefs.newlinechar); pir_out.close(); }\r
-     * \r
-     * public static void write_FastaSeq(OutputStream os, FastaSequence seq)\r
-     * throws IOException { BufferedWriter fasta_out = new BufferedWriter( new\r
-     * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() +\r
-     * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() +\r
-     * SysPrefs.newlinechar); fasta_out.close(); }\r
-     */\r
-\r
-    /**\r
-     * @return true is the sequence contains only letters a,c, t, g, u\r
-     */\r
-    public static boolean isNucleotideSequence(final FastaSequence s) {\r
-       return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence());\r
-    }\r
-\r
-    /**\r
-     * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one\r
-     * (!) - B char\r
-     */\r
-    public static boolean isNonAmbNucleotideSequence(String sequence) {\r
-       sequence = SequenceUtil.cleanSequence(sequence);\r
-       if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
-           return false;\r
+       /**\r
+        * A whitespace character: [\t\n\x0B\f\r]\r
+        */\r
+       public static final Pattern WHITE_SPACE = Pattern.compile("\\s");\r
+\r
+       /**\r
+        * A digit\r
+        */\r
+       public static final Pattern DIGIT = Pattern.compile("\\d");\r
+\r
+       /**\r
+        * Non word\r
+        */\r
+       public static final Pattern NONWORD = Pattern.compile("\\W");\r
+\r
+       /**\r
+        * Valid Amino acids\r
+        */\r
+       public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+",\r
+                       Pattern.CASE_INSENSITIVE);\r
+\r
+       /**\r
+        * inversion of AA pattern\r
+        */\r
+       public static final Pattern NON_AA = Pattern.compile(\r
+                       "[^ARNDCQEGHILKMFPSTWYV]+", Pattern.CASE_INSENSITIVE);\r
+\r
+       /**\r
+        * Same as AA pattern but with two additional letters - XU\r
+        */\r
+       public static final Pattern AMBIGUOUS_AA = Pattern.compile(\r
+                       "[ARNDCQEGHILKMFPSTWYVXU]+", Pattern.CASE_INSENSITIVE);\r
+\r
+       /**\r
+        * Nucleotides a, t, g, c, u\r
+        */\r
+       public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+",\r
+                       Pattern.CASE_INSENSITIVE);\r
+\r
+       /**\r
+        * Ambiguous nucleotide\r
+        */\r
+       public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile(\r
+                       "[AGTCRYMKSWHBVDNU]+", Pattern.CASE_INSENSITIVE); // see IUPAC\r
+       /**\r
+        * Non nucleotide\r
+        */\r
+       public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+",\r
+                       Pattern.CASE_INSENSITIVE);\r
+\r
+       private SequenceUtil() {\r
+       } // utility class, no instantiation\r
+\r
+       /*\r
+        * public static void write_PirSeq(OutputStream os, FastaSequence seq)\r
+        * throws IOException { BufferedWriter pir_out = new BufferedWriter(new\r
+        * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() +\r
+        * SysPrefs.newlinechar); pir_out.write(seq.getSequence() +\r
+        * SysPrefs.newlinechar); pir_out.close(); } public static void\r
+        * write_FastaSeq(OutputStream os, FastaSequence seq) throws IOException {\r
+        * BufferedWriter fasta_out = new BufferedWriter( new\r
+        * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() +\r
+        * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() +\r
+        * SysPrefs.newlinechar); fasta_out.close(); }\r
+        */\r
+\r
+       /**\r
+        * @return true is the sequence contains only letters a,c, t, g, u\r
+        */\r
+       public static boolean isNucleotideSequence(final FastaSequence s) {\r
+               return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence());\r
        }\r
-       if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) {\r
-           return false;\r
-           /*\r
-            * System.out.format("I found the text starting at " +\r
-            * "index %d and ending at index %d.%n", nonDNAmatcher .start(),\r
-            * nonDNAmatcher.end());\r
-            */\r
+\r
+       /**\r
+        * Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one\r
+        * (!) - B char\r
+        */\r
+       public static boolean isNonAmbNucleotideSequence(String sequence) {\r
+               sequence = SequenceUtil.cleanSequence(sequence);\r
+               if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.NON_NUCLEOTIDE.matcher(sequence).find()) {\r
+                       return false;\r
+                       /*\r
+                        * System.out.format("I found the text starting at " +\r
+                        * "index %d and ending at index %d.%n", nonDNAmatcher .start(),\r
+                        * nonDNAmatcher.end());\r
+                        */\r
+               }\r
+               final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence);\r
+               return DNAmatcher.find();\r
        }\r
-       final Matcher DNAmatcher = SequenceUtil.NUCLEOTIDE.matcher(sequence);\r
-       return DNAmatcher.find();\r
-    }\r
-\r
-    /**\r
-     * Removes all whitespace chars in the sequence string\r
-     * \r
-     * @param sequence\r
-     * @return cleaned up sequence\r
-     */\r
-    public static String cleanSequence(String sequence) {\r
-       assert sequence != null;\r
-       final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence);\r
-       sequence = m.replaceAll("").toUpperCase();\r
-       return sequence;\r
-    }\r
-\r
-    /**\r
-     * Removes all special characters and digits as well as whitespace chars\r
-     * from the sequence\r
-     * \r
-     * @param sequence\r
-     * @return cleaned up sequence\r
-     */\r
-    public static String deepCleanSequence(String sequence) {\r
-       sequence = SequenceUtil.cleanSequence(sequence);\r
-       sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll("");\r
-       sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll("");\r
-       final Pattern othernonSeqChars = Pattern.compile("[_-]+");\r
-       sequence = othernonSeqChars.matcher(sequence).replaceAll("");\r
-       return sequence;\r
-    }\r
-\r
-    /**\r
-     * \r
-     * @param sequence\r
-     * @return true is the sequence is a protein sequence, false overwise\r
-     */\r
-    public static boolean isProteinSequence(String sequence) {\r
-       sequence = SequenceUtil.cleanSequence(sequence);\r
-       if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
-           return false;\r
+\r
+       /**\r
+        * Removes all whitespace chars in the sequence string\r
+        * \r
+        * @param sequence\r
+        * @return cleaned up sequence\r
+        */\r
+       public static String cleanSequence(String sequence) {\r
+               assert sequence != null;\r
+               final Matcher m = SequenceUtil.WHITE_SPACE.matcher(sequence);\r
+               sequence = m.replaceAll("").toUpperCase();\r
+               return sequence;\r
        }\r
-       if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
-           return false;\r
+\r
+       /**\r
+        * Removes all special characters and digits as well as whitespace chars\r
+        * from the sequence\r
+        * \r
+        * @param sequence\r
+        * @return cleaned up sequence\r
+        */\r
+       public static String deepCleanSequence(String sequence) {\r
+               sequence = SequenceUtil.cleanSequence(sequence);\r
+               sequence = SequenceUtil.DIGIT.matcher(sequence).replaceAll("");\r
+               sequence = SequenceUtil.NONWORD.matcher(sequence).replaceAll("");\r
+               final Pattern othernonSeqChars = Pattern.compile("[_-]+");\r
+               sequence = othernonSeqChars.matcher(sequence).replaceAll("");\r
+               return sequence;\r
        }\r
-       if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
-           return false;\r
+\r
+       /**\r
+        * @param sequence\r
+        * @return true is the sequence is a protein sequence, false overwise\r
+        */\r
+       public static boolean isProteinSequence(String sequence) {\r
+               sequence = SequenceUtil.cleanSequence(sequence);\r
+               if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               final Matcher protmatcher = SequenceUtil.AA.matcher(sequence);\r
+               return protmatcher.find();\r
        }\r
-       final Matcher protmatcher = SequenceUtil.AA.matcher(sequence);\r
-       return protmatcher.find();\r
-    }\r
-\r
-    /**\r
-     * Check whether the sequence confirms to amboguous protein sequence\r
-     * \r
-     * @param sequence\r
-     * @return return true only if the sequence if ambiguous protein sequence\r
-     *         Return false otherwise. e.g. if the sequence is non-ambiguous\r
-     *         protein or DNA\r
-     */\r
-    public static boolean isAmbiguosProtein(String sequence) {\r
-       sequence = SequenceUtil.cleanSequence(sequence);\r
-       if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
-           return false;\r
+\r
+       /**\r
+        * Check whether the sequence confirms to amboguous protein sequence\r
+        * \r
+        * @param sequence\r
+        * @return return true only if the sequence if ambiguous protein sequence\r
+        *         Return false otherwise. e.g. if the sequence is non-ambiguous\r
+        *         protein or DNA\r
+        */\r
+       public static boolean isAmbiguosProtein(String sequence) {\r
+               sequence = SequenceUtil.cleanSequence(sequence);\r
+               if (SequenceUtil.isNonAmbNucleotideSequence(sequence)) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               if (SequenceUtil.AA.matcher(sequence).find()) {\r
+                       return false;\r
+               }\r
+               final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence);\r
+               return amb_prot.find();\r
        }\r
-       if (SequenceUtil.DIGIT.matcher(sequence).find()) {\r
-           return false;\r
+\r
+       /**\r
+        * Writes list of FastaSequeces into the outstream formatting the sequence\r
+        * so that it contains width chars on each line\r
+        * \r
+        * @param outstream\r
+        * @param sequences\r
+        * @param width\r
+        *            - the maximum number of characters to write in one line\r
+        * @throws IOException\r
+        */\r
+       public static void writeFasta(final OutputStream outstream,\r
+                       final List<FastaSequence> sequences, final int width)\r
+                       throws IOException {\r
+               writeFastaKeepTheStream(outstream, sequences, width);\r
+               outstream.close();\r
        }\r
-       if (SequenceUtil.NON_AA.matcher(sequence).find()) {\r
-           return false;\r
+\r
+       public static void writeFastaKeepTheStream(final OutputStream outstream,\r
+                       final List<FastaSequence> sequences, final int width)\r
+                       throws IOException {\r
+               final OutputStreamWriter writer = new OutputStreamWriter(outstream);\r
+               final BufferedWriter fastawriter = new BufferedWriter(writer);\r
+               for (final FastaSequence fs : sequences) {\r
+                       fastawriter.write(">" + fs.getId() + "\n");\r
+                       fastawriter.write(fs.getFormatedSequence(width));\r
+                       fastawriter.write("\n");\r
+               }\r
+               fastawriter.flush();\r
+               writer.flush();\r
+       }\r
+\r
+       /**\r
+        * Reads fasta sequences from inStream into the list of FastaSequence\r
+        * objects\r
+        * \r
+        * @param inStream\r
+        *            from\r
+        * @return list of FastaSequence objects\r
+        * @throws IOException\r
+        */\r
+       public static List<FastaSequence> readFasta(final InputStream inStream)\r
+                       throws IOException {\r
+               final List<FastaSequence> seqs = new ArrayList<FastaSequence>();\r
+\r
+               final BufferedReader infasta = new BufferedReader(\r
+                               new InputStreamReader(inStream, "UTF8"), 16000);\r
+               final Pattern pattern = Pattern.compile("//s+");\r
+\r
+               String line;\r
+               String sname = "", seqstr = null;\r
+               do {\r
+                       line = infasta.readLine();\r
+                       if ((line == null) || line.startsWith(">")) {\r
+                               if (seqstr != null) {\r
+                                       seqs.add(new FastaSequence(sname.substring(1), seqstr));\r
+                               }\r
+                               sname = line; // remove >\r
+                               seqstr = "";\r
+                       } else {\r
+                               final String subseq = pattern.matcher(line).replaceAll("");\r
+                               seqstr += subseq;\r
+                       }\r
+               } while (line != null);\r
+\r
+               infasta.close();\r
+               return seqs;\r
        }\r
-       if (SequenceUtil.AA.matcher(sequence).find()) {\r
-           return false;\r
+\r
+       /**\r
+        * Writes FastaSequence in the file, each sequence will take one line only\r
+        * \r
+        * @param os\r
+        * @param sequences\r
+        * @throws IOException\r
+        */\r
+       public static void writeFasta(final OutputStream os,\r
+                       final List<FastaSequence> sequences) throws IOException {\r
+               final OutputStreamWriter outWriter = new OutputStreamWriter(os);\r
+               final BufferedWriter fasta_out = new BufferedWriter(outWriter);\r
+               for (final FastaSequence fs : sequences) {\r
+                       fasta_out.write(fs.getOnelineFasta());\r
+               }\r
+               fasta_out.close();\r
+               outWriter.close();\r
+       }\r
+\r
+       public static List<AnnotatedSequence> readJRonn(final File result)\r
+                       throws IOException, UnknownFileFormatException {\r
+               InputStream input = new FileInputStream(result);\r
+               List<AnnotatedSequence> sequences = readJRonn(input);\r
+               input.close();\r
+               return sequences;\r
        }\r
-       final Matcher amb_prot = SequenceUtil.AMBIGUOUS_AA.matcher(sequence);\r
-       return amb_prot.find();\r
-    }\r
-\r
-    /**\r
-     * Writes list of FastaSequeces into the outstream formatting the sequence\r
-     * so that it contains width chars on each line\r
-     * \r
-     * @param outstream\r
-     * @param sequences\r
-     * @param width\r
-     *            - the maximum number of characters to write in one line\r
-     * @throws IOException\r
-     */\r
-    public static void writeFasta(final OutputStream outstream,\r
-           final List<FastaSequence> sequences, final int width)\r
-           throws IOException {\r
-       final OutputStreamWriter writer = new OutputStreamWriter(outstream);\r
-       final BufferedWriter fastawriter = new BufferedWriter(writer);\r
-       for (final FastaSequence fs : sequences) {\r
-           fastawriter.write(fs.getFormatedSequence(width));\r
+\r
+       /**\r
+        * Reader for JRonn horizontal file format >Foobar M G D T T A G 0.48 0.42\r
+        * 0.42 0.48 0.52 0.53 0.54 All values are tab delimited\r
+        * \r
+        * @param inStream\r
+        * @return\r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        */\r
+       public static List<AnnotatedSequence> readJRonn(final InputStream inStream)\r
+                       throws IOException, UnknownFileFormatException {\r
+               final List<AnnotatedSequence> seqs = new ArrayList<AnnotatedSequence>();\r
+\r
+               final BufferedReader infasta = new BufferedReader(\r
+                               new InputStreamReader(inStream, "UTF8"), 16000);\r
+\r
+               String line;\r
+               String sname = "";\r
+               do {\r
+                       line = infasta.readLine();\r
+                       if (line == null || line.isEmpty()) {\r
+                               // skip empty lines\r
+                               continue;\r
+                       }\r
+                       if (line.startsWith(">")) {\r
+                               // read name\r
+                               sname = line.trim().substring(1);\r
+                               // read sequence line\r
+                               line = infasta.readLine();\r
+                               final String sequence = line.replace("\t", "");\r
+                               // read annotation line\r
+                               line = infasta.readLine();\r
+                               String[] annotValues = line.split("\t");\r
+                               float[] annotation = convertToNumber(annotValues);\r
+                               if (annotation.length != sequence.length()) {\r
+                                       throw new UnknownFileFormatException(\r
+                                                       "File does not look like Jronn horizontally formatted output file!\n"\r
+                                                                       + JRONN_WRONG_FORMAT_MESSAGE);\r
+                               }\r
+                               seqs.add(new AnnotatedSequence(sname, sequence, annotation));\r
+                       }\r
+               } while (line != null);\r
+\r
+               infasta.close();\r
+               return seqs;\r
        }\r
-       outstream.flush();\r
-       fastawriter.close();\r
-       writer.close();\r
-    }\r
-\r
-    /**\r
-     * Reads fasta sequences from inStream into the list of FastaSequence\r
-     * objects\r
-     * \r
-     * @param inStream\r
-     *            from\r
-     * @return list of FastaSequence objects\r
-     * @throws IOException\r
-     */\r
-    public static List<FastaSequence> readFasta(final InputStream inStream)\r
-           throws IOException {\r
-       final List<FastaSequence> seqs = new ArrayList<FastaSequence>();\r
-\r
-       final BufferedReader infasta = new BufferedReader(\r
-               new InputStreamReader(inStream, "UTF8"), 16000);\r
-       final Pattern pattern = Pattern.compile("//s+");\r
-\r
-       String line;\r
-       String sname = "", seqstr = null;\r
-       do {\r
-           line = infasta.readLine();\r
-           if ((line == null) || line.startsWith(">")) {\r
-               if (seqstr != null) {\r
-                   seqs.add(new FastaSequence(sname.substring(1), seqstr));\r
+\r
+       private static float[] convertToNumber(String[] annotValues)\r
+                       throws UnknownFileFormatException {\r
+               float[] annotation = new float[annotValues.length];\r
+               try {\r
+                       for (int i = 0; i < annotation.length; i++) {\r
+                               annotation[i] = Float.parseFloat(annotValues[i]);\r
+                       }\r
+               } catch (NumberFormatException e) {\r
+                       throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE,\r
+                                       e.getCause());\r
                }\r
-               sname = line; // remove >\r
-               seqstr = "";\r
-           } else {\r
-               final String subseq = pattern.matcher(line).replaceAll("");\r
-               seqstr += subseq;\r
-           }\r
-       } while (line != null);\r
-\r
-       infasta.close();\r
-       return seqs;\r
-    }\r
-\r
-    /**\r
-     * Writes FastaSequence in the file, each sequence will take one line only\r
-     * \r
-     * @param os\r
-     * @param sequences\r
-     * @throws IOException\r
-     */\r
-    public static void writeFasta(final OutputStream os,\r
-           final List<FastaSequence> sequences) throws IOException {\r
-       final OutputStreamWriter outWriter = new OutputStreamWriter(os);\r
-       final BufferedWriter fasta_out = new BufferedWriter(outWriter);\r
-       for (final FastaSequence fs : sequences) {\r
-           fasta_out.write(fs.getOnelineFasta());\r
+               return annotation;\r
        }\r
-       fasta_out.close();\r
-       outWriter.close();\r
-    }\r
-\r
-    public static List<AnnotatedSequence> readJRonn(final File result)\r
-           throws IOException, UnknownFileFormatException {\r
-       InputStream input = new FileInputStream(result);\r
-       List<AnnotatedSequence> sequences = readJRonn(input);\r
-       input.close();\r
-       return sequences;\r
-    }\r
-\r
-    /**\r
-     * Reader for JRonn horizontal file format\r
-     * \r
-     * >Foobar\r
-     * \r
-     * M G D T T A G\r
-     * \r
-     * 0.48 0.42 0.42 0.48 0.52 0.53 0.54\r
-     * \r
-     * All values are tab delimited\r
-     * \r
-     * @param inStream\r
-     * @return\r
-     * @throws IOException\r
-     * @throws UnknownFileFormatException\r
-     */\r
-    public static List<AnnotatedSequence> readJRonn(final InputStream inStream)\r
-           throws IOException, UnknownFileFormatException {\r
-       final List<AnnotatedSequence> seqs = new ArrayList<AnnotatedSequence>();\r
-\r
-       final BufferedReader infasta = new BufferedReader(\r
-               new InputStreamReader(inStream, "UTF8"), 16000);\r
-\r
-       String line;\r
-       String sname = "";\r
-       do {\r
-           line = infasta.readLine();\r
-           if (line == null || line.isEmpty()) {\r
-               // skip empty lines\r
-               continue;\r
-           }\r
-           if (line.startsWith(">")) {\r
-               // read name\r
-               sname = line.trim().substring(1);\r
-               // read sequence line\r
-               line = infasta.readLine();\r
-               final String sequence = line.replace("\t", "");\r
-               // read annotation line\r
-               line = infasta.readLine();\r
-               String[] annotValues = line.split("\t");\r
-               float[] annotation = convertToNumber(annotValues);\r
-               if (annotation.length != sequence.length()) {\r
-                   throw new UnknownFileFormatException(\r
-                           "File does not look like Jronn horizontally formatted output file!\n"\r
-                                   + JRONN_WRONG_FORMAT_MESSAGE);\r
+\r
+       private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n"\r
+                       + ">sequence_name\n "\r
+                       + "M    V       S\n"\r
+                       + "0.43 0.22    0.65\n"\r
+                       + "Where first line is the sequence name,\n"\r
+                       + "second line is the tab delimited sequence,\n"\r
+                       + "third line contains tab delimited disorder prediction values.\n"\r
+                       + "No lines are allowed between these three. Additionally, the number of  "\r
+                       + "sequence residues must be equal to the number of the disorder values.";\r
+\r
+       /**\r
+        * Closes the Closable and logs the exception if any\r
+        * \r
+        * @param log\r
+        * @param stream\r
+        */\r
+       public final static void closeSilently(java.util.logging.Logger log,\r
+                       Closeable stream) {\r
+               if (stream != null) {\r
+                       try {\r
+                               stream.close();\r
+                       } catch (IOException e) {\r
+                               log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());\r
+                       }\r
                }\r
-               seqs.add(new AnnotatedSequence(sname, sequence, annotation));\r
-           }\r
-       } while (line != null);\r
-\r
-       infasta.close();\r
-       return seqs;\r
-    }\r
-\r
-    private static float[] convertToNumber(String[] annotValues)\r
-           throws UnknownFileFormatException {\r
-       float[] annotation = new float[annotValues.length];\r
-       try {\r
-           for (int i = 0; i < annotation.length; i++) {\r
-               annotation[i] = Float.parseFloat(annotValues[i]);\r
-           }\r
-       } catch (NumberFormatException e) {\r
-           throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, e\r
-                   .getCause());\r
        }\r
-       return annotation;\r
-    }\r
-\r
-    private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n"\r
-           + ">sequence_name\n "\r
-           + "M        V       S\n"\r
-           + "0.43     0.22    0.65\n"\r
-           + "Where first line is the sequence name,\n"\r
-           + "second line is the tab delimited sequence,\n"\r
-           + "third line contains tab delimited disorder prediction values.\n"\r
-           + "No lines are allowed between these three. Additionally, the number of  "\r
-           + "sequence residues must be equal to the number of the disorder values.";\r
-\r
-    /**\r
-     * Closes the Closable and logs the exception if any\r
-     * \r
-     * @param log\r
-     * @param stream\r
-     */\r
-    public final static void closeSilently(java.util.logging.Logger log,\r
-           Closeable stream) {\r
-       if (stream != null) {\r
-           try {\r
-               stream.close();\r
-           } catch (IOException e) {\r
-               log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());\r
-           }\r
+\r
+       /**\r
+        * \r
+        * TODO complete!\r
+        * \r
+        * # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983\r
+        * 0.85864 0.44331 .... # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512\r
+        * 0.37094\r
+        * \r
+        * @param input\r
+        * @return\r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        */\r
+       public static List<MultiAnnotatedSequence<DisemblResultAnnot>> readDisembl(\r
+                       final InputStream input) throws IOException,\r
+                       UnknownFileFormatException {\r
+               Scanner scan = new Scanner(input);\r
+               scan.useDelimiter("# RESIDUE COILS REM465 HOTLOOPS\n");\r
+               if (!scan.hasNext()) {\r
+                       throw new UnknownFileFormatException(\r
+                                       "In Disembl score format each seqeunce score is expected to start from the line: "\r
+                                                       + "'# RESIDUE COILS REM465 HOTLOOPS\\n'."\r
+                                                       + " No such line was found!");\r
+               }\r
+\r
+               List<MultiAnnotatedSequence<DisemblResultAnnot>> results = new ArrayList<MultiAnnotatedSequence<DisemblResultAnnot>>();\r
+               int seqCounter = 0;\r
+               while (scan.hasNext()) {\r
+                       seqCounter++;\r
+                       String singleSeq = scan.next();\r
+                       Scanner scansingle = new Scanner(singleSeq);\r
+                       StringBuffer seqbuffer = new StringBuffer();\r
+                       List<Float> coils = new ArrayList<Float>();\r
+                       List<Float> rem = new ArrayList<Float>();\r
+                       List<Float> hotloops = new ArrayList<Float>();\r
+\r
+                       MultiAnnotatedSequence<DisemblResultAnnot> disemblRes = new MultiAnnotatedSequence<DisemblResultAnnot>(\r
+                                       DisemblResultAnnot.class);\r
+\r
+                       while (scansingle.hasNextLine()) {\r
+                               String valueLine = scansingle.nextLine();\r
+                               Scanner values = new Scanner(valueLine);\r
+                               seqbuffer.append(values.next());\r
+                               coils.add(values.nextFloat());\r
+                               rem.add(values.nextFloat());\r
+                               hotloops.add(values.nextFloat());\r
+                               values.close();\r
+                       }\r
+                       disemblRes.addAnnotation(DisemblResultAnnot.COILS, coils);\r
+                       disemblRes.addAnnotation(DisemblResultAnnot.REM465, rem);\r
+                       disemblRes.addAnnotation(DisemblResultAnnot.HOTLOOPS, hotloops);\r
+                       // TODO\r
+                       // disemblRes.sequence = seqbuffer.toString();\r
+                       scansingle.close();\r
+                       results.add(disemblRes);\r
+               }\r
+\r
+               input.close();\r
+               return results;\r
        }\r
-    }\r
-\r
-    public static List<AnnotatedSequence> readDisembl(final File result)\r
-           throws IOException, UnknownFileFormatException {\r
-       InputStream input = new FileInputStream(result);\r
-       List<AnnotatedSequence> sequences = readJRonn(input);\r
-       input.close();\r
-       return sequences;\r
-    }\r
+\r
 }\r
diff --git a/runner/compbio/runner/conservation/AACon.java b/runner/compbio/runner/conservation/AACon.java
new file mode 100644 (file)
index 0000000..5d20d5d
--- /dev/null
@@ -0,0 +1,198 @@
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
+ */\r
+\r
+package compbio.runner.conservation;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.Arrays;\r
+import java.util.List;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.conservation.ResultReader;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.client.CommandBuilder;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.SkeletalExecutable;\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.runner.Util;\r
+\r
+/**\r
+ * Command line\r
+ * \r
+ * java -Xmx512 -jar jronn_v3.jar -i=test_seq.txt -n=1 -o=out.txt -s=stat.out\r
+ * \r
+ * @author pvtroshin\r
+ * \r
+ */\r
+public class AACon extends SkeletalExecutable<AACon> {\r
+\r
+       private static Logger log = Logger.getLogger(AACon.class);\r
+\r
+       /**\r
+        * Number of cores to use, defaults to 1 for local execution or the value of\r
+        * "jronn.cluster.cpunum" property for cluster execution\r
+        */\r
+       private int ncoreNumber = 0;\r
+\r
+       private final String ncorePrm = "-n=";\r
+\r
+       // Cache for Limits information\r
+       private static LimitsManager<AACon> limits;\r
+\r
+       public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
+       public static final String STAT_FILE = "stat.txt";\r
+\r
+       public AACon() {\r
+               addParameters(Arrays.asList("-jar", getLibPath(), "-d=" + STAT_FILE,\r
+                               "-f=RESULT_NO_ALIGNMENT"));\r
+       }\r
+\r
+       @SuppressWarnings("unchecked")\r
+       @Override\r
+       public MultiAnnotatedSequence<Method> getResults(String workDirectory)\r
+                       throws ResultNotAvailableException {\r
+               MultiAnnotatedSequence<Method> annotations = null;\r
+               try {\r
+                       InputStream inStream = new FileInputStream(new File(workDirectory,\r
+                                       getOutput()));\r
+                       annotations = ResultReader.readResults(inStream);\r
+                       inStream.close();\r
+               } catch (FileNotFoundException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               } catch (IOException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               } catch (NullPointerException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               }\r
+               return annotations;\r
+       }\r
+\r
+       private static String getLibPath() {\r
+\r
+               String settings = ph.getProperty("aacon.jar.file");\r
+               if (compbio.util.Util.isEmpty(settings)) {\r
+                       throw new NullPointerException(\r
+                                       "Please define aacon.jar.file property in Executable.properties file"\r
+                                                       + "and initialize it with the location of jronn jar file");\r
+               }\r
+               if (new File(settings).isAbsolute()) {\r
+                       // Jronn jar can be found so no actions necessary\r
+                       // no further actions is necessary\r
+                       return settings;\r
+               }\r
+               return compbio.engine.client.Util.convertToAbsolute(settings);\r
+       }\r
+\r
+       @Override\r
+       public List<String> getCreatedFiles() {\r
+               return Arrays.asList(getOutput(), getError());\r
+       }\r
+\r
+       @Override\r
+       public AACon setInput(String inFile) {\r
+               super.setInput(inFile);\r
+               cbuilder.setParam("-i=" + inFile);\r
+               return this;\r
+       }\r
+\r
+       @Override\r
+       public AACon setOutput(String outFile) {\r
+               super.setOutput(outFile);\r
+               cbuilder.setParam("-o=" + outFile);\r
+               return this;\r
+       }\r
+\r
+       @Override\r
+       public Limit<AACon> getLimit(String presetName) {\r
+               if (limits == null) {\r
+                       limits = getLimits();\r
+               }\r
+               Limit<AACon> limit = null;\r
+               if (limits != null) {\r
+                       // this returns default limit if preset is undefined!\r
+                       limit = limits.getLimitByName(presetName);\r
+               }\r
+               // If limit is not defined for a particular preset, then return default\r
+               // limit\r
+               if (limit == null) {\r
+                       log.debug("Limit for the preset " + presetName\r
+                                       + " is not found. Using default");\r
+                       limit = limits.getDefaultLimit();\r
+               }\r
+               return limit;\r
+       }\r
+\r
+       @Override\r
+       public LimitsManager<AACon> getLimits() {\r
+               // synchronise on static field\r
+               synchronized (log) {\r
+                       if (limits == null) {\r
+                               limits = Util.getLimits(this.getClass());\r
+                       }\r
+               }\r
+               return limits;\r
+       }\r
+\r
+       @Override\r
+       public Class<? extends Executable<?>> getType() {\r
+               return this.getClass();\r
+       }\r
+\r
+       public static String getStatFile() {\r
+               return STAT_FILE;\r
+       }\r
+\r
+       public void setNCore(int ncoreNumber) {\r
+               if (ncoreNumber < 1 || ncoreNumber > 100) {\r
+                       throw new IndexOutOfBoundsException(\r
+                                       "Number of cores must be within 1 and 100 ");\r
+               }\r
+               this.ncoreNumber = ncoreNumber;\r
+               cbuilder.setParam(ncorePrm + Integer.toString(getNCore()));\r
+       }\r
+\r
+       int getNCore() {\r
+               return ncoreNumber;\r
+       }\r
+\r
+       @Override\r
+       public CommandBuilder<AACon> getParameters(ExecProvider provider) {\r
+               // If number of cores is provided, set it for the cluster execution\r
+               // only!\r
+               if (provider == Executable.ExecProvider.Cluster) {\r
+                       int cpunum = SkeletalExecutable.getClusterCpuNum(getType());\r
+                       cpunum = (cpunum == 0) ? 1 : cpunum;\r
+                       setNCore(cpunum);\r
+               } else {\r
+                       // Limit number of cores to 1 for ANY execution which does not set\r
+                       // Ncores explicitly using setNCore method or is run on local VM\r
+                       if (ncoreNumber == 0) {\r
+                               setNCore(1);\r
+                       }\r
+               }\r
+               return super.getParameters(provider);\r
+       }\r
+\r
+}\r
index 4c76854..c6b278f 100644 (file)
@@ -1,19 +1,15 @@
-/* Copyright (c) 2009 Peter Troshin\r
- *  \r
- *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 \r
- * \r
- *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
- *  Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- *  License for more details.\r
- * \r
- *  A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
  */\r
 \r
 package compbio.runner.disorder;\r
@@ -24,7 +20,6 @@ import java.util.Arrays;
 \r
 import org.apache.log4j.Logger;\r
 \r
-import com.sun.xml.internal.bind.api.impl.NameConverter.Standard;\r
 import compbio.data.sequence.Alignment;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
 import compbio.engine.client.Executable;\r
@@ -36,98 +31,101 @@ import compbio.metadata.ResultNotAvailableException;
 import compbio.runner.Util;\r
 \r
 /**\r
- * @see Standard DisEMBL DisEMBL.py smooth_frame peak_frame join_frame\r
- *      fold_coils fold_hotloops fold_rem465 sequence_file print 'A default run\r
- *      would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 fasta_file > out'\r
+ * @see DisEMBL\r
  * \r
- *      new DisEMBL is at /homes/pvtroshin/soft/DisEMBL-1.4raw This is not a\r
- *      standard DisEMBL! The script has been modified! DisEMBL.py smooth_frame\r
- *      peak_frame join_frame fold_coils fold_hotloops fold_rem465 [mode] <\r
- *      fasta_file > out print 'A default run would be: ./DisEMBL.py 8 8 4 1.2\r
- *      1.4 1.2 < fasta_file' print 'Mode: "default"(nothing) or "scores" which\r
- *      will give scores per residue in TAB separated format'\r
+ *      DisEMBL.py smooth_frame peak_frame join_frame fold_coils fold_hotloops\r
+ *      fold_rem465 sequence_file print 'A default run would be: ./DisEMBL.py 8\r
+ *      8 4 1.2 1.4 1.2 fasta_file > out' new DisEMBL is at\r
+ *      /homes/pvtroshin/soft/DisEMBL-1.4raw\r
  * \r
+ *      This is not a standard DisEMBL! The script has been modified! DisEMBL.py\r
+ *      smooth_frame peak_frame join_frame fold_coils fold_hotloops fold_rem465\r
+ *      [mode] < fasta_file > out print\r
+ * \r
+ *      'A default run would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 < fasta_file'\r
+ *      print 'Mode: "default"(nothing) or "scores" which will give scores per\r
+ *      residue in TAB separated format'\r
  */\r
 public class Disembl extends SkeletalExecutable<Disembl> implements\r
-       PipedExecutable<Disembl> {\r
-\r
-    private static Logger log = Logger.getLogger(Disembl.class);\r
-\r
-    // Cache for Limits information\r
-    private static LimitsManager<Disembl> limits;\r
-\r
-    public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
-\r
-    public Disembl() {\r
-       // remove default input to prevent it to appear in the parameters list\r
-       // that could happen if the parameters are set first\r
-       // super.setInput("");\r
-       addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2",\r
-               "scores"));\r
-    }\r
-\r
-    @SuppressWarnings("unchecked")\r
-    public Alignment getResults(String workDirectory)\r
-           throws ResultNotAvailableException {\r
-       try {\r
-           return Util.readClustalFile(workDirectory, getOutput());\r
-       } catch (FileNotFoundException e) {\r
-           log.error(e.getMessage(), e.getCause());\r
-           throw new ResultNotAvailableException(e);\r
-       } catch (IOException e) {\r
-           log.error(e.getMessage(), e.getCause());\r
-           throw new ResultNotAvailableException(e);\r
-       } catch (UnknownFileFormatException e) {\r
-           log.error(e.getMessage(), e.getCause());\r
-           throw new ResultNotAvailableException(e);\r
-       } catch (NullPointerException e) {\r
-           log.error(e.getMessage(), e.getCause());\r
-           throw new ResultNotAvailableException(e);\r
+               PipedExecutable<Disembl> {\r
+\r
+       private static Logger log = Logger.getLogger(Disembl.class);\r
+\r
+       // Cache for Limits information\r
+       private static LimitsManager<Disembl> limits;\r
+\r
+       public static final String KEY_VALUE_SEPARATOR = Util.SPACE;\r
+\r
+       public Disembl() {\r
+               // remove default input to prevent it to appear in the parameters list\r
+               // that could happen if the parameters are set first\r
+               // super.setInput("");\r
+               addParameters(Arrays.asList("8", "8", "4", "1.2", "1.4", "1.2",\r
+                               "scores"));\r
        }\r
-    }\r
-\r
-    @Override\r
-    public Disembl setInput(String inFile) {\r
-       super.setInput(inFile);\r
-       cbuilder.setLast(inFile);\r
-       return this;\r
-    }\r
-\r
-    @Override\r
-    public Limit<Disembl> getLimit(String presetName) {\r
-       if (limits == null) {\r
-           limits = getLimits();\r
+\r
+       @SuppressWarnings("unchecked")\r
+       public Alignment getResults(String workDirectory)\r
+                       throws ResultNotAvailableException {\r
+               try {\r
+                       return Util.readClustalFile(workDirectory, getOutput());\r
+               } catch (FileNotFoundException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               } catch (IOException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               } catch (UnknownFileFormatException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               } catch (NullPointerException e) {\r
+                       log.error(e.getMessage(), e.getCause());\r
+                       throw new ResultNotAvailableException(e);\r
+               }\r
        }\r
 \r
-       Limit<Disembl> limit = null;\r
-       if (limits != null) {\r
-           // this returns default limit if preset is undefined!\r
-           limit = limits.getLimitByName(presetName);\r
+       @Override\r
+       public Disembl setInput(String inFile) {\r
+               super.setInput(inFile);\r
+               cbuilder.setLast(inFile);\r
+               return this;\r
        }\r
-       // If limit is not defined for a particular preset, then return default\r
-       // limit\r
-       if (limit == null) {\r
-           log.debug("Limit for the preset " + presetName\r
-                   + " is not found. Using default");\r
-           limit = limits.getDefaultLimit();\r
+\r
+       @Override\r
+       public Limit<Disembl> getLimit(String presetName) {\r
+               if (limits == null) {\r
+                       limits = getLimits();\r
+               }\r
+\r
+               Limit<Disembl> limit = null;\r
+               if (limits != null) {\r
+                       // this returns default limit if preset is undefined!\r
+                       limit = limits.getLimitByName(presetName);\r
+               }\r
+               // If limit is not defined for a particular preset, then return default\r
+               // limit\r
+               if (limit == null) {\r
+                       log.debug("Limit for the preset " + presetName\r
+                                       + " is not found. Using default");\r
+                       limit = limits.getDefaultLimit();\r
+               }\r
+               return limit;\r
        }\r
-       return limit;\r
-    }\r
-\r
-    @Override\r
-    public LimitsManager<Disembl> getLimits() {\r
-       // synchronise on static field\r
-       synchronized (log) {\r
-           if (limits == null) {\r
-               limits = Util.getLimits(this.getClass());\r
-           }\r
+\r
+       @Override\r
+       public LimitsManager<Disembl> getLimits() {\r
+               // synchronise on static field\r
+               synchronized (log) {\r
+                       if (limits == null) {\r
+                               limits = Util.getLimits(this.getClass());\r
+                       }\r
+               }\r
+               return limits;\r
        }\r
-       return limits;\r
-    }\r
 \r
-    @Override\r
-    public Class<? extends Executable<?>> getType() {\r
-       return this.getClass();\r
-    }\r
+       @Override\r
+       public Class<? extends Executable<?>> getType() {\r
+               return this.getClass();\r
+       }\r
 \r
 }\r
index f2af670..720037b 100644 (file)
@@ -1,21 +1,16 @@
-/* Copyright (c) 2009 Peter Troshin\r
- *  \r
- *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0     \r
- * \r
- *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
- *  Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- *  License for more details.\r
- * \r
- *  A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
+/*\r
+ * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
+ * and/or modify it under the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
+ * derived work distributed in source code form must include this copyright and\r
+ * license notice.\r
  */\r
-\r
 package compbio.data.sequence;\r
 \r
 import static org.testng.AssertJUnit.assertEquals;\r
@@ -36,112 +31,152 @@ import compbio.metadata.AllTestSuit;
 \r
 public class SequenceUtilTester {\r
 \r
-    @Test()\r
-    public void testisNonAmbNucleotideSequence() {\r
-       String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
-       assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
-       String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
-       assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
-       String nonDna = "atgfctgatgcatgcatgatgctga";\r
-       assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
-       nonDna = "atgc1tgatgcatgcatgatgctga";\r
-       assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
-       nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
-       assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-       // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
-       assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
-    }\r
-\r
-    @Test()\r
-    public void testCleanSequence() {\r
-       String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
-       assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
-               SequenceUtil.cleanSequence(dirtySeq));\r
-    }\r
-\r
-    @Test()\r
-    public void testDeepCleanSequence() {\r
-       String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
-       assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
-               SequenceUtil.deepCleanSequence(dirtySeq));\r
-    }\r
-\r
-    @Test()\r
-    public void testisProteinSequence() {\r
-       String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
-       assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
-       String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
-       assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
-       String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
-       assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
-       AAseq += "XU";\r
-       assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
-\r
-    }\r
-\r
-    @Test()\r
-    public void testReadWriteFasta() {\r
-\r
-       try {\r
-           FileInputStream fio = new FileInputStream(\r
-                   AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
-           assertNotNull(fio);\r
-           List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
-           assertNotNull(fseqs);\r
-           assertEquals(3, fseqs.size());\r
-           assertEquals(3, fseqs.size());\r
-           fio.close();\r
-           FileOutputStream fou = new FileOutputStream(\r
-                   AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
-           SequenceUtil.writeFasta(fou, fseqs);\r
-           fou.close();\r
-           FileOutputStream fou20 = new FileOutputStream(\r
-                   AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
-           SequenceUtil.writeFasta(fou20, fseqs, 20);\r
-           fou20.close();\r
-\r
-       } catch (FileNotFoundException e) {\r
-           e.printStackTrace();\r
-           fail(e.getLocalizedMessage());\r
-       } catch (IOException e) {\r
-           e.printStackTrace();\r
-           fail(e.getLocalizedMessage());\r
+       @Test()\r
+       public void testisNonAmbNucleotideSequence() {\r
+               String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
+               assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
+               String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
+               assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
+               String nonDna = "atgfctgatgcatgcatgatgctga";\r
+               assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+               nonDna = "atgc1tgatgcatgcatgatgctga";\r
+               assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+               nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+               // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
+               assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
+\r
+       }\r
+\r
+       @Test()\r
+       public void testCleanSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
+                               SequenceUtil.cleanSequence(dirtySeq));\r
+       }\r
+\r
+       @Test()\r
+       public void testDeepCleanSequence() {\r
+               String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
+               assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
+                               SequenceUtil.deepCleanSequence(dirtySeq));\r
        }\r
-    }\r
-\r
-    /**\r
-     * This test tests the loading of horizontally formatted Jronn output file\r
-     */\r
-    @Test\r
-    public void loadJronnFile() {\r
-\r
-       FileInputStream fio;\r
-       try {\r
-           fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
-           List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
-           assertNotNull(aseqs);\r
-           assertEquals(aseqs.size(), 3);\r
-           AnnotatedSequence aseq = aseqs.get(0);\r
-           assertNotNull(aseq);\r
-           assertNotNull(aseq.getAnnotation());\r
-           //System.out.println(aseq);\r
-           assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
-                   .length());\r
-           fio.close();\r
-       } catch (FileNotFoundException e) {\r
-           e.printStackTrace();\r
-           fail(e.getLocalizedMessage());\r
-       } catch (IOException e) {\r
-           e.printStackTrace();\r
-           fail(e.getLocalizedMessage());\r
-       } catch (UnknownFileFormatException e) {\r
-           e.printStackTrace();\r
-           fail(e.getLocalizedMessage());\r
+\r
+       @Test()\r
+       public void testisProteinSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+               String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
+               assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+               String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+               AAseq += "XU";\r
+               assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+\r
+       }\r
+\r
+       @Test()\r
+       public void testReadWriteFasta() {\r
+\r
+               try {\r
+                       FileInputStream fio = new FileInputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
+                       assertNotNull(fio);\r
+                       List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
+                       assertNotNull(fseqs);\r
+                       assertEquals(3, fseqs.size());\r
+                       assertEquals(3, fseqs.size());\r
+                       fio.close();\r
+                       FileOutputStream fou = new FileOutputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
+                       SequenceUtil.writeFasta(fou, fseqs);\r
+                       fou.close();\r
+                       FileOutputStream fou20 = new FileOutputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
+                       SequenceUtil.writeFasta(fou20, fseqs, 21);\r
+                       fou20.close();\r
+\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
        }\r
 \r
-    }\r
+       /**\r
+        * This test tests the loading of horizontally formatted Jronn output file\r
+        */\r
+       @Test\r
+       public void loadJronnFile() {\r
+\r
+               FileInputStream fio;\r
+               try {\r
+                       fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
+                       List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
+                       assertNotNull(aseqs);\r
+                       assertEquals(aseqs.size(), 3);\r
+                       AnnotatedSequence aseq = aseqs.get(0);\r
+                       assertNotNull(aseq);\r
+                       assertNotNull(aseq.getAnnotation());\r
+                       // System.out.println(aseq);\r
+                       assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
+                                       .length());\r
+                       fio.close();\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (UnknownFileFormatException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
 \r
+       }\r
+\r
+       enum Trial {\r
+               one, two, three\r
+       };\r
+\r
+       /**\r
+        * This test tests the loading of horizontally formatted Jronn output file\r
+        */\r
+       @SuppressWarnings("unchecked")\r
+       @Test\r
+       public void testMultiAnnotatedSequence() {\r
+\r
+               FileInputStream fio;\r
+               try {\r
+                       fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
+                                       + "disembl.out");\r
+                       List<MultiAnnotatedSequence<DisemblResultAnnot>> aseqs = SequenceUtil\r
+                                       .readDisembl(fio);\r
+                       assertNotNull(aseqs);\r
+\r
+                       /*\r
+                        * MultiAnnotatedSequence ma = new MultiAnnotatedSequence();\r
+                        * Map<Trial, List<Number>> val = ma.getInstance(Trial.class);\r
+                        * List<Number> list = new ArrayList<Number>(); list.add(new\r
+                        * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one,\r
+                        * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f));\r
+                        * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0);\r
+                        */\r
+                       fio.close();\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (UnknownFileFormatException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+\r
+       }\r
 }\r
index 2c045cc..633e30b 100644 (file)
@@ -59,8 +59,7 @@ public class AllTestSuit {
         * For this to work execution must start from the project directory!\r
         */\r
        public static final String CURRENT_DIRECTORY = SysPrefs\r
-                       .getCurrentDirectory()\r
-                       + File.separator;\r
+                       .getCurrentDirectory() + File.separator;\r
 \r
        public static final String TEST_DATA_PATH = "testsrc" + File.separator\r
                        + "testdata" + File.separator;\r
@@ -78,6 +77,9 @@ public class AllTestSuit {
        public static final String test_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
                        + "TO1381.fasta";\r
 \r
+       public static final String test_alignment_input = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
+                       + "TO1381.fasta.aln";\r
+\r
        public static final String test_input_real = AllTestSuit.TEST_DATA_PATH_ABSOLUTE\r
                        + "50x500Protein.fasta";\r
 \r
diff --git a/testsrc/compbio/runner/conservation/AAConTester.java b/testsrc/compbio/runner/conservation/AAConTester.java
new file mode 100644 (file)
index 0000000..033555a
--- /dev/null
@@ -0,0 +1,381 @@
+/*\r
+ * Copyright (c) 2010 Peter Troshin JAva Bioinformatics Analysis Web Services\r
+ * (JABAWS) @version: 2.0 \r
+ * \r
+ * This library is free software; you can redistribute it and/or modify it under \r
+ * the terms of the Apache License version 2 as published\r
+ * by the Apache Software Foundation This library is distributed in the hope\r
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * Apache License for more details. A copy of the license is in\r
+ * apache_license.txt. It is also available here:\r
+ * \r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt \r
+ * \r
+ * Any republication or derived work distributed in source code form must include \r
+ * this copyright and license notice.\r
+ */\r
+package compbio.runner.conservation;\r
+\r
+import static org.testng.Assert.assertEquals;\r
+import static org.testng.Assert.assertFalse;\r
+import static org.testng.Assert.assertNotNull;\r
+import static org.testng.Assert.assertNull;\r
+import static org.testng.Assert.assertTrue;\r
+import static org.testng.Assert.fail;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.text.ParseException;\r
+\r
+import javax.xml.bind.ValidationException;\r
+\r
+import org.ggf.drmaa.DrmaaException;\r
+import org.ggf.drmaa.JobInfo;\r
+import org.testng.annotations.BeforeMethod;\r
+import org.testng.annotations.Test;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.FilePuller;\r
+import compbio.engine.SyncExecutor;\r
+import compbio.engine.client.ConfExecutable;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.RunConfiguration;\r
+import compbio.engine.cluster.drmaa.ClusterUtil;\r
+import compbio.engine.cluster.drmaa.JobRunner;\r
+import compbio.engine.cluster.drmaa.StatisticManager;\r
+import compbio.engine.local.LocalRunner;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobExecutionException;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.util.FileWatcher;\r
+import compbio.util.SysPrefs;\r
+\r
+public class AAConTester {\r
+\r
+       public static final String CURRENT_DIRECTORY = SysPrefs\r
+                       .getCurrentDirectory() + File.separator;\r
+\r
+       public static String test_outfile = "TO1381.aacon.out"; // "/homes/pvtroshin/TO1381.clustal.cluster.out\r
+       public static String test_alignment_input = CURRENT_DIRECTORY + "testsrc"\r
+                       + File.separator + "testdata" + File.separator + "TO1381.fasta.aln";\r
+       private AACon aacon;\r
+\r
+       @BeforeMethod(alwaysRun = true)\r
+       void init() {\r
+               aacon = new AACon();\r
+               aacon.setInput(test_alignment_input).setOutput(test_outfile);\r
+       }\r
+\r
+       @Test()\r
+       public void testRunOnCluster() {\r
+               assertFalse(SysPrefs.isWindows,\r
+                               "Cluster execution can only be in unix environment");\r
+               try {\r
+                       ConfiguredExecutable<AACon> confAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Cluster);\r
+                       JobRunner runner = JobRunner.getInstance(confAAcon);\r
+\r
+                       assertNotNull(runner, "Runner is NULL");\r
+                       runner.executeJob();\r
+                       // assertNotNull("JobId is null", jobId1);\r
+                       JobStatus status = runner.getJobStatus();\r
+                       assertTrue(status == JobStatus.PENDING\r
+                                       || status == JobStatus.RUNNING,\r
+                                       "Status of the process is wrong!");\r
+                       JobInfo info = runner.getJobInfo();\r
+                       assertNotNull(info, "JobInfo is null");\r
+                       StatisticManager sm = new StatisticManager(info);\r
+                       assertNotNull(sm, "Statictic manager is null");\r
+                       try {\r
+\r
+                               String exits = sm.getExitStatus();\r
+                               assertNotNull("Exit status is null", exits);\r
+                               // cut 4 trailing zeros from the number\r
+                               int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits)\r
+                                               .intValue();\r
+                               assertEquals(0, exitsInt);\r
+                               System.out.println(sm.getAllStats());\r
+\r
+                       } catch (ParseException e) {\r
+                               e.printStackTrace();\r
+                               fail("Parse Exception: " + e.getMessage());\r
+                       }\r
+                       // assertFalse(runner.cleanup());\r
+                       assertTrue(sm.hasExited());\r
+                       assertFalse(sm.wasAborted());\r
+                       assertFalse(sm.hasDump());\r
+                       assertFalse(sm.hasSignaled());\r
+\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail("DrmaaException caught:" + e.getMessage());\r
+               } catch (JobExecutionException e) {\r
+                       e.printStackTrace();\r
+                       fail("DrmaaException caught:" + e.getMessage());\r
+               } catch (DrmaaException e) {\r
+                       e.printStackTrace();\r
+                       fail("DrmaaException caught:" + e.getMessage());\r
+               }\r
+       }\r
+\r
+       /**\r
+        * This tests fails from time to time depending on the cluster load or some\r
+        * other factors. Any client code has to adjust for this issue\r
+        */\r
+       @Test()\r
+       public void testRunOnClusterAsync() {\r
+               assertFalse(SysPrefs.isWindows,\r
+                               "Cluster execution can only be in unix environment");\r
+               try {\r
+                       ConfiguredExecutable<AACon> confAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Cluster);\r
+                       AsyncExecutor aengine = Configurator.getAsyncEngine(confAAcon);\r
+                       String jobId = aengine.submitJob(confAAcon);\r
+                       assertNotNull(jobId, "Runner is NULL");\r
+                       // let drmaa to start\r
+                       Thread.sleep(500);\r
+                       JobStatus status = aengine.getJobStatus(jobId);\r
+                       while (status != JobStatus.FINISHED) {\r
+                               System.out.println("Job Status: " + status);\r
+                               Thread.sleep(1000);\r
+                               status = aengine.getJobStatus(jobId);\r
+                               ConfiguredExecutable<AACon> result = (ConfiguredExecutable<AACon>) aengine\r
+                                               .getResults(jobId);\r
+                               assertNotNull(result);\r
+                               System.out.println("RES:" + result);\r
+                               // Some times the job could be removed from the cluster\r
+                               // accounting\r
+                               // before it has been reported to finish. Make sure\r
+                               // to stop waiting in such case\r
+                               if (status == JobStatus.UNDEFINED) {\r
+                                       break;\r
+                               }\r
+                       }\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail("DrmaaException caught:" + e.getMessage());\r
+               } catch (InterruptedException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (ResultNotAvailableException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               }\r
+       }\r
+\r
+       @Test()\r
+       public void testRunLocally() {\r
+               try {\r
+                       ConfiguredExecutable<AACon> confAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+                       // For local execution use relative\r
+                       LocalRunner lr = new LocalRunner(confAAcon);\r
+                       lr.executeJob();\r
+                       ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+                       assertNotNull(al1.getResults());\r
+                       MultiAnnotatedSequence<Method> annotations = confAAcon.getResults();\r
+                       assertNotNull(annotations);\r
+                       assertEquals(annotations.getAnnotations().size(), 18);\r
+                       assertEquals(al1.getResults(), annotations);\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (ResultNotAvailableException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (JobExecutionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+\r
+       @Test()\r
+       public void testRunLocallyOnTwoCpu() {\r
+               try {\r
+                       aacon.setNCore(2);\r
+                       ConfiguredExecutable<AACon> confAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+                       // For local execution use relative\r
+                       LocalRunner lr = new LocalRunner(confAAcon);\r
+                       lr.executeJob();\r
+                       ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+                       assertNotNull(al1.getResults());\r
+                       MultiAnnotatedSequence<Method> annotations = confAAcon.getResults();\r
+                       assertNotNull(annotations);\r
+                       assertEquals(annotations.getAnnotations().size(), 18);\r
+                       assertEquals(al1.getResults(), annotations);\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (ResultNotAvailableException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (JobExecutionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+\r
+       @Test()\r
+       public void readStatistics() {\r
+               try {\r
+                       ConfiguredExecutable<AACon> confAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+                       // For local execution use relative\r
+\r
+                       AsyncExecutor sexec = Configurator.getAsyncEngine(confAAcon);\r
+                       String jobId = sexec.submitJob(confAAcon);\r
+                       FilePuller fw = FilePuller.newFilePuller(\r
+                                       confAAcon.getWorkDirectory() + File.separator\r
+                                                       + AACon.getStatFile(),\r
+                                       FileWatcher.MIN_CHUNK_SIZE_BYTES);\r
+                       int count = 0;\r
+                       long position = 0;\r
+                       fw.waitForFile(2);\r
+                       JobStatus status = sexec.getJobStatus(jobId);\r
+                       do {\r
+                               ChunkHolder ch = fw.pull(position);\r
+                               String chunk = ch.getChunk();\r
+                               position = ch.getNextPosition();\r
+                               // System.out.println(chunk);\r
+                               count++;\r
+                               // Make sure the loop is terminated if the job fails\r
+                               if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) {\r
+                                       fail("job failed!");\r
+                                       break;\r
+                               }\r
+                               Thread.sleep(300);\r
+                               status = sexec.getJobStatus(jobId);\r
+                       } while (status != JobStatus.FINISHED || fw.hasMoreData());\r
+\r
+                       assertTrue(count >= 1);\r
+                       ConfiguredExecutable<?> al = sexec.getResults(jobId);\r
+                       assertNotNull(al.getResults());\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (ResultNotAvailableException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (InterruptedException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               }\r
+       }\r
+\r
+       @Test()\r
+       public void testPersistance() {\r
+               try {\r
+                       AACon aacon = new AACon();\r
+                       aacon.setError("errrr.txt").setInput(test_alignment_input)\r
+                                       .setOutput("outtt.txt");\r
+                       assertEquals(aacon.getInput(), test_alignment_input);\r
+                       assertEquals(aacon.getError(), "errrr.txt");\r
+                       assertEquals(aacon.getOutput(), "outtt.txt");\r
+                       ConfiguredExecutable<AACon> cAAcon = Configurator\r
+                                       .configureExecutable(aacon, Executable.ExecProvider.Local);\r
+\r
+                       SyncExecutor sexec = Configurator.getSyncEngine(cAAcon);\r
+                       sexec.executeJob();\r
+                       ConfiguredExecutable<?> al = sexec.waitForResult();\r
+                       assertNotNull(al.getResults());\r
+                       // Save run configuration\r
+                       assertTrue(cAAcon.saveRunConfiguration());\r
+\r
+                       // See if loaded configuration is the same as saved\r
+                       RunConfiguration loadedRun = RunConfiguration\r
+                                       .load(new FileInputStream(new File(cAAcon\r
+                                                       .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+                       assertEquals(\r
+                                       ((ConfExecutable<AACon>) cAAcon).getRunConfiguration(),\r
+                                       loadedRun);\r
+                       // Load run configuration as ConfExecutable\r
+                       ConfiguredExecutable<AACon> resurrectedCAAcon = (ConfiguredExecutable<AACon>) cAAcon\r
+                                       .loadRunConfiguration(new FileInputStream(new File(cAAcon\r
+                                                       .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+                       assertNotNull(resurrectedCAAcon);\r
+                       assertEquals(resurrectedCAAcon.getExecutable().getInput(),\r
+                                       test_alignment_input);\r
+                       assertEquals(resurrectedCAAcon.getExecutable().getError(),\r
+                                       "errrr.txt");\r
+                       assertEquals(resurrectedCAAcon.getExecutable().getOutput(),\r
+                                       "outtt.txt");\r
+                       // See in details whether executables are the same\r
+                       assertEquals(resurrectedCAAcon.getExecutable(), aacon);\r
+\r
+                       ConfiguredExecutable<AACon> resAAcon = Configurator\r
+                                       .configureExecutable(resurrectedCAAcon.getExecutable(),\r
+                                                       Executable.ExecProvider.Local);\r
+\r
+                       sexec = Configurator.getSyncEngine(resAAcon,\r
+                                       Executable.ExecProvider.Local);\r
+                       sexec.executeJob();\r
+                       al = sexec.waitForResult();\r
+                       assertNotNull(al);\r
+\r
+               } catch (JobSubmissionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (JobExecutionException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               } catch (ResultNotAvailableException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               }\r
+       }\r
+\r
+       @Test()\r
+       public void testConfigurationLoading() {\r
+               try {\r
+                       RunnerConfig<AACon> aaconConfig = ConfExecutable\r
+                                       .getRunnerOptions(AACon.class);\r
+                       assertNotNull(aaconConfig);\r
+                       assertTrue(aaconConfig.getArguments().size() > 0);\r
+\r
+                       PresetManager<AACon> aaconPresets = ConfExecutable\r
+                                       .getRunnerPresets(AACon.class);\r
+                       assertNull(aaconPresets); // there is no presets\r
+\r
+                       LimitsManager<AACon> jronnLimits = ConfExecutable\r
+                                       .getRunnerLimits(AACon.class);\r
+                       assertNotNull(jronnLimits);\r
+                       assertTrue(jronnLimits.getLimits().size() > 0);\r
+                       jronnLimits.validate(aaconPresets);\r
+\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (ValidationException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+\r
+}\r
diff --git a/testsrc/compbio/runner/disorder/DisemblTester.java b/testsrc/compbio/runner/disorder/DisemblTester.java
new file mode 100644 (file)
index 0000000..85d5e91
--- /dev/null
@@ -0,0 +1,352 @@
+/* Copyright (c) 2009 Peter Troshin\r
+ *  \r
+ *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0     \r
+ * \r
+ *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
+ *  Apache License version 2 as published by the Apache Software Foundation\r
+ * \r
+ *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
+ *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
+ *  License for more details.\r
+ * \r
+ *  A copy of the license is in apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
+ * \r
+ * Any republication or derived work distributed in source code form\r
+ * must include this copyright and license notice.\r
+ */\r
+\r
+package compbio.runner.disorder;\r
+\r
+import static org.testng.Assert.assertEquals;\r
+import static org.testng.Assert.assertFalse;\r
+import static org.testng.Assert.assertNotNull;\r
+import static org.testng.Assert.assertNull;\r
+import static org.testng.Assert.assertTrue;\r
+import static org.testng.Assert.fail;\r
+\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.text.ParseException;\r
+import java.util.List;\r
+\r
+import javax.xml.bind.ValidationException;\r
+\r
+import org.ggf.drmaa.DrmaaException;\r
+import org.ggf.drmaa.JobInfo;\r
+import org.testng.annotations.BeforeMethod;\r
+import org.testng.annotations.Test;\r
+\r
+import compbio.data.sequence.AnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.FilePuller;\r
+import compbio.engine.SyncExecutor;\r
+import compbio.engine.client.ConfExecutable;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.engine.client.Executable;\r
+import compbio.engine.client.RunConfiguration;\r
+import compbio.engine.cluster.drmaa.ClusterUtil;\r
+import compbio.engine.cluster.drmaa.JobRunner;\r
+import compbio.engine.cluster.drmaa.StatisticManager;\r
+import compbio.engine.local.LocalRunner;\r
+import compbio.metadata.AllTestSuit;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobExecutionException;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.util.FileWatcher;\r
+import compbio.util.SysPrefs;\r
+\r
+public class DisemblTester {\r
+\r
+    public static String test_outfile = "TO1381.disembl.out";\r
+\r
+    private Disembl disembl;\r
+\r
+    @BeforeMethod(alwaysRun = true)\r
+    void init() {\r
+       disembl = new Disembl();\r
+       disembl.setInput(AllTestSuit.test_input).setOutput(test_outfile);\r
+    }\r
+\r
+    @Test(groups = { AllTestSuit.test_group_cluster,\r
+           AllTestSuit.test_group_runner })\r
+    public void testRunOnCluster() {\r
+       assertFalse(SysPrefs.isWindows,\r
+               "Cluster execution can only be in unix environment");\r
+       try {\r
+           ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+                   .configureExecutable(disembl,\r
+                           Executable.ExecProvider.Cluster);\r
+           JobRunner runner = JobRunner.getInstance(confDisembl);\r
+\r
+           assertNotNull(runner, "Runner is NULL");\r
+           runner.executeJob();\r
+           // assertNotNull("JobId is null", jobId1);\r
+           JobStatus status = runner.getJobStatus();\r
+           assertTrue(status == JobStatus.PENDING\r
+                   || status == JobStatus.RUNNING,\r
+                   "Status of the process is wrong!");\r
+           JobInfo info = runner.getJobInfo();\r
+           assertNotNull(info, "JobInfo is null");\r
+           StatisticManager sm = new StatisticManager(info);\r
+           assertNotNull(sm, "Statictic manager is null");\r
+           try {\r
+\r
+               String exits = sm.getExitStatus();\r
+               assertNotNull("Exit status is null", exits);\r
+               // cut 4 trailing zeros from the number\r
+               int exitsInt = ClusterUtil.CLUSTER_STAT_IN_SEC.parse(exits)\r
+                       .intValue();\r
+               assertEquals(0, exitsInt);\r
+               System.out.println(sm.getAllStats());\r
+\r
+           } catch (ParseException e) {\r
+               e.printStackTrace();\r
+               fail("Parse Exception: " + e.getMessage());\r
+           }\r
+           //assertFalse(runner.cleanup());\r
+           assertTrue(sm.hasExited());\r
+           assertFalse(sm.wasAborted());\r
+           assertFalse(sm.hasDump());\r
+           assertFalse(sm.hasSignaled());\r
+\r
+       } catch (JobSubmissionException e) {\r
+           e.printStackTrace();\r
+           fail("DrmaaException caught:" + e.getMessage());\r
+       } catch (JobExecutionException e) {\r
+           e.printStackTrace();\r
+           fail("DrmaaException caught:" + e.getMessage());\r
+       } catch (DrmaaException e) {\r
+           e.printStackTrace();\r
+           fail("DrmaaException caught:" + e.getMessage());\r
+       }\r
+    }\r
+\r
+    /**\r
+     * This tests fails from time to time depending on the cluster load or some\r
+     * other factors. Any client code has to adjust for this issue\r
+     */\r
+    @Test(groups = { AllTestSuit.test_group_cluster,\r
+           AllTestSuit.test_group_runner })\r
+    public void testRunOnClusterAsync() {\r
+       assertFalse(SysPrefs.isWindows,\r
+               "Cluster execution can only be in unix environment");\r
+       try {\r
+           ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+                   .configureExecutable(disembl,\r
+                           Executable.ExecProvider.Cluster);\r
+           AsyncExecutor aengine = Configurator.getAsyncEngine(confDisembl);\r
+           String jobId = aengine.submitJob(confDisembl);\r
+           assertNotNull(jobId, "Runner is NULL");\r
+           // let drmaa to start\r
+           Thread.sleep(500);\r
+           JobStatus status = aengine.getJobStatus(jobId);\r
+           while (status != JobStatus.FINISHED) {\r
+               System.out.println("Job Status: " + status);\r
+               Thread.sleep(1000);\r
+               status = aengine.getJobStatus(jobId);\r
+               ConfiguredExecutable<Jronn> result = (ConfiguredExecutable<Jronn>) aengine\r
+                       .getResults(jobId);\r
+               assertNotNull(result);\r
+               System.out.println("RES:" + result);\r
+               // Some times the job could be removed from the cluster accounting \r
+               // before it has been reported to finish. Make sure \r
+               // to stop waiting in such case\r
+               if (status == JobStatus.UNDEFINED) {\r
+                   break;\r
+               }\r
+           }\r
+       } catch (JobSubmissionException e) {\r
+           e.printStackTrace();\r
+           fail("DrmaaException caught:" + e.getMessage());\r
+       } catch (InterruptedException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (ResultNotAvailableException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       }\r
+    }\r
+\r
+    @Test(groups = { AllTestSuit.test_group_runner })\r
+    public void testRunLocally() {\r
+       try {\r
+           ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+                   .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+\r
+           // For local execution use relative\r
+           LocalRunner lr = new LocalRunner(confDisembl);\r
+           lr.executeJob();\r
+           ConfiguredExecutable<?> al1 = lr.waitForResult();\r
+           assertNotNull(al1.getResults());\r
+           List<AnnotatedSequence> al2 = confDisembl.getResults();\r
+           assertNotNull(al2);\r
+           assertEquals(al2.size(), 3);\r
+           assertEquals(al1.getResults(), al2);\r
+       } catch (JobSubmissionException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       } catch (ResultNotAvailableException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       } catch (JobExecutionException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       }\r
+    }\r
+\r
+    @Test(groups = { AllTestSuit.test_group_runner })\r
+    public void readStatistics() {\r
+       try {\r
+           ConfiguredExecutable<Disembl> confDisembl = Configurator\r
+                   .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+           // For local execution use relavive\r
+\r
+           AsyncExecutor sexec = Configurator.getAsyncEngine(confDisembl);\r
+           String jobId = sexec.submitJob(confDisembl);\r
+           FilePuller fw = FilePuller.newFilePuller(confDisembl\r
+                   .getWorkDirectory()\r
+                   + File.separator + Jronn.getStatFile(),\r
+                   FileWatcher.MIN_CHUNK_SIZE_BYTES);\r
+           int count = 0;\r
+           long position = 0;\r
+           fw.waitForFile(4);\r
+           JobStatus status = sexec.getJobStatus(jobId);\r
+           while (status != JobStatus.FINISHED) {\r
+               if (fw.hasMoreData()) {\r
+                   ChunkHolder ch = fw.pull(position);\r
+                   String chunk = ch.getChunk();\r
+                   position = ch.getNextPosition();\r
+               }\r
+               count++;\r
+               // Make sure the loop is terminated if the job fails\r
+               if ((status == JobStatus.UNDEFINED || status == JobStatus.FAILED)) {\r
+                   break;\r
+               }\r
+               Thread.sleep(300);\r
+               status = sexec.getJobStatus(jobId);\r
+           }\r
+           assertTrue(count > 1);\r
+           ConfiguredExecutable<?> al = sexec.getResults(jobId);\r
+           assertNotNull(al.getResults());\r
+       } catch (JobSubmissionException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (ResultNotAvailableException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (IOException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (InterruptedException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       }\r
+    }\r
+\r
+    @Test(groups = { AllTestSuit.test_group_runner })\r
+    public void testPersistance() {\r
+       try {\r
+           Disembl disembl = new Disembl();\r
+           disembl.setError("errrr.txt").setInput(AllTestSuit.test_input)\r
+                   .setOutput("outtt.txt");\r
+           assertEquals(disembl.getInput(), AllTestSuit.test_input);\r
+           assertEquals(disembl.getError(), "errrr.txt");\r
+           assertEquals(disembl.getOutput(), "outtt.txt");\r
+           ConfiguredExecutable<Disembl> cDisembl = Configurator\r
+                   .configureExecutable(disembl, Executable.ExecProvider.Local);\r
+\r
+           SyncExecutor sexec = Configurator.getSyncEngine(cDisembl);\r
+           sexec.executeJob();\r
+           ConfiguredExecutable<?> al = sexec.waitForResult();\r
+           assertNotNull(al.getResults());\r
+           // Save run configuration\r
+           assertTrue(cDisembl.saveRunConfiguration());\r
+\r
+           // See if loaded configuration is the same as saved\r
+           RunConfiguration loadedRun = RunConfiguration\r
+                   .load(new FileInputStream(new File(cDisembl\r
+                           .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+           assertEquals(((ConfExecutable<Disembl>) cDisembl)\r
+                   .getRunConfiguration(), loadedRun);\r
+           // Load run configuration as ConfExecutable\r
+           ConfiguredExecutable<Disembl> resurrectedCDisembl = (ConfiguredExecutable<Disembl>) cDisembl\r
+                   .loadRunConfiguration(new FileInputStream(new File(cDisembl\r
+                           .getWorkDirectory(), RunConfiguration.rconfigFile)));\r
+           assertNotNull(resurrectedCDisembl);\r
+           assertEquals(resurrectedCDisembl.getExecutable().getInput(),\r
+                   AllTestSuit.test_input);\r
+           assertEquals(resurrectedCDisembl.getExecutable().getError(),\r
+                   "errrr.txt");\r
+           assertEquals(resurrectedCDisembl.getExecutable().getOutput(),\r
+                   "outtt.txt");\r
+           // See in details whether executables are the same\r
+           assertEquals(resurrectedCDisembl.getExecutable(), disembl);\r
+\r
+           ConfiguredExecutable<Disembl> resJronn = Configurator\r
+                   .configureExecutable(resurrectedCDisembl.getExecutable(),\r
+                           Executable.ExecProvider.Local);\r
+\r
+           sexec = Configurator.getSyncEngine(resJronn,\r
+                   Executable.ExecProvider.Local);\r
+           sexec.executeJob();\r
+           al = sexec.waitForResult();\r
+           assertNotNull(al);\r
+\r
+       } catch (JobSubmissionException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (JobExecutionException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (FileNotFoundException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (IOException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       } catch (ResultNotAvailableException e) {\r
+           e.printStackTrace();\r
+           fail(e.getMessage());\r
+       }\r
+    }\r
+\r
+    @Test(groups = { AllTestSuit.test_group_runner })\r
+    public void testConfigurationLoading() {\r
+       try {\r
+           RunnerConfig<Disembl> disemblConfig = ConfExecutable\r
+                   .getRunnerOptions(Disembl.class);\r
+           assertNotNull(disemblConfig);\r
+           assertTrue(disemblConfig.getArguments().size() > 0);\r
+\r
+           PresetManager<Disembl> disemblPresets = ConfExecutable\r
+                   .getRunnerPresets(Disembl.class);\r
+           assertNull(disemblPresets); // there is no presets\r
+\r
+           LimitsManager<Disembl> disemblLimits = ConfExecutable\r
+                   .getRunnerLimits(Disembl.class);\r
+           assertNotNull(disemblLimits);\r
+           assertTrue(disemblLimits.getLimits().size() > 0);\r
+           disemblLimits.validate(disemblPresets);\r
+\r
+       } catch (FileNotFoundException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       } catch (IOException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       } catch (ValidationException e) {\r
+           e.printStackTrace();\r
+           fail(e.getLocalizedMessage());\r
+       }\r
+    }\r
+\r
+}\r
diff --git a/testsrc/testdata/TO1381.fasta.aln b/testsrc/testdata/TO1381.fasta.aln
new file mode 100644 (file)
index 0000000..f6da7b8
--- /dev/null
@@ -0,0 +1,35 @@
+>Foobar_dundeefriends      \r
+MTADGPRELLQLRAAVRHRPQDFVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r
+ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL\r
+LPEEPYITAQLLNWRRRLCDWRALDVLSAQVRAAVAQGVGAVEPFAFLSEDASAAEQLAC\r
+ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+GVFEMDGFADDFGALLQALARRHGWLGI\r
+\r
+>Foobar                    \r
+-----------------------------------MGDTTAGEMAVQRGLALH-------\r
+---------QQRHAEAAVLLQQASDAAPEHPGIALWL-HALEDAGQAEAAAA-YTRAHQL\r
+LPEEPYITAQLLN--------------------AVAQGVGAVEPFAFLSEDASAAE----\r
+----------SVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+GVFEMDGFADDFGALLQALARRHGWLGI\r
+\r
+>dundeefriends             \r
+-MTADGPRELLQLRAAVRHRPQDVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r
+ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALED--------------HQL\r
+LPEEPYITAQLDVLSAQVR-------------AAVAQGVGAVEPFAFLSEDASAAEQLAC\r
+ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r
+HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r
+FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r
+VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r
+RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r
+TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r
+I---------------------------
\ No newline at end of file
diff --git a/webservices/compbio/data/msa/Annotation.java b/webservices/compbio/data/msa/Annotation.java
new file mode 100644 (file)
index 0000000..c73c0bc
--- /dev/null
@@ -0,0 +1,175 @@
+package compbio.data.msa;\r
+\r
+import java.security.InvalidParameterException;\r
+import java.util.List;\r
+\r
+import javax.jws.WebParam;\r
+import javax.jws.WebService;\r
+\r
+import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.LimitExceededException;\r
+import compbio.metadata.Option;\r
+import compbio.metadata.Preset;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.UnsupportedRuntimeException;\r
+import compbio.metadata.WrongParameterException;\r
+\r
+/**\r
+ * Interface for tools that results to one or more annotation to sequence(s)\r
+ * \r
+ * @author pvtroshin\r
+ * \r
+ *         Date November 2010\r
+ * \r
+ * @param <T>\r
+ *            executable type / web service type\r
+ */\r
+@WebService(targetNamespace = "http://a.data.compbio/01/12/2010/")\r
+public interface Annotation<T> extends JManagement, Metadata<T> {\r
+\r
+       /**\r
+        * \r
+        * Any dataset containing a greater number of sequences or the average\r
+        * length of the sequences are greater then defined in the default Limit\r
+        * will not be accepted for an alignment operation and\r
+        * JobSubmissionException will be thrown.\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @return jobId - unique identifier for the job\r
+        * @throws JobSubmissionException\r
+        *             is thrown when the job could not be submitted due to the\r
+        *             following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String analize(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException;\r
+\r
+       /**\r
+        * \r
+        * @see Option\r
+        * \r
+        *      Default Limit is used to decide whether the calculation will be\r
+        *      permitted or denied\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @param options\r
+        *            A list of Options\r
+        * @return jobId - unique identifier for the job\r
+        * @throws JobSubmissionException. This\r
+        *             exception is thrown when the job could not be submitted due\r
+        *             to the following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws WrongParameterException\r
+        *             is throws when 1) One of the Options provided is not\r
+        *             supported, 2) The value of the option is defined outside the\r
+        *             boundaries. In both cases exception object contain the\r
+        *             information on the violating Option.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String customAnalize(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+                       @WebParam(name = "options") List<Option<T>> options)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException, WrongParameterException;\r
+\r
+       /**\r
+        * \r
+        * \r
+        * Limit for a presetName is used whether the calculation will be permitted\r
+        * or denied. If no Limit was defined for a presetName, than default limit\r
+        * is used.\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @param preset\r
+        *            A list of Options\r
+        * @return String - jobId - unique identifier for the job\r
+        * @throws JobSubmissionException. This\r
+        *             exception is thrown when the job could not be submitted due\r
+        *             to the following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws WrongParameterException\r
+        *             is throws when 1) One of the Options provided is not\r
+        *             supported, 2) The value of the option is defined outside the\r
+        *             boundaries. In both cases exception object contain the\r
+        *             information on the violating Option.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String presetAnalize(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+                       @WebParam(name = "preset") Preset<T> preset)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException, WrongParameterException;\r
+\r
+       /**\r
+        * Return the result of the job.\r
+        * \r
+        * @param jobId\r
+        *            a unique job identifier\r
+        * @return\r
+        * @throws ResultNotAvailableException\r
+        *             this exception is throw if the job execution was not\r
+        *             successful or the result of the execution could not be found.\r
+        *             (e.g. removed). Exception could also be thrown is dues to the\r
+        *             lower level problems on the server i.e. IOException,\r
+        *             FileNotFoundException problems as well as\r
+        *             UnknownFileFormatException.\r
+        * @throws InvalidParameterException\r
+        *             thrown if jobId is empty or cannot be recognised e.g. in\r
+        *             invalid format\r
+        */\r
+       MultiAnnotatedSequence<?> getResult(@WebParam(name = "jobId") String jobId)\r
+                       throws ResultNotAvailableException;\r
+}\r
diff --git a/webservices/compbio/data/msa/JManagement.java b/webservices/compbio/data/msa/JManagement.java
new file mode 100644 (file)
index 0000000..b9b2f01
--- /dev/null
@@ -0,0 +1,54 @@
+package compbio.data.msa;\r
+\r
+import java.security.InvalidParameterException;\r
+\r
+import javax.jws.WebParam;\r
+\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobStatus;\r
+\r
+public interface JManagement {\r
+\r
+       /**\r
+        * Stop running job but leave its output untouched\r
+        * \r
+        * @return true if job was cancelled successfully, false otherwise\r
+        * @throws InvalidParameterException\r
+        *             thrown if jobId is empty or cannot be recognised e.g. in\r
+        *             invalid format\r
+        */\r
+       boolean cancelJob(@WebParam(name = "jobId") String jobId);\r
+\r
+       /**\r
+        * Return the status of the job. @see JobStatus\r
+        * \r
+        * @param jobId\r
+        *            - unique job identifier\r
+        * @return JobStatus - status of the job\r
+        * @throws InvalidParameterException\r
+        *             thrown if jobId is empty or cannot be recognised e.g. in\r
+        *             invalid format\r
+        */\r
+       JobStatus getJobStatus(@WebParam(name = "jobId") String jobId);\r
+\r
+       /**\r
+        * Reads 1kb chunk from the statistics file which is specific to a given web\r
+        * service from the position. If in time of a request less then 1kb data is\r
+        * available from the position to the end of the file, then it returns all\r
+        * the data available from the position to the end of the file.\r
+        * \r
+        * @param jobId\r
+        *            - unique job identifier\r
+        * @param position\r
+        *            - next position within the file to read\r
+        * @return ChunkHolder - @see ChunkHolder which contains a chuink of data\r
+        *         and a next position within the file from which no data has been\r
+        *         read\r
+        * @throws InvalidParameterException\r
+        *             thrown if jobId is empty or cannot be recognised e.g. in\r
+        *             invalid format and also if the position value is negative\r
+        */\r
+       ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId,\r
+                       @WebParam(name = "position") long position);\r
+\r
+}\r
diff --git a/webservices/compbio/data/msa/Metadata.java b/webservices/compbio/data/msa/Metadata.java
new file mode 100644 (file)
index 0000000..67abb5d
--- /dev/null
@@ -0,0 +1,50 @@
+package compbio.data.msa;\r
+\r
+import javax.jws.WebParam;\r
+\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.RunnerConfig;\r
+\r
+public interface Metadata<T> {\r
+\r
+       /**\r
+        * Get options supported by a web service\r
+        * \r
+        * @return RunnerConfig the list of options and parameters supported by a\r
+        *         web service.\r
+        */\r
+       RunnerConfig<T> getRunnerOptions();\r
+\r
+       /**\r
+        * Get presets supported by a web service\r
+        * \r
+        * @return PresetManager the object contains information about presets\r
+        *         supported by a web service\r
+        */\r
+       PresetManager<T> getPresets();\r
+\r
+       /**\r
+        * Get a Limit for a preset.\r
+        * \r
+        * @param presetName\r
+        *            the name of the preset. if no name is provided, then the\r
+        *            default preset is returned. If no limit for a particular\r
+        *            preset is defined then the default preset is returned\r
+        * @return Limit\r
+        */\r
+       Limit<T> getLimit(@WebParam(name = "presetName") String presetName);\r
+\r
+       /**\r
+        * List Limits supported by a web service.\r
+        * \r
+        * @param presetName\r
+        *            the name of the preset. if no name is provided, then the\r
+        *            default preset is returned. If no limit for a particular\r
+        *            preset is defined then the default preset is returned\r
+        * @return LimitManager\r
+        */\r
+       LimitsManager<T> getLimits();\r
+\r
+}\r
index 4a22009..b1735fb 100644 (file)
@@ -26,17 +26,11 @@ import javax.jws.WebService;
 \r
 import compbio.data.sequence.Alignment;\r
 import compbio.data.sequence.FastaSequence;\r
-import compbio.metadata.ChunkHolder;\r
-import compbio.metadata.JobStatus;\r
 import compbio.metadata.JobSubmissionException;\r
-import compbio.metadata.Limit;\r
 import compbio.metadata.LimitExceededException;\r
-import compbio.metadata.LimitsManager;\r
 import compbio.metadata.Option;\r
 import compbio.metadata.Preset;\r
-import compbio.metadata.PresetManager;\r
 import compbio.metadata.ResultNotAvailableException;\r
-import compbio.metadata.RunnerConfig;\r
 import compbio.metadata.UnsupportedRuntimeException;\r
 import compbio.metadata.WrongParameterException;\r
 \r
@@ -45,248 +39,158 @@ import compbio.metadata.WrongParameterException;
  * \r
  * @author pvtroshin\r
  * \r
- *         Date September 2009\r
+ *         Date November 2010\r
  * \r
  * @param <T>\r
  *            executable type / web service type\r
  */\r
-@WebService(targetNamespace = "http://msa.data.compbio/01/01/2010/")\r
-public interface MsaWS<T> {\r
-\r
-    /**\r
-     * Align a list of sequences with default settings.\r
-     * \r
-     * Any dataset containing a greater number of sequences or the average\r
-     * length of the sequences are greater then defined in the default Limit\r
-     * will not be accepted for an alignment operation and\r
-     * JobSubmissionException will be thrown.\r
-     * \r
-     * @param sequences\r
-     *            List of FastaSequence objects. The programme does not perform\r
-     *            any sequence validity checks. Nor does it checks whether the\r
-     *            sequences names are unique. It is responsibility of the caller\r
-     *            to validate this information\r
-     * @return jobId - unique identifier for the job\r
-     * @throws JobSubmissionException. This\r
-     *             exception is thrown when the job could not be submitted due\r
-     *             to the following reasons: 1) The number of sequences in the\r
-     *             submission or their average length is greater then defined by\r
-     *             the default Limit. 2) Any problems on the server side e.g. it\r
-     *             is misconfigured or malfunction, is reported via this\r
-     *             exception. In the first case the information on the limit\r
-     *             could be obtained from an exception.\r
-     * @throws InvalidParameterException\r
-     *             thrown if input list of fasta sequence is null or empty\r
-     * @throws UnsupportedRuntimeException\r
-     *             thrown if server OS does not support native executables for a\r
-     *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
-     *             service is called\r
-     * @throws LimitExceededException\r
-     *             is throw if the input sequences number or average length\r
-     *             exceeds what is defined by the limit\r
-     */\r
-    String align(\r
-           @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
-           throws UnsupportedRuntimeException, LimitExceededException,\r
-           JobSubmissionException;\r
-\r
-    /**\r
-     * Align a list of sequences with options.\r
-     * \r
-     * @see Option\r
-     * \r
-     *      Default Limit is used to decide whether the calculation will be\r
-     *      permitted or denied\r
-     * \r
-     * @param sequences\r
-     *            List of FastaSequence objects. The programme does not perform\r
-     *            any sequence validity checks. Nor does it checks whether the\r
-     *            sequences names are unique. It is responsibility of the caller\r
-     *            to validate this information\r
-     * @param options\r
-     *            A list of Options\r
-     * @return jobId - unique identifier for the job\r
-     * @throws JobSubmissionException. This\r
-     *             exception is thrown when the job could not be submitted due\r
-     *             to the following reasons: 1) The number of sequences in the\r
-     *             submission or their average length is greater then defined by\r
-     *             the default Limit. 2) Any problems on the server side e.g. it\r
-     *             is misconfigured or malfunction, is reported via this\r
-     *             exception. In the first case the information on the limit\r
-     *             could be obtained from an exception.\r
-     * @throws WrongParameterException\r
-     *             is throws when 1) One of the Options provided is not\r
-     *             supported, 2) The value of the option is defined outside the\r
-     *             boundaries. In both cases exception object contain the\r
-     *             information on the violating Option.\r
-     * @throws InvalidParameterException\r
-     *             thrown if input list of fasta sequence is null or empty\r
-     * @throws UnsupportedRuntimeException\r
-     *             thrown if server OS does not support native executables for a\r
-     *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
-     *             service is called\r
-     * @throws LimitExceededException\r
-     *             is throw if the input sequences number or average length\r
-     *             exceeds what is defined by the limit\r
-     */\r
-    String customAlign(\r
-           @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
-           @WebParam(name = "options") List<Option<T>> options)\r
-           throws UnsupportedRuntimeException, LimitExceededException,\r
-           JobSubmissionException, WrongParameterException;\r
-\r
-    /**\r
-     * Align a list of sequences with preset. @see Preset\r
-     * \r
-     * Limit for a presetName is used whether the calculation will be permitted\r
-     * or denied. If no Limit was defined for a presetName, than default limit\r
-     * is used.\r
-     * \r
-     * @param sequences\r
-     *            List of FastaSequence objects. The programme does not perform\r
-     *            any sequence validity checks. Nor does it checks whether the\r
-     *            sequences names are unique. It is responsibility of the caller\r
-     *            to validate this information\r
-     * @param preset\r
-     *            A list of Options\r
-     * @return String - jobId - unique identifier for the job\r
-     * @throws JobSubmissionException. This\r
-     *             exception is thrown when the job could not be submitted due\r
-     *             to the following reasons: 1) The number of sequences in the\r
-     *             submission or their average length is greater then defined by\r
-     *             the default Limit. 2) Any problems on the server side e.g. it\r
-     *             is misconfigured or malfunction, is reported via this\r
-     *             exception. In the first case the information on the limit\r
-     *             could be obtained from an exception.\r
-     * @throws WrongParameterException\r
-     *             is throws when 1) One of the Options provided is not\r
-     *             supported, 2) The value of the option is defined outside the\r
-     *             boundaries. In both cases exception object contain the\r
-     *             information on the violating Option.\r
-     * @throws InvalidParameterException\r
-     *             thrown if input list of fasta sequence is null or empty\r
-     * @throws UnsupportedRuntimeException\r
-     *             thrown if server OS does not support native executables for a\r
-     *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
-     *             service is called\r
-     * @throws LimitExceededException\r
-     *             is throw if the input sequences number or average length\r
-     *             exceeds what is defined by the limit\r
-     */\r
-    String presetAlign(\r
-           @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
-           @WebParam(name = "preset") Preset<T> preset)\r
-           throws UnsupportedRuntimeException, LimitExceededException,\r
-           JobSubmissionException, WrongParameterException;\r
-\r
-    /**\r
-     * Return the result of the job.\r
-     * \r
-     * @param jobId\r
-     *            a unique job identifier\r
-     * @return Alignment\r
-     * @throws ResultNotAvailableException\r
-     *             this exception is throw if the job execution was not\r
-     *             successful or the result of the execution could not be found.\r
-     *             (e.g. removed). Exception could also be thrown is dues to the\r
-     *             lower level problems on the server i.e. IOException,\r
-     *             FileNotFoundException problems as well as\r
-     *             UnknownFileFormatException.\r
-     * @throws InvalidParameterException\r
-     *             thrown if jobId is empty or cannot be recognised e.g. in\r
-     *             invalid format\r
-     */\r
-    Alignment getResult(@WebParam(name = "jobId") String jobId)\r
-           throws ResultNotAvailableException;\r
-\r
-    /**\r
-     * Stop running job but leave its output untouched\r
-     * \r
-     * @return true if job was cancelled successfully, false otherwise\r
-     * @throws InvalidParameterException\r
-     *             thrown if jobId is empty or cannot be recognised e.g. in\r
-     *             invalid format\r
-     */\r
-    boolean cancelJob(@WebParam(name = "jobId") String jobId);\r
-\r
-    /**\r
-     * Return the status of the job. @see JobStatus\r
-     * \r
-     * @param jobId\r
-     *            - unique job identifier\r
-     * @return JobStatus - status of the job\r
-     * @throws InvalidParameterException\r
-     *             thrown if jobId is empty or cannot be recognised e.g. in\r
-     *             invalid format\r
-     */\r
-    JobStatus getJobStatus(@WebParam(name = "jobId") String jobId);\r
-\r
-    /**\r
-     * Reads 1kb chunk from the statistics file which is specific to a given web\r
-     * service from the position. If in time of a request less then 1kb data is\r
-     * available from the position to the end of the file, then it returns all\r
-     * the data available from the position to the end of the file.\r
-     * \r
-     * @param jobId\r
-     *            - unique job identifier\r
-     * @param position\r
-     *            - next position within the file to read\r
-     * @return ChunkHolder - @see ChunkHolder which contains a chuink of data\r
-     *         and a next position within the file from which no data has been\r
-     *         read\r
-     * @throws InvalidParameterException\r
-     *             thrown if jobId is empty or cannot be recognised e.g. in\r
-     *             invalid format and also if the position value is negative\r
-     */\r
-    ChunkHolder pullExecStatistics(@WebParam(name = "jobId") String jobId,\r
-           @WebParam(name = "position") long position);\r
-\r
-    /*\r
-     * TODO\r
-     * \r
-     * @param jobId\r
-     * \r
-     * @return\r
-     * \r
-     * byte getProgress(@WebParam(name = "jobId") String jobId);\r
-     */\r
-\r
-    /**\r
-     * Get options supported by a web service\r
-     * \r
-     * @return RunnerConfig the list of options and parameters supported by a\r
-     *         web service.\r
-     */\r
-    RunnerConfig<T> getRunnerOptions();\r
-\r
-    /**\r
-     * Get presets supported by a web service\r
-     * \r
-     * @return PresetManager the object contains information about presets\r
-     *         supported by a web service\r
-     */\r
-    PresetManager<T> getPresets();\r
-\r
-    /**\r
-     * Get a Limit for a preset.\r
-     * \r
-     * @param presetName\r
-     *            the name of the preset. if no name is provided, then the\r
-     *            default preset is returned. If no limit for a particular\r
-     *            preset is defined then the default preset is returned\r
-     * @return Limit\r
-     */\r
-    Limit<T> getLimit(@WebParam(name = "presetName") String presetName);\r
-\r
-    /**\r
-     * List Limits supported by a web service.\r
-     * \r
-     * @param presetName\r
-     *            the name of the preset. if no name is provided, then the\r
-     *            default preset is returned. If no limit for a particular\r
-     *            preset is defined then the default preset is returned\r
-     * @return LimitManager\r
-     */\r
-    LimitsManager<T> getLimits();\r
+@WebService(targetNamespace = "http://msa.data.compbio/01/12/2010/")\r
+public interface MsaWS<T> extends JManagement, Metadata<T> {\r
+\r
+       /**\r
+        * Align a list of sequences with default settings.\r
+        * \r
+        * Any dataset containing a greater number of sequences or the average\r
+        * length of the sequences are greater then defined in the default Limit\r
+        * will not be accepted for an alignment operation and\r
+        * JobSubmissionException will be thrown.\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @return jobId - unique identifier for the job\r
+        * @throws JobSubmissionException. This\r
+        *             exception is thrown when the job could not be submitted due\r
+        *             to the following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String align(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException;\r
+\r
+       /**\r
+        * Align a list of sequences with options.\r
+        * \r
+        * @see Option\r
+        * \r
+        *      Default Limit is used to decide whether the calculation will be\r
+        *      permitted or denied\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @param options\r
+        *            A list of Options\r
+        * @return jobId - unique identifier for the job\r
+        * @throws JobSubmissionException. This\r
+        *             exception is thrown when the job could not be submitted due\r
+        *             to the following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws WrongParameterException\r
+        *             is throws when 1) One of the Options provided is not\r
+        *             supported, 2) The value of the option is defined outside the\r
+        *             boundaries. In both cases exception object contain the\r
+        *             information on the violating Option.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String customAlign(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+                       @WebParam(name = "options") List<Option<T>> options)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException, WrongParameterException;\r
+\r
+       /**\r
+        * Align a list of sequences with preset. @see Preset\r
+        * \r
+        * Limit for a presetName is used whether the calculation will be permitted\r
+        * or denied. If no Limit was defined for a presetName, than default limit\r
+        * is used.\r
+        * \r
+        * @param sequences\r
+        *            List of FastaSequence objects. The programme does not perform\r
+        *            any sequence validity checks. Nor does it checks whether the\r
+        *            sequences names are unique. It is responsibility of the caller\r
+        *            to validate this information\r
+        * @param preset\r
+        *            A list of Options\r
+        * @return String - jobId - unique identifier for the job\r
+        * @throws JobSubmissionException. This\r
+        *             exception is thrown when the job could not be submitted due\r
+        *             to the following reasons: 1) The number of sequences in the\r
+        *             submission or their average length is greater then defined by\r
+        *             the default Limit. 2) Any problems on the server side e.g. it\r
+        *             is misconfigured or malfunction, is reported via this\r
+        *             exception. In the first case the information on the limit\r
+        *             could be obtained from an exception.\r
+        * @throws WrongParameterException\r
+        *             is throws when 1) One of the Options provided is not\r
+        *             supported, 2) The value of the option is defined outside the\r
+        *             boundaries. In both cases exception object contain the\r
+        *             information on the violating Option.\r
+        * @throws InvalidParameterException\r
+        *             thrown if input list of fasta sequence is null or empty\r
+        * @throws UnsupportedRuntimeException\r
+        *             thrown if server OS does not support native executables for a\r
+        *             given web service, e.g. JWS2 is deployed on Windows and Mafft\r
+        *             service is called\r
+        * @throws LimitExceededException\r
+        *             is throw if the input sequences number or average length\r
+        *             exceeds what is defined by the limit\r
+        */\r
+       String presetAlign(\r
+                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
+                       @WebParam(name = "preset") Preset<T> preset)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException, WrongParameterException;\r
+\r
+       /**\r
+        * Return the result of the job.\r
+        * \r
+        * @param jobId\r
+        *            a unique job identifier\r
+        * @return Alignment\r
+        * @throws ResultNotAvailableException\r
+        *             this exception is throw if the job execution was not\r
+        *             successful or the result of the execution could not be found.\r
+        *             (e.g. removed). Exception could also be thrown is dues to the\r
+        *             lower level problems on the server i.e. IOException,\r
+        *             FileNotFoundException problems as well as\r
+        *             UnknownFileFormatException.\r
+        * @throws InvalidParameterException\r
+        *             thrown if jobId is empty or cannot be recognised e.g. in\r
+        *             invalid format\r
+        */\r
+       Alignment getResult(@WebParam(name = "jobId") String jobId)\r
+                       throws ResultNotAvailableException;\r
 \r
 }\r
diff --git a/webservices/compbio/ws/server/AAConWS.java b/webservices/compbio/ws/server/AAConWS.java
new file mode 100644 (file)
index 0000000..cf382df
--- /dev/null
@@ -0,0 +1,140 @@
+package compbio.ws.server;\r
+\r
+import java.io.File;\r
+import java.util.List;\r
+\r
+import javax.annotation.Resource;\r
+import javax.jws.WebService;\r
+import javax.xml.ws.WebServiceContext;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import compbio.conservation.Method;\r
+import compbio.data.msa.Annotation;\r
+import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.JalviewAnnotation;\r
+import compbio.data.sequence.MultiAnnotatedSequence;\r
+import compbio.engine.AsyncExecutor;\r
+import compbio.engine.Configurator;\r
+import compbio.engine.client.ConfiguredExecutable;\r
+import compbio.metadata.ChunkHolder;\r
+import compbio.metadata.JobStatus;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.Limit;\r
+import compbio.metadata.LimitExceededException;\r
+import compbio.metadata.LimitsManager;\r
+import compbio.metadata.Option;\r
+import compbio.metadata.Preset;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.RunnerConfig;\r
+import compbio.metadata.UnsupportedRuntimeException;\r
+import compbio.metadata.WrongParameterException;\r
+import compbio.runner.Util;\r
+import compbio.runner.conservation.AACon;\r
+\r
+@WebService(endpointInterface = "compbio.data.msa.MsaWS", targetNamespace = "http://msa.data.compbio/01/01/2010/", serviceName = "MuscleWS")\r
+public class AAConWS implements Annotation<AACon> {\r
+\r
+       // Ask for resource injection\r
+       @Resource\r
+       WebServiceContext wsContext;\r
+\r
+       private static Logger statLog = Logger.getLogger("AAConWS-stats");\r
+\r
+       private static Logger log = Logger.getLogger(AAConWS.class);\r
+\r
+       private static final RunnerConfig<AACon> aaconOptions = Util\r
+                       .getSupportedOptions(AACon.class);\r
+\r
+       private static final PresetManager<AACon> aaconPresets = Util\r
+                       .getPresets(AACon.class);\r
+\r
+       ConfiguredExecutable<AACon> init(List<FastaSequence> sequences)\r
+                       throws JobSubmissionException {\r
+               AACon aacon = new AACon();\r
+               aacon.setInput("fasta.in").setOutput("fasta.out");\r
+               return Configurator.configureExecutable(aacon, sequences);\r
+       }\r
+\r
+       @SuppressWarnings("unchecked")\r
+       public MultiAnnotatedSequence<Method> getResult(String jobId)\r
+                       throws ResultNotAvailableException {\r
+               WSUtil.validateJobId(jobId);\r
+               AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+               ConfiguredExecutable<AACon> aacon = (ConfiguredExecutable<AACon>) asyncEngine\r
+                               .getResults(jobId);\r
+               MultiAnnotatedSequence<Method> mas = aacon.getResults();\r
+               // log(jobId, "getResults");\r
+               return mas;\r
+       }\r
+\r
+       @SuppressWarnings("unchecked")\r
+       public JalviewAnnotation getJalviewAnnotation(String jobId)\r
+                       throws ResultNotAvailableException {\r
+               MultiAnnotatedSequence<Method> result = getResult(jobId);\r
+\r
+               // log(jobId, "getResults");\r
+               return result.toJalviewAnnotation();\r
+       }\r
+\r
+       public Limit<AACon> getLimit(String presetName) {\r
+               return new AACon().getLimit(presetName);\r
+       }\r
+\r
+       public LimitsManager<AACon> getLimits() {\r
+               return new AACon().getLimits();\r
+       }\r
+\r
+       public ChunkHolder pullExecStatistics(String jobId, long position) {\r
+               WSUtil.validateJobId(jobId);\r
+               String file = Configurator.getWorkDirectory(jobId) + File.separator\r
+                               + AACon.getStatFile();\r
+               return WSUtil.pullFile(file, position);\r
+       }\r
+\r
+       public boolean cancelJob(String jobId) {\r
+               WSUtil.validateJobId(jobId);\r
+               return WSUtil.cancelJob(jobId);\r
+       }\r
+\r
+       public JobStatus getJobStatus(String jobId) {\r
+               WSUtil.validateJobId(jobId);\r
+               return WSUtil.getJobStatus(jobId);\r
+       }\r
+\r
+       public PresetManager<AACon> getPresets() {\r
+               return aaconPresets;\r
+       }\r
+\r
+       public RunnerConfig<AACon> getRunnerOptions() {\r
+               return aaconOptions;\r
+       }\r
+\r
+       @Override\r
+       public String analize(List<FastaSequence> sequences)\r
+                       throws UnsupportedRuntimeException, LimitExceededException,\r
+                       JobSubmissionException {\r
+               // TODO Auto-generated method stub\r
+               return null;\r
+       }\r
+\r
+       @Override\r
+       public String customAnalize(List<FastaSequence> sequences,\r
+                       List<Option<AACon>> options) throws UnsupportedRuntimeException,\r
+                       LimitExceededException, JobSubmissionException,\r
+                       WrongParameterException {\r
+               // TODO Auto-generated method stub\r
+               return null;\r
+       }\r
+\r
+       @Override\r
+       public String presetAnalize(List<FastaSequence> sequences,\r
+                       Preset<AACon> preset) throws UnsupportedRuntimeException,\r
+                       LimitExceededException, JobSubmissionException,\r
+                       WrongParameterException {\r
+               // TODO Auto-generated method stub\r
+               return null;\r
+       }\r
+\r
+}\r
index dd1051f..62996e5 100644 (file)
@@ -36,73 +36,73 @@ import compbio.util.Timer;
 \r
 public final class WSUtil {\r
 \r
-    public static final void validateJobId(String jobId)\r
-           throws InvalidParameterException {\r
-       if (!compbio.engine.client.Util.isValidJobId(jobId)) {\r
-           throw new InvalidParameterException(\r
-                   "JobId is not provided or cannot be recognised! Given value: "\r
-                           + jobId);\r
+       public static final void validateJobId(String jobId)\r
+                       throws InvalidParameterException {\r
+               if (!compbio.engine.client.Util.isValidJobId(jobId)) {\r
+                       throw new InvalidParameterException(\r
+                                       "JobId is not provided or cannot be recognised! Given value: "\r
+                                                       + jobId);\r
+               }\r
        }\r
-    }\r
 \r
-    public static final void validateFastaInput(List<FastaSequence> sequences)\r
-           throws InvalidParameterException {\r
-       if (sequences == null || sequences.isEmpty()) {\r
-           throw new InvalidParameterException(\r
-                   "List of fasta sequences required but not provided! ");\r
+       public static final void validateFastaInput(List<FastaSequence> sequences)\r
+                       throws InvalidParameterException {\r
+               if (sequences == null || sequences.isEmpty()) {\r
+                       throw new InvalidParameterException(\r
+                                       "List of fasta sequences required but not provided! ");\r
+               }\r
        }\r
-    }\r
 \r
-    public static JobStatus getJobStatus(String jobId) {\r
-       AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
-       return asyncEngine.getJobStatus(jobId);\r
-    }\r
-\r
-    public static ChunkHolder pullFile(String file, long position) {\r
-       return ProgressGetter.pull(file, position);\r
-    }\r
+       public static JobStatus getJobStatus(String jobId) {\r
+               AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+               return asyncEngine.getJobStatus(jobId);\r
+       }\r
 \r
-    public static byte getProgress(String jobId) {\r
-       throw new UnsupportedOperationException();\r
-    }\r
+       public static ChunkHolder pullFile(String file, long position) {\r
+               return ProgressGetter.pull(file, position);\r
+       }\r
 \r
-    public static AsyncExecutor getEngine(ConfiguredExecutable<?> confClustal) {\r
-       assert confClustal != null;\r
-       return Configurator.getAsyncEngine(confClustal);\r
-    }\r
+       public static byte getProgress(String jobId) {\r
+               throw new UnsupportedOperationException();\r
+       }\r
 \r
-    public static boolean cancelJob(String jobId) {\r
-       AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
-       return asyncEngine.cancelJob(jobId);\r
-    }\r
+       public static AsyncExecutor getEngine(ConfiguredExecutable<?> confClustal) {\r
+               assert confClustal != null;\r
+               return Configurator.getAsyncEngine(confClustal);\r
+       }\r
 \r
-    public static <T> String align(List<FastaSequence> sequences,\r
-           ConfiguredExecutable<T> confExec, WSLogger logger,\r
-           String callingMethod, Limit<T> limit)\r
-           throws LimitExceededException, JobSubmissionException {\r
-       Timer timer = Timer.getMilliSecondsTimer();\r
-       if (limit.isExceeded(sequences)) {\r
-           throw LimitExceededException.newLimitExceeded(limit, sequences);\r
+       public static boolean cancelJob(String jobId) {\r
+               AsyncExecutor asyncEngine = Configurator.getAsyncEngine(jobId);\r
+               return asyncEngine.cancelJob(jobId);\r
        }\r
-       compbio.runner.Util.writeInput(sequences, confExec);\r
-       AsyncExecutor engine = Configurator.getAsyncEngine(confExec);\r
-       String jobId = engine.submitJob(confExec);\r
-       if (logger != null) {\r
-           logger.log(timer, callingMethod, jobId);\r
+\r
+       public static <T> String align(List<FastaSequence> sequences,\r
+                       ConfiguredExecutable<T> confExec, WSLogger logger,\r
+                       String callingMethod, Limit<T> limit)\r
+                       throws LimitExceededException, JobSubmissionException {\r
+               Timer timer = Timer.getMilliSecondsTimer();\r
+               if (limit != null && limit.isExceeded(sequences)) {\r
+                       throw LimitExceededException.newLimitExceeded(limit, sequences);\r
+               }\r
+               compbio.runner.Util.writeInput(sequences, confExec);\r
+               AsyncExecutor engine = Configurator.getAsyncEngine(confExec);\r
+               String jobId = engine.submitJob(confExec);\r
+               if (logger != null) {\r
+                       logger.log(timer, callingMethod, jobId);\r
+               }\r
+               return jobId;\r
        }\r
-       return jobId;\r
-    }\r
 \r
-    /*\r
-     * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation\r
-     */\r
-    public static final <T> List<String> getCommands(List<Option<T>> options,\r
-           String keyValueSeparator) {\r
-       List<String> oList = new ArrayList<String>();\r
-       for (Option<T> o : options) {\r
-           oList.add(o.toCommand(keyValueSeparator));\r
+       /*\r
+        * TODO Rewrite using purely CommandBuilder. This is breaking encapsulation\r
+        */\r
+       public static final <T> List<String> getCommands(List<Option<T>> options,\r
+                       String keyValueSeparator) {\r
+               List<String> oList = new ArrayList<String>();\r
+               for (Option<T> o : options) {\r
+                       oList.add(o.toCommand(keyValueSeparator));\r
+               }\r
+               return oList;\r
        }\r
-       return oList;\r
-    }\r
 \r
 }\r