AAConWS further work
authorpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Tue, 21 Dec 2010 15:02:12 +0000 (15:02 +0000)
committerpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Tue, 21 Dec 2010 15:02:12 +0000 (15:02 +0000)
git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@3550 e3abac25-378b-4346-85de-24260fe3988d

binaries/AACon_manual.txt
binaries/aaconservation.jar
conf/settings/AAConParameters.xml
datamodel/compbio/data/sequence/Score.java
runner/compbio/runner/conservation/AACon.java
testsrc/compbio/ws/client/AAConWSClientExample.java [new file with mode: 0644]
testsrc/compbio/ws/client/TestAAConWS.java
webservices/compbio/data/msa/Annotation.java
webservices/compbio/ws/server/AAConWS.java
website/AAConWSClientExample.pdf [new file with mode: 0644]

index ff7d66f..e9b4b53 100644 (file)
@@ -24,10 +24,9 @@ possible formats with or without an alignment.
 If format is not specified, the program outputs conservation scores without 
 alignment. The scores are not normalized by default but they can be (see below).
 SMERFS default parameters are window width of 7, column score is set to
-the middle column, gap% cutoff of 0.1. If different values for SMERFS parameters 
-are required than all three parameters must be provided. Details of the program 
-execution can be recorded to a separate file if an appropriate file path is 
-provided.
+the middle column (MID_SCORE), gap% cutoff of 0.1. Different parameters for SMERFS 
+can be provided (see below). Details of the program execution can be recorded to
+a separate file if an appropriate file path is provided.
 
 List of command line arguments:
 
@@ -49,17 +48,6 @@ List of command line arguments:
       RESULT_NO_ALIGNMENT
      Optional, if not specified RESULT_NO_ALIGNMENT is assumed 
 
--s=  precedes a list of three comma separated parameters for SMERFS
-     the order of parameters is as following:
-      1. window width - an integer and an odd number
-      2. how to allocate window scores to columns, two ways are possible:
-            MID_SCORE - gives the window score to the middle column
-            MAX_SCORE - gives the column the highest score of all the windows it 
-            belongs to
-      3. gap percentage cutoff - a float greater than 0 and smaller or equal 1
-     EXAMPLE: -s=5,MID_SCORE,0.1
-     Optional, default values are 7,MID_SCORE,0.1 
-      
 -d=  precedes a full path to a file where program execution details are to be 
      listed. Optional, if not provided, no execution statistics is produced.  
       
@@ -76,7 +64,23 @@ List of command line arguments:
         The following formula is used for normalization 
                        n = (d - dmin)/(dmax - dmin)
         Negative results first converted to positive by adding an absolute value of
-        the most negative result. Optional. 
+        the most negative result. Optional.
+
+SMERFS Only Parameters: 
+
+-smerfsGT=  precedes SMERFS Gap Treshold - a gap percentage cutoff - 
+                       a float greater than 0 and smaller or equal 1. Optional defaults 
+                       to 0.1
+
+-smerfsCS=  precedes SMERFS Column Score algorithm defines the window scores to 
+                       columns allocation , two methods are possible:
+               MID_SCORE - gives the window score to the middle column
+               MAX_SCORE - gives the column the highest score of all the windows it 
+               belongs to. Optional defaults to MID_SCORE.  
+
+-smerfsWW=  precedes Window Width parameter - an integer and an odd number.
+            Optional, defaults to 7 
+         
 
 EXAMPLE HOW TO RUN THE PROGRAM:
 java -jar <jar name> -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n
@@ -86,5 +90,5 @@ Input comes form prot1 file and an output without an alignment is recorded to
 prot1_results file. 
 
 Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton.
-Please visit http://www.compbio.dundee.ac.uk for further information.
+Please visit http://www.compbio.dundee.ac.uk/aacon for further information.
  
\ No newline at end of file
index 616bb33..e16b9a8 100644 (file)
Binary files a/binaries/aaconservation.jar and b/binaries/aaconservation.jar differ
index 9f5da9f..2274cd3 100644 (file)
@@ -3,7 +3,13 @@
        <runnerClassName>compbio.runner.conservation.AACon</runnerClassName>\r
        <options>\r
                <name>Normalize</name>\r
-               <description>Normalize the results. The results of the calculation by different methods will all be scaled to the range between 0 and 1, so that they are comparable</description>\r
+               <description>Normalize the results.  \r
+               Normalized results have values between 0 and 1. Please note however, that \r
+        some results cannot be normalized. In such a case, the system returns not \r
+        normalized value. The following formula is used for normalization \r
+                               n = (d - dmin)/(dmax - dmin)\r
+               Negative results first converted to positive by adding a greatest absolute \r
+               result value.</description>\r
                <optionNames>-n</optionNames>\r
                <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
        </options>\r
         <possibleValues>VALDAR</possibleValues>\r
         <possibleValues>SMERFS</possibleValues>\r
        </parameters>\r
+       <parameters>\r
+               <name>SMERFS Window Width</name>\r
+               <description>The width of the window for SMERFS. Optional, defaults to 7</description>\r
+               <optionNames>-smerfsWW</optionNames>\r
+               <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+               <defaultValue>7</defaultValue>\r
+               <validValue>\r
+               <type>Integer</type>\r
+            <min>3</min>\r
+            <max>100</max>\r
+        </validValue>\r
+       </parameters>\r
+       <parameters>\r
+               <name>SMERFS Column Scoring Method</name>\r
+               <description>SMERFS Column Score algorithm defines the window scores to \r
+                       columns allocation, two methods are possible:\r
+               MID_SCORE - gives the window score to the middle column\r
+               MAX_SCORE - gives the column the highest score of all the windows it \r
+               belongs to. Optional defaults to MID_SCORE. </description>\r
+               <optionNames>-smerfsCS</optionNames>\r
+               <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+               <defaultValue>MID_SCORE</defaultValue>\r
+               <possibleValues>MAX_SCORE</possibleValues>\r
+        <possibleValues>MID_SCORE</possibleValues>\r
+       </parameters>\r
+       <parameters>\r
+               <name>SMERFS Gap Threshhold</name>\r
+               <description>a gap percentage cutoff - a float greater than 0 and smaller or equal 1. Optional defaults to 0.1</description>\r
+               <optionNames>-smerfsGT</optionNames>\r
+               <furtherDetails>http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt</furtherDetails>\r
+               <defaultValue>0.1</defaultValue>\r
+               <validValue>\r
+               <type>Float</type>\r
+            <min>0.001</min>\r
+            <max>1</max>\r
+        </validValue>\r
+       </parameters>\r
 </runnerConfig>\r
index 0e49a08..127df64 100644 (file)
@@ -1,7 +1,14 @@
 package compbio.data.sequence;\r
 \r
+import java.io.BufferedWriter;\r
+import java.io.IOException;\r
+import java.io.OutputStream;\r
+import java.io.OutputStreamWriter;\r
+import java.text.NumberFormat;\r
 import java.util.Arrays;\r
 import java.util.List;\r
+import java.util.Locale;\r
+import java.util.Set;\r
 \r
 import javax.xml.bind.annotation.XmlAccessType;\r
 import javax.xml.bind.annotation.XmlAccessorType;\r
@@ -12,6 +19,13 @@ import compbio.util.annotation.Immutable;
 @Immutable\r
 public class Score {\r
 \r
+       public static final NumberFormat NUMBER_FORMAT = NumberFormat\r
+                       .getNumberInstance(Locale.UK);\r
+       static {\r
+               NUMBER_FORMAT.setGroupingUsed(false);\r
+               NUMBER_FORMAT.setMaximumFractionDigits(3);\r
+       }\r
+\r
        private ConservationMethod method;\r
 \r
        private List<Float> scores;\r
@@ -73,4 +87,18 @@ public class Score {
                        return false;\r
                return true;\r
        }\r
+\r
+       public static void write(Set<Score> scores, OutputStream output)\r
+                       throws IOException {\r
+               BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(\r
+                               output));\r
+               for (Score score : scores) {\r
+                       writer.write("#" + score.method + " ");\r
+                       for (Float scoreVal : score.getScores()) {\r
+                               writer.write(NUMBER_FORMAT.format(scoreVal) + " ");\r
+                       }\r
+                       writer.write("\n");\r
+               }\r
+               writer.flush();\r
+       }\r
 }\r
index 49c87d4..f43db89 100644 (file)
@@ -25,6 +25,7 @@ import java.util.List;
 \r
 import org.apache.log4j.Logger;\r
 \r
+import compbio.data.sequence.SMERFSConstraints;\r
 import compbio.data.sequence.Score;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.engine.client.CommandBuilder;\r
@@ -61,6 +62,10 @@ public class AACon extends SkeletalExecutable<AACon> {
        public static final String KEY_VALUE_SEPARATOR = "=";\r
        public static final String STAT_FILE = "stat.txt";\r
 \r
+       private final int windowWidth = SMERFSConstraints.DEFAULT_WINDOW_SIZE;\r
+       private final SMERFSConstraints colScoreMethod = SMERFSConstraints.MID_SCORE;\r
+       private final double gapTreshold = SMERFSConstraints.DEFAULT_GAP_THRESHOLD;\r
+\r
        public AACon() {\r
                addParameters(Arrays.asList("-jar", getLibPath(), "-d=" + STAT_FILE,\r
                                "-f=RESULT_NO_ALIGNMENT"));\r
@@ -97,7 +102,7 @@ public class AACon extends SkeletalExecutable<AACon> {
                                                        + "and initialize it with the location of jronn jar file");\r
                }\r
                if (new File(settings).isAbsolute()) {\r
-                       // Jronn jar can be found so no actions necessary\r
+                       // the jar can be found so no actions necessary\r
                        // no further actions is necessary\r
                        return settings;\r
                }\r
diff --git a/testsrc/compbio/ws/client/AAConWSClientExample.java b/testsrc/compbio/ws/client/AAConWSClientExample.java
new file mode 100644 (file)
index 0000000..98c2dfc
--- /dev/null
@@ -0,0 +1,118 @@
+package compbio.ws.client;\r
+\r
+import java.io.ByteArrayInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.util.List;\r
+import java.util.Set;\r
+\r
+import compbio.data.msa.Annotation;\r
+import compbio.data.sequence.FastaSequence;\r
+import compbio.data.sequence.Score;\r
+import compbio.data.sequence.SequenceUtil;\r
+import compbio.metadata.JobSubmissionException;\r
+import compbio.metadata.Preset;\r
+import compbio.metadata.PresetManager;\r
+import compbio.metadata.ResultNotAvailableException;\r
+import compbio.metadata.UnsupportedRuntimeException;\r
+import compbio.metadata.WrongParameterException;\r
+import compbio.runner.conservation.AACon;\r
+\r
+/**\r
+ * AAConWS client example\r
+ */\r
+public class AAConWSClientExample {\r
+\r
+       /*\r
+        * Input sequences. For the simplicity keep them in the class\r
+        */\r
+       static final String input = ">Foo      \r\n"\r
+                       + "MTADGPRELLQLRAAVRHRPQDFVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r\n"\r
+                       + "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL\r\n"\r
+                       + "LPEEPYITAQLLNWRRRLCDWRALDVLSAQVRAAVAQGVGAVEPFAFLSEDASAAEQLAC\r\n"\r
+                       + "ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n"\r
+                       + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n"\r
+                       + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n"\r
+                       + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n"\r
+                       + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n"\r
+                       + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n"\r
+                       + "GVFEMDGFADDFGALLQALARRHGWLGI\r\n"\r
+                       + "\r\n"\r
+                       + ">Bar                    \r\n"\r
+                       + "-----------------------------------MGDTTAGEMAVQRGLALH-------\r\n"\r
+                       + "---------QQRHAEAAVLLQQASDAAPEHPGIALWL-HALEDAGQAEAAAA-YTRAHQL\r\n"\r
+                       + "LPEEPYITAQLLN--------------------AVAQGVGAVEPFAFLSEDASAAE----\r\n"\r
+                       + "----------SVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n"\r
+                       + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n"\r
+                       + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n"\r
+                       + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n"\r
+                       + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n"\r
+                       + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n"\r
+                       + "GVFEMDGFADDFGALLQALARRHGWLGI\r\n"\r
+                       + "\r\n"\r
+                       + ">Noname             \r\n"\r
+                       + "-MTADGPRELLQLRAAVRHRPQDVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r\n"\r
+                       + "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALED--------------HQL\r\n"\r
+                       + "LPEEPYITAQLDVLSAQVR-------------AAVAQGVGAVEPFAFLSEDASAAEQLAC\r\n"\r
+                       + "ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n"\r
+                       + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n"\r
+                       + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n"\r
+                       + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n"\r
+                       + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n"\r
+                       + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n"\r
+                       + "I---------------------------";\r
+\r
+       public static void main(String[] args) throws UnsupportedRuntimeException,\r
+                       JobSubmissionException, WrongParameterException,\r
+                       FileNotFoundException, IOException, ResultNotAvailableException,\r
+                       InterruptedException {\r
+\r
+               /*\r
+                * Annotation interface for AAConWS web service instance\r
+                */\r
+               Annotation<AACon> client = (Annotation<AACon>) Jws2Client.connect(\r
+                               "http://www.compbio.dundee.ac.uk/aacon", Services.AAConWS);\r
+\r
+               /* Get the list of available presets */\r
+               PresetManager presetman = client.getPresets();\r
+\r
+               /* Get the Preset object by preset name */\r
+               Preset preset = presetman.getPresetByName("Complete conservation");\r
+\r
+               /*\r
+                * Load sequences in FASTA format from the file You can use something\r
+                * like new FileInputStream() to load sequence from the file\r
+                */\r
+               List<FastaSequence> fastalist = SequenceUtil\r
+                               .readFasta(new ByteArrayInputStream(input.getBytes()));\r
+\r
+               /*\r
+                * Submit loaded sequences for an alignment using preset. The job\r
+                * identifier is returned by this method, you can retrieve the results\r
+                * with it sometime later.\r
+                */\r
+               String jobId = client.presetAnalize(fastalist, preset);\r
+\r
+               /* This method will block for the duration of the calculation */\r
+               Set<Score> result = client.getAnnotation(jobId);\r
+\r
+               /*\r
+                * This is a better way of obtaining results, it does not involve\r
+                * holding the connection open for the duration of the calculation,\r
+                * Besides, as the University of Dundee public server will reset the\r
+                * connection after 10 minutes of idling, this is the only way to obtain\r
+                * the results of long running task from our public server.\r
+                */\r
+               // while (client.getJobStatus(jobId) != JobStatus.FINISHED) {\r
+               // Thread.sleep(1000); // wait a second, then recheck the status\r
+               // }\r
+\r
+               /* Output the alignment to standard out */\r
+               Score.write(result, System.out);\r
+\r
+               /* Alternatively, you can record retrieved alignment into the file */\r
+               // FileOutputStream out = new FileOutputStream("result.txt");\r
+               // Score.write(result, out);\r
+               // out.close();\r
+       }\r
+}\r
index e0b002c..acaf57f 100644 (file)
@@ -17,8 +17,8 @@ import org.testng.annotations.Test;
 \r
 import compbio.data.msa.Annotation;\r
 import compbio.data.msa.JABAService;\r
-import compbio.data.sequence.FastaSequence;\r
 import compbio.data.sequence.ConservationMethod;\r
+import compbio.data.sequence.FastaSequence;\r
 import compbio.data.sequence.Score;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.metadata.JobSubmissionException;\r
@@ -80,7 +80,8 @@ public class TestAAConWS {
                        HashSet<Score> result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
                        assertEquals(result.size(), 1);\r
-                       assertEquals(result.iterator().next().getMethod(), ConservationMethod.SHENKIN);\r
+                       assertEquals(result.iterator().next().getMethod(),\r
+                                       ConservationMethod.SHENKIN);\r
                        List<Float> scores = result.iterator().next().getScores();\r
                        assertNotNull(scores);\r
                        assertEquals(scores.size(), 568);\r
@@ -192,6 +193,9 @@ public class TestAAConWS {
 \r
                try {\r
                        options.getArgument("Calculation method").setDefaultValue("SMERFS");\r
+                       // options.getArgument("SMERFS Column Scoring Method")\r
+                       // .setDefaultValue("MAX_SCORE");\r
+                       // options.getArgument("SMERFS Gap Threshhold").setDefaultValue("1");\r
                        String jobId = msaws.customAnalize(fsl, options.getArguments());\r
                        HashSet<Score> result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
index 1de9f30..27a1d55 100644 (file)
@@ -9,7 +9,6 @@ import javax.jws.WebParam;
 import javax.jws.WebService;\r
 \r
 import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.SMERFSConstraints;\r
 import compbio.data.sequence.Score;\r
 import compbio.metadata.JobSubmissionException;\r
 import compbio.metadata.LimitExceededException;\r
@@ -164,48 +163,6 @@ public interface Annotation<T> extends JABAService, JManagement, Metadata<T> {
                        JobSubmissionException, WrongParameterException;\r
 \r
        /**\r
-        * \r
-        * Analyse the sequences. The actual analysis algorithm is defined by the\r
-        * type T.\r
-        * \r
-        * Any dataset containing a greater number of sequences or the average\r
-        * length of the sequences are greater then defined in the default Limit\r
-        * will not be accepted for an alignment operation and\r
-        * JobSubmissionException will be thrown.\r
-        * \r
-        * @param sequences\r
-        *            List of FastaSequence objects. The programme does not perform\r
-        *            any sequence validity checks. Nor does it checks whether the\r
-        *            sequences names are unique. It is responsibility of the caller\r
-        *            to validate this information\r
-        * @return jobId - unique identifier for the job\r
-        * @throws JobSubmissionException\r
-        *             is thrown when the job could not be submitted due to the\r
-        *             following reasons: 1) The number of sequences in the\r
-        *             submission or their average length is greater then defined by\r
-        *             the default Limit. 2) Any problems on the server side e.g. it\r
-        *             is misconfigured or malfunction, is reported via this\r
-        *             exception. In the first case the information on the limit\r
-        *             could be obtained from an exception.\r
-        * @throws InvalidParameterException\r
-        *             thrown if input list of fasta sequence is null or empty\r
-        * @throws UnsupportedRuntimeException\r
-        *             thrown if server OS does not support native executables for a\r
-        *             given web service, e.g. JABAWS is deployed on Windows and\r
-        *             Mafft service is called\r
-        * @throws LimitExceededException\r
-        *             is throw if the input sequences number or average length\r
-        *             exceeds what is defined by the limit\r
-        */\r
-       @WebMethod\r
-       String customSMERFS(\r
-                       @WebParam(name = "fastaSequences") List<FastaSequence> sequences,\r
-                       int windowWidth, SMERFSConstraints scoringMethod,\r
-                       float gapTreshold, boolean normalize)\r
-                       throws UnsupportedRuntimeException, LimitExceededException,\r
-                       JobSubmissionException;\r
-\r
-       /**\r
         * Return the result of the job.\r
         * \r
         * @param jobId\r
index f9d8ce6..a32533f 100644 (file)
@@ -13,7 +13,6 @@ import org.apache.log4j.Logger;
 \r
 import compbio.data.msa.Annotation;\r
 import compbio.data.sequence.FastaSequence;\r
-import compbio.data.sequence.SMERFSConstraints;\r
 import compbio.data.sequence.Score;\r
 import compbio.engine.AsyncExecutor;\r
 import compbio.engine.Configurator;\r
@@ -175,13 +174,4 @@ public class AAConWS implements Annotation<AACon> {
                return WSUtil.align(sequences, confAAcon, null, "presetAnalize", limit);\r
        }\r
 \r
-       @Override\r
-       public String customSMERFS(List<FastaSequence> sequences, int windowWidth,\r
-                       SMERFSConstraints scoringMethod, float gapTreshold,\r
-                       boolean normalize) throws UnsupportedRuntimeException,\r
-                       LimitExceededException, JobSubmissionException {\r
-               // TODO Auto-generated method stub\r
-               return null;\r
-       }\r
-\r
 }\r
diff --git a/website/AAConWSClientExample.pdf b/website/AAConWSClientExample.pdf
new file mode 100644 (file)
index 0000000..82a315e
Binary files /dev/null and b/website/AAConWSClientExample.pdf differ