From: pvtroshin Date: Tue, 21 Dec 2010 15:02:12 +0000 (+0000) Subject: AAConWS further work X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=eee7bbead2d812ef1aa67389ae2d1c4884c47458;p=jabaws.git AAConWS further work git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@3550 e3abac25-378b-4346-85de-24260fe3988d --- diff --git a/binaries/AACon_manual.txt b/binaries/AACon_manual.txt index ff7d66f..e9b4b53 100644 --- a/binaries/AACon_manual.txt +++ b/binaries/AACon_manual.txt @@ -24,10 +24,9 @@ possible formats with or without an alignment. If format is not specified, the program outputs conservation scores without alignment. The scores are not normalized by default but they can be (see below). SMERFS default parameters are window width of 7, column score is set to -the middle column, gap% cutoff of 0.1. If different values for SMERFS parameters -are required than all three parameters must be provided. Details of the program -execution can be recorded to a separate file if an appropriate file path is -provided. +the middle column (MID_SCORE), gap% cutoff of 0.1. Different parameters for SMERFS +can be provided (see below). Details of the program execution can be recorded to +a separate file if an appropriate file path is provided. List of command line arguments: @@ -49,17 +48,6 @@ List of command line arguments: RESULT_NO_ALIGNMENT Optional, if not specified RESULT_NO_ALIGNMENT is assumed --s= precedes a list of three comma separated parameters for SMERFS - the order of parameters is as following: - 1. window width - an integer and an odd number - 2. how to allocate window scores to columns, two ways are possible: - MID_SCORE - gives the window score to the middle column - MAX_SCORE - gives the column the highest score of all the windows it - belongs to - 3. gap percentage cutoff - a float greater than 0 and smaller or equal 1 - EXAMPLE: -s=5,MID_SCORE,0.1 - Optional, default values are 7,MID_SCORE,0.1 - -d= precedes a full path to a file where program execution details are to be listed. Optional, if not provided, no execution statistics is produced. @@ -76,7 +64,23 @@ List of command line arguments: The following formula is used for normalization n = (d - dmin)/(dmax - dmin) Negative results first converted to positive by adding an absolute value of - the most negative result. Optional. + the most negative result. Optional. + +SMERFS Only Parameters: + +-smerfsGT= precedes SMERFS Gap Treshold - a gap percentage cutoff - + a float greater than 0 and smaller or equal 1. Optional defaults + to 0.1 + +-smerfsCS= precedes SMERFS Column Score algorithm defines the window scores to + columns allocation , two methods are possible: + MID_SCORE - gives the window score to the middle column + MAX_SCORE - gives the column the highest score of all the windows it + belongs to. Optional defaults to MID_SCORE. + +-smerfsWW= precedes Window Width parameter - an integer and an odd number. + Optional, defaults to 7 + EXAMPLE HOW TO RUN THE PROGRAM: java -jar -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n @@ -86,5 +90,5 @@ Input comes form prot1 file and an output without an alignment is recorded to prot1_results file. Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton. -Please visit http://www.compbio.dundee.ac.uk for further information. +Please visit http://www.compbio.dundee.ac.uk/aacon for further information. \ No newline at end of file diff --git a/binaries/aaconservation.jar b/binaries/aaconservation.jar index 616bb33..e16b9a8 100644 Binary files a/binaries/aaconservation.jar and b/binaries/aaconservation.jar differ diff --git a/conf/settings/AAConParameters.xml b/conf/settings/AAConParameters.xml index 9f5da9f..2274cd3 100644 --- a/conf/settings/AAConParameters.xml +++ b/conf/settings/AAConParameters.xml @@ -3,7 +3,13 @@ compbio.runner.conservation.AACon Normalize - Normalize the results. The results of the calculation by different methods will all be scaled to the range between 0 and 1, so that they are comparable + Normalize the results. + Normalized results have values between 0 and 1. Please note however, that + some results cannot be normalized. In such a case, the system returns not + normalized value. The following formula is used for normalization + n = (d - dmin)/(dmax - dmin) + Negative results first converted to positive by adding a greatest absolute + result value. -n http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt @@ -33,4 +39,41 @@ VALDAR SMERFS + + SMERFS Window Width + The width of the window for SMERFS. Optional, defaults to 7 + -smerfsWW + http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt + 7 + + Integer + 3 + 100 + + + + SMERFS Column Scoring Method + SMERFS Column Score algorithm defines the window scores to + columns allocation, two methods are possible: + MID_SCORE - gives the window score to the middle column + MAX_SCORE - gives the column the highest score of all the windows it + belongs to. Optional defaults to MID_SCORE. + -smerfsCS + http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt + MID_SCORE + MAX_SCORE + MID_SCORE + + + SMERFS Gap Threshhold + a gap percentage cutoff - a float greater than 0 and smaller or equal 1. Optional defaults to 0.1 + -smerfsGT + http://www.compbio.dundee.ac.uk/jabaws/prog_docs/aacon.txt + 0.1 + + Float + 0.001 + 1 + + diff --git a/datamodel/compbio/data/sequence/Score.java b/datamodel/compbio/data/sequence/Score.java index 0e49a08..127df64 100644 --- a/datamodel/compbio/data/sequence/Score.java +++ b/datamodel/compbio/data/sequence/Score.java @@ -1,7 +1,14 @@ package compbio.data.sequence; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.text.NumberFormat; import java.util.Arrays; import java.util.List; +import java.util.Locale; +import java.util.Set; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; @@ -12,6 +19,13 @@ import compbio.util.annotation.Immutable; @Immutable public class Score { + public static final NumberFormat NUMBER_FORMAT = NumberFormat + .getNumberInstance(Locale.UK); + static { + NUMBER_FORMAT.setGroupingUsed(false); + NUMBER_FORMAT.setMaximumFractionDigits(3); + } + private ConservationMethod method; private List scores; @@ -73,4 +87,18 @@ public class Score { return false; return true; } + + public static void write(Set scores, OutputStream output) + throws IOException { + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( + output)); + for (Score score : scores) { + writer.write("#" + score.method + " "); + for (Float scoreVal : score.getScores()) { + writer.write(NUMBER_FORMAT.format(scoreVal) + " "); + } + writer.write("\n"); + } + writer.flush(); + } } diff --git a/runner/compbio/runner/conservation/AACon.java b/runner/compbio/runner/conservation/AACon.java index 49c87d4..f43db89 100644 --- a/runner/compbio/runner/conservation/AACon.java +++ b/runner/compbio/runner/conservation/AACon.java @@ -25,6 +25,7 @@ import java.util.List; import org.apache.log4j.Logger; +import compbio.data.sequence.SMERFSConstraints; import compbio.data.sequence.Score; import compbio.data.sequence.SequenceUtil; import compbio.engine.client.CommandBuilder; @@ -61,6 +62,10 @@ public class AACon extends SkeletalExecutable { public static final String KEY_VALUE_SEPARATOR = "="; public static final String STAT_FILE = "stat.txt"; + private final int windowWidth = SMERFSConstraints.DEFAULT_WINDOW_SIZE; + private final SMERFSConstraints colScoreMethod = SMERFSConstraints.MID_SCORE; + private final double gapTreshold = SMERFSConstraints.DEFAULT_GAP_THRESHOLD; + public AACon() { addParameters(Arrays.asList("-jar", getLibPath(), "-d=" + STAT_FILE, "-f=RESULT_NO_ALIGNMENT")); @@ -97,7 +102,7 @@ public class AACon extends SkeletalExecutable { + "and initialize it with the location of jronn jar file"); } if (new File(settings).isAbsolute()) { - // Jronn jar can be found so no actions necessary + // the jar can be found so no actions necessary // no further actions is necessary return settings; } diff --git a/testsrc/compbio/ws/client/AAConWSClientExample.java b/testsrc/compbio/ws/client/AAConWSClientExample.java new file mode 100644 index 0000000..98c2dfc --- /dev/null +++ b/testsrc/compbio/ws/client/AAConWSClientExample.java @@ -0,0 +1,118 @@ +package compbio.ws.client; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.List; +import java.util.Set; + +import compbio.data.msa.Annotation; +import compbio.data.sequence.FastaSequence; +import compbio.data.sequence.Score; +import compbio.data.sequence.SequenceUtil; +import compbio.metadata.JobSubmissionException; +import compbio.metadata.Preset; +import compbio.metadata.PresetManager; +import compbio.metadata.ResultNotAvailableException; +import compbio.metadata.UnsupportedRuntimeException; +import compbio.metadata.WrongParameterException; +import compbio.runner.conservation.AACon; + +/** + * AAConWS client example + */ +public class AAConWSClientExample { + + /* + * Input sequences. For the simplicity keep them in the class + */ + static final String input = ">Foo \r\n" + + "MTADGPRELLQLRAAVRHRPQDFVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r\n" + + "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL\r\n" + + "LPEEPYITAQLLNWRRRLCDWRALDVLSAQVRAAVAQGVGAVEPFAFLSEDASAAEQLAC\r\n" + + "ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n" + + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n" + + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n" + + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n" + + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n" + + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n" + + "GVFEMDGFADDFGALLQALARRHGWLGI\r\n" + + "\r\n" + + ">Bar \r\n" + + "-----------------------------------MGDTTAGEMAVQRGLALH-------\r\n" + + "---------QQRHAEAAVLLQQASDAAPEHPGIALWL-HALEDAGQAEAAAA-YTRAHQL\r\n" + + "LPEEPYITAQLLN--------------------AVAQGVGAVEPFAFLSEDASAAE----\r\n" + + "----------SVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n" + + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n" + + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n" + + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n" + + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n" + + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n" + + "GVFEMDGFADDFGALLQALARRHGWLGI\r\n" + + "\r\n" + + ">Noname \r\n" + + "-MTADGPRELLQLRAAVRHRPQDVAWLMLADAELGMGDTTAGEMAVQRGLALHPGHPEAV\r\n" + + "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALED--------------HQL\r\n" + + "LPEEPYITAQLDVLSAQVR-------------AAVAQGVGAVEPFAFLSEDASAAEQLAC\r\n" + + "ARTRAQAIAASVRPLAPTRVRSKGPLRVGFVSNGFGAHPTGLLTVALFEALQRRQPDLQM\r\n" + + "HLFATSGDDGSTLRTRLAQASTLHDVTALGHLATAKHIRHHGIDLLFDLRGWGGGGRPEV\r\n" + + "FALRPAPVQVNWLAYPGTSGAPWMDYVLGDAFALPPALEPFYSEHVLRLQGAFQPSDTSR\r\n" + + "VVAEPPSRTQCGLPEQGVVLCCFNNSYKLNPQSMARMLAVLREVPDSVLWLLSGPGEADA\r\n" + + "RLRAFAHAQGVDAQRLVFMPKLPHPQYLARYRHADLFLDTHPYNAHTTASDALWTGCPVL\r\n" + + "TTPGETFAARVAGSLNHHLGLDEMNVADDAAFVAKAVALASDPAALTALHARVDVLRRES\r\n" + + "I---------------------------"; + + public static void main(String[] args) throws UnsupportedRuntimeException, + JobSubmissionException, WrongParameterException, + FileNotFoundException, IOException, ResultNotAvailableException, + InterruptedException { + + /* + * Annotation interface for AAConWS web service instance + */ + Annotation client = (Annotation) Jws2Client.connect( + "http://www.compbio.dundee.ac.uk/aacon", Services.AAConWS); + + /* Get the list of available presets */ + PresetManager presetman = client.getPresets(); + + /* Get the Preset object by preset name */ + Preset preset = presetman.getPresetByName("Complete conservation"); + + /* + * Load sequences in FASTA format from the file You can use something + * like new FileInputStream() to load sequence from the file + */ + List fastalist = SequenceUtil + .readFasta(new ByteArrayInputStream(input.getBytes())); + + /* + * Submit loaded sequences for an alignment using preset. The job + * identifier is returned by this method, you can retrieve the results + * with it sometime later. + */ + String jobId = client.presetAnalize(fastalist, preset); + + /* This method will block for the duration of the calculation */ + Set result = client.getAnnotation(jobId); + + /* + * This is a better way of obtaining results, it does not involve + * holding the connection open for the duration of the calculation, + * Besides, as the University of Dundee public server will reset the + * connection after 10 minutes of idling, this is the only way to obtain + * the results of long running task from our public server. + */ + // while (client.getJobStatus(jobId) != JobStatus.FINISHED) { + // Thread.sleep(1000); // wait a second, then recheck the status + // } + + /* Output the alignment to standard out */ + Score.write(result, System.out); + + /* Alternatively, you can record retrieved alignment into the file */ + // FileOutputStream out = new FileOutputStream("result.txt"); + // Score.write(result, out); + // out.close(); + } +} diff --git a/testsrc/compbio/ws/client/TestAAConWS.java b/testsrc/compbio/ws/client/TestAAConWS.java index e0b002c..acaf57f 100644 --- a/testsrc/compbio/ws/client/TestAAConWS.java +++ b/testsrc/compbio/ws/client/TestAAConWS.java @@ -17,8 +17,8 @@ import org.testng.annotations.Test; import compbio.data.msa.Annotation; import compbio.data.msa.JABAService; -import compbio.data.sequence.FastaSequence; import compbio.data.sequence.ConservationMethod; +import compbio.data.sequence.FastaSequence; import compbio.data.sequence.Score; import compbio.data.sequence.SequenceUtil; import compbio.metadata.JobSubmissionException; @@ -80,7 +80,8 @@ public class TestAAConWS { HashSet result = msaws.getAnnotation(jobId); assertNotNull(result); assertEquals(result.size(), 1); - assertEquals(result.iterator().next().getMethod(), ConservationMethod.SHENKIN); + assertEquals(result.iterator().next().getMethod(), + ConservationMethod.SHENKIN); List scores = result.iterator().next().getScores(); assertNotNull(scores); assertEquals(scores.size(), 568); @@ -192,6 +193,9 @@ public class TestAAConWS { try { options.getArgument("Calculation method").setDefaultValue("SMERFS"); + // options.getArgument("SMERFS Column Scoring Method") + // .setDefaultValue("MAX_SCORE"); + // options.getArgument("SMERFS Gap Threshhold").setDefaultValue("1"); String jobId = msaws.customAnalize(fsl, options.getArguments()); HashSet result = msaws.getAnnotation(jobId); assertNotNull(result); diff --git a/webservices/compbio/data/msa/Annotation.java b/webservices/compbio/data/msa/Annotation.java index 1de9f30..27a1d55 100644 --- a/webservices/compbio/data/msa/Annotation.java +++ b/webservices/compbio/data/msa/Annotation.java @@ -9,7 +9,6 @@ import javax.jws.WebParam; import javax.jws.WebService; import compbio.data.sequence.FastaSequence; -import compbio.data.sequence.SMERFSConstraints; import compbio.data.sequence.Score; import compbio.metadata.JobSubmissionException; import compbio.metadata.LimitExceededException; @@ -164,48 +163,6 @@ public interface Annotation extends JABAService, JManagement, Metadata { JobSubmissionException, WrongParameterException; /** - * - * Analyse the sequences. The actual analysis algorithm is defined by the - * type T. - * - * Any dataset containing a greater number of sequences or the average - * length of the sequences are greater then defined in the default Limit - * will not be accepted for an alignment operation and - * JobSubmissionException will be thrown. - * - * @param sequences - * List of FastaSequence objects. The programme does not perform - * any sequence validity checks. Nor does it checks whether the - * sequences names are unique. It is responsibility of the caller - * to validate this information - * @return jobId - unique identifier for the job - * @throws JobSubmissionException - * is thrown when the job could not be submitted due to the - * following reasons: 1) The number of sequences in the - * submission or their average length is greater then defined by - * the default Limit. 2) Any problems on the server side e.g. it - * is misconfigured or malfunction, is reported via this - * exception. In the first case the information on the limit - * could be obtained from an exception. - * @throws InvalidParameterException - * thrown if input list of fasta sequence is null or empty - * @throws UnsupportedRuntimeException - * thrown if server OS does not support native executables for a - * given web service, e.g. JABAWS is deployed on Windows and - * Mafft service is called - * @throws LimitExceededException - * is throw if the input sequences number or average length - * exceeds what is defined by the limit - */ - @WebMethod - String customSMERFS( - @WebParam(name = "fastaSequences") List sequences, - int windowWidth, SMERFSConstraints scoringMethod, - float gapTreshold, boolean normalize) - throws UnsupportedRuntimeException, LimitExceededException, - JobSubmissionException; - - /** * Return the result of the job. * * @param jobId diff --git a/webservices/compbio/ws/server/AAConWS.java b/webservices/compbio/ws/server/AAConWS.java index f9d8ce6..a32533f 100644 --- a/webservices/compbio/ws/server/AAConWS.java +++ b/webservices/compbio/ws/server/AAConWS.java @@ -13,7 +13,6 @@ import org.apache.log4j.Logger; import compbio.data.msa.Annotation; import compbio.data.sequence.FastaSequence; -import compbio.data.sequence.SMERFSConstraints; import compbio.data.sequence.Score; import compbio.engine.AsyncExecutor; import compbio.engine.Configurator; @@ -175,13 +174,4 @@ public class AAConWS implements Annotation { return WSUtil.align(sequences, confAAcon, null, "presetAnalize", limit); } - @Override - public String customSMERFS(List sequences, int windowWidth, - SMERFSConstraints scoringMethod, float gapTreshold, - boolean normalize) throws UnsupportedRuntimeException, - LimitExceededException, JobSubmissionException { - // TODO Auto-generated method stub - return null; - } - } diff --git a/website/AAConWSClientExample.pdf b/website/AAConWSClientExample.pdf new file mode 100644 index 0000000..82a315e Binary files /dev/null and b/website/AAConWSClientExample.pdf differ