From: pvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Date: Thu, 16 Jun 2011 16:46:41 +0000 (+0000)
Subject: Binaries relocation
X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;ds=sidebyside;h=2c1c5633a7d1c31008e04136cc4dae6fca1b369e;p=jabaws.git

Binaries relocation

git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@4278 e3abac25-378b-4346-85de-24260fe3988d
---

diff --git a/TODO.txt b/TODO.txt
index dbd8400..911b957 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,3 +1,4 @@
+GET rid of binaries/help directory!
 TODO: 
 Registry 1 week
 webservices - 1 week
diff --git a/binaries/AACon_manual.txt b/binaries/help/AACon_manual.txt
similarity index 100%
rename from binaries/AACon_manual.txt
rename to binaries/help/AACon_manual.txt
diff --git a/binaries/clustalw-help.txt b/binaries/help/clustalw-help.txt
similarity index 100%
rename from binaries/clustalw-help.txt
rename to binaries/help/clustalw-help.txt
diff --git a/binaries/help/iupred.txt b/binaries/help/iupred.txt
new file mode 100644
index 0000000..e023ac9
--- /dev/null
+++ b/binaries/help/iupred.txt
@@ -0,0 +1,45 @@
+INTERPRETATION OF THE OUTPUT:
+
+In the case of long and short types of disorder the output  gives the
+likelihood of disorder for each residue, i.e. it is a value between 0 and 1,
+and higher values indicate higher probability of disorder. Residues with values
+above 0.5 can be regarded as disordered, and at this cutoff 5% of globular
+proteins is expected to be predicted to disordered (false positives).
+ 
+For the prediction type of globular domains it gives the number of globular
+domains and list their start and end position in the sequence. This is followed
+by the submitted sequence with residues of globular domains indicated by
+uppercase letters. 
+
+
+SHORT SUMMARY OF THE METHOD
+
+Intrinsically unstructured/disordered proteins have no single well-defined
+tertiary structure in their native, functional state. Our server recognizes
+such regions from the amino acid sequence based on the estimated pairwise
+energy content. The underlying assumption is that globular proteins make a
+large number of interresidue interactions, providing the stabilizing energy to
+overcome the entropy loss during folding. In contrast, IUPs have special
+sequences that do not have the capacity to form sufficient interresidue
+interactions. Taking a set of globular proteins with known structure, we have
+developed a simple formalism that allows the estimation of the pairwise
+interaction energies of these proteins. It uses a quadratic expression in the
+amino acid composition, which takes into account that the contribution of an
+amino acid to order/disorder depends not only its own chemical type, but also
+on its sequential environment, including its potential interaction partners.
+Applying this calculation for IUP sequences, their estimated energies are
+clearly shifted towards less favorable energies compared to globular proteins,
+enabling the predicion of protein disorder on this ground. 
+
+	
+References
+
+"The Pairwise Energy Content Estimated from Amino Acid Composition 
+Discriminates between Folded and Intrinsically Unstructured Proteins"
+Zsuzsanna Dosztanyi, Veronika Csizmok, Peter Tompa and Istvan Simon
+J. Mol. Biol. (2005) 347, 827-839.
+
+"IUPred: web server for the prediction of intrinsically unstructured 
+regions of proteins based on estimated energy content"
+Zsuzsanna Dosztanyi, Veronika Csizmok, Peter Tompa and Istvan Simon
+Bioinformatics (2005) 21, 3433-3434.
diff --git a/binaries/mafft_manual.htm b/binaries/help/mafft_manual.htm
similarity index 100%
rename from binaries/mafft_manual.htm
rename to binaries/help/mafft_manual.htm
diff --git a/binaries/muscle3.6.html b/binaries/help/muscle3.6.html
similarity index 100%
rename from binaries/muscle3.6.html
rename to binaries/help/muscle3.6.html
diff --git a/binaries/muscle3.7.txt b/binaries/help/muscle3.7.txt
similarity index 100%
rename from binaries/muscle3.7.txt
rename to binaries/help/muscle3.7.txt
diff --git a/binaries/probcons.pdf b/binaries/help/probcons.pdf
similarity index 100%
rename from binaries/probcons.pdf
rename to binaries/help/probcons.pdf
diff --git a/binaries/t_coffee.htm b/binaries/help/t_coffee.htm
similarity index 100%
rename from binaries/t_coffee.htm
rename to binaries/help/t_coffee.htm
diff --git a/binaries/aaconservation.jar b/binaries/windows/aaconservation.jar
similarity index 100%
rename from binaries/aaconservation.jar
rename to binaries/windows/aaconservation.jar
diff --git a/binaries/clustalw2.exe b/binaries/windows/clustalw2.exe
similarity index 100%
rename from binaries/clustalw2.exe
rename to binaries/windows/clustalw2.exe
diff --git a/binaries/jronn3.1.jar b/binaries/windows/jronn3.1.jar
similarity index 100%
rename from binaries/jronn3.1.jar
rename to binaries/windows/jronn3.1.jar
diff --git a/binaries/muscle.exe b/binaries/windows/muscle.exe
similarity index 100%
rename from binaries/muscle.exe
rename to binaries/windows/muscle.exe
diff --git a/conf/Executable.properties b/conf/Executable.properties
index 4b75730..b9bdac3 100644
--- a/conf/Executable.properties
+++ b/conf/Executable.properties
@@ -1,6 +1,6 @@
 
 ### Clustal configuration ###
-local.clustalw.bin.windows=binaries/clustalw2.exe
+local.clustalw.bin.windows=binaries/windows/clustalw2.exe
 local.clustalw.bin=binaries/src/clustalw/src/clustalw2
 cluster.clustalw.bin=/homes/pvtroshin/workspace/jaba2/binaries/src/clustalw/src/clustalw2
 # Parameters names which come from RunnerConfig -> Parameters.xml file ultimately are all lowercased in comparison!
@@ -13,7 +13,7 @@ clustalw.limits.file=conf/settings/ClustalLimits.xml
 clustalw.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M
 
 ### Muscle configuration ###
-local.muscle.bin.windows=binaries/muscle.exe
+local.muscle.bin.windows=binaries/windows/muscle.exe
 local.muscle.bin=binaries/src/muscle/muscle
 # Beware version of muscle on the cluster older and does not support some 
 # of the newer version attributed thus, will not work with Muscle.java wrapper!
@@ -64,7 +64,7 @@ probcons.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M
 local.jronn.bin.windows=D:\\Java\\jdk1.6.0_24\\bin\\java.exe 
 local.jronn.bin=/sw/java/latest/bin/java
 cluster.jronn.bin=/sw/java/latest/bin/java
-jronn.jar.file=binaries/jronn3.1.jar
+jronn.jar.file=binaries/windows/jronn3.1.jar
 # jronn.parameters.file=conf/settings/JronnParameters.xml
 jronn.limits.file=conf/settings/JronnLimits.xml
 #TODO jronn.jvm.options=-Xms32M -Xmx512M
@@ -89,8 +89,9 @@ globplot.limits.file=conf/settings/GlobPlotLimits.xml
 globplot.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M
 
 ### IUPred configuration ### 
-#local.iupred.bin.windows= 
+local.iupred.bin.windows=binaries/windows/iupred/iupred.exe 
 local.iupred.bin=binaries/src/iupred/iupred
+# This must point to the directory where iupred binary is, with other files it depends on
 iupred.bin.env=IUPred_PATH#/homes/pvtroshin/workspace/jaba2/binaries/src/iupred
 cluster.iupred.bin=/homes/pvtroshin/workspace/jaba2/binaries/src/iupred/iupred
 iupred.parameters.file=conf/settings/IUPredParameters.xml
@@ -98,10 +99,12 @@ iupred.limits.file=conf/settings/IUPredLimits.xml
 iupred.cluster.settings=-l h_cpu=24:00:00 -l h_vmem=6000M -l ram=6000M
 
 ### AACon configuration ###
+# This is just a path to the standard java executable 
 local.aacon.bin.windows=D:\\Java\\jdk1.6.0_24\\bin\\java.exe 
 local.aacon.bin=/sw/java/latest/bin/java
 cluster.aacon.bin=/sw/java/latest/bin/java
-aacon.jar.file=binaries/aaconservation.jar
+# Path to the AACon library
+aacon.jar.file=binaries/windows/aaconservation.jar
 aacon.parameters.file=conf/settings/AAConParameters.xml
 aacon.presets.file=conf/settings/AAConPresets.xml
 aacon.limits.file=conf/settings/AAConLimits.xml
diff --git a/website/prog_docs/AACon_manual.txt b/website/prog_docs/AACon_manual.txt
new file mode 100644
index 0000000..e9b4b53
--- /dev/null
+++ b/website/prog_docs/AACon_manual.txt
@@ -0,0 +1,94 @@
+
+AA Conservation version 1.0b (2 September 2010)
+
+This program allows calculation of conservation of amino acids in
+multiple sequence alignments.
+It implements 17 different conservation scores as described by Valdar in
+his paper (Scoring Residue Conservation, PROTEINS: Structure, Function
+and  Bioinformatics 48:227-241 (2002)) and SMERFS scoring algorithm as described
+by Manning, Jefferson and Barton (The contrasting properties of conservation
+and correlated phylogeny in protein functional residue prediction,
+BMC Bioinformatics (2008)).
+
+The conservation algorithms supported are:
+
+KABAT, JORES, SCHNEIDER, SHENKIN, GERSTEIN, TAYLOR_GAPS, TAYLOR_NO_GAPS, 
+ZVELIBIL, KARLIN, ARMON, THOMPSON, NOT_LANCET, MIRNY, WILLIAMSON, 
+LANDGRAF, SANDER, VALDAR, SMERFS
+
+Input format is either a FASTA formatted file containing aligned sequences with 
+gaps or a Clustal alignment. The valid gap characters are *, -, space character,
+X and . (a dot). By default program prints the results to the command window. 
+If the output file is provided the results are printed to the file in two 
+possible formats with or without an alignment.
+If format is not specified, the program outputs conservation scores without 
+alignment. The scores are not normalized by default but they can be (see below).
+SMERFS default parameters are window width of 7, column score is set to
+the middle column (MID_SCORE), gap% cutoff of 0.1. Different parameters for SMERFS 
+can be provided (see below). Details of the program execution can be recorded to
+a separate file if an appropriate file path is provided.
+
+List of command line arguments:
+
+-m=  precedes a comma separated list of method names
+     EXAMPLE: -m=KABAT,JORES,GERSTEIN
+     Optional, if no method is specified request for all is assumed. 
+
+-i=  precedes a full path to the input FASTA file, required
+
+-o=  precedes a full path to the output file, optional, if no output file is 
+     provided the program will output to the standard out.  
+
+-t=  precedes the number of CPUs (CPU cores more precisely) to use. Optional, 
+	 defaults to all processors available on the machine.  
+      
+-f=  precedes the format  of the results in the output file
+     two different formats are possible:
+      RESULT_WITH_ALIGNMENT
+      RESULT_NO_ALIGNMENT
+     Optional, if not specified RESULT_NO_ALIGNMENT is assumed 
+
+-d=  precedes a full path to a file where program execution details are to be 
+     listed. Optional, if not provided, no execution statistics is produced.  
+      
+-g=  precedes comma separated list of gap characters provided by the user, if 
+     you're using an unusual gap character (not a -,., ,*,X) you have to 
+     provide it. If you you provide this list you have to list all the gaps 
+     accepted. Including those that were previously treated as a default. 
+     Optional.      
+      
+-n   using this key causes the results to be normalized. 
+	 Normalized results have values between 0 and 1. Please note however, that 
+	 some results cannot be normalized. In such a case, the system returns not 
+	 normalized value, and log the issue to the standard error stream. 
+	 The following formula is used for normalization 
+			n = (d - dmin)/(dmax - dmin)
+	 Negative results first converted to positive by adding an absolute value of
+	 the most negative result. Optional.
+
+SMERFS Only Parameters: 
+
+-smerfsGT=  precedes SMERFS Gap Treshold - a gap percentage cutoff - 
+			a float greater than 0 and smaller or equal 1. Optional defaults 
+			to 0.1
+
+-smerfsCS=  precedes SMERFS Column Score algorithm defines the window scores to 
+			columns	allocation , two methods are possible:
+	        MID_SCORE - gives the window score to the middle column
+	        MAX_SCORE - gives the column the highest score of all the windows it 
+	        belongs to. Optional defaults to MID_SCORE.  
+
+-smerfsWW=  precedes Window Width parameter - an integer and an odd number.
+            Optional, defaults to 7 
+	  
+
+EXAMPLE HOW TO RUN THE PROGRAM:
+java -jar <jar name> -m=KABAT,SMERFS -i=prot1 -o=prot1_results -n
+
+As a result of the execution KABAT and SMERFS scores will be calculated. 
+Input comes form prot1 file and an output without an alignment is recorded to 
+prot1_results file. 
+
+Authors: Peter Troshin, Agnieszka Golicz, David Martin and Geoff Barton.
+Please visit http://www.compbio.dundee.ac.uk/aacon for further information.
+ 
\ No newline at end of file
diff --git a/website/prog_docs/iupred.txt b/website/prog_docs/iupred.txt
new file mode 100644
index 0000000..e023ac9
--- /dev/null
+++ b/website/prog_docs/iupred.txt
@@ -0,0 +1,45 @@
+INTERPRETATION OF THE OUTPUT:
+
+In the case of long and short types of disorder the output  gives the
+likelihood of disorder for each residue, i.e. it is a value between 0 and 1,
+and higher values indicate higher probability of disorder. Residues with values
+above 0.5 can be regarded as disordered, and at this cutoff 5% of globular
+proteins is expected to be predicted to disordered (false positives).
+ 
+For the prediction type of globular domains it gives the number of globular
+domains and list their start and end position in the sequence. This is followed
+by the submitted sequence with residues of globular domains indicated by
+uppercase letters. 
+
+
+SHORT SUMMARY OF THE METHOD
+
+Intrinsically unstructured/disordered proteins have no single well-defined
+tertiary structure in their native, functional state. Our server recognizes
+such regions from the amino acid sequence based on the estimated pairwise
+energy content. The underlying assumption is that globular proteins make a
+large number of interresidue interactions, providing the stabilizing energy to
+overcome the entropy loss during folding. In contrast, IUPs have special
+sequences that do not have the capacity to form sufficient interresidue
+interactions. Taking a set of globular proteins with known structure, we have
+developed a simple formalism that allows the estimation of the pairwise
+interaction energies of these proteins. It uses a quadratic expression in the
+amino acid composition, which takes into account that the contribution of an
+amino acid to order/disorder depends not only its own chemical type, but also
+on its sequential environment, including its potential interaction partners.
+Applying this calculation for IUP sequences, their estimated energies are
+clearly shifted towards less favorable energies compared to globular proteins,
+enabling the predicion of protein disorder on this ground. 
+
+	
+References
+
+"The Pairwise Energy Content Estimated from Amino Acid Composition 
+Discriminates between Folded and Intrinsically Unstructured Proteins"
+Zsuzsanna Dosztanyi, Veronika Csizmok, Peter Tompa and Istvan Simon
+J. Mol. Biol. (2005) 347, 827-839.
+
+"IUPred: web server for the prediction of intrinsically unstructured 
+regions of proteins based on estimated energy content"
+Zsuzsanna Dosztanyi, Veronika Csizmok, Peter Tompa and Istvan Simon
+Bioinformatics (2005) 21, 3433-3434.