From: pvtroshin Date: Thu, 19 Aug 2010 14:43:32 +0000 (+0000) Subject: updates to jaba2 from jaba release branch X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=50bb3e70f53817b1174b79003f4826c95b25ff6e;p=jabaws.git updates to jaba2 from jaba release branch git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@2921 e3abac25-378b-4346-85de-24260fe3988d --- diff --git a/dundee-conf/settings/ClustalLimits.xml b/dundee-conf/settings/ClustalLimits.xml new file mode 100644 index 0000000..ebbe206 --- /dev/null +++ b/dundee-conf/settings/ClustalLimits.xml @@ -0,0 +1,18 @@ + + + compbio.runner.clustal.ClustalW + + Disable gap weighting (Speed-oriented) + 2000 + 1000 + + + 1000 + 1000 + + + # LocalEngineExecutionLimit # + 40 + 500 + + diff --git a/dundee-conf/settings/ClustalParameters.xml b/dundee-conf/settings/ClustalParameters.xml new file mode 100644 index 0000000..7e8de1d --- /dev/null +++ b/dundee-conf/settings/ClustalParameters.xml @@ -0,0 +1,195 @@ + + + compbio.runner.clustal.ClustalW + + NOPGAP + Residue-specific gaps off + -NOPGAP + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + + No transition weighting + Disable sequence weighting + -NOWEIGHTS + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + + NOHGAP + Hydrophilic gaps off + -NOHGAP + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + = + + Transition weighting + Type of the sequence (PROTEIN or DNA) + -TRANSWEIGHT + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + 0.5 + + Float + 0 + 10 + + + + Type + Type of the sequence (PROTEIN or DNA) + -TYPE + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + PROTEIN + PROTEIN + DNA + + + OUTORDER + As per INPUT or ALIGNED + -OUTORDER + INPUT + INPUT + ALIGNED + + + MATRIX + Protein weight matrix + -MATRIX + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + BLOSUM62 + + BLOSUM100 + BLOSUM30 + BLOSUM35 + BLOSUM40 + BLOSUM45 + BLOSUM50 + BLOSUM55 + BLOSUM60 + BLOSUM62 + BLOSUM65 + BLOSUM70 + BLOSUM75 + BLOSUM80 + BLOSUM85 + BLOSUM90 + BLOSUMN + DAYHOFF + GONNET + IDENTITY + MATCH + NUC.4.2 + NUC.4.4 + PAM10 + PAM100 + PAM110 + PAM120 + PAM130 + PAM140 + PAM150 + PAM160 + PAM170 + PAM180 + PAM190 + PAM20 + PAM200 + PAM210 + PAM220 + PAM230 + PAM240 + PAM250 + PAM260 + PAM270 + PAM280 + PAM290 + PAM30 + PAM300 + PAM310 + PAM320 + PAM330 + PAM340 + PAM350 + PAM360 + PAM370 + PAM380 + PAM390 + PAM40 + PAM400 + PAM410 + PAM420 + PAM430 + PAM440 + PAM450 + PAM460 + PAM470 + PAM480 + PAM490 + PAM50 + PAM500 + PAM60 + PAM70 + PAM80 + PAM90 + + + GAPOPEN + Gap opening penalty + -GAPOPEN + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + 10 + + Float + 0 + 1000 + + + + -GAPEXT + Gap extension penalty + -GAPEXT + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + 0.1 + + Float + 0 + 10 + + + + ENDGAPS + End gap separation pen + -ENDGAPS + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + 0.5 + + Float + 0 + 10 + + + + GAPDIST + Gap separation pen. range + -GAPDIST + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + 1 + + Integer + 0 + 50 + + + diff --git a/dundee-conf/settings/ClustalPresets.xml b/dundee-conf/settings/ClustalPresets.xml new file mode 100644 index 0000000..3115006 --- /dev/null +++ b/dundee-conf/settings/ClustalPresets.xml @@ -0,0 +1,13 @@ + + + compbio.runner.clustal.ClustalW + + Disable gap weighting (Speed-oriented) + + + + + + + + diff --git a/dundee-conf/settings/MafftLimits.xml b/dundee-conf/settings/MafftLimits.xml new file mode 100644 index 0000000..7db541e --- /dev/null +++ b/dundee-conf/settings/MafftLimits.xml @@ -0,0 +1,13 @@ + + + compbio.runner.mafft.Mafft + + 1000 + 1000 + + + # LocalEngineExecutionLimit # + 40 + 500 + + diff --git a/dundee-conf/settings/MafftParameters.xml b/dundee-conf/settings/MafftParameters.xml new file mode 100644 index 0000000..74fc854 --- /dev/null +++ b/dundee-conf/settings/MafftParameters.xml @@ -0,0 +1,237 @@ + + + compbio.runner.mafft.Mafft + + Shared 6mers distance calculation + Distance is calculated based on the number of shared 6mers. Default: on + --6merpair + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --6merpair + + + Output sequences order + --inputorder - Output order: same as input. + --reorder - Output order: aligned. Default: same as input + --inputorder + --reorder + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --inputorder + + + Sequence type + + --nuc - Assume the sequences are nucleotide. + --amino - Assume the sequences are amino acid. + --amino + --nuc + --auto + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --auto + + + Pairwise alignment computation method + + --globalpair + All pairwise alignments are computed with the Needleman-Wunsch algorithm. More accurate but slower than --6merpair. Suitable for a set of globally alignable sequences. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (G-INS-i). Default: off (6mer distance is used) + --genafpair + All pairwise alignments are computed with a local algorithm with the generalized affine gap cost (Altschul 1998). More accurate but slower than --6merpair. Suitable when large internal gaps are expected. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (E-INS-i). Default: off (6mer distance is used) + --fastapair + All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). FASTA is required. Default: off (6mer distance is used) + --localpair + All pairwise alignments are computed with the Smith-Waterman algorithm. More accurate but slower than --6merpair. Suitable for a set of locally alignable sequences. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (L-INS-i). Default: off (6mer distance is used) + + --fastapair + --genafpair + --localpair + --globalpair + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --localpair + + + FFT approximation + Use / Do not use FFT approximation in group-to-group alignment. Default: off + --nofft + --fft + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --nofft + + + No score + Alignment score is not checked in the iterative refinement stage. Default: off (score is checked) + --noscore + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + Part tree + + --parttree - Use a fast tree-building method (PartTree, Katoh and Toh 2007) with the 6mer distance. + --dpparttree - the PartTree algorithm is used with distances based on DP. + Slightly more accurate and slower than --parttree. + --fastaparttree - The PartTree algorithm is used with distances based on FASTA. + Slightly more accurate and slower than --parttree. + All methods recommended for a large number (> ~10,000) of sequences are input. + + --dpparttree + --parttree + --fastaparttree + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + --fastaparttree + + + + Max iteration number + number cycles of iterative refinement are performed. Default: 0 + --maxiterate + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + 0 + 1000 + + + + Partsize + The number of partitions in the PartTree algorithm. Default: 50 + --partsize + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 50 + + Integer + 1 + + + + Group size + Do not make alignment larger than number sequences. Valid only with the --*parttree options. Default: the number of input sequences + --groupsize + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 20 + + Integer + 0 + + + + Guide tree rebuild + Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2 + --retree + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 2 + + Integer + 1 + 100 + + + + Gap opening penalty + Gap opening penalty at group-to-group alignment. Default: 1.53 + --op + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 1.53 + + Float + 0 + + + + Group-to-group gap extension penalty + Offset value, which works like gap extension penalty, for group-to-group alignment. Deafult: 0.123 + --ep + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0.123 + + Float + 0 + + + + Gap opening penalty at local pairwise alignment + Gap opening penalty at local pairwise alignment. Valid when the --localpair or --genafpair option is selected. Default: -2.00 + --lop + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -2.00 + + Float + 0 + + + + Matrix + Substitution Matrix to use + --aamatrix + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + BLOSUM62 + BLOSUM100 + BLOSUM30 + BLOSUM35 + BLOSUM40 + BLOSUM45 + BLOSUM50 + BLOSUM55 + BLOSUM60 + BLOSUM62 + BLOSUM65 + BLOSUM70 + BLOSUM75 + BLOSUM80 + BLOSUM85 + BLOSUM90 + BLOSUMN + DAYHOFF + GONNET + IDENTITY + MATCH + PAM10 + PAM100 + PAM110 + PAM120 + PAM130 + PAM140 + PAM150 + PAM160 + PAM170 + PAM180 + PAM190 + PAM20 + PAM200 + PAM210 + PAM220 + PAM230 + PAM240 + PAM250 + PAM260 + PAM270 + PAM280 + PAM290 + PAM30 + PAM300 + PAM310 + PAM320 + PAM330 + PAM340 + PAM350 + PAM360 + PAM370 + PAM380 + PAM390 + PAM40 + PAM400 + PAM410 + PAM420 + PAM430 + PAM440 + PAM450 + PAM460 + PAM470 + PAM480 + PAM490 + PAM50 + PAM500 + PAM60 + PAM70 + PAM80 + PAM90 + + diff --git a/dundee-conf/settings/MafftPresets.xml b/dundee-conf/settings/MafftPresets.xml new file mode 100644 index 0000000..794d20c --- /dev/null +++ b/dundee-conf/settings/MafftPresets.xml @@ -0,0 +1,62 @@ + + + compbio.runner.mafft.Mafft + + L-INS-i (Accuracy-oriented) + L-INS-i (probably most accurate; recommended for <200 + sequences; iterative refinement method incorporating local pairwise + alignment information) + + + + + + + G-INS-i (Accuracy-oriented) + G-INS-i (suitable for sequences of similar lengths; + recommended for <200 sequences; iterative refinement method + incorporating global pairwise alignment information) + + + + + + + E-INS-i (Accuracy-oriented) + E-INS-i (suitable for sequences containing large + unalignable regions; recommended for <200 sequences) + + + + + + + + FFT-NS-i (Speed oriented) + FFT-NS-i (iterative refinement method; two cycles only) + + + + + + + FFT-NS-1 (Speed oriented) + FFT-NS-1 (very fast; recommended for >2000 sequences; + progressive method with a rough guide tree) + + + + + + + NW-NS-PartTree-1 (Speed oriented) + NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 + sequences; progressive method with the PartTree algorithm) + + + + + + + + diff --git a/dundee-conf/settings/MuscleLimits.xml b/dundee-conf/settings/MuscleLimits.xml new file mode 100644 index 0000000..88cfc61 --- /dev/null +++ b/dundee-conf/settings/MuscleLimits.xml @@ -0,0 +1,13 @@ + + + compbio.runner.muscle.Muscle + + 1000 + 1000 + + + # LocalEngineExecutionLimit # + 40 + 500 + + diff --git a/dundee-conf/settings/MuscleParameters.xml b/dundee-conf/settings/MuscleParameters.xml new file mode 100644 index 0000000..11c36d8 --- /dev/null +++ b/dundee-conf/settings/MuscleParameters.xml @@ -0,0 +1,289 @@ + + + compbio.runner.muscle.Muscle + + Group sequences + Group sequences by similarity (this is the default) or preserve the input order + -group + -stable + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -stable + + + Anchor optimisation + Enable/disable anchor optimization in tree dependent refinement iterations + -anchors + -noanchors + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -anchors + + + Root alignment computation method + Use Steven Brenner's method for computing the root alignment. + -brenner + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + + dimer + Use dimer approximation for the SP score (faster, slightly less accurate) + -dimer + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + Diagonal + Use diagonal optimizations. Faster, especially for closely related sequences, but may be less accurate. + -diags + + + Diagonal 1 + Use diagonal optimizations in first iteration (faster for similar sequences) + -diags1 + + + Profile scoring method + le - use log-expectation profile score VTML240 (default for amino acid sequences.) + sp - use sum-of-pairs protein profile score (PAM200). + sv - use sum-of-pairs profile score (VTML240) + -le + -sp + -sv + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -le + + + + Sequence type + Sequence type - Amino acid/Nucleotide + -seqtype + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + auto + auto + protein + nucleo + + + Maxiters + Maximum number of iterations (integer, default 16) + -maxiters + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 16 + + Integer + 1 + 100 + + + + Matrix + Substitution Matrix to use + -matrix + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + BLOSUM62 + BLOSUM100 + BLOSUM30 + BLOSUM35 + BLOSUM40 + BLOSUM45 + BLOSUM50 + BLOSUM55 + BLOSUM60 + BLOSUM62 + BLOSUM65 + BLOSUM70 + BLOSUM75 + BLOSUM80 + BLOSUM85 + BLOSUM90 + BLOSUMN + DAYHOFF + GONNET + IDENTITY + MATCH + NUC.4.2 + NUC.4.4 + PAM10 + PAM100 + PAM110 + PAM120 + PAM130 + PAM140 + PAM150 + PAM160 + PAM170 + PAM180 + PAM190 + PAM20 + PAM200 + PAM210 + PAM220 + PAM230 + PAM240 + PAM250 + PAM260 + PAM270 + PAM280 + PAM290 + PAM30 + PAM300 + PAM310 + PAM320 + PAM330 + PAM340 + PAM350 + PAM360 + PAM370 + PAM380 + PAM390 + PAM40 + PAM400 + PAM410 + PAM420 + PAM430 + PAM440 + PAM450 + PAM460 + PAM470 + PAM480 + PAM490 + PAM50 + PAM500 + PAM60 + PAM70 + PAM80 + PAM90 + + + Gap open penalty + Gap opening penalty. Must be negative + -gapopen + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -12.0 + + Float + -100 + 0 + + + + Gap extension penalty + Gap extension penalty. Must be negative + -gapextend + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -1.0 + + Float + -100 + 0 + + + + Center + Center parameter. Should be negative. + -center + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0.0 + + Float + -100 + 0 + + + + Hydro + Window size for determining whether a region is hydrophobic. + -hydro + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 5 + + Integer + 0 + 100 + + + + Hydrofactor + Multiplier for gap open/close penalties in hydrophobic regions. + -hydrofactor + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 1.2 + + Float + 0 + 10 + + + + cluster1 + Clustering method to use on the iteration 1 + -cluster1 + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + upgma + upgma + + + cluster2 + Clustering method to use on the iteration 2 and all subsequent itarations + -cluster2 + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + upgmb + upgmb + neighborjoining + + + Sequence weighting scheme 1 + Sequence weighting scheme to use on the iteration 1 and 2 + none=all sequences have equal weight. + henikoff=Henikoff & Henikoff weighting scheme. + henikoffpb=Modified Henikoff scheme as used in PSI-BLAST. + clustalw=CLUSTALW method. + threeway=Gotoh three-way method + -weight1 + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + clustalw + none + henikoff + henikoffpb + gsc + clustalw + threeway + + + Sequence weighting scheme 2 + Sequence weighting scheme to use on the iteration 3 and all subsequent + iterations for tree-dependent refinement. + none=all sequences have equal weight. + henikoff=Henikoff & Henikoff weighting scheme. + henikoffpb=Modified Henikoff scheme as used in PSI-BLAST. + clustalw=CLUSTALW method. + threeway=Gotoh three-way method + -weight2 + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + clustalw + none + henikoff + henikoffpb + gsc + clustalw + threeway + + + Distance1 + Distance measure for iteration 1. Defaults Kmer6_6 (for amino ) or Kmer4_6 (for nucleo) + -distance1 + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + kmer6_6 + kmer6_6 + kmer20_3 + kbit20_3 + kmer20_4 + kmer4_6 + + diff --git a/dundee-conf/settings/MusclePresets.xml b/dundee-conf/settings/MusclePresets.xml new file mode 100644 index 0000000..190adad --- /dev/null +++ b/dundee-conf/settings/MusclePresets.xml @@ -0,0 +1,31 @@ + + + compbio.runner.muscle.Muscle + + Protein alignment(Fastest speed) + Fastest possible speed for protein sequences. Gives acceptable quality alignments for closely related sequences + + + + + + + + + Nucleotide alignment(Fastest speed) + Fastest possible speed for nucleotide sequences. Gives acceptable quality alignments for closely related sequences + + + + + + + Huge alignments (speed-oriented) + Very large number of sequences (several thousand), or they are very long, + + + + + + + diff --git a/dundee-conf/settings/ProbconsLimits.xml b/dundee-conf/settings/ProbconsLimits.xml new file mode 100644 index 0000000..96c8b31 --- /dev/null +++ b/dundee-conf/settings/ProbconsLimits.xml @@ -0,0 +1,13 @@ + + + compbio.runner.probcons.Probcons + + 1000 + 1000 + + + # LocalEngineExecutionLimit # + 30 + 500 + + diff --git a/dundee-conf/settings/ProbconsParameters.xml b/dundee-conf/settings/ProbconsParameters.xml new file mode 100644 index 0000000..aa8ef07 --- /dev/null +++ b/dundee-conf/settings/ProbconsParameters.xml @@ -0,0 +1,147 @@ + + + compbio.runner.probcons.Probcons + + + Output aligned + Output sequences in alignment order rather than input order + -a + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + + + + Rounds of pre-training before aligning the sequences + This specifies the number of rounds of EM to be applied on the set of sequences being +aligned. This option is used in case the default parameters are not appropriate for the +particular sequences being aligned; in general, this option is not recommended as it may +lead to unstable alignment parameters. + -pre + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + 0 + 20 + + + + Passes of iterative refinement + This specifies the number of iterations of iterative refinement to be performed. In each +stage of iterative refinement, the set of sequences in the alignment is randomly +partitioned into two groups. After projecting the alignments to these groups, the two +groups are realigned, resulting in an alignment whose objective score is guaranteed to be +at least that of the original alignment + -ir + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 100 + + Integer + 0 + 1000 + + + + Passes of consistency transformation + Each pass applies one round of the consistency transformation on the set of sequences. + The consistency transformation is described in detail in the mentioned papers. In each + round, the aligner computes the consistency transformation for each pair of sequences + using all other sequences. The aligner then updates the posterior probability matrices of + the pairwise alignments. + -c + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 2 + + Integer + 0 + 5 + + + diff --git a/dundee-conf/settings/TcoffeeLimits.xml b/dundee-conf/settings/TcoffeeLimits.xml new file mode 100644 index 0000000..f21fb03 --- /dev/null +++ b/dundee-conf/settings/TcoffeeLimits.xml @@ -0,0 +1,13 @@ + + + compbio.runner.tcoffee.Tcoffee + + 1000 + 1000 + + + # LocalEngineExecutionLimit # + 40 + 500 + + diff --git a/dundee-conf/settings/TcoffeeParameters.xml b/dundee-conf/settings/TcoffeeParameters.xml new file mode 100644 index 0000000..a18052f --- /dev/null +++ b/dundee-conf/settings/TcoffeeParameters.xml @@ -0,0 +1,367 @@ + + + compbio.runner.tcoffee.Tcoffee + + Search sequences in PDB + + Forces t_coffee to run extract_from_pdb to check the pdb status of each sequence. + This can considerably slow down the program. + + -check_pdb_status + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + + = + + + Preset Mode + It indicates that t_coffee will use some hard coded parameters. These include: + quickaln: Very fast, sequence type - all, accuracy - medium low + + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -mode + quickaln + quickaln + + + + + Distance matrix computation method + + This flag indicates the method used for computing the distance matrix (distance between every pair of sequences) required for the computation of the dendrogram. + Slow The chosen dp_mode using the extended library, + fast: The fasta dp_mode using the extended library. + very_fast The fasta dp_mode using blosum62mt. + ktup Ktup matching (Muscle kind) + aln Read the distances on a precomputed MSA + -distance_matrix_mode + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + very_fast + slow + fast + very_fast + ktup + aln + + + Tree Computation method + + gotoh_pair_wise: implementation of the gotoh algorithm (quadratic in memory and time) + myers_miller_pair_wise: implementation of the Myers and Miller dynamic programming algorithm ( quadratic in time and linear in space). This algorithm is recommended for very long sequences. It is about 2 times slower than gotoh and only accepts tg_mode=1or 2 (i.e. gaps penalized for opening). + fasta_pair_wise: implementation of the fasta algorithm. The sequence is hashed, looking for ktuples words. Dynamic programming is only carried out on the ndiag best scoring diagonals. This is much faster but less accurate than the two previous. This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag + cfasta_pair_wise: c stands for checked. It is the same algorithm. The dynamic programming is made on the ndiag best diagonals, and then on the 2*ndiags, and so on until the scores converge. Complexity will depend on the level of divergence of the sequences, but will usually be L*log(L), with an accuracy comparable to the two first mode ( this was checked on BaliBase). This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag + + -dp_mode + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + cfasta_pair_wise + gotoh_pair_wise + myers_miller_pair_wise + fasta_pair_wise + cfasta_pair_wise + + + + Number of diagonals used by the fasta_pair_wise algorithm + + Indicates the number of diagonals used by the fasta_pair_wise algorithm (cf -dp_mode). When -ndiag=0, n_diag=Log (length of the smallest sequence)+1. + When -ndiag and -diag_threshold are set, diagonals are selected if and only if they fulfill both conditions. + + -ndiag + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + 0 + 1000 + + + + Number of diagonals used by the fasta_pair_wise algorithm + + Indicates the manner in which diagonals are scored during the fasta hashing. +0: indicates that the score of a diagonal is equal to the sum of the scores of the exact matches it contains. +1 indicates that this score is set equal to the score of the best uninterrupted segment (useful when dealing with fragments of sequences). + + -diag_mode + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + 0 + 1 + + + Diagonal threshold + + Sets the value of the threshold when selecting diagonals. + 0: indicates that -ndiag should be used to select the diagonals (cf -ndiag section). + + -diag_threshold + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + 0 + 1000 + + + + Alphabet degeneration method + + Indicates the manner in which the amino acid alphabet is degenerated when hashing in the + fasta_pairwise dynamic programming. Standard ClustalW matrices are all valid. + They are used to define groups of amino acids having positive substitution values. + In T-Coffee, the default is a 13 letter grouping named Vasiliky, with residues grouped as follows: + rk, de, qh, vilm, fy (other residues kept alone). + This alphabet is set with the flag -sim_matrix=vasiliky. + In order to keep the alphabet non degenerated, -sim_matrix=idmat can be used to retain + the standard alphabet. + + -sim_matrix + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + vasiliky + vasiliky + idmat + + + Substitution Matrix + +This flag sets the matrix that will be used by alignment methods within t_coffee (slow_pair, lalign_id_pair). It does not affect external methods (like clustal_pair, clustal_aln). +Users can also provide their own matrices, using the matrix format described in the appendix. + + -matrix + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + blosum62mt + + + + + Match penalty + + Indicates the penalty to associate with a match. When using a library, + all matches are positive or equal to 0. Matches equal to 0 are unsupported by the + library but non-penalized. Setting nomatch to a non-negative value makes it possible + to penalize these null matches and prevent unrelated sequences from being aligned + (this can be useful when the alignments are meant to be used for structural modeling) + -nomatch + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + 0 + 1000 + + + + Gap opening penalty + + Indicates the penalty applied for opening a gap. The penalty must be negative. + If no value is provided when using a substitution matrix, a value will be automatically computed. + Here are some guidelines regarding the tuning of gapopen and gapext. + In T-Coffee matches get a score between 0 (match) and 1000 (match perfectly consistent with the library). + The default cosmetic penalty is set to -50 (5% of a perfect match). + If you want to tune -gapoen and see a strong effect, you should therefore consider values between 0 + and -1000. + + -gapopen + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + -1000 + 0 + + + + Gap extension penalty + + Indicates the penalty applied for extending a gap. The penalty must be negative. + + -gapext + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + -1000 + 0 + + + + Cosmetic penalty + + Indicates the penalty applied for opening a gap. The penalty must be negative and is set to a very low value by default. + It will only have an influence on the portions of the alignment that are unalignable. + It will not make them more correct, but only more pleasing to the eye ( i.e. Avoid stretches + of lonely residues). The cosmetic penalty is automatically turned off if a substitution matrix is + used rather than a library. + + -cosmetic_penalty + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + -50 + + Integer + -1000 + 0 + + + + Terminal gaps penalty + + 0: terminal gaps penalized with -gapopen + -gapext*len + 1: terminal gaps penalized with a -gapext*len + 2: terminal gaps unpenalized. + + -tg_mode + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 1 + 0 + 1 + 2 + + + Number of iterations + + Sequences are extracted in turn and realigned to the MSA. + If iterate is set to -1, each sequence is realigned, otherwise the number of iterations is + set by -iterate. + + -iterate + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + 0 + + Integer + -1 + 100 + + + + Output order + + Sets the order of the sequences in the output alignment: -outorder=input means the sequences + are kept in the original order. -outorder=aligned means the sequences come in the order + indicated by the tree. This order can be seen as a one-dimensional projection of the tree distances. + + -outorder + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + input + input + aligned + + + Input order + + Multiple alignments based on dynamic programming depend slightly on the order in which + the incoming sequences are provided. To prevent this effect sequences are arbitrarily + sorted at the beginning of the program (-inorder=aligned). + However, this affects the sequence order within the library. + You can switch this off by setting -inorder=input + + -inorder + http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html + aligned + input + aligned + + diff --git a/dundee-conf/settings/TcoffeePresets.xml b/dundee-conf/settings/TcoffeePresets.xml new file mode 100644 index 0000000..585bb21 --- /dev/null +++ b/dundee-conf/settings/TcoffeePresets.xml @@ -0,0 +1,54 @@ + + + compbio.runner.tcoffee.Tcoffee + + Quick align. Very fast approximate (Speed-oriented) + quickaln: Very fast, sequence type - all, accuracy - medium low + + + + + + + +