From 3587067d71bb38ad95bf7c1470b9f827980b5c90 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Fri, 2 Aug 2013 11:38:20 +0100 Subject: [PATCH] Re-format the config files in order to get more readable files --- conf/settings/ClustalParameters.xml | 364 +++++++++++++-------------- conf/settings/ClustaloParameters.xml | 119 ++++----- conf/settings/IUPredParameters.xml | 4 +- conf/settings/MafftParameters.xml | 275 +++++++++++--------- conf/settings/TcoffeeParameters.xml | 458 ++++++++++++++++++---------------- conf/settings/TcoffeePresets.xml | 102 ++++---- 6 files changed, 699 insertions(+), 623 deletions(-) diff --git a/conf/settings/ClustalParameters.xml b/conf/settings/ClustalParameters.xml index fc32d26..dde49d0 100644 --- a/conf/settings/ClustalParameters.xml +++ b/conf/settings/ClustalParameters.xml @@ -1,185 +1,185 @@ - compbio.runner.clustal.ClustalW - - NOPGAP - Residue-specific gaps off - -NOPGAP - prog_docs/clustalw.txt - - - No transition weighting - Disable sequence weighting - -NOWEIGHTS - prog_docs/clustalw.txt - - - NOHGAP - Hydrophilic gaps off - -NOHGAP - prog_docs/clustalw.txt - - = - - Transition weighting - Type of the sequence (PROTEIN or DNA) - -TRANSWEIGHT - prog_docs/clustalw.txt - 0.5 - - Float - 0 - 10 - - - - Type - Type of the sequence (PROTEIN or DNA) - -TYPE - prog_docs/clustalw.txt - PROTEIN - PROTEIN - DNA - - - OUTORDER - As per INPUT or ALIGNED - -OUTORDER - INPUT - INPUT - ALIGNED - - - MATRIX - Protein weight matrix - -MATRIX - prog_docs/clustalw.txt - BLOSUM62 - - BLOSUM100 - BLOSUM30 - BLOSUM35 - BLOSUM40 - BLOSUM45 - BLOSUM50 - BLOSUM55 - BLOSUM60 - BLOSUM62 - BLOSUM65 - BLOSUM70 - BLOSUM75 - BLOSUM80 - BLOSUM85 - BLOSUM90 - BLOSUMN - DAYHOFF - GONNET - IDENTITY - MATCH - NUC.4.2 - NUC.4.4 - PAM10 - PAM100 - PAM110 - PAM120 - PAM130 - PAM140 - PAM150 - PAM160 - PAM170 - PAM180 - PAM190 - PAM20 - PAM200 - PAM210 - PAM220 - PAM230 - PAM240 - PAM250 - PAM260 - PAM270 - PAM280 - PAM290 - PAM30 - PAM300 - PAM310 - PAM320 - PAM330 - PAM340 - PAM350 - PAM360 - PAM370 - PAM380 - PAM390 - PAM40 - PAM400 - PAM410 - PAM420 - PAM430 - PAM440 - PAM450 - PAM460 - PAM470 - PAM480 - PAM490 - PAM50 - PAM500 - PAM60 - PAM70 - PAM80 - PAM90 - - - GAPOPEN - Gap opening penalty - -GAPOPEN - prog_docs/clustalw.txt - 10 - - Float - 0 - 1000 - - - - -GAPEXT - Gap extension penalty - -GAPEXT - prog_docs/clustalw.txt - 0.1 - - Float - 0 - 10 - - - - ENDGAPS - End gap separation pen - -ENDGAPS - prog_docs/clustalw.txt - 0.5 - - Float - 0 - 10 - - - - GAPDIST - Gap separation pen. range - -GAPDIST - prog_docs/clustalw.txt - 1 - - Integer - 0 - 50 - - + compbio.runner.clustal.ClustalW + + NOPGAP + Residue-specific gaps off + -NOPGAP + prog_docs/clustalw.txt + + + No transition weighting + Disable sequence weighting + -NOWEIGHTS + prog_docs/clustalw.txt + + + NOHGAP + Hydrophilic gaps off + -NOHGAP + prog_docs/clustalw.txt + + = + + Transition weighting + Type of the sequence (PROTEIN or DNA) + -TRANSWEIGHT + prog_docs/clustalw.txt + 0.5 + + Float + 0 + 10 + + + + Type + Type of the sequence (PROTEIN or DNA) + -TYPE + prog_docs/clustalw.txt + PROTEIN + PROTEIN + DNA + + + OUTORDER + As per INPUT or ALIGNED + -OUTORDER + INPUT + INPUT + ALIGNED + + + MATRIX + Protein weight matrix + -MATRIX + prog_docs/clustalw.txt + BLOSUM62 + + BLOSUM100 + BLOSUM30 + BLOSUM35 + BLOSUM40 + BLOSUM45 + BLOSUM50 + BLOSUM55 + BLOSUM60 + BLOSUM62 + BLOSUM65 + BLOSUM70 + BLOSUM75 + BLOSUM80 + BLOSUM85 + BLOSUM90 + BLOSUMN + DAYHOFF + GONNET + IDENTITY + MATCH + NUC.4.2 + NUC.4.4 + PAM10 + PAM100 + PAM110 + PAM120 + PAM130 + PAM140 + PAM150 + PAM160 + PAM170 + PAM180 + PAM190 + PAM20 + PAM200 + PAM210 + PAM220 + PAM230 + PAM240 + PAM250 + PAM260 + PAM270 + PAM280 + PAM290 + PAM30 + PAM300 + PAM310 + PAM320 + PAM330 + PAM340 + PAM350 + PAM360 + PAM370 + PAM380 + PAM390 + PAM40 + PAM400 + PAM410 + PAM420 + PAM430 + PAM440 + PAM450 + PAM460 + PAM470 + PAM480 + PAM490 + PAM50 + PAM500 + PAM60 + PAM70 + PAM80 + PAM90 + + + GAPOPEN + Gap opening penalty + -GAPOPEN + prog_docs/clustalw.txt + 10 + + Float + 0 + 1000 + + + + -GAPEXT + Gap extension penalty + -GAPEXT + prog_docs/clustalw.txt + 0.1 + + Float + 0 + 10 + + + + ENDGAPS + End gap separation pen + -ENDGAPS + prog_docs/clustalw.txt + 0.5 + + Float + 0 + 10 + + + + GAPDIST + Gap separation pen. range + -GAPDIST + prog_docs/clustalw.txt + 1 + + Integer + 0 + 50 + + diff --git a/conf/settings/ClustaloParameters.xml b/conf/settings/ClustaloParameters.xml index 711011e..0a1be98 100644 --- a/conf/settings/ClustaloParameters.xml +++ b/conf/settings/ClustaloParameters.xml @@ -1,59 +1,66 @@ - compbio.runner.msa.ClustalO - - Dealign - Dealign input sequences - --dealign - prog_docs/clustalo.txt - - - Full distance matrix - Use full distance matrix for guide-tree calculation (slow; mBed is default) - --full - prog_docs/clustalo.txt - - - Full distance matrix for each iteration - Use full distance matrix for guide-tree calculation during iteration (mBed is default) - --full-iter - prog_docs/clustalo.txt - - = - - Max Guide tree iterations - Maximum number of HMM iterations - --max-hmm-iterations - prog_docs/clustalo.txt - 1 - - Integer - 1 - 100 - - - - Number of iterations (combined) - Number of (combined guide tree/HMM) iterations - --iter - prog_docs/clustalo.txt - 1 - - Integer - 1 - 100 - - - - Max Guide tree iterations - Maximum guide tree iterations - --max-guidetree-iterations - prog_docs/clustalo.txt - 1 - - Integer - 1 - 100 - - + compbio.runner.msa.ClustalO + + + Dealign + Dealign input sequences + --dealign + prog_docs/clustalo.txt + + + + Full distance matrix + Use full distance matrix for guide-tree calculation (slow; mBed is default) + --full + prog_docs/clustalo.txt + + + + Full distance matrix for each iteration + Use full distance matrix for guide-tree calculation during iteration (mBed is default) + --full-iter + prog_docs/clustalo.txt + + + = + + + Max Guide tree iterations + Maximum number of HMM iterations + --max-hmm-iterations + prog_docs/clustalo.txt + 1 + + Integer + 1 + 100 + + + + + Number of iterations (combined) + Number of (combined guide tree/HMM) iterations + --iter + prog_docs/clustalo.txt + 1 + + Integer + 1 + 100 + + + + + Max Guide tree iterations + Maximum guide tree iterations + --max-guidetree-iterations + prog_docs/clustalo.txt + 1 + + Integer + 1 + 100 + + diff --git a/conf/settings/IUPredParameters.xml b/conf/settings/IUPredParameters.xml index eb485c8..5bc99c6 100644 --- a/conf/settings/IUPredParameters.xml +++ b/conf/settings/IUPredParameters.xml @@ -1,7 +1,7 @@ - compbio.runner.disorder.IUPred - + compbio.runner.disorder.IUPred + Disorder type "Long" - for prediction of long disordered regions, "short" - for prediction of short disordered regions ( e.g. missing residues in diff --git a/conf/settings/MafftParameters.xml b/conf/settings/MafftParameters.xml index f1b55c4..25091c8 100644 --- a/conf/settings/MafftParameters.xml +++ b/conf/settings/MafftParameters.xml @@ -1,6 +1,9 @@ compbio.runner.mafft.Mafft + + + Shared 6mers distance calculation Distance is calculated based on the number of shared 6mers. Default: on @@ -8,37 +11,49 @@ prog_docs/mafft.html --6merpair + Output sequences order - --inputorder - Output order: same as input. - --reorder - Output order: aligned. Default: same as input + + --inputorder - Output order: same as input. + --reorder - Output order: aligned. Default: same as input + --inputorder --reorder prog_docs/mafft.html --inputorder + - + Sequence type - --nuc - Assume the sequences are nucleotide. - --amino - Assume the sequences are amino acid. + --nuc - Assume the sequences are nucleotide. + --amino - Assume the sequences are amino acid. + --amino --nuc --auto prog_docs/mafft.html --auto + Pairwise alignment computation method --globalpair - All pairwise alignments are computed with the Needleman-Wunsch algorithm. More accurate but slower than --6merpair. Suitable for a set of globally alignable sequences. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (G-INS-i). Default: off (6mer distance is used) + All pairwise alignments are computed with the Needleman-Wunsch algorithm. + More accurate but slower than --6merpair. Suitable for a set of globally alignable sequences. + Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (G-INS-i). + Default: off (6mer distance is used) --genafpair - All pairwise alignments are computed with a local algorithm with the generalized affine gap cost (Altschul 1998). More accurate but slower than --6merpair. Suitable when large internal gaps are expected. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (E-INS-i). Default: off (6mer distance is used) + All pairwise alignments are computed with a local algorithm with the generalized affine gap cost (Altschul 1998). + More accurate but slower than --6merpair. Suitable when large internal gaps are expected. + Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (E-INS-i). + Default: off (6mer distance is used) --fastapair - All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). FASTA is required. Default: off (6mer distance is used) + All pairwise alignments are computed with FASTA (Pearson and Lipman 1988). FASTA is required. Default: off (6mer distance is used) --localpair - All pairwise alignments are computed with the Smith-Waterman algorithm. More accurate but slower than --6merpair. Suitable for a set of locally alignable sequences. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (L-INS-i). Default: off (6mer distance is used) + All pairwise alignments are computed with the Smith-Waterman algorithm. More accurate but slower than --6merpair. Suitable for a set of locally alignable sequences. Applicable to up to ~200 sequences. A combination with --maxiterate 1000 is recommended (L-INS-i). Default: off (6mer distance is used) --fastapair --genafpair @@ -47,6 +62,7 @@ prog_docs/mafft.html --localpair + FFT approximation Use / Do not use FFT approximation in group-to-group alignment. Default: off @@ -55,21 +71,24 @@ prog_docs/mafft.html --nofft + No score Alignment score is not checked in the iterative refinement stage. Default: off (score is checked) --noscore prog_docs/mafft.html - + + Part tree - - --parttree - Use a fast tree-building method (PartTree, Katoh and Toh 2007) with the 6mer distance. - --dpparttree - the PartTree algorithm is used with distances based on DP. - Slightly more accurate and slower than --parttree. - --fastaparttree - The PartTree algorithm is used with distances based on FASTA. - Slightly more accurate and slower than --parttree. - All methods recommended for a large number (> ~10,000) of sequences are input. + + --parttree + Use a fast tree-building method (PartTree, Katoh and Toh 2007) with the 6mer distance. + --dpparttree + the PartTree algorithm is used with distances based on DP. Slightly more accurate and slower than --parttree. + --fastaparttree + The PartTree algorithm is used with distances based on FASTA. Slightly more accurate and slower than --parttree. + All methods recommended for a large number (> ~10,000) of sequences are input. --dpparttree --parttree @@ -77,19 +96,24 @@ prog_docs/mafft.html --fastaparttree + + + - + + Max iteration number number cycles of iterative refinement are performed. Default: 0 --maxiterate prog_docs/mafft.html 0 - Integer + Integer 0 1000 + Partsize The number of partitions in the PartTree algorithm. Default: 50 @@ -97,141 +121,156 @@ prog_docs/mafft.html 50 - Integer - 1 + Integer + 1 - + + Group size - Do not make alignment larger than number sequences. Valid only with the --*parttree options. Default: the number of input sequences + + Do not make alignment larger than number sequences. + Valid only with the --*parttree options. Default: the number of input sequences + --groupsize prog_docs/mafft.html 20 - Integer - 0 + Integer + 0 - + + Guide tree rebuild - Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2 + + Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2 + --retree prog_docs/mafft.html - 2 + 2 - Integer - 1 - 100 + Integer + 1 + 100 - + + Gap opening penalty - Gap opening penalty at group-to-group alignment. Default: 1.53 + + Gap opening penalty at group-to-group alignment. Default: 1.53 + --op prog_docs/mafft.html 1.53 - - Float - 0 + + Float + 0 - + + Group-to-group gap extension penalty Offset value, which works like gap extension penalty, for group-to-group alignment. Deafult: 0.123 --ep prog_docs/mafft.html 0.123 - - Float - 0 + + Float + 0 - - + + + Gap opening penalty at local pairwise alignment - Gap opening penalty at local pairwise alignment. Valid when the --localpair or --genafpair option is selected. Default: -2.00 + + Gap opening penalty at local pairwise alignment. Valid when the --localpair or --genafpair option is selected. Default: -2.00 + --lop prog_docs/mafft.html - -2.00 + -2.00 - Float - 0 + Float + 0 - - + + + Matrix Substitution Matrix to use --aamatrix prog_docs/mafft.html BLOSUM62 - BLOSUM100 - BLOSUM30 - BLOSUM35 - BLOSUM40 - BLOSUM45 - BLOSUM50 - BLOSUM55 - BLOSUM60 - BLOSUM62 - BLOSUM65 - BLOSUM70 - BLOSUM75 - BLOSUM80 - BLOSUM85 - BLOSUM90 - BLOSUMN - DAYHOFF - GONNET - IDENTITY - MATCH - PAM10 - PAM100 - PAM110 - PAM120 - PAM130 - PAM140 - PAM150 - PAM160 - PAM170 - PAM180 - PAM190 - PAM20 - PAM200 - PAM210 - PAM220 - PAM230 - PAM240 - PAM250 - PAM260 - PAM270 - PAM280 - PAM290 - PAM30 - PAM300 - PAM310 - PAM320 - PAM330 - PAM340 - PAM350 - PAM360 - PAM370 - PAM380 - PAM390 - PAM40 - PAM400 - PAM410 - PAM420 - PAM430 - PAM440 - PAM450 - PAM460 - PAM470 - PAM480 - PAM490 - PAM50 - PAM500 - PAM60 - PAM70 - PAM80 - PAM90 + BLOSUM100 + BLOSUM30 + BLOSUM35 + BLOSUM40 + BLOSUM45 + BLOSUM50 + BLOSUM55 + BLOSUM60 + BLOSUM62 + BLOSUM65 + BLOSUM70 + BLOSUM75 + BLOSUM80 + BLOSUM85 + BLOSUM90 + BLOSUMN + DAYHOFF + GONNET + IDENTITY + MATCH + PAM10 + PAM100 + PAM110 + PAM120 + PAM130 + PAM140 + PAM150 + PAM160 + PAM170 + PAM180 + PAM190 + PAM20 + PAM200 + PAM210 + PAM220 + PAM230 + PAM240 + PAM250 + PAM260 + PAM270 + PAM280 + PAM290 + PAM30 + PAM300 + PAM310 + PAM320 + PAM330 + PAM340 + PAM350 + PAM360 + PAM370 + PAM380 + PAM390 + PAM40 + PAM400 + PAM410 + PAM420 + PAM430 + PAM440 + PAM450 + PAM460 + PAM470 + PAM480 + PAM490 + PAM50 + PAM500 + PAM60 + PAM70 + PAM80 + PAM90 diff --git a/conf/settings/TcoffeeParameters.xml b/conf/settings/TcoffeeParameters.xml index bf3f1fe..5989d85 100644 --- a/conf/settings/TcoffeeParameters.xml +++ b/conf/settings/TcoffeeParameters.xml @@ -1,24 +1,28 @@ compbio.runner.tcoffee.Tcoffee - + + Search sequences in PDB - Forces t_coffee to run extract_from_pdb to check the pdb status of each sequence. - This can considerably slow down the program. - + Forces t_coffee to run extract_from_pdb to check the pdb status of each sequence. + This can considerably slow down the program. + -check_pdb_status prog_docs/tcoffee.html + = - Preset Mode - It indicates that t_coffee will use some hard coded parameters. These include: - quickaln: Very fast, sequence type - all, accuracy - medium low - + + It indicates that t_coffee will use some hard coded parameters. + These include: quickaln: Very fast, sequence type - all, accuracy - medium low + prog_docs/tcoffee.html -mode quickaln @@ -28,14 +32,14 @@ All the options below need proper installation! rcoffee does not work as it fails to find the "templates" - This require blast to be setup properly see also Presets - dali: a mode used to combine dali pairwise alignments - 3dcoffee: runs t_coffee with the 3dcoffee parameterization - accurate: slow, sequence type - protein, accuracy - high + This require blast to be setup properly see also Presets + dali: a mode used to combine dali pairwise alignments + 3dcoffee: runs t_coffee with the 3dcoffee parameterization + accurate: slow, sequence type - protein, accuracy - high expresso: slow, sequence type - all, accuracy - high - rcoffee: slow, sequence type - RNA, accuracy - high - - expresso + rcoffee: slow, sequence type - RNA, accuracy - high + + expresso dali 3dcoffee accurate @@ -52,316 +56,342 @@ Distance matrix computation method - This flag indicates the method used for computing the distance matrix (distance between every pair of sequences) required for the computation of the dendrogram. - Slow The chosen dp_mode using the extended library, - fast: The fasta dp_mode using the extended library. - very_fast The fasta dp_mode using blosum62mt. - ktup Ktup matching (Muscle kind) - aln Read the distances on a precomputed MSA + This flag indicates the method used for computing the distance matrix (distance between + every pair of sequences) required for the computation of the dendrogram. + Slow - The chosen dp_mode using the extended library, + fast - The fasta dp_mode using the extended library. + very_fast - The fasta dp_mode using blosum62mt. + ktup - Ktup matching (Muscle kind) + aln - Read the distances on a precomputed MSA + -distance_matrix_mode prog_docs/tcoffee.html - very_fast - slow + very_fast + slow fast very_fast ktup aln + Tree Computation method - gotoh_pair_wise: implementation of the gotoh algorithm (quadratic in memory and time) - myers_miller_pair_wise: implementation of the Myers and Miller dynamic programming algorithm ( quadratic in time and linear in space). This algorithm is recommended for very long sequences. It is about 2 times slower than gotoh and only accepts tg_mode=1or 2 (i.e. gaps penalized for opening). - fasta_pair_wise: implementation of the fasta algorithm. The sequence is hashed, looking for ktuples words. Dynamic programming is only carried out on the ndiag best scoring diagonals. This is much faster but less accurate than the two previous. This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag - cfasta_pair_wise: c stands for checked. It is the same algorithm. The dynamic programming is made on the ndiag best diagonals, and then on the 2*ndiags, and so on until the scores converge. Complexity will depend on the level of divergence of the sequences, but will usually be L*log(L), with an accuracy comparable to the two first mode ( this was checked on BaliBase). This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag - + gotoh_pair_wise: implementation of the gotoh algorithm (quadratic in memory and time) + myers_miller_pair_wise: implementation of the Myers and Miller dynamic programming + algorithm ( quadratic in time and linear in space). This algorithm is recommended for + very long sequences. It is about 2 times slower than gotoh and only accepts tg_mode=1 + or 2 (i.e. gaps penalized for opening). + fasta_pair_wise: implementation of the fasta algorithm. The sequence is hashed, looking + for ktuples words. Dynamic programming is only carried out on the ndiag best scoring + diagonals. This is much faster but less accurate than the two previous. This mode is + controlled by the parameters -ktuple, -diag_mode and -ndiag + cfasta_pair_wise: c stands for checked. It is the same algorithm. The dynamic programming + is made on the ndiag best diagonals, and then on the 2*ndiags, and so on until the scores + converge. Complexity will depend on the level of divergence of the sequences, but will + usually be L*log(L), with an accuracy comparable to the two first mode ( this was checked + on BaliBase). This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag + -dp_mode prog_docs/tcoffee.html - cfasta_pair_wise - gotoh_pair_wise + cfasta_pair_wise + gotoh_pair_wise myers_miller_pair_wise fasta_pair_wise cfasta_pair_wise + Number of diagonals used by the fasta_pair_wise algorithm - Indicates the number of diagonals used by the fasta_pair_wise algorithm (cf -dp_mode). When -ndiag=0, n_diag=Log (length of the smallest sequence)+1. - When -ndiag and -diag_threshold are set, diagonals are selected if and only if they fulfill both conditions. - + Indicates the number of diagonals used by the fasta_pair_wise algorithm (cf -dp_mode). When -ndiag=0, n_diag=Log (length of the smallest sequence)+1. + When -ndiag and -diag_threshold are set, diagonals are selected if and only if they fulfill both conditions. + -ndiag prog_docs/tcoffee.html - 0 + 0 - Integer + Integer 0 1000 + Number of diagonals used by the fasta_pair_wise algorithm - Indicates the manner in which diagonals are scored during the fasta hashing. -0: indicates that the score of a diagonal is equal to the sum of the scores of the exact matches it contains. -1 indicates that this score is set equal to the score of the best uninterrupted segment (useful when dealing with fragments of sequences). - + Indicates the manner in which diagonals are scored during the fasta hashing. + 0 indicates that the score of a diagonal is equal to the sum of the scores of the exact matches it contains. + 1 indicates that this score is set equal to the score of the best uninterrupted segment + (useful when dealing with fragments of sequences). + -diag_mode prog_docs/tcoffee.html - 0 - 0 - 1 + 0 + 0 + 1 - + + Diagonal threshold - Sets the value of the threshold when selecting diagonals. - 0: indicates that -ndiag should be used to select the diagonals (cf -ndiag section). - + Sets the value of the threshold when selecting diagonals. + 0: indicates that -ndiag should be used to select the diagonals (cf -ndiag section). + -diag_threshold prog_docs/tcoffee.html - 0 + 0 - Integer + Integer 0 1000 - + + Alphabet degeneration method - Indicates the manner in which the amino acid alphabet is degenerated when hashing in the - fasta_pairwise dynamic programming. Standard ClustalW matrices are all valid. - They are used to define groups of amino acids having positive substitution values. - In T-Coffee, the default is a 13 letter grouping named Vasiliky, with residues grouped as follows: - rk, de, qh, vilm, fy (other residues kept alone). - This alphabet is set with the flag -sim_matrix=vasiliky. - In order to keep the alphabet non degenerated, -sim_matrix=idmat can be used to retain - the standard alphabet. - + Indicates the manner in which the amino acid alphabet is degenerated when hashing in the + fasta_pairwise dynamic programming. Standard ClustalW matrices are all valid. + They are used to define groups of amino acids having positive substitution values. + In T-Coffee, the default is a 13 letter grouping named Vasiliky, with residues grouped as follows: + rk, de, qh, vilm, fy (other residues kept alone). + This alphabet is set with the flag -sim_matrix=vasiliky. + In order to keep the alphabet non degenerated, -sim_matrix=idmat can be used to retain + the standard alphabet. + -sim_matrix prog_docs/tcoffee.html - vasiliky - vasiliky - idmat - - + vasiliky + vasiliky + idmat + + + Substitution Matrix -This flag sets the matrix that will be used by alignment methods within t_coffee (slow_pair, lalign_id_pair). It does not affect external methods (like clustal_pair, clustal_aln). -Users can also provide their own matrices, using the matrix format described in the appendix. - + This flag sets the matrix that will be used by alignment methods within t_coffee (slow_pair, lalign_id_pair). + It does not affect external methods (like clustal_pair, clustal_aln). + Users can also provide their own matrices, using the matrix format described in the appendix. + -matrix prog_docs/tcoffee.html - blosum62mt - - - - + + + Match penalty - Indicates the penalty to associate with a match. When using a library, - all matches are positive or equal to 0. Matches equal to 0 are unsupported by the - library but non-penalized. Setting nomatch to a non-negative value makes it possible - to penalize these null matches and prevent unrelated sequences from being aligned - (this can be useful when the alignments are meant to be used for structural modeling) + Indicates the penalty to associate with a match. When using a library, + all matches are positive or equal to 0. Matches equal to 0 are unsupported by the + library but non-penalized. Setting nomatch to a non-negative value makes it possible + to penalize these null matches and prevent unrelated sequences from being aligned + (this can be useful when the alignments are meant to be used for structural modeling) -nomatch prog_docs/tcoffee.html - 0 + 0 - Integer + Integer 0 1000 - - + + + Gap opening penalty - Indicates the penalty applied for opening a gap. The penalty must be negative. - If no value is provided when using a substitution matrix, a value will be automatically computed. - Here are some guidelines regarding the tuning of gapopen and gapext. - In T-Coffee matches get a score between 0 (match) and 1000 (match perfectly consistent with the library). - The default cosmetic penalty is set to -50 (5% of a perfect match). - If you want to tune -gapoen and see a strong effect, you should therefore consider values between 0 - and -1000. - + Indicates the penalty applied for opening a gap. The penalty must be negative. + If no value is provided when using a substitution matrix, a value will be automatically computed. + Here are some guidelines regarding the tuning of gapopen and gapext. + In T-Coffee matches get a score between 0 (match) and 1000 (match perfectly consistent with the library). + The default cosmetic penalty is set to -50 (5% of a perfect match). + If you want to tune -gapoen and see a strong effect, you should therefore consider values between 0 and -1000. + -gapopen prog_docs/tcoffee.html - 0 + 0 - Integer + Integer -1000 0 - - + + + Gap extension penalty - Indicates the penalty applied for extending a gap. The penalty must be negative. - + Indicates the penalty applied for extending a gap. The penalty must be negative. + -gapext prog_docs/tcoffee.html - 0 + 0 - Integer + Integer -1000 0 - + + Cosmetic penalty - Indicates the penalty applied for opening a gap. The penalty must be negative and is set to a very low value by default. - It will only have an influence on the portions of the alignment that are unalignable. - It will not make them more correct, but only more pleasing to the eye ( i.e. Avoid stretches - of lonely residues). The cosmetic penalty is automatically turned off if a substitution matrix is - used rather than a library. - + Indicates the penalty applied for opening a gap. The penalty must be negative and is set to a very low value by default. + It will only have an influence on the portions of the alignment that are unalignable. + It will not make them more correct, but only more pleasing to the eye ( i.e. Avoid stretches + of lonely residues). The cosmetic penalty is automatically turned off if a substitution matrix is + used rather than a library. + -cosmetic_penalty prog_docs/tcoffee.html - -50 + -50 - Integer + Integer -1000 0 - - + + + Terminal gaps penalty - 0: terminal gaps penalized with -gapopen + -gapext*len - 1: terminal gaps penalized with a -gapext*len - 2: terminal gaps unpenalized. - + 0: terminal gaps penalized with -gapopen + -gapext*len + 1: terminal gaps penalized with a -gapext*len + 2: terminal gaps unpenalized. + -tg_mode prog_docs/tcoffee.html - 1 - 0 - 1 - 2 - - + 1 + 0 + 1 + 2 + + + Number of iterations - Sequences are extracted in turn and realigned to the MSA. - If iterate is set to -1, each sequence is realigned, otherwise the number of iterations is - set by -iterate. - + Sequences are extracted in turn and realigned to the MSA. + If iterate is set to -1, each sequence is realigned, otherwise the number of iterations is + set by -iterate. + -iterate prog_docs/tcoffee.html - 0 + 0 - Integer + Integer -1 100 + Output order - Sets the order of the sequences in the output alignment: -outorder=input means the sequences - are kept in the original order. -outorder=aligned means the sequences come in the order - indicated by the tree. This order can be seen as a one-dimensional projection of the tree distances. - + Sets the order of the sequences in the output alignment: -outorder=input means the sequences + are kept in the original order. -outorder=aligned means the sequences come in the order + indicated by the tree. This order can be seen as a one-dimensional projection of the tree distances. + -outorder prog_docs/tcoffee.html - input - input - aligned - - + input + input + aligned + + + Input order - Multiple alignments based on dynamic programming depend slightly on the order in which - the incoming sequences are provided. To prevent this effect sequences are arbitrarily - sorted at the beginning of the program (-inorder=aligned). - However, this affects the sequence order within the library. - You can switch this off by setting -inorder=input - + Multiple alignments based on dynamic programming depend slightly on the order in which + the incoming sequences are provided. To prevent this effect sequences are arbitrarily + sorted at the beginning of the program (-inorder=aligned). + However, this affects the sequence order within the library. + You can switch this off by setting -inorder=input + -inorder prog_docs/tcoffee.html - aligned - input - aligned + aligned + input + aligned diff --git a/conf/settings/TcoffeePresets.xml b/conf/settings/TcoffeePresets.xml index 585bb21..66e06d9 100644 --- a/conf/settings/TcoffeePresets.xml +++ b/conf/settings/TcoffeePresets.xml @@ -1,54 +1,54 @@ - compbio.runner.tcoffee.Tcoffee - - Quick align. Very fast approximate (Speed-oriented) - quickaln: Very fast, sequence type - all, accuracy - medium low - - - - - - - + compbio.runner.tcoffee.Tcoffee + + Quick align. Very fast approximate (Speed-oriented) + quickaln: Very fast, sequence type - all, accuracy - medium low + + + + + + + -- 1.7.10.2