conf/settings/TcoffeeParameters.xml

   1 <?xml version="1.0" encoding="US-ASCII" standalone="yes"?>\r
   2 <runnerConfig>\r
   3  <runnerClassName>compbio.runner.tcoffee.Tcoffee</runnerClassName>\r
   4   <options isRequired="false">\r
   5         <name>Search sequences in PDB</name>\r
   6         <description>\r
   7                 Forces t_coffee to run extract_from_pdb to check the pdb status of each sequence. \r
   8                 This can considerably slow down the program.    \r
   9                 </description>\r
  10         <optionNames>-check_pdb_status</optionNames>\r
  11         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
  12    </options>\r
  13    <prmSeparator>=</prmSeparator>\r
  14    <!-- ERROR: When using -evaluate, Provide a multiple sequence alignment via the -infile flag [FATAL:T-COFFEE] \r
  15       mcoffee: compares alignmens obtained by different programmes\r
  16    -->\r
  17     <parameters isRequired="false">\r
  18         <name>Preset Mode</name>\r
  19         <description>It indicates that t_coffee will use some hard coded parameters. These include:\r
  20    quickaln: Very fast, sequence type - all, accuracy - medium low \r
  21    </description>\r
  22         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
  23         <optionNames>-mode</optionNames>\r
  24         <defaultValue>quickaln</defaultValue>\r
  25         <possibleValues>quickaln</possibleValues>\r
  26     </parameters>\r
  27     <!--\r
  28     All the options below need proper installation!\r
  29     rcoffee does not work as it fails to find the "templates" \r
  30     <description>\r
  31                 This require blast to be setup properly see also Presets\r
  32            dali: a mode used to combine dali pairwise alignments\r
  33            3dcoffee: runs t_coffee with the 3dcoffee parameterization\r
  34            accurate: slow, sequence type - protein, accuracy - high\r
  35        expresso: slow, sequence type - all, accuracy - high\r
  36            rcoffee: slow, sequence type - RNA, accuracy - high\r
  37         </description>\r
  38         <possibleValues>expresso</possibleValues>\r
  39     <possibleValues>dali</possibleValues>\r
  40     <possibleValues>3dcoffee</possibleValues>\r
  41     <possibleValues>accurate</possibleValues>\r
  42     <possibleValues>rcoffee</possibleValues>\r
  43      -->\r
  44 <!-- Parameter with textual value \r
  45     <parameters isRequired="false">\r
  46         <name>Structures</name>\r
  47         <description>Reads or fetch a pdb file. Please enter up to 200 pdb identifiers </description>\r
  48         <optionNames>-pdb</optionNames>\r
  49         <furtherDetails>http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html</furtherDetails>\r
  50     </parameters>\r
  51     -->\r
  52     <parameters isRequired="false">\r
  53         <name>Distance matrix computation method</name>\r
  54         <description>\r
  55         This flag indicates the method used for computing the distance matrix (distance between every pair of sequences) required for the computation of the dendrogram. \r
  56         Slow   The chosen dp_mode using the extended library, \r
  57         fast:   The fasta dp_mode using the extended library.\r
  58         very_fast          The fasta dp_mode using blosum62mt.\r
  59         ktup    Ktup matching (Muscle kind) \r
  60         aln     Read the distances on a precomputed MSA</description>\r
  61         <optionNames>-distance_matrix_mode</optionNames>\r
  62         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
  63                 <defaultValue>very_fast</defaultValue>\r
  64                 <possibleValues>slow</possibleValues>\r
  65         <possibleValues>fast</possibleValues>\r
  66         <possibleValues>very_fast</possibleValues>\r
  67         <possibleValues>ktup</possibleValues>\r
  68         <possibleValues>aln</possibleValues>\r
  69     </parameters>\r
  70     <parameters isRequired="false">\r
  71         <name>Tree Computation method</name>\r
  72         <description>\r
  73         gotoh_pair_wise: implementation of the gotoh algorithm (quadratic in memory and time)\r
  74         myers_miller_pair_wise: implementation of the Myers and Miller dynamic programming algorithm ( quadratic in time and linear in space). This algorithm is recommended for very long sequences. It is about 2 times slower than gotoh and only accepts tg_mode=1or 2 (i.e. gaps penalized for opening).\r
  75         fasta_pair_wise: implementation of the fasta algorithm. The sequence is hashed, looking for ktuples words. Dynamic programming is only carried out on the ndiag best scoring diagonals. This is much faster but less accurate than the two previous. This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag\r
  76         cfasta_pair_wise: c stands for checked. It is the same algorithm. The dynamic programming is made on the ndiag best diagonals, and then on the 2*ndiags, and so on until the scores converge. Complexity will depend on the level of divergence of the sequences, but will usually be L*log(L), with an accuracy comparable to the two first mode ( this was checked on BaliBase). This mode is controlled by the parameters -ktuple, -diag_mode and -ndiag\r
  77                 </description>\r
  78         <optionNames>-dp_mode</optionNames>\r
  79         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
  80                 <defaultValue>cfasta_pair_wise</defaultValue>\r
  81                 <possibleValues>gotoh_pair_wise</possibleValues>\r
  82         <possibleValues>myers_miller_pair_wise</possibleValues>\r
  83         <possibleValues>fasta_pair_wise</possibleValues>\r
  84         <possibleValues>cfasta_pair_wise</possibleValues>\r
  85     </parameters>\r
  86 <!-- This should be groupped with -dp_mode -->\r
  87     <parameters isRequired="false">\r
  88         <name>Number of diagonals used by the fasta_pair_wise algorithm</name>\r
  89         <description>\r
  90         Indicates the number of diagonals used by the fasta_pair_wise algorithm (cf -dp_mode). When  -ndiag=0, n_diag=Log (length of the smallest sequence)+1. \r
  91         When -ndiag and -diag_threshold are set, diagonals are selected if and only if they fulfill both conditions.\r
  92                 </description>\r
  93         <optionNames>-ndiag</optionNames>\r
  94         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
  95                 <defaultValue>0</defaultValue>\r
  96         <validValue>\r
  97                 <type>Integer</type>\r
  98             <min>0</min>\r
  99             <max>1000</max>\r
 100         </validValue>\r
 101     </parameters>\r
 102     <parameters isRequired="false">\r
 103         <name>Number of diagonals used by the fasta_pair_wise algorithm</name>\r
 104         <description>\r
 105         Indicates the manner in which diagonals are scored during the fasta hashing. \r
 106 0: indicates that the score of a diagonal is equal to the sum of the scores of the exact matches it contains. \r
 107 1 indicates that this score is set equal to the score of the best uninterrupted segment (useful when dealing with fragments of sequences).\r
 108                 </description>\r
 109         <optionNames>-diag_mode</optionNames>\r
 110         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 111                 <defaultValue>0</defaultValue>\r
 112                 <possibleValues>0</possibleValues>\r
 113                 <possibleValues>1</possibleValues>\r
 114     </parameters>\r
 115  <parameters isRequired="false">\r
 116         <name>Diagonal threshold</name>\r
 117         <description>\r
 118         Sets the value of the threshold when selecting diagonals. \r
 119         0: indicates that -ndiag should be used to select the diagonals (cf -ndiag section).\r
 120                 </description>\r
 121         <optionNames>-diag_threshold</optionNames>\r
 122         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 123                 <defaultValue>0</defaultValue>\r
 124         <validValue>\r
 125                 <type>Integer</type>\r
 126             <min>0</min>\r
 127             <max>1000</max>\r
 128         </validValue>\r
 129     </parameters>\r
 130  <parameters isRequired="false">\r
 131         <name>Alphabet degeneration method</name>\r
 132         <description>\r
 133         Indicates the manner in which the amino acid alphabet is degenerated when hashing in the \r
 134         fasta_pairwise dynamic programming. Standard ClustalW matrices are all valid. \r
 135         They are used to define groups of amino acids having positive substitution values. \r
 136         In T-Coffee, the default is a 13 letter grouping named Vasiliky, with residues grouped as follows:\r
 137         rk, de, qh, vilm, fy (other residues kept alone).\r
 138         This alphabet is set with the flag -sim_matrix=vasiliky. \r
 139         In order to keep the alphabet non degenerated, -sim_matrix=idmat can be used to retain \r
 140         the standard alphabet.\r
 141                 </description>\r
 142         <optionNames>-sim_matrix</optionNames>\r
 143         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 144                 <defaultValue>vasiliky</defaultValue>\r
 145                 <possibleValues>vasiliky</possibleValues>\r
 146                 <possibleValues>idmat</possibleValues>\r
 147   </parameters>\r
 148  <parameters isRequired="false">\r
 149         <name>Substitution Matrix</name>\r
 150         <description>\r
 151 This flag sets the matrix that will be used by alignment methods within t_coffee (slow_pair, lalign_id_pair). It does not affect external methods (like clustal_pair, clustal_aln). \r
 152 Users can also provide their own matrices, using the matrix format described in the appendix.\r
 153                 </description>\r
 154         <optionNames>-matrix</optionNames>\r
 155         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 156                 <possibleValues>blosum62mt</possibleValues>\r
 157         <!-- This option causes tcoffee to fail  \r
 158         <parameters isRequired="false">\r
 159         <name>Matrix</name>\r
 160         <description>Substitution Matrix to use</description>\r
 161         <optionNames>-matrix</optionNames>\r
 162         <furtherDetails>http://www.compbio.dundee.ac.uk/users/pvtroshin/ws/Index.html</furtherDetails>\r
 163         <defaultValue>BLOSUM62</defaultValue>\r
 164                 <possibleValues>BLOSUM100</possibleValues>\r
 165                 <possibleValues>BLOSUM30</possibleValues>\r
 166                 <possibleValues>BLOSUM35</possibleValues>\r
 167                 <possibleValues>BLOSUM40</possibleValues>\r
 168                 <possibleValues>BLOSUM45</possibleValues>\r
 169                 <possibleValues>BLOSUM50</possibleValues>\r
 170                 <possibleValues>BLOSUM55</possibleValues>\r
 171                 <possibleValues>BLOSUM60</possibleValues>\r
 172                 <possibleValues>BLOSUM62</possibleValues>\r
 173                 <possibleValues>BLOSUM65</possibleValues>\r
 174                 <possibleValues>BLOSUM70</possibleValues>\r
 175                 <possibleValues>BLOSUM75</possibleValues>\r
 176                 <possibleValues>BLOSUM80</possibleValues>\r
 177                 <possibleValues>BLOSUM85</possibleValues>\r
 178                 <possibleValues>BLOSUM90</possibleValues>\r
 179                 <possibleValues>BLOSUMN</possibleValues>\r
 180                 <possibleValues>DAYHOFF</possibleValues>\r
 181                 <possibleValues>GONNET</possibleValues>\r
 182                 <possibleValues>IDENTITY</possibleValues>\r
 183                 <possibleValues>MATCH</possibleValues>\r
 184                 <possibleValues>NUC.4.2</possibleValues>\r
 185                 <possibleValues>NUC.4.4</possibleValues>\r
 186                 <possibleValues>PAM10</possibleValues>\r
 187                 <possibleValues>PAM100</possibleValues>\r
 188                 <possibleValues>PAM110</possibleValues>\r
 189                 <possibleValues>PAM120</possibleValues>\r
 190                 <possibleValues>PAM130</possibleValues>\r
 191                 <possibleValues>PAM140</possibleValues>\r
 192                 <possibleValues>PAM150</possibleValues>\r
 193                 <possibleValues>PAM160</possibleValues>\r
 194                 <possibleValues>PAM170</possibleValues>\r
 195                 <possibleValues>PAM180</possibleValues>\r
 196                 <possibleValues>PAM190</possibleValues>\r
 197                 <possibleValues>PAM20</possibleValues>\r
 198                 <possibleValues>PAM200</possibleValues>\r
 199                 <possibleValues>PAM210</possibleValues>\r
 200                 <possibleValues>PAM220</possibleValues>\r
 201                 <possibleValues>PAM230</possibleValues>\r
 202                 <possibleValues>PAM240</possibleValues>\r
 203                 <possibleValues>PAM250</possibleValues>\r
 204                 <possibleValues>PAM260</possibleValues>\r
 205                 <possibleValues>PAM270</possibleValues>\r
 206                 <possibleValues>PAM280</possibleValues>\r
 207                 <possibleValues>PAM290</possibleValues>\r
 208                 <possibleValues>PAM30</possibleValues>\r
 209                 <possibleValues>PAM300</possibleValues>\r
 210                 <possibleValues>PAM310</possibleValues>\r
 211                 <possibleValues>PAM320</possibleValues>\r
 212                 <possibleValues>PAM330</possibleValues>\r
 213                 <possibleValues>PAM340</possibleValues>\r
 214                 <possibleValues>PAM350</possibleValues>\r
 215                 <possibleValues>PAM360</possibleValues>\r
 216                 <possibleValues>PAM370</possibleValues>\r
 217                 <possibleValues>PAM380</possibleValues>\r
 218                 <possibleValues>PAM390</possibleValues>\r
 219                 <possibleValues>PAM40</possibleValues>\r
 220                 <possibleValues>PAM400</possibleValues>\r
 221                 <possibleValues>PAM410</possibleValues>\r
 222                 <possibleValues>PAM420</possibleValues>\r
 223                 <possibleValues>PAM430</possibleValues>\r
 224                 <possibleValues>PAM440</possibleValues>\r
 225                 <possibleValues>PAM450</possibleValues>\r
 226                 <possibleValues>PAM460</possibleValues>\r
 227                 <possibleValues>PAM470</possibleValues>\r
 228                 <possibleValues>PAM480</possibleValues>\r
 229                 <possibleValues>PAM490</possibleValues>\r
 230                 <possibleValues>PAM50</possibleValues>\r
 231                 <possibleValues>PAM500</possibleValues>\r
 232                 <possibleValues>PAM60</possibleValues>\r
 233                 <possibleValues>PAM70</possibleValues>\r
 234                 <possibleValues>PAM80</possibleValues>\r
 235                 <possibleValues>PAM90</possibleValues>\r
 236    </parameters>\r
 237     -->\r
 238     \r
 239    </parameters>\r
 240  <parameters isRequired="false">\r
 241         <name>Match penalty</name>\r
 242         <description>\r
 243                         Indicates the penalty to associate with a match. When using a library, \r
 244                         all matches are positive or equal to 0. Matches equal to 0 are unsupported by the \r
 245                         library but non-penalized. Setting nomatch to a non-negative value makes it possible \r
 246                         to penalize these null matches and prevent unrelated sequences from being aligned \r
 247                         (this can be useful when the alignments are meant to be used for structural modeling)</description>\r
 248         <optionNames>-nomatch</optionNames>\r
 249         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 250                 <defaultValue>0</defaultValue>\r
 251         <validValue>\r
 252                 <type>Integer</type>\r
 253             <min>0</min>\r
 254             <max>1000</max>\r
 255         </validValue>\r
 256    </parameters>\r
 257  <parameters isRequired="false">\r
 258         <name>Gap opening penalty</name>\r
 259         <description>\r
 260                 Indicates the penalty applied for opening a gap. The penalty must be negative. \r
 261                 If no value is provided when using a substitution matrix, a value will be automatically computed.\r
 262                 Here are some guidelines regarding the tuning of gapopen and gapext. \r
 263                 In T-Coffee matches get a score between 0 (match) and 1000 (match perfectly consistent with the library).\r
 264                 The default cosmetic penalty is set to -50 (5% of a perfect match). \r
 265                 If you want to tune -gapoen and see a strong effect, you should therefore consider values between 0 \r
 266                 and -1000. \r
 267                 </description>\r
 268         <optionNames>-gapopen</optionNames>\r
 269         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 270                 <defaultValue>0</defaultValue>\r
 271         <validValue>\r
 272                 <type>Integer</type>\r
 273             <min>-1000</min>\r
 274             <max>0</max>\r
 275         </validValue>\r
 276    </parameters>\r
 277   <parameters isRequired="false">\r
 278         <name>Gap extension penalty</name>\r
 279         <description>\r
 280                 Indicates the penalty applied for extending a gap. The penalty must be negative. \r
 281                 </description>\r
 282         <optionNames>-gapext</optionNames>\r
 283         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 284                 <defaultValue>0</defaultValue>\r
 285         <validValue>\r
 286                 <type>Integer</type>\r
 287             <min>-1000</min>\r
 288             <max>0</max>\r
 289         </validValue>\r
 290    </parameters>\r
 291   <parameters isRequired="false">\r
 292         <name>Cosmetic penalty</name>\r
 293         <description>\r
 294         Indicates the penalty applied for opening a gap. The penalty must be negative and is set to a very low value by default.\r
 295         It will only have an influence on the portions of the alignment that are unalignable. \r
 296         It will not make them more correct, but only more pleasing to the eye ( i.e. Avoid stretches \r
 297         of lonely residues). The cosmetic penalty is automatically turned off if a substitution matrix is \r
 298         used rather than a library.\r
 299                 </description>\r
 300         <optionNames>-cosmetic_penalty</optionNames>\r
 301         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 302                 <defaultValue>-50</defaultValue>\r
 303         <validValue>\r
 304                 <type>Integer</type>\r
 305             <min>-1000</min>\r
 306             <max>0</max>\r
 307         </validValue>\r
 308    </parameters>\r
 309   <parameters isRequired="false">\r
 310         <name>Terminal gaps penalty</name>\r
 311         <description>\r
 312                 0: terminal gaps penalized with -gapopen + -gapext*len\r
 313                 1: terminal gaps penalized with a -gapext*len\r
 314                 2: terminal gaps unpenalized.\r
 315                 </description>\r
 316         <optionNames>-tg_mode</optionNames>\r
 317         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 318                 <defaultValue>1</defaultValue>\r
 319                 <possibleValues>0</possibleValues>\r
 320                 <possibleValues>1</possibleValues>\r
 321                 <possibleValues>2</possibleValues>\r
 322    </parameters>\r
 323   <parameters isRequired="false">\r
 324         <name>Number of iterations</name>\r
 325         <description>\r
 326                         Sequences are extracted in turn and realigned to the MSA. \r
 327                         If iterate is set to -1, each sequence is realigned, otherwise the number of iterations is \r
 328                         set by -iterate.\r
 329                 </description>\r
 330         <optionNames>-iterate</optionNames>\r
 331         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 332                 <defaultValue>0</defaultValue>\r
 333         <validValue>\r
 334                 <type>Integer</type>\r
 335             <min>-1</min>\r
 336             <max>100</max>\r
 337         </validValue>\r
 338    </parameters>\r
 339   <parameters isRequired="false">\r
 340         <name>Output order</name>\r
 341         <description>\r
 342                 Sets the order of the sequences in the output alignment: -outorder=input means the sequences \r
 343                 are kept in the original order. -outorder=aligned means the sequences come in the order \r
 344                 indicated by the tree. This order can be seen as a one-dimensional projection of the tree distances. \r
 345                 </description>\r
 346         <optionNames>-outorder</optionNames>\r
 347         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 348                 <defaultValue>input</defaultValue>\r
 349                 <possibleValues>input</possibleValues>\r
 350                 <possibleValues>aligned</possibleValues>\r
 351    </parameters>\r
 352   <parameters isRequired="false">\r
 353         <name>Input order</name>\r
 354         <description>\r
 355                 Multiple alignments based on dynamic programming depend slightly on the order in which \r
 356                 the incoming sequences are provided. To prevent this effect sequences are arbitrarily \r
 357                 sorted at the beginning of the program (-inorder=aligned). \r
 358                 However, this affects the sequence order within the library. \r
 359                 You can switch this off by setting -inorder=input\r
 360                 </description>\r
 361         <optionNames>-inorder</optionNames>\r
 362         <furtherDetails>prog_docs/tcoffee.html</furtherDetails>\r
 363                 <defaultValue>aligned</defaultValue>\r
 364                 <possibleValues>input</possibleValues>\r
 365                 <possibleValues>aligned</possibleValues>\r
 366    </parameters>\r
 367 </runnerConfig>\r