binaries/src/ViennaRNA/Progs/RNAfold.ggo

   1 # Name of your program
   2 package "RNAfold" # don't use package if you're using automake
   3
   4 purpose "Calculate minimum free energy secondary structures and partition function of RNAs"
   5
   6 #usage "RNAfold [options]\n"
   7 #version "2.0"   # don't use version if you're using automake
   8
   9
  10 # command line options passed to gengetopt
  11 args "--file-name=RNAfold_cmdl --include-getopt --default-optional --func-name=RNAfold_cmdline_parser --arg-struct-name=RNAfold_args_info"
  12
  13
  14 description "The program reads RNA sequences from stdin, calculates their minimum free energy (mfe) structure\
  15  and prints to stdout the mfe structure in bracket notation and its free energy. If the -p option\
  16  was given it also computes the partition function (pf) and base pairing probability matrix, and\
  17  prints the free energy of the thermodynamic ensemble, the frequency of the mfe structure in the ensemble,\
  18  and the ensemble diversity to stdout.\n\nIt also produces PostScript files with plots of the resulting\
  19  secondary structure graph and a \"dot plot\" of the base pairing matrix.\nThe dot plot shows a matrix of\
  20  squares with area proportional to the pairing probability in the upper right half, and one square for each\
  21  pair in the minimum free energy structure in the lower left half. For each pair i-j with probability\
  22  p>10E-6 there is a line of the form\n\ni  j  sqrt(p)  ubox\n\nin the PostScript file, so that the pair\
  23  probabilities can be easily extracted.\n\nSequences may be provided in a simple text format where each sequence\
  24  occupies a single line. Output files are named \"rna.ps\" and \"dot.ps\". Existing files of the same name\
  25  will be overwritten.\nIt is also possible to provide sequence data in FASTA format. In this case, the first\
  26  word (max. 42 char) of the FASTA header will be used for output file names. PostScript files \"name_ss.ps\"\
  27  and \"name_dp.ps\" are produced for the structure and dot plot, respectively.\nOnce FASTA input was provided\
  28  all following sequences must be in FASTA format too.\nThe program will continue to read new sequences until a\
  29  line consisting of the single character @ or an end of file condition is encountered.\n\n"
  30
  31 # Options
  32 section "General Options"
  33 sectiondesc="Below are command line options which alter the general behavior of this program\n\n"
  34
  35 option  "constraint"  C
  36 "Calculate structures subject to constraints.\n"
  37 details="The program reads first the\
  38  sequence, then a string containing constraints on the structure encoded with the symbols:\n\n. (no constraint\
  39  for this base)\n\n| (the corresponding base has to be paired\n\nx (the base is unpaired)\n\n< (base i is paired with\
  40  a base j>i)\n\n> (base i is paired with a base j<i)\n\nand matching brackets ( ) (base i pairs base j)\n\nWith the\
  41  exception of \"|\", constraints will disallow all pairs conflicting with the constraint. This is usually\
  42  sufficient to enforce the constraint, but occasionally a base may stay unpaired in spite of constraints. PF\
  43  folding ignores constraints of type \"|\".\n\n"
  44 flag
  45 off
  46
  47 option  "noconv"  -
  48 "Do not automatically substitude nucleotide \"T\" with \"U\"\n\n"
  49 flag
  50 off
  51
  52 option  "noPS"  -
  53 "Do not produce postscript drawing of the mfe structure.\n\n"
  54 flag
  55 off
  56
  57 section "Algorithms"
  58 sectiondesc="Select additional algorithms which should be included in the calculations.\nThe Minimum free energy\
  59  (MFE) and a structure representative are calculated in any case.\n\n"
  60
  61 option  "partfunc"  p
  62 "Calculate the partition function and base pairing probability matrix.\n"
  63 details="In addition to the MFE structure\
  64  we print a coarse representation of the pair probabilities in form of a pseudo bracket notation, followed by\
  65  the ensemble free energy, as well as the centroid structure derived from the pair probabilities together with\
  66  its free energy and distance to the ensemble. Finally it prints the frequency of the mfe structure, and the\
  67  structural diversity (mean distance between the structures in the ensemble).\nSee the description of pf_fold()\
  68  and mean_bp_dist() and centroid() in the RNAlib documentation for details.\nNote that unless you also specify\
  69  -d2 or -d0, the partition function and mfe calculations will use a slightly different energy model. See the\
  70  discussion of dangling end options below.\n\nAn additionally passed value to this option changes the behavior\
  71  of partition function calculation:\n-p0 Calculate the partition function but not the pair probabilities,\
  72  saving about 50% in runtime. This prints the ensemble free energy -kT ln(Z).\n-p2 Compute stack probabilities,\
  73  i.e. the probability that a pair (i,j) and the immediately interior pair (i+1,j-1) are formed simultaneously in\
  74  addition to pair probabilities. A second postscript dot plot called \"name_dp2.ps\", or \"dot2.ps\" (if the\
  75  sequence does not have a name), is produced that contains pair probabilities in the upper right half and stack\
  76  probabilities in the lower left.\n\n"
  77 int
  78 default="1"
  79 argoptional
  80 optional
  81
  82 option  "MEA"   -
  83 "Calculate an MEA (maximum expected accuracy) structure, where the expected accuracy is computed from the\
  84  pair probabilities: each base pair (i,j) gets a score 2*gamma*p_ij and the score of an unpaired base is\
  85  given by the probability of not forming a pair.\n"
  86 details="The parameter gamma tunes the importance of correctly\
  87  predicted pairs versus unpaired bases. Thus, for small values of gamma the MEA structure will contain\
  88  only pairs with very high probability.\nUsing --MEA implies -p for\
  89  computing the pair probabilities.\n\n"
  90 float
  91 typestr="gamma"
  92 default="1."
  93 argoptional
  94 optional
  95
  96 option  "pfScale" S
  97 "In the calculation of the pf use scale*mfe as an estimate for the ensemble free energy (used to avoid\
  98  overflows).\n"
  99 details="The default is 1.07, useful values are 1.0 to 1.2. Occasionally needed\
 100  for long sequences.\nYou can also recompile the program to use double precision (see the README file).\n\n"
 101 double
 102 typestr="scaling factor"
 103 optional
 104 hidden
 105
 106 option  "circ"    c
 107 "Assume a circular (instead of linear) RNA molecule.\n"
 108 flag
 109 off
 110
 111 option  "ImFeelingLucky"  -
 112 "Return exactly one stochastically backtracked structure\n"
 113 details="This function computes the partition function and returns exactly one secondary structure\
 114  stochastically sampled from the Boltzmann equilibrium according to its probability in the ensemble\n\n"
 115 flag
 116 off
 117 hidden
 118
 119
 120 option  "bppmThreshold" -
 121 "Set the threshold for base pair probabilities included in the postscript output\n"
 122 details="By setting the threshold the base pair probabilities that are included in the\
 123  output can be varied. By default only those exceeding 1e-5 in probability will be shown as squares\
 124  in the dot plot. Changing the threshold to any other value allows for increase or decrease of data.\n\n"
 125 double
 126 typestr="<value>"
 127 optional
 128 default="1e-5"
 129 hidden
 130
 131 option  "gquad" g
 132 "Incoorporate G-Quadruplex formation into the structure prediction algorithm\n"
 133 flag
 134 off
 135
 136
 137 section "Model Details"
 138
 139 option  "temp"  T
 140 "Rescale energy parameters to a temperature of temp C. Default is 37C.\n\n"
 141 double
 142 optional
 143
 144 option  "noTetra" 4
 145 "Do not include special tabulated stabilizing energies for tri-, tetra- and hexaloop hairpins. Mostly for testing.\n\n"
 146 flag
 147 off
 148
 149 option  "dangles" d
 150 "How to treat \"dangling end\" energies for bases adjacent to helices in free ends and multi-loops\n"
 151 details="\nWith -d1 only unpaired bases can participate in at most one dangling end, this is the\
 152  default for mfe folding but unsupported for the partition function folding.\n\nWith -d2 this check is ignored,\
 153  dangling energies will be added for the bases adjacent to a helix on both sides in any case; this is the\
 154  default for partition function folding (-p).\nThe option -d0 ignores dangling ends altogether (mostly for\
 155  debugging).\nWith -d3 mfe folding will allow coaxial stacking of adjacent helices in multi-loops. At the\
 156  moment the implementation will not allow coaxial stacking of the two interior pairs in a loop of degree 3\
 157  and works only for mfe folding.\n\nNote that by default (as well as with -d1 and -d3) pf and mfe folding\
 158  treat dangling ends differently. Use -d2 in addition to -p to ensure that both algorithms use the same\
 159  energy model.\n\n"
 160 int
 161 default="2"
 162 optional
 163
 164 option  "noLP"  -
 165 "Produce structures without lonely pairs (helices of length 1).\n"
 166 details="For partition function folding this only disallows pairs that can only occur isolated. Other\
 167  pairs may still occasionally occur as helices of length 1.\n\n"
 168 flag
 169 off
 170
 171 option  "noGU"  -
 172 "Do not allow GU pairs\n\n"
 173 flag
 174 off
 175
 176 option  "noClosingGU" -
 177 "Do not allow GU pairs at the end of helices\n\n"
 178 flag
 179 off
 180
 181 option  "paramFile" P
 182 "Read energy parameters from paramfile, instead of using the default parameter set.\n"
 183 details="A sample parameter file should accompany your distribution.\nSee the RNAlib\
 184  documentation for details on the file format.\n\n"
 185 string
 186 typestr="paramfile"
 187 optional
 188
 189 option  "nsp" -
 190 "Allow other pairs in addition to the usual AU,GC,and GU pairs.\n"
 191 details="Its argument is a comma separated list of additionally allowed pairs. If the\
 192  first character is a \"-\" then AB will imply that AB and BA are allowed pairs.\ne.g.\
 193  RNAfold -nsp -GA  will allow GA and AG pairs. Nonstandard pairs are given 0 stacking\
 194  energy.\n\n"
 195 string
 196 optional
 197 hidden
 198
 199 option  "energyModel" e
 200 "Rarely used option to fold sequences from the artificial ABCD... alphabet, where\
 201  A pairs B, C-D etc.  Use the energy parameters for GC (-e 1) or AU (-e 2) pairs.\n\n"
 202 int
 203 optional
 204 hidden
 205
 206 option  "betaScale" -
 207 "Set the scaling of the Boltzmann factors\n"
 208 details="The argument provided with this option enables to scale the thermodynamic temperature\
 209  used in the Boltzmann factors independently from the temperature used to scale the individual\
 210  energy contributions of the loop types. The Boltzmann factors then become exp(-dG/(kT*betaScale))\
 211  where k is the Boltzmann constant, dG the free energy contribution of the state and T the\
 212  absolute temperature.\n\n"
 213 double
 214 default="1."
 215 optional
 216 dependon="partfunc"
 217 hidden
 218
 219 text    "\nIf in doubt our program is right, nature is at fault.\nComments should be sent to\
 220  rna@tbi.univie.ac.at.\n\n"