3 # Last changed Time-stamp: <2011-05-27 17:10:13 stef>
4 # tool for the design of bistable RNA molecules
7 # additional paths were perl looks for RNA.pm and RNA/barriers.pm
8 # use lib '/home/RNA/barriers';
13 use vars qw/$opt_debug $opt_v $ParamFile/;
15 my %pair = ("AU", 5, "GC", 1, "CG", 2, "GU", 3, "UG", 4, "UA", 6);
16 my %mut = ('A' => 'G', 'G' => 'A', 'U' => 'C', 'C' => 'U');
17 my %wc = ('A' => 'U', 'G' => 'C', 'U' => 'A', 'C' => 'G');
19 my ($fist, $sest, $startseq, $cons);
25 my $Temperature1 = 37;
27 my ($optseq, $optcost, $e);
33 Getopt::Long::config('no_ignore_case');
34 &usage() unless GetOptions("T=f" => \$Temperature1,
35 "T2=f" => \$Temperature2,
36 "4" => sub {$RNA::tetra_loop = 0},
37 "d|d0" => sub {$RNA::dangles=0},
38 "d2" => sub {$RNA::dangles=2},,
40 "noGU" => \$RNA::noGU,
41 "noCloseGU" => \$RNA::no_closingGU,
42 "noLP" => \$RNA::noLonelyPairs,
51 $RNA::temperature = $Temperature1;
52 $Temperature2 = $Temperature1 unless defined $Temperature2;
54 $noo = ($noo >= 1) ? $noo : 1;
55 $nom = ($nom >= 1) ? $nom : 1000;
56 $border = ($border >=1) ? $border : 100;
57 my $interactiv = init_ia(0);
58 my $ia = 1 if -t STDIN && -t STDOUT && $#ARGV < 0;
59 RNA::read_parameter_file($ParamFile) if ($ParamFile);
61 # don't call update at this stage since no sequence is known yet
62 #if ($cpnt != -1) { RNA::update_cofold_params() } else { RNA::update_fold_params() }
66 $interactiv->() if $ia;
67 last if !process_input();
69 my @fist = make_pair_table ($fist);
70 my @sest = make_pair_table ($sest);
72 fibo(length($fist)+2);
74 my @clist = make_cycle_table (\@fist, \@sest);
75 $border = estimate_border(\@clist);
76 print_lol->(\@clist) if $opt_debug;
77 my $nocs = calc_number_of_seq(\@clist);
78 print "$nocs possible sequences\n" if ($opt_v);
81 $startseq = make_compatible(@clist, ' ' x length($fist)); # unless $startseq;
82 $optseq = do_opti($nom, $startseq, \@clist);
83 $optcost = cost_function($optseq);
84 substr($optseq, $cpnt-1, 0, '&') if ($cpnt != -1);
86 $cos{$optseq}[0] = $optcost;
87 printf "%s %8.4f\n", $optseq, $optcost;
90 print "the resulting sequences are:\n";
91 foreach $e (sort {$cos{$b}[0]<=>$cos{$a}[0]} keys %cos) {
92 printf "%s %8.4f", $e, $cos{$e}[0];
93 printf " %2d", $cos{$e}[1] if ($cos{$e}[1]>1);
100 my @clist = @{$_[0]};
104 $max_cyc_len = $#{$_}+1 if $#{$_}+1 > $max_cyc_len;
105 $border += calc_number_of_seq([$_])-1;
107 printf "max cyclen is %4d border is %5d\n", $max_cyc_len, $border
123 while ($#fibo < $length) {
124 push @fibo, $fibo[-1] + $fibo[-2];
126 return $fibo[$length];
132 return 0 if !defined $_;
135 return 0 if $fist eq '@';
138 $cpnt = index($fist, "&")+1;
139 die "ERROR: Different Cut-Points set!\n"
140 if ($cpnt != index($sest, "&")+1);
144 $RNA::cut_point = $cpnt;
145 } else { $cpnt = -1 }
146 die "structures have unequal length"
147 if (length($fist) != length($sest));
149 $_ .= 'N' x (length($fist)-length); # pad with N
152 # all init functions are deprecated since ViennaRNA 2.0
153 #if ($cpnt != -1) { &RNA::init_co_pf_fold(length($fist)); } else { &RNA::init_pf_fold(length($fist)); }
160 if (!defined($times)) {
161 print "Input structure1 structure2 & start string\n";
162 print " @ to quit, enter for random start string\n";
164 print "T1= $Temperature1, T2= $Temperature2";
165 $bar ? print " Barrier=$bar\n": print "\n";
166 for (1..8) { print '....,....', $_;}
173 sub make_pair_table {
175 # let table[i]=j if (i,j) is pair, -1 if i unpaired
176 # indices start at 0 in this version!
177 my($str) = shift(@_);
178 my($i,$j,$hx,$c,@olist,@table);
180 foreach $c (split(//,$str)) {
183 } elsif ($c eq '(') {
185 } elsif ($c eq ')') {
187 die ("unbalanced brackets in make_pair_table") if ($hx<0);
193 die ("too few closed brackets in make_pair_table") if ($hx!=0);
197 sub make_cycle_table {
202 for my $i (0..$#fist) {
208 while ($j>=0 && !$seen[$j]) {
211 $j = ($#cycle1 % 2) ? $sest[$j] : $fist[$j];
215 while ($j>=0 && !$seen[$j]) {
218 $j = ($#cycle2 % 2) ? $sest[$j] : $fist[$j];
220 # duplicate first element if closed cycle
221 unshift @cycle2, $j if ($j!=-1);
222 push @clist, [@cycle2, @cycle1];
227 sub make_compatible {
228 # make random sequence or randomly mutate sequence
229 # by replacing all @components by random paths
232 foreach (@components) {
235 if ($comp[$[] eq $comp[-1] && (@comp>1)) { # it's a cycle
238 my @seq = make_pathseq ($l);
240 substr($startseq, $comp[$_], 1, $seq[$_]);
247 # with one argument: return random cycle of length $l
248 # two arguments: return $rand-th possible cycle sequence
249 # if $l<0 assume closed cycle else path
252 if ($l<0) { # its a closed cycle
255 die "cycles must have even length" if $l%2;
257 my $n = 2*($fibo[$l+1]+$fibo[$ll]); # total number of seqs
258 $rand = rand($n) unless (defined($rand));
261 # set first base in sequence
262 if ($rand < $fibo[$ll]) {
264 } elsif ($rand < 2*$fibo[$ll]) {
265 push @seq, 'C', 'G'; $rand -= $fibo[$ll];
267 $rand -= 2*$fibo[$ll]; $ll=$l;
268 push @seq, ($rand>=$fibo[$l+1])?'U':'G';
269 $rand -= $fibo[$l+1] if $rand >= $fibo[$l+1];
272 # grow sequence to length $l
273 # if we have a cycle starting with A or C $ll=$l-1, else $ll=$l
275 if ($rand < $fibo[$ll-@seq]) {
276 push @seq, 'C','G' if ($seq[-1] eq 'G');
277 push @seq, 'A','U' if ($seq[-1] eq 'U');
279 $rand -= $fibo[$ll-@seq];
280 push @seq, ($seq[-1] eq 'G') ? 'U' : 'G';
283 pop @seq if (@seq > $l); # in case we've added one base too many
287 sub invert_cyclelist {
288 # foreach position store its cycle in @pos_list
289 my @clist = @{$_[0]};
291 foreach my $lp (@clist) {
294 $pos_list[$l[$p]] = [$lp, $p];
305 my $refcost = cost_function($refseq);
306 my ($mutseq,$newcost);
309 my @poslist = invert_cyclelist($clistp);
310 for my $d (1..$nom) {
311 $mutseq = mutate_seq($refseq, $clistp, \@poslist);
312 if (!exists $seen{$mutseq}) {
314 $newcost = cost_function($mutseq,$refcost);
316 printf '%4d %s %6.3f', $d, $mutseq, $newcost;
317 (defined $sE) ? printf("%6.2f\n", $sE) : print "\n";
319 if ($newcost < $refcost) {
323 print "New refseq!\n" if ($opt_v);
329 if ($reject >= $border) {
339 my ($mutseq, $closed);
340 my @clist = @{$_[0]};
341 my @poslist = @{$_[1]};
342 # my @cyc = @{$clist[int rand(@clist)]};
343 my $pos = int rand(length $refseq);
345 my ($cp, $cpos) = @{$poslist[$pos]};
347 if ($#cyc>0 && $cyc[0] == $cyc[-1]) { #close cycle
352 if (rand() < 1/@cyc) {
353 # my $cp = $clist[int rand(@clist)];
354 do { # prevent mutations to the same cyclesequence
355 $mutseq = make_compatible($cp, $refseq);
356 } while (substr($mutseq,$pos,1) eq substr($refseq,$pos,1));
359 my $c = $mut{substr($mutseq, $pos, 1)};
360 substr($mutseq, $pos, 1, $c);
361 if ($cpos>0 || ($closed)) { # pos>0 or closed cyc
362 my $cc = substr($mutseq, $cyc[$cpos-1], 1);
363 substr($mutseq, $cyc[$cpos-1], 1, $wc{$c})
364 if (!exists $pair{"$c$cc"});
366 if ($cpos+1<=$#cyc || $closed) {
367 my $cp = ($cpos+1) % @cyc;
368 my $cc = substr($mutseq, $cyc[$cp], 1);
369 substr($mutseq, $cyc[$cp], 1, $wc{$c})
370 if (!exists $pair{"$c$cc"});
379 $RNA::temperature = $Temperature1;
382 $f1 = RNA::co_pf_fold($seq, undef);
384 $f1 = RNA::pf_fold($seq, undef);
386 my $e1 = RNA::energy_of_struct($seq, $fist);
387 my $e1s = RNA::energy_of_struct($seq, $sest);
390 if ($Temperature1 != $Temperature2) {
391 $RNA::temperature = $Temperature2;
394 $f2 = RNA::co_pf_fold($seq, undef);
396 $f2 = RNA::pf_fold($seq, undef);
398 my $e2 = RNA::energy_of_struct($seq, $sest);
399 my $e2s = RNA::energy_of_struct($seq, $fist);
400 $cost = ($e1-$f1)+($e2-$f2) +
401 $small*(($e1-$e1s+$dg) + ($e2 - $e2s+$dg));
403 $cost = $e1+$e1s-2*$f1+$small*($e1-$e1s+$dg)*($e1-$e1s+$dg);
404 if ($bar && ((!defined $refcost) || $cost<$refcost)) {
406 # require RNA::barrier;
408 # $sE = (RNA::barrier::find_saddle($seq, $fist, $sest, 20))[0];
409 $sE = RNA::find_saddle($seq, $fist, $sest, 20)/100.;
411 printf "sE = %6.2f %6.2f\n", $sE,(0.1*$small*($sE - $bar)*($sE - $bar)) ;
412 $cost += (0.1*$small*($sE - $bar)*($sE - $bar));
416 my $d = class_dist($seq, $cons);
417 $cost += $d*50*$small;
422 sub calc_number_of_seq {
423 my @allcyc = @{$_[0]};
429 if ($cyc[0] eq $cyc[-1] && $#cyc>0) { # closed cycle
431 $nos *= 2*(fibo($l+1)+fibo($l-1));
434 $nos *= 2*(fibo($l+1)+fibo($l));
441 my %classes = ('A' => 'A',
446 'R' => 'AG', # purine
447 'Y' => 'CT', # pyrimidine
452 'V' => 'ACG', # not T
453 'H' => 'ACU', # not G
454 'D' => 'AGU', # not C
455 'B' => 'CGU', # not A
458 my @seq = split(//, uc $_[0]);
459 my @template = split(//, uc $_[1]);
463 my $c = $classes{shift @template} || 'N';
471 usage: $0 [options] [infile]
472 program specific options:
473 -n <int> run $0 <int> times (default: $noo)
474 -trial <int> max number of sequences tested per run (default: $nom)
475 -g <float> small positivie parameter for costfunction (default: $small)
476 -bar <float> barrier hight, seperating the two states (default: $bar)
477 -T2 <float> for temperature sensitive switches, temperature at which
478 2nd structure is ground-state (default: undef)
479 standard Vienna RNA options:
480 -T <float> sets temperature to <float> (default 37.0C)
481 -4 no extrastable tetraloops
483 -d2 use double dangles
484 -noGU no GU or UG basepairs allowed
485 -noClosingGU no closing GU or UG basepairs allowed
486 -noLP no lonely basepairs allowed
487 -P <string> read fold-parameters from file <string>
489 1st-line structure one
490 2nd-line structure two
491 3rd-line sequence constraints or empty line
499 switch.pl - design bistable RNA sequences
503 switch.pl [options] [infile]
507 switch.pl designs RNA sequences that exhibit two secondary
508 structures of almost equal stability. For any two given structures
509 there always exist many sequences compatible with both
510 structures. If both structures are reasonable stable, we can find
511 sequences where both target structures have almost equal energy, and
512 all other structures have much higher energies.
514 For details of the algorithm see: Flamm et al.,
515 "Design of Multi-Stable RNA Molecules", RNA 7:254-265 (2001)
517 Input consists of three lines, the first two containing the target
518 structures in dot bracket notations. The third line may be used to
519 define sequence constraints: It contains a sequence string using
520 IUPAC codes for nucleotide classes (i.e. C<Y> for pyrimidine, C<R> for
521 purine, C<N> for anything...). If the line is empty or shorter than the
522 structures it is padded with C<N>s.
524 Sequence constraints are not strictly enforced, instead a constraint
525 violation term is added to the cost function. Thus it is legal to
526 specify sequence constraints that cannot be fulfilled.
528 switch.pl uses the Vienna RNA package for energy evaluation, the
529 Vienna RNA package and corresponding Perl module therefore have to be
538 number of independent optimization runs
540 =item B<-trial> <int>
542 maximum number of sequences to test per run, up to a million or so.
546 Parameter of the cost function that weights the importance of
547 equal energies, and desired energy barriers.
549 The cost function primarily optimizes product of Boltzman
550 probabilities of the two structures C<p(S1)*P(S2)>, in addition it
551 contains a penalty proportional to C<[E(S1)-E(S2)]^2> that
552 enforces equal energies for both structures. With the --bar it
553 also tries design for a given energy barrier. The -g parameter
554 defines the weight of these additional cost function terms.
558 Temperature in C for all energy calculations. Default 37C.
562 For temperature sensitive switches, use -T and -T2 to define the
563 temperatures at which structures S1 and S2 should be prefered.
565 =item B<-bar> <float>
567 Size of the desired energy barrier between the two structures in
568 kcal/mol. A fast heuristic that looks at shortest refolding paths is
569 used to estimate the barrier. Requires a recent version of the Vienna
570 RNA package that includes the find_saddle() function for estimating
573 =item ViennaRNA standard options
575 the -noLP, -P <paramfile>, -d, -d2, -4, -noGU, -noClosingGU,
576 should work as usual. See the RNAfold man page for details.
582 Ivo L. Hofacker, Christoph Flamm, Peter Stadler, Sebastian
583 Maurer-Stroh, Martin Zehl.
584 Send comments to <ivo@tbi.univie.ac.at>.