3 # Last changed Time-stamp: <2008-08-26 16:04:00 ivo>
4 # produce Pauline Hogeweg's mountain representation *_dp.ps files
5 # writes 3 sets of x y data separated by a "&"
6 # the first two sets are mountain representations from base pair probabilities
7 # and mfe structure, respectively.
8 # definition: mm[i],mp[i] = (mean) number of base pairs enclosing base i
9 # third set a measure of well-definedness: the entropy of the pair probs of
10 # base i, sp[i] = -Sum p_i * ln(p_i). Well-defined regions have low entropy.
12 # use e.g. as mountain.pl dot.ps | xmgrace -pipe
15 our (@mm, @mp, @pp, @sp, @p0, $i, $max, $length, $do_png); # perl5 only
17 my $sep = "&"; # xmgr uses & to separate data sets
19 if (@ARGV && ($ARGV[0] eq '-png')) {
20 eval "use Chart::Lines";
22 "\nCould not load the Chart::Lines module required with -png option\n")
31 if (/\/sequence \{ \((\S*)[\\\)]/) {
32 my $seq = $1; # empty for new version
33 while (!/\) \} def/) { # read until end of definition
35 /(\S*)[\\\)]/; # ends either with `)' or `\'
38 $length = length($seq);
42 next unless /(\d+) (\d+) (\d+\.\d+) (.box)$/;
43 my ($i, $j, $p, $id) = ($1,$2,$3,$4);
45 $p *= $p; # square it to probability
48 my $ss = $p>0 ? $p*log($p) : 0;
59 $mp[0] = $mm[0] = $max = 0;
60 for ($i=1; $i<=$length; $i++) {
63 $max = $mp[$i] if ($mp[$i]>$max);
65 $max = $mp[$i] if ($mp[$i]>$max);
66 $sp[$i] += (1-$pp[$i])*log(1-$pp[$i]);
73 # FIXME: legend_lables when doing mfe only
74 my $skip = 10**(int (log($length)/log(10.) - 0.5));
75 my $obj = Chart::Lines->new( $width, $height );
76 $obj->set ('title' => $ARGV,
77 'x_label' => 'Position',
78 'y_label' => 'Height',
81 'legend_labels' => ['mfe', 'pf'],
82 'skip_x_ticks' => $skip);
84 $obj->add_dataset ((0..$length));
86 $obj->add_dataset (@mp);
87 $obj->add_dataset (@mm);
88 $obj->png("mountain.png");
91 # print the results for plotting
92 for ($i=1; $i<=$length; $i++) {
93 printf("%4d %7.5g\n", $i, $mp[$i]);
97 for ($i=1; $i<=$length; $i++) {
98 printf("%4d %4d\n", $i, $mm[$i]);
102 for ($i=1; $i<=$length; $i++) {
103 printf("%4d %7.5g\n", $i, -$sp[$i]/$log2);
109 mountain - produce coordinates for a mountain plot from a dot plot
113 mountain.pl myseq_dp.ps > mountain.dat
117 reads pair proabilities and MFE structure from a probability dot
118 plot as produced by C<RNAfold -p>, and produces x-y data suitable
119 for producing a mountain plot using standard xy-plotting programs.
121 Output consists of 3 data sets separated by a line containing only
122 the C<&> character. The first two sets are mountain representations
123 computed from base pair probabilities and mfe structure, respectively.
124 For the mfe case the moutain plot graphs the number base pairs
125 enclosing a position k, in case of pair probabilities we use the average
126 number of base pairs computed as m_k = \Sum_i<k<j p_ij.
127 The third set contains the positional entropy, which provides a measure
128 of local structural welldefinedness, s_i = -\Sum_j p_ij * ln(p_ij).
130 The output is suitable for graphing with xmgrace, e.g.:
131 C< RNAfold -p < foo.seq; mountain.pl foo_dp.ps | xmgrace -pipe>
135 Ivo L. Hofacker <ivo@tbi.univie.ac.at>