9 my $ssFile='alirna.ps';
16 GetOptions ('-s:s' => \$ssFile,
17 '-c:i' => \$columnWidth,
23 "h"=> sub {pod2usage(-verbose => 1)},
24 ) || pod2usage(-verbose => 0);
26 pod2usage(-verbose => 2) if $man;
29 pod2usage(-msg => "RNAalifold secondary structure file $ssFile not found (use option -s)",
34 my $aln=readClustal();
37 for my $i (1..length($aln->[0]->{seq})) {
43 open(ALIRNA,"<$ssFile");
47 $pairsFlag=1 if (/\/pairs/);
48 if ($pairsFlag and /\[(\d+) (\d+)\]/) {
52 $pairsFlag=0 if ($pairsFlag and /def/);
55 $consStruc=join('',@ss);
58 $row->{seq}=uc($row->{seq});
67 my $consSeq=consensusSeq($aln);
69 #print "$consSeq\n$consStruc\n";
71 plotAln($aln,$consSeq,$consStruc);
75 ######################################################################
77 # consensusSeq(\@aln alnref)
79 # Returns consensus sequence of alignment
81 ######################################################################
88 for my $i (0..length($aln[0]->{seq})-1) {
90 my %countHash=('A'=>0,'C'=>0,'G'=>0,'T'=>0,'U'=>0,'-'=>0);
91 #print %countHash,"\n";
92 for my $j (0..$#aln) {
93 my $c=substr($aln[$j]->{seq},$i,1);
96 #print %countHash,"\n";
100 for my $c ('A','C','G','T','U','-') {
101 if ($countHash{$c}>=$maxCount) {
103 $maxCount=$countHash{$c};
112 ######################################################################
114 # readClustal(filehandle)
116 # Reads Clustal W formatted alignment file and returns it in list of
117 # hash references with keys "name" and "seq" for the name and the sequence,
120 ######################################################################
125 my (%order, $order, %alignments);
128 my ($seqname, $aln_line) = ('', '');
129 if ( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) {
131 ($seqname,$aln_line) = ("$1/$2-$3",$4);
132 } elsif ( /^(\S+)\s+([A-Z\-]+)\s*\d*$/ ) {
133 ($seqname,$aln_line) = ($1,$2);
137 if ( !exists $order{$seqname} ) {
138 $order{$seqname} = $order++;
140 $alignments{$seqname} .= $aln_line;
143 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
144 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
145 (my $sname,my $start, my $end) = ($1,$2,$3);
147 (my $sname, my $start) = ($name,1);
148 my $str = $alignments{$name};
149 $str =~ s/[^A-Za-z]//g;
150 my $end = length($str);
152 my $seq=$alignments{$name};
153 push @out, {name=>$name,seq=>$seq};
158 ######################################################################
160 # getPairs(\@aln alnref, $ss string)
162 # Evalutates the pairing of an alignment according to a given
163 # consensus secondary structure
165 # Returns list of all base pairs which is a hash with the following
168 # open ... column in the alignment of the first base in the pair "("
169 # close ... column in the alignment of the second base in the pair ")"
170 # all ... list of all basepairs in the different sequences in the alignment
171 # pairing ... list of all different pairing basepairs
172 # nonpairing ... list of all incompatible basepairs
174 ######################################################################
178 my @inputAln=@{$_[0]};
181 # return nothing if there are no pairs
182 if (!($ss=~tr/(/(/)) {
187 foreach my $row (@inputAln) {
191 my @tmp=split(//,$seq);
194 my @ss=split(//,$ss);
199 foreach my $column (0..$#ss) {
201 my $currChar=$ss[$column];
203 if ($currChar eq '(') {
207 if ($currChar eq ')') {
208 my $openedCol=pop @stack;
209 push @pairs,{open=>$openedCol,close=>$column};
213 @pairs=sort {$a->{open} <=> $b->{open}} @pairs;
215 foreach my $i (0..$#pairs) {
216 #print "$i: $pairs[$i]->{open} - $pairs[$i]->{close}\n";
222 for my $j (0..$#aln) {
223 my $currPair=$aln[$j][$pairs[$i]->{open}].$aln[$j][$pairs[$i]->{close}];
227 for my $pair (@all) {
228 if (($pair eq 'AU') or
235 push @pairing, $pair;
236 } elsif ($pair eq "--") {
239 push @nonpairing,$pair;
244 my @uniquePairing = grep(!$saw{$_}++, @pairing);
246 $pairs[$i]->{all}=[@all];
247 $pairs[$i]->{pairing}=[@uniquePairing];
248 $pairs[$i]->{nonpairing}=[@nonpairing];
256 ######################################################################
258 # plotAln(\@aln ref-to-alignment, $consensus string, $ss string)
260 # Creates a image of the alignment with various annotations
262 # \@aln ... alignment in list of hash format
263 # $consensus ... consensus sequence
264 # $ss ... consensus secondary structure in dot bracket notation
266 # Returns png as string.
268 ######################################################################
278 # Get important measures
279 (my $fontWidth, my $fontHeight) = (6,6.5);
281 my $length=length($aln[0]->{seq});
283 foreach my $row (@aln) {
284 $maxName=length($row->{name}) if (length($row->{name})>$maxName);
286 $maxName = 5 if $maxName<5 && $ruler;
289 my $lineStep=$fontHeight+2; # distance between lines
290 my $blockStep=3.5*$fontHeight; # distance between blocks
291 my $consStep=$fontHeight*0.5; # distance between alignment and conservation curve
293 my $nameStep=3*$fontWidth; # distance between names and sequences
294 my $maxConsBar=2.5*$fontHeight; # Height of conservation curve
295 my $startY=2; # "y origin"
296 my $namesX=$fontWidth; # "x origin"
297 my $seqsX=$namesX+$maxName*$fontWidth+$nameStep; # x-coord. where sequences start
299 # Calculate width and height
301 my $tmpColumns=$columnWidth;
303 $tmpColumns=$length if ($length<$columnWidth);
305 my $imageWidth = int(0.9+$namesX+($maxName+$tmpColumns)*$fontWidth+$nameStep+$fontWidth);
306 $imageWidth += int($fontWidth + length("$length")*$fontWidth) if $resnum;
307 my $numBlock = int(1+($length-1)/$columnWidth);
308 my $imageHeight = int($startY+$numBlock*((@aln+1+$ruler)*$lineStep+$blockStep+$consStep+$ssStep));
324 my $ocre1="0.16 0.6";
325 my $green1="0.32 0.6";
326 my $turq1="0.48 0.6";
327 my $blue1="0.65 0.6";
328 my $violet1="0.81 0.6";
331 my $ocre2="0.16 0.2";
332 my $green2="0.32 0.2";
333 my $turq2="0.48 0.2";
334 my $blue2="0.65 0.2";
335 my $violet2="0.81 0.2";
338 my @colorMatrix=([$red,$red1,$red2],
339 [$ocre,$ocre1,$ocre2],
340 [$green,$green1,$green2],
341 [$turq,$turq1,$turq2],
342 [$blue,$blue1,$blue2],
343 [$violet,$violet1,$violet2]);
345 my @pairs=getPairs(\@aln,$ss);
347 foreach my $pair (@pairs) {
349 foreach my $column ($pair->{open},$pair->{close}) {
350 my $block = int(0.999+($column+1)/$columnWidth);
351 my $effCol=$column-($block-1)*$columnWidth;
352 my $x=$seqsX+$effCol*$fontWidth;
354 foreach my $row (0..$#aln) {
356 my $pairing=@{$pair->{pairing}};
357 my $nonpairing=@{$pair->{nonpairing}};
361 if ($nonpairing <=2) {
362 $color=$colorMatrix[$pairing-1][$nonpairing];
364 if (($pair->{all}->[$row] eq 'AU') or
365 ($pair->{all}->[$row] eq 'UA') or
366 ($pair->{all}->[$row] eq 'GC') or
367 ($pair->{all}->[$row] eq 'CG') or
368 ($pair->{all}->[$row] eq 'GU') or
369 ($pair->{all}->[$row] eq 'UG')) {
371 my $y=$startY+($block-1)*($lineStep*(@aln+1+$ruler)+$blockStep+$consStep)+$ssStep*($block)+($row+1)*$lineStep;
373 my $xtmp=$x+$fontWidth;
375 my $ytmp2=$y+$fontHeight+1;
376 $ps.="$x $ytmp1 $xtmp $ytmp2 $color box\n";
382 # Calculate conservation scores as (M+1)/(N+1), where M is the
383 # number of matches to the consensus and N is the number of
384 # sequences in the alignment
386 my @conservation=(); # each column one score
388 for my $column (0..$length-1) {
390 my $consChar=substr($consensus,$column,1);
392 # if consensus is gap, score=0
393 if ($consChar eq "-" or $consChar eq "_") {
394 push @conservation, 0;
399 for my $row (0..$#aln) {
400 my $currChar=substr($aln[$row]->{seq},$column,1);
401 $match++ if ($currChar eq $consChar);
404 my $score=($match-1)/(@aln-1);
405 push @conservation,$score;
408 # Draw the alignments in chunks
412 while ($currPos<$length) {
414 # secondary structure in first line
415 #$out->string($font,$seqsX,$currY,substr($ss,$currPos,$columnWidth),$black);
416 my $tmpSeq=substr($ss,$currPos,$columnWidth);
417 $tmpSeq=~s/\(/\\\(/g;
418 $tmpSeq=~s/\)/\\\)/g;
419 $ps .= "0 setgray\n";
420 $ps.="($tmpSeq) $seqsX $currY string\n";
421 $currY+=$lineStep+$ssStep;
423 # sequences labeled only with the organism-specifier
424 foreach my $row (@aln) {
425 #$out->string($font,$namesX,$currY,$row->{name},$black);
426 $ps.="($row->{name}) $namesX $currY string\n";
427 #$out->string($font,$seqsX,$currY,substr($row->{seq},$currPos,$columnWidth),$black);
428 my $tmpSeq=substr($row->{seq},$currPos,$columnWidth);
429 $ps.="($tmpSeq) $seqsX $currY string\n";
431 my $numX=$seqsX+(length($tmpSeq)+1)*$fontWidth; # x-coord. where residue nums start
432 $tmpSeq = substr($row->{seq},0, $currPos+$columnWidth);
433 my $p = length($tmpSeq) - $tmpSeq =~ tr/_-//; # remove gaps
434 $ps .= sprintf("(%" . length($length) . "d) $numX $currY string\n", $p);
439 $ps .= "(ruler) $namesX $currY string\n";
440 my $p = 10 * (int(1.5+$currPos/10));
441 my $r = "." x ($p-$currPos-length("$p"));
443 my $maxp = $currPos+$columnWidth;
444 $maxp = $length if $maxp>$length;
445 for (;$p<=$maxp; $p+=10) {
446 $r .= sprintf("%10d", $p);
449 $r .= "." x ($maxp-$p+10) if $p>$maxp;
450 $ps .= "($r) $seqsX $currY string\n";
456 $ps .= "0.6 setgray\n";
457 for my $col ($currPos..$currPos+$columnWidth-1) {
458 my $score=shift @conservation;
459 last if (!defined $score);
460 my $barHeight=$maxConsBar*$score;
461 $barHeight=1 if ($barHeight==0);
462 my $x=$seqsX+($col-($columnWidth*int($currPos/$columnWidth)))*$fontWidth;
464 my $ytmp1=$currY+$maxConsBar-$barHeight;
465 my $xtmp2=$x+$fontWidth;
466 my $ytmp2=$currY+$maxConsBar;
467 $ps .= "$x $ytmp1 $xtmp2 $ytmp2 box2\n";
470 $currPos+=$columnWidth;
473 my $BB="2 2 $imageWidth $imageHeight";
477 s/!HEIGHT!/$imageHeight/;
486 coloraln.pl - colorize an alignment with consensus structure
490 coloraln.pl [-r] [-n] [-c columns] [-s structure.ps] file.aln
494 colorrna.pl reads an alignment in CLUSTAL format, and a consensus
495 secondary structure (which it extracts from a postscript secondary
496 structure plot). It produces a postscript figure of the alignment, in
497 which compensatory mutation supporting the consensus structure are
498 marked by color. The color scheme is the same employed by RNAalifold
499 and alidot: Red marks pairs with no sequence variation; ochre, green,
500 turquoise, blue, and violet mark pairs with 2,3,4,5,6 different tpyes
501 of pairs, respectively.
509 read I<file> to extract the consensus structure (default: C<alirna.ps>)
513 break alignments into blocks of at most I<width> columns, (default: 120)
517 suppress conversion of C<T> to C<U>, i.e. do not convert DNA to
518 RNA, (default: convert to C<U>)
522 add a "ruler" below the alignment showing sequence positions
526 write sequence position at the end of each sequence line
534 %!PS-Adobe-3.0 EPSF-3.0
538 % draws Vienna RNA like colored boxes
539 /box { % x1 y1 x2 y2 hue saturation
541 dup 0.3 mul 1 exch sub sethsbcolor
542 exch 3 index sub exch 2 index sub rectfill
546 % draws a box in current color
547 /box2 { % x1 y1 x2 y2
548 exch 3 index sub exch 2 index sub rectfill
551 /string { % (Text) x y
560 [10 0 0 -10 0 0] makefont setfont
562 %100 100 106 110 0.5 0.5 1 box
564 %(This is a string) 200 200 string