#!/usr/bin/perl =head1 NAME prettify.pl =head1 SYNOPSYS prettify.pl [--blc I] [--alscript I] --postscript I =head1 DESCRIPTION This program uses a BLC file to create an Alscript command file. =head1 TODO Add more options, make prettier Alscript command files. =head1 BUGS Alscript can't count properly and so we have to add one to some of the indicies to make it work. =head1 AUTHORS Started by Patrick Audley Extended by Jonathan Barber =cut use strict; use warnings; use Getopt::Long; use BLC; my $prefix = "seq.protein"; my $bin = "/software/submit_all"; my $blc = '-'; my $alscript = '-'; my $psout; GetOptions( "blc:s" => \$blc, "alscript:s" => \$alscript, "postscript=s" => \$psout, ) or die "$!\n"; die "--postscript option required\n" unless $psout; my $data = new BLC; $data->read_file($blc); # Find which entries for secondary structure are in the sequence data my @jpred = map { /^(?:jnet|jhmm|jalign|jpssm|lupas_14|lupas_21|lupas_28)$/ or 0 } $data->get_seq_ids; # Replace space characters with '-' for the FSM my @sequences = map { my $c = $_; $c =~ s/ /-/g; $c } $data->get_sequences; # Find the positions of the secondary structure in the BLC file my ($top, $bottom, $alignEnd); foreach (0 .. $#jpred) { $top = $_ and last if $jpred[$_] } foreach (reverse(0 .. $#jpred)) { $bottom = $_ and last if $jpred[$_] } my $end = length $sequences[0]; # ... and then add 1 to them as Alscript can't count properly $alignEnd = $top; $top++; $bottom++; # alscript bits open ALSOUT, ">$alscript" or die "$alscript: Can't open the Alscript command file\n"; # Commands for Alscript # Add one as Alscript can't count, and make sure it's over a minimum otherwise # it cores... my $foo = @sequences > 50 ? @sequences + 1 : 50; my $MAX_NSEQ = "MAX_NSEQ ".$foo; # Buffer size for reading the BLC file needs to be at least the size of alignment +2 # This also needs to be larger than the longest line in the Alscript command file # If any lines are longer than MAX_INPUT_LEN then Alscript is likely to fail with 'success' my $MAX_INPUT_LEN = "MAX_INPUT_LEN ".($foo > 50 ? $foo + 2 : 50); my $BLOCK_FILE = '#'; unless ($blc eq '-') { $BLOCK_FILE = "BLOCK_FILE $blc"; } my $OUTPUT_FILE = "OUTPUT_FILE $psout"; # Print the header print ALSOUT <{'E'}}) { my ($start, $end) = @{$_}; my $colour = 51; print ALSOUT "STRAND $start $line $end\n"; print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n"; } foreach (@{$fsm->{'H'}}) { my ($start, $end) = @{$_}; my $colour = 50; print ALSOUT "HELIX $start $line $end\n"; print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n"; } } } # Munge out the jpred predictions using a basic FSM # Pass the sequence and the states that are present. # Returns HoA's, keys are the states, arrays are start and end # points of the states. # Idea taken from Patrick Audley sub fsm { my ($sequence, @states) = @_; my $state; # The state we're in my $start = 0; # The start position of the current state my %res; # Hash to store results in my $pos = 0; # The position in the FSM $state = substr $sequence, 0, 1; foreach my $char (split //, $sequence) { $pos++; next if $char eq $state; # Same state # Otherwise we've changed states my $flag = 0; foreach (@states) { if ($_ eq $char) { push @{$res{$state}}, [ $start, $pos - 1 ]; $state = $char; $start = $pos; $flag = 1; last; } } unless ($flag) { warn "'$char' not a defined state at position $pos.\n"; } } return \%res; }