X-Git-Url: http://source.jalview.org/gitweb/?p=jpred.git;a=blobdiff_plain;f=websoft%2Fbin%2Fprettify.pl;fp=websoft%2Fbin%2Fprettify.pl;h=dfc53197a59f2a724a172faac385959cbd381907;hp=0000000000000000000000000000000000000000;hb=443c228bf0712d71e7fa34b5a2dc4b2b2e79f13f;hpb=9aa768067094f24f46f273077f867348e6143711 diff --git a/websoft/bin/prettify.pl b/websoft/bin/prettify.pl new file mode 100755 index 0000000..dfc5319 --- /dev/null +++ b/websoft/bin/prettify.pl @@ -0,0 +1,188 @@ +#!/usr/bin/perl + +=head1 NAME + +prettify.pl + +=head1 SYNOPSYS + +prettify.pl [--blc I] [--alscript I] --postscript I + +=head1 DESCRIPTION + +This program uses a BLC file to create an Alscript command file. + +=head1 TODO + +Add more options, make prettier Alscript command files. + +=head1 BUGS + +Alscript can't count properly and so we have to add one to some of the indicies to make it work. + +=head1 AUTHORS + +Started by Patrick Audley +Extended by Jonathan Barber + +=cut + +use strict; +use warnings; +use Getopt::Long; +use BLC; + +my $prefix = "seq.protein"; +my $bin = "/software/submit_all"; + +my $blc = '-'; +my $alscript = '-'; +my $psout; + +GetOptions( + "blc:s" => \$blc, + "alscript:s" => \$alscript, + "postscript=s" => \$psout, +) or die "$!\n"; + +die "--postscript option required\n" unless $psout; + +my $data = new BLC; +$data->read_file($blc); + +# Find which entries for secondary structure are in the sequence data +my @jpred = map { /^(?:jnet|jhmm|jalign|jpssm|lupas_14|lupas_21|lupas_28)$/ or 0 } $data->get_seq_ids; + +# Replace space characters with '-' for the FSM +my @sequences = map { my $c = $_; $c =~ s/ /-/g; $c } $data->get_sequences; + +# Find the positions of the secondary structure in the BLC file +my ($top, $bottom, $alignEnd); +foreach (0 .. $#jpred) { $top = $_ and last if $jpred[$_] } +foreach (reverse(0 .. $#jpred)) { $bottom = $_ and last if $jpred[$_] } +my $end = length $sequences[0]; + +# ... and then add 1 to them as Alscript can't count properly +$alignEnd = $top; +$top++; +$bottom++; + +# alscript bits +open ALSOUT, ">$alscript" or die "$alscript: Can't open the Alscript command file\n"; + +# Commands for Alscript +# Add one as Alscript can't count, and make sure it's over a minimum otherwise +# it cores... +my $foo = @sequences > 50 ? @sequences + 1 : 50; +my $MAX_NSEQ = "MAX_NSEQ ".$foo; +# Buffer size for reading the BLC file needs to be at least the size of alignment +2 +# This also needs to be larger than the longest line in the Alscript command file +# If any lines are longer than MAX_INPUT_LEN then Alscript is likely to fail with 'success' +my $MAX_INPUT_LEN = "MAX_INPUT_LEN ".($foo > 50 ? $foo + 2 : 50); +my $BLOCK_FILE = '#'; +unless ($blc eq '-') { $BLOCK_FILE = "BLOCK_FILE $blc"; } +my $OUTPUT_FILE = "OUTPUT_FILE $psout"; + +# Print the header +print ALSOUT <{'E'}}) { + my ($start, $end) = @{$_}; + my $colour = 51; + print ALSOUT "STRAND $start $line $end\n"; + print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n"; + } + + foreach (@{$fsm->{'H'}}) { + my ($start, $end) = @{$_}; + my $colour = 50; + print ALSOUT "HELIX $start $line $end\n"; + print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n"; + } + } +} + +# Munge out the jpred predictions using a basic FSM +# Pass the sequence and the states that are present. +# Returns HoA's, keys are the states, arrays are start and end +# points of the states. +# Idea taken from Patrick Audley +sub fsm { + my ($sequence, @states) = @_; + my $state; # The state we're in + my $start = 0; # The start position of the current state + my %res; # Hash to store results in + my $pos = 0; # The position in the FSM + + $state = substr $sequence, 0, 1; + foreach my $char (split //, $sequence) { + $pos++; + next if $char eq $state; # Same state + + # Otherwise we've changed states + my $flag = 0; + foreach (@states) { + if ($_ eq $char) { + push @{$res{$state}}, [ $start, $pos - 1 ]; + $state = $char; + $start = $pos; + $flag = 1; + last; + } + } + unless ($flag) { + warn "'$char' not a defined state at position $pos.\n"; + } + } + + return \%res; +}