--- /dev/null
+#!/usr/bin/perl
+
+=head1 NAME
+
+prettify.pl
+
+=head1 SYNOPSYS
+
+prettify.pl [--blc I<path>] [--alscript I<path>] --postscript I<path>
+
+=head1 DESCRIPTION
+
+This program uses a BLC file to create an Alscript command file.
+
+=head1 TODO
+
+Add more options, make prettier Alscript command files.
+
+=head1 BUGS
+
+Alscript can't count properly and so we have to add one to some of the indicies to make it work.
+
+=head1 AUTHORS
+
+Started by Patrick Audley <paudley@compbio.dundee.ac.uk>
+Extended by Jonathan Barber <jon@compbio.dundee.ac.uk>
+
+=cut
+
+use strict;
+use warnings;
+use Getopt::Long;
+use BLC;
+
+my $prefix = "seq.protein";
+my $bin = "/software/submit_all";
+
+my $blc = '-';
+my $alscript = '-';
+my $psout;
+
+GetOptions(
+ "blc:s" => \$blc,
+ "alscript:s" => \$alscript,
+ "postscript=s" => \$psout,
+) or die "$!\n";
+
+die "--postscript option required\n" unless $psout;
+
+my $data = new BLC;
+$data->read_file($blc);
+
+# Find which entries for secondary structure are in the sequence data
+my @jpred = map { /^(?:jnet|jhmm|jalign|jpssm|lupas_14|lupas_21|lupas_28)$/ or 0 } $data->get_seq_ids;
+
+# Replace space characters with '-' for the FSM
+my @sequences = map { my $c = $_; $c =~ s/ /-/g; $c } $data->get_sequences;
+
+# Find the positions of the secondary structure in the BLC file
+my ($top, $bottom, $alignEnd);
+foreach (0 .. $#jpred) { $top = $_ and last if $jpred[$_] }
+foreach (reverse(0 .. $#jpred)) { $bottom = $_ and last if $jpred[$_] }
+my $end = length $sequences[0];
+
+# ... and then add 1 to them as Alscript can't count properly
+$alignEnd = $top;
+$top++;
+$bottom++;
+
+# alscript bits
+open ALSOUT, ">$alscript" or die "$alscript: Can't open the Alscript command file\n";
+
+# Commands for Alscript
+# Add one as Alscript can't count, and make sure it's over a minimum otherwise
+# it cores...
+my $foo = @sequences > 50 ? @sequences + 1 : 50;
+my $MAX_NSEQ = "MAX_NSEQ ".$foo;
+# Buffer size for reading the BLC file needs to be at least the size of alignment +2
+# This also needs to be larger than the longest line in the Alscript command file
+# If any lines are longer than MAX_INPUT_LEN then Alscript is likely to fail with 'success'
+my $MAX_INPUT_LEN = "MAX_INPUT_LEN ".($foo > 50 ? $foo + 2 : 50);
+my $BLOCK_FILE = '#';
+unless ($blc eq '-') { $BLOCK_FILE = "BLOCK_FILE $blc"; }
+my $OUTPUT_FILE = "OUTPUT_FILE $psout";
+
+# Print the header
+print ALSOUT <<END;
+SILENT_MODE
+$BLOCK_FILE
+$MAX_NSEQ
+$MAX_INPUT_LEN
+$OUTPUT_FILE
+PORTRAIT
+POINTSIZE 8
+IDENT_WIDTH 12
+X_OFFSET 2
+Y_OFFSET 2
+DEFINE_FONT 0 Helvetica DEFAULT
+DEFINE_FONT 1 Helvetica REL 0.75
+DEFINE_FONT 7 Helvetica REL 0.6
+DEFINE_FONT 3 Helvetica-Bold DEFAULT
+DEFINE_FONT 4 Times-Bold DEFAULT
+DEFINE_FONT 5 Helvetica-BoldOblique DEFAULT
+#
+DEFINE_COLOUR 3 1 0.62 0.67 # Turquiose
+DEFINE_COLOUR 4 1 1 0 # Yellow
+DEFINE_COLOUR 5 1 0 0 # Red
+DEFINE_COLOUR 7 1 0 1 # Purple
+DEFINE_COLOUR 8 0 0 1 # Blue
+DEFINE_COLOUR 9 0 1 0 # Green
+DEFINE_COLOUR 10 0.41 0.64 1.00 # Pale blue
+DEFINE_COLOUR 11 0.41 0.82 0.67 # Pale green
+DEFINE_COLOUR 50 0.69 0.18 0.37 # Pink (helix)
+DEFINE_COLOUR 51 1.00 0.89 0.00 # Gold (strand)
+NUMBER_INT 10
+SETUP
+#
+# Highlight specific residues.
+# Avoid highlighting Lupas 'C' predictions by
+# limiting the highlighting to the alignments
+Scol_CHARS C 1 1 $end $alignEnd 4
+Ccol_CHARS H ALL 5
+Ccol_CHARS P ALL 8
+SURROUND_CHARS LIV ALL
+#
+# Replace known structure types with whitespace
+SUB_CHARS 1 $top $end $bottom H SPACE
+SUB_CHARS 1 $top $end $bottom E SPACE
+SUB_CHARS 1 $top $end $bottom - SPACE
+END
+
+# Find the range of each of the secondary structure elements
+foreach (0..$#jpred) {
+ if ($jpred[$_]) { # Then it's a secondary structure definition
+ my $fsm = fsm($sequences[$_], qw(- E H c C));
+ my $line = $_ + 1;
+
+ foreach (@{$fsm->{'E'}}) {
+ my ($start, $end) = @{$_};
+ my $colour = 51;
+ print ALSOUT "STRAND $start $line $end\n";
+ print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n";
+ }
+
+ foreach (@{$fsm->{'H'}}) {
+ my ($start, $end) = @{$_};
+ my $colour = 50;
+ print ALSOUT "HELIX $start $line $end\n";
+ print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n";
+ }
+ }
+}
+
+# Munge out the jpred predictions using a basic FSM
+# Pass the sequence and the states that are present.
+# Returns HoA's, keys are the states, arrays are start and end
+# points of the states.
+# Idea taken from Patrick Audley
+sub fsm {
+ my ($sequence, @states) = @_;
+ my $state; # The state we're in
+ my $start = 0; # The start position of the current state
+ my %res; # Hash to store results in
+ my $pos = 0; # The position in the FSM
+
+ $state = substr $sequence, 0, 1;
+ foreach my $char (split //, $sequence) {
+ $pos++;
+ next if $char eq $state; # Same state
+
+ # Otherwise we've changed states
+ my $flag = 0;
+ foreach (@states) {
+ if ($_ eq $char) {
+ push @{$res{$state}}, [ $start, $pos - 1 ];
+ $state = $char;
+ $start = $pos;
+ $flag = 1;
+ last;
+ }
+ }
+ unless ($flag) {
+ warn "'$char' not a defined state at position $pos.\n";
+ }
+ }
+
+ return \%res;
+}