9 prettify.pl [--blc I<path>] [--alscript I<path>] --postscript I<path>
13 This program uses a BLC file to create an Alscript command file.
17 Add more options, make prettier Alscript command files.
21 Alscript can't count properly and so we have to add one to some of the indicies to make it work.
25 Started by Patrick Audley <paudley@compbio.dundee.ac.uk>
26 Extended by Jonathan Barber <jon@compbio.dundee.ac.uk>
35 my $prefix = "seq.protein";
36 my $bin = "/software/submit_all";
44 "alscript:s" => \$alscript,
45 "postscript=s" => \$psout,
48 die "--postscript option required\n" unless $psout;
51 $data->read_file($blc);
53 # Find which entries for secondary structure are in the sequence data
54 my @jpred = map { /^(?:jnet|jhmm|jalign|jpssm|lupas_14|lupas_21|lupas_28)$/ or 0 } $data->get_seq_ids;
56 # Replace space characters with '-' for the FSM
57 my @sequences = map { my $c = $_; $c =~ s/ /-/g; $c } $data->get_sequences;
59 # Find the positions of the secondary structure in the BLC file
60 my ($top, $bottom, $alignEnd);
61 foreach (0 .. $#jpred) { $top = $_ and last if $jpred[$_] }
62 foreach (reverse(0 .. $#jpred)) { $bottom = $_ and last if $jpred[$_] }
63 my $end = length $sequences[0];
65 # ... and then add 1 to them as Alscript can't count properly
71 open ALSOUT, ">$alscript" or die "$alscript: Can't open the Alscript command file\n";
73 # Commands for Alscript
74 # Add one as Alscript can't count, and make sure it's over a minimum otherwise
76 my $foo = @sequences > 50 ? @sequences + 1 : 50;
77 my $MAX_NSEQ = "MAX_NSEQ ".$foo;
78 # Buffer size for reading the BLC file needs to be at least the size of alignment +2
79 # This also needs to be larger than the longest line in the Alscript command file
80 # If any lines are longer than MAX_INPUT_LEN then Alscript is likely to fail with 'success'
81 my $MAX_INPUT_LEN = "MAX_INPUT_LEN ".($foo > 50 ? $foo + 2 : 50);
83 unless ($blc eq '-') { $BLOCK_FILE = "BLOCK_FILE $blc"; }
84 my $OUTPUT_FILE = "OUTPUT_FILE $psout";
98 DEFINE_FONT 0 Helvetica DEFAULT
99 DEFINE_FONT 1 Helvetica REL 0.75
100 DEFINE_FONT 7 Helvetica REL 0.6
101 DEFINE_FONT 3 Helvetica-Bold DEFAULT
102 DEFINE_FONT 4 Times-Bold DEFAULT
103 DEFINE_FONT 5 Helvetica-BoldOblique DEFAULT
105 DEFINE_COLOUR 3 1 0.62 0.67 # Turquiose
106 DEFINE_COLOUR 4 1 1 0 # Yellow
107 DEFINE_COLOUR 5 1 0 0 # Red
108 DEFINE_COLOUR 7 1 0 1 # Purple
109 DEFINE_COLOUR 8 0 0 1 # Blue
110 DEFINE_COLOUR 9 0 1 0 # Green
111 DEFINE_COLOUR 10 0.41 0.64 1.00 # Pale blue
112 DEFINE_COLOUR 11 0.41 0.82 0.67 # Pale green
113 DEFINE_COLOUR 50 0.69 0.18 0.37 # Pink (helix)
114 DEFINE_COLOUR 51 1.00 0.89 0.00 # Gold (strand)
118 # Highlight specific residues.
119 # Avoid highlighting Lupas 'C' predictions by
120 # limiting the highlighting to the alignments
121 Scol_CHARS C 1 1 $end $alignEnd 4
124 SURROUND_CHARS LIV ALL
126 # Replace known structure types with whitespace
127 SUB_CHARS 1 $top $end $bottom H SPACE
128 SUB_CHARS 1 $top $end $bottom E SPACE
129 SUB_CHARS 1 $top $end $bottom - SPACE
132 # Find the range of each of the secondary structure elements
133 foreach (0..$#jpred) {
134 if ($jpred[$_]) { # Then it's a secondary structure definition
135 my $fsm = fsm($sequences[$_], qw(- E H c C));
138 foreach (@{$fsm->{'E'}}) {
139 my ($start, $end) = @{$_};
141 print ALSOUT "STRAND $start $line $end\n";
142 print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n";
145 foreach (@{$fsm->{'H'}}) {
146 my ($start, $end) = @{$_};
148 print ALSOUT "HELIX $start $line $end\n";
149 print ALSOUT "COLOUR_TEXT_REGION $start $line $end $line $colour\n";
154 # Munge out the jpred predictions using a basic FSM
155 # Pass the sequence and the states that are present.
156 # Returns HoA's, keys are the states, arrays are start and end
157 # points of the states.
158 # Idea taken from Patrick Audley
160 my ($sequence, @states) = @_;
161 my $state; # The state we're in
162 my $start = 0; # The start position of the current state
163 my %res; # Hash to store results in
164 my $pos = 0; # The position in the FSM
166 $state = substr $sequence, 0, 1;
167 foreach my $char (split //, $sequence) {
169 next if $char eq $state; # Same state
171 # Otherwise we've changed states
175 push @{$res{$state}}, [ $start, $pos - 1 ];
183 warn "'$char' not a defined state at position $pos.\n";