13 "format=s" => \$format
16 if (!$in) { $in = "-"; }
17 if (!$out) { $out = "-"; }
19 # Read in the concise CSV file. Data is stored in hash's with the key
20 # being the name of the type of data and the value being the sequence
21 # data, the order of the alignments is also stored for display in the
22 # order with which they are aligned.
24 my (@align, @seq, %predict, %prob);
26 my @predictions = qw(Lupas_14 Lupas_21 Lupas_28 jnetpred JNETHMM JNETALIGN JNETPSSM JNETJURY JNETSOL25 JNETSOL5 JNETSOL0 JNETCONF);
28 my @prob = qw(JNETPROPH JNETPROPB JNETPROPC);
30 # Arrays that holds the order in which things are printed
31 my @print_pred = qw(jnetpred JNETALIGN JNETHMM JNETPSSM);
32 my @coils = qw(Lupas_14 Lupas_21 Lupas_28);
33 my @solvent = qw(JNETSOL25 JNETSOL5 JNETSOL0);
35 # Hash that relates data names to printed names
37 JNETALIGN => "jalign",
42 Lupas_14 => "Lupas 14",
43 Lupas_21 => "Lupas 21",
44 Lupas_28 => "Lupas 28",
46 JNETSOL25 => "Jnet_25",
49 JNETCONF => "Jnet Rel"
53 open(IN, "<$in") or die($!);
56 my ($id, $seq) = split(":", $_);
58 if ($id =~ /;/) { # Find sequence alignments
59 my ($align, $seqid) = split(";", $id);
66 foreach (@predictions) { # Find the predictions
68 if ($predict{$id}) { die("Same ID ($id) found twice in concise file!"); }
77 if ($prob{$id}) { die("Same ID ($id) found twice in concise file!"); }
79 @_ = split(",", $seq);
81 if ($_ >= 0.85) { $_ = 9; }
82 else { $_ = int(($_ * 10) + 0.5); }
84 my $temp = join("", @_);
92 ############################################
94 # Sets up the references for prediction
95 # methods and deffn methods
98 my @predname = qw(jnetpred JNETALIGN JNETPSSM JNETHMM);
100 if (!$predict{$_}) { warn("\"$_\" data not present\n"); next; }
101 $predict{$_} =~ s/[TCYWXZ]/-/g;
104 ######################################################################
105 # Print out the sequences to HTML
106 ######################################################################
108 open(HTML, ">$out") or die($!);
109 print HTML "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 3.2//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><title>Jpred secondary structure prediction results</title></head><body bgcolor=\"#ffffff\"><pre><code>";
112 # Prints the sequence passed to it as a reference to an array, second
113 # array contains the "conservation", which varies between 0 and 1 for
118 my $seqref = shift @_;
119 my $scoreref = shift @_;
122 foreach (0..$#{$seqref}) {
123 print HTML ${$seqref}[$_];
128 # foreach (0..$#{$seqref} - 1) {
129 foreach (0..$#{$seqref}) {
130 my $colour = "#"; # Work out the colour
131 if (${$scoreref}[$_] == 1) {
135 $colour .= (sprintf "%-2.2lx", ${$scoreref}[$_] * 255).(sprintf "%-2.2lx", 0).(sprintf "%-2.2lx", 0);
138 print HTML "<font color=\"$colour\">${$seqref}[$_]</font>";
142 # Prints the gap at the begining of the line between the sequence ID
143 # and the ":". First argument is the length of the ID and the second is
144 # the maximimum length.
147 my ($strlen, $max_len) = @_;
148 foreach ($strlen .. $max_len) {
154 # Works out a consensus score according to Zvelebil et al. (1987)
155 # J. Mol. Biol. 195, pp957, and does the smoothing
159 I => [1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
160 L => [1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
161 V => [1, 0, 0, 0, 0, 1, 0, 1, 0, 0],
162 C => [1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
163 A => [1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
164 G => [1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
165 M => [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
166 F => [1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
167 Y => [1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
168 W => [1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
169 H => [1, 1, 0, 1, 1, 0, 0, 0, 1, 0],
170 K => [1, 1, 0, 1, 1, 0, 0, 0, 0, 0],
171 R => [0, 1, 0, 1, 1, 0, 0, 0, 0, 0],
172 E => [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
173 Q => [0, 0, 0, 1, 1, 0, 0, 0, 0, 0],
174 D => [0, 0, 1, 1, 0, 1, 0, 0, 0, 0],
175 N => [0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
176 S => [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
177 T => [1, 0, 0, 1, 0, 1, 0, 0, 0, 0],
178 P => [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
179 Z => [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
180 '-' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
181 '_' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
182 '.' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
183 X => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
184 B => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # Not mentioned, so use X
185 U => [1, 0, 0, 0, 0, 1, 0, 0, 0, 0], # Not mentioned, so use C
196 my $seq_len = length($align[0]);
199 foreach my $pos (0..$seq_len - 1) {
202 # Foreach sequence, get the residue for this position
203 foreach (@align) { push @temp, substr($_, $pos, 1); }
206 for ($i = 0; $i < $#temp; $i++) {
207 my $one = uc($temp[$i]);
208 for ($j = $i + 1; $j < $#temp; $j++) {
209 my $two = uc($temp[$j]);
211 # This debugging is in here
212 # incase a residue occurs that we
213 # haven't got in the hash above
214 unless (defined $conservation{$one}[$_]) {
215 # warn "first $one, $_\n";
216 # die "'$conservation{$one}[$_]'\n";
219 unless (defined $conservation{$two}[$_]) {
220 # warn "second $two, $_\n";
221 # die "'$conservation{$two}[$_]'\n";
224 if ($conservation{$one}[$_] != $conservation{$two}[$_]) { $p++; }
229 # This loop looks to be v. slow
231 if (($p = 0.9 - 0.1 * $p) < 0) { $p = 0; }
234 for ($i = 1; $i < $#temp; $i++) {
235 if ($temp[$i] ne $temp[0]) {
240 if ($p) { $p = 0.9; }
247 # Average the result over 3 residues
249 # push @final, $cons[0];
250 # for ($i = 1; $i < ($#cons); $i++) {
251 # push @final, ($cons[$i - 1] + $cons[$i] + $cons[$i + 1]) / 3;
253 # push @final, $cons[$#cons + 1];
258 ############################################################
259 # Uncomment this to get the conservation scoring worked out
260 ############################################################
261 #my @score = &cons(\@seq);
264 # Print the sequences
267 # Find the longest identifier and set the space at the start of the line
271 if (length($_) > $max_len) { $max_len = length($_); }
273 if ($max_len <= 6) { $max_len = 11; }
275 foreach (0..$#align) {
278 print HTML "$align[$_]";
281 if ($format eq 'seq') {
282 ## differentiate between Uniprot and Uniparc (UniRef90_UNI\w+) IDs as they require
283 ## different SRS query strings
284 if ($align[$_] =~ /UniRef90_(UPI\w+)/) {
285 print HTML "<a href=$SRSSERVER?db=allebi&query=$1>$align[$_]</a>";
287 print HTML "<a href=$SRSSERVER?db=allebi&query=$align[$_]>$align[$_]</a>";
290 print HTML "$align[$_]";
293 &printgap(length($align[$_]) ,$max_len);
296 @_ = split("", $seq[$_] );
297 # &printseq([@_], [@score]);
302 print HTML " : $align[$_] \n";
305 if ($format eq 'seq') {
306 ## differentiate between Uniprot and Uniparc IDs as they require
307 ## different SRS query strings
308 if ($align[$_] =~ /UniRef90_(UPI\w+)/) {
309 print HTML " : <a href=$SRSSERVER?db=allebi&query=$1>$align[$_]</a>\n";
311 print HTML " : <a href=$SRSSERVER?db=allebi&query=$align[$_]>$align[$_]</a>\n";
314 print HTML " : $align[$_]\n";
320 ######################################################################
323 # Create a guide sequence of the original query
324 # (of the form 1----------11--- etc.)
328 my $seq_len = length($seq[0]);
330 for ($i = 1; $i < $seq_len; $i += 10) {
332 my $no_dash = 9 - length($i);
333 foreach (0..$no_dash) { $dash .= "-"; }
337 while (length($guide) > $seq_len) { # Trim spare guide away
341 printgap(0, $max_len);
342 print HTML $guide." :\n";
344 print HTML "OrigSeq";
345 printgap(7, $max_len);
347 print HTML " : OrigSeq\n\n";
349 # Print out secondary structure predictions, goes through in order of
350 # @print_pred so comes out right
352 foreach (@print_pred) {
353 if (!$predict{$_}) { next; }
354 if ($_ eq "JNETALIGN") { # Add a line before JNETALIGN which will put jnetpred on its own
358 print HTML $convert{$_};
359 printgap(length($convert{$_}), $max_len);
360 @_ = split("", $predict{$_});
363 print HTML "<font color=ffa800>$_</font>";
366 print HTML "<font color=e90055>$_</font>";
372 print HTML " : $convert{$_}\n";
377 if (!$predict{$_}) { next; }
378 print HTML $convert{$_};
379 printgap(length($convert{$_}), $max_len);
380 @_ = split("", $predict{$_});
382 if ($_ eq "C") { print HTML "<font color=00aa00>$_</font>"; }
383 else { print HTML $_; }
385 print HTML " : $convert{$_}\n";
390 if (!$predict{$_}) { next; }
391 print HTML $convert{$_};
392 printgap(length($convert{$_}), $max_len);
393 @_ = split("", $predict{$_});
396 print HTML "<font color=aa0000>$_</font>";
398 else { print HTML $_; }
400 print HTML " : $convert{$_}\n";
403 printf HTML $convert{"JNETCONF"};
404 printgap(8, $max_len);
406 @_ = split("", $predict{"JNETCONF"});
408 if ($_ >= 7) { print HTML "<font color=00aa00>$_</font>"; }
409 else { print HTML "$_"; }
411 print HTML " : ".$convert{"JNETCONF"}."\n";
413 print HTML "</code></pre>";
420 print HTML "<pre><code>";
421 print HTML "<b>\nNotes
423 Colour code for alignment:
424 Blue - Complete identity at a position
425 Shades of red - The more red a position is, the higher the level of
426 conservation of chemical properties of the amino acids
427 Jnet - Final secondary structure prediction for query
428 jalign - Jnet alignment prediction
429 jhmm - Jnet hmm profile prediction
430 jpssm - Jnet PSIBLAST pssm profile prediction
432 Lupas - Lupas Coil prediction (window size of 14, 21 and 28)
434 Note on coiled coil predictions - = less than 50% probability
435 c = between 50% and 90% probability
436 C = greater than 90% probability
438 Jnet_25 - Jnet prediction of burial, less than 25% solvent accesibility
439 Jnet_5 - Jnet prediction of burial, less than 5% exposure
440 Jnet_0 - Jnet prediction of burial, 0% exposure
441 Jnet Rel - Jnet reliability of prediction accuracy, ranges from 0 to 9, bigger is better.
444 print HTML "</code></pre>$JPREDFOOT</body></html>";