--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Jpred;
+use Getopt::Long;
+
+my ($in, $out);
+my $format = 'seq';
+GetOptions(
+ "in=s" => \$in,
+ "out=s" => \$out,
+ "format=s" => \$format
+);
+
+if (!$in) { $in = "-"; }
+if (!$out) { $out = "-"; }
+
+# Read in the concise CSV file. Data is stored in hash's with the key
+# being the name of the type of data and the value being the sequence
+# data, the order of the alignments is also stored for display in the
+# order with which they are aligned.
+
+my (@align, @seq, %predict, %prob);
+
+my @predictions = qw(Lupas_14 Lupas_21 Lupas_28 jnetpred JNETHMM JNETALIGN JNETPSSM JNETJURY JNETSOL25 JNETSOL5 JNETSOL0 JNETCONF);
+
+my @prob = qw(JNETPROPH JNETPROPB JNETPROPC);
+
+# Arrays that holds the order in which things are printed
+my @print_pred = qw(jnetpred JNETALIGN JNETHMM JNETPSSM);
+my @coils = qw(Lupas_14 Lupas_21 Lupas_28);
+my @solvent = qw(JNETSOL25 JNETSOL5 JNETSOL0);
+
+# Hash that relates data names to printed names
+my %convert = (
+ JNETALIGN => "jalign",
+ JNETHMM => "jhmm",
+ jnetpred => "Jnet",
+ JNETPSSM => "jpssm",
+
+ Lupas_14 => "Lupas 14",
+ Lupas_21 => "Lupas 21",
+ Lupas_28 => "Lupas 28",
+
+ JNETSOL25 => "Jnet_25",
+ JNETSOL5 => "Jnet_5",
+ JNETSOL0 => "Jnet_0",
+ JNETCONF => "Jnet Rel"
+);
+
+# Read in the data
+open(IN, "<$in") or die($!);
+while (<IN>) {
+ if (/:/) {
+ my ($id, $seq) = split(":", $_);
+
+ if ($id =~ /;/) { # Find sequence alignments
+ my ($align, $seqid) = split(";", $id);
+ push @align, $seqid;
+ $seq =~ s/,//g;
+ chomp($seq);
+ push @seq, $seq;
+ }
+
+ foreach (@predictions) { # Find the predictions
+ if ($id eq $_) {
+ if ($predict{$id}) { die("Same ID ($id) found twice in concise file!"); }
+ $seq =~ s/,//g;
+ chomp($seq);
+ $predict{$id} = $seq;
+ }
+ }
+
+ foreach (@prob) {
+ if ($id eq $_) {
+ if ($prob{$id}) { die("Same ID ($id) found twice in concise file!"); }
+ chomp($seq);
+ @_ = split(",", $seq);
+ foreach (@_) {
+ if ($_ >= 0.85) { $_ = 9; }
+ else { $_ = int(($_ * 10) + 0.5); }
+ }
+ my $temp = join("", @_);
+ $prob{$id} = $temp;
+ }
+ }
+ }
+ }
+close(IN);
+
+############################################
+#
+# Sets up the references for prediction
+# methods and deffn methods
+#
+
+my @predname = qw(jnetpred JNETALIGN JNETPSSM JNETHMM);
+foreach (@predname) {
+ if (!$predict{$_}) { warn("\"$_\" data not present\n"); next; }
+ $predict{$_} =~ s/[TCYWXZ]/-/g;
+ }
+
+######################################################################
+# Print out the sequences to HTML
+######################################################################
+
+open(HTML, ">$out") or die($!);
+print HTML "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 3.2//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><title>Jpred secondary structure prediction results</title></head><body bgcolor=\"#ffffff\"><pre><code>";
+
+#
+# Prints the sequence passed to it as a reference to an array, second
+# array contains the "conservation", which varies between 0 and 1 for
+# each position.
+#
+
+sub printseq {
+ my $seqref = shift @_;
+ my $scoreref = shift @_;
+
+ unless ($scoreref) {
+ foreach (0..$#{$seqref}) {
+ print HTML ${$seqref}[$_];
+ }
+ return;
+ }
+
+# foreach (0..$#{$seqref} - 1) {
+ foreach (0..$#{$seqref}) {
+ my $colour = "#"; # Work out the colour
+ if (${$scoreref}[$_] == 1) {
+ $colour .= "0000ff";
+ }
+ else {
+ $colour .= (sprintf "%-2.2lx", ${$scoreref}[$_] * 255).(sprintf "%-2.2lx", 0).(sprintf "%-2.2lx", 0);
+ }
+
+ print HTML "<font color=\"$colour\">${$seqref}[$_]</font>";
+ }
+}
+
+# Prints the gap at the begining of the line between the sequence ID
+# and the ":". First argument is the length of the ID and the second is
+# the maximimum length.
+
+sub printgap {
+ my ($strlen, $max_len) = @_;
+ foreach ($strlen .. $max_len) {
+ print HTML " ";
+ }
+ print HTML ": ";
+ }
+
+# Works out a consensus score according to Zvelebil et al. (1987)
+# J. Mol. Biol. 195, pp957, and does the smoothing
+
+sub cons {
+ my %conservation = (
+ I => [1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ L => [1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ V => [1, 0, 0, 0, 0, 1, 0, 1, 0, 0],
+ C => [1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+ A => [1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
+ G => [1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
+ M => [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ F => [1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+ Y => [1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
+ W => [1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
+ H => [1, 1, 0, 1, 1, 0, 0, 0, 1, 0],
+ K => [1, 1, 0, 1, 1, 0, 0, 0, 0, 0],
+ R => [0, 1, 0, 1, 1, 0, 0, 0, 0, 0],
+ E => [0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
+ Q => [0, 0, 0, 1, 1, 0, 0, 0, 0, 0],
+ D => [0, 0, 1, 1, 0, 1, 0, 0, 0, 0],
+ N => [0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
+ S => [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
+ T => [1, 0, 0, 1, 0, 1, 0, 0, 0, 0],
+ P => [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
+ Z => [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
+ '-' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ '_' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ '.' => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ X => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ B => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # Not mentioned, so use X
+ U => [1, 0, 0, 0, 0, 1, 0, 0, 0, 0], # Not mentioned, so use C
+ );
+
+ my $aref = shift @_;
+ my (@cons, @align);
+
+ # Get each sequence
+ foreach (@{$aref}) {
+ push @align, $_;
+ }
+
+ my $seq_len = length($align[0]);
+
+ # Foreach position
+ foreach my $pos (0..$seq_len - 1) {
+ my @temp;
+
+ # Foreach sequence, get the residue for this position
+ foreach (@align) { push @temp, substr($_, $pos, 1); }
+
+ my ($p, $i, $j);
+ for ($i = 0; $i < $#temp; $i++) {
+ my $one = uc($temp[$i]);
+ for ($j = $i + 1; $j < $#temp; $j++) {
+ my $two = uc($temp[$j]);
+ foreach (0..9) {
+ # This debugging is in here
+ # incase a residue occurs that we
+ # haven't got in the hash above
+ unless (defined $conservation{$one}[$_]) {
+# warn "first $one, $_\n";
+# die "'$conservation{$one}[$_]'\n";
+ next;
+ }
+ unless (defined $conservation{$two}[$_]) {
+# warn "second $two, $_\n";
+# die "'$conservation{$two}[$_]'\n";
+ next;
+ }
+ if ($conservation{$one}[$_] != $conservation{$two}[$_]) { $p++; }
+ }
+ }
+ }
+
+ # This loop looks to be v. slow
+ if ($p) {
+ if (($p = 0.9 - 0.1 * $p) < 0) { $p = 0; }
+ }
+ else {
+ for ($i = 1; $i < $#temp; $i++) {
+ if ($temp[$i] ne $temp[0]) {
+ $p++;
+ last;
+ }
+ }
+ if ($p) { $p = 0.9; }
+ else { $p = 1; }
+ }
+
+ push @cons, $p;
+ }
+
+ # Average the result over 3 residues
+# my (@final, $i);
+# push @final, $cons[0];
+# for ($i = 1; $i < ($#cons); $i++) {
+# push @final, ($cons[$i - 1] + $cons[$i] + $cons[$i + 1]) / 3;
+# }
+# push @final, $cons[$#cons + 1];
+
+ return @cons;
+}
+
+############################################################
+# Uncomment this to get the conservation scoring worked out
+############################################################
+#my @score = &cons(\@seq);
+
+#
+# Print the sequences
+#
+
+# Find the longest identifier and set the space at the start of the line
+
+my $max_len = 0;
+foreach (@align) {
+ if (length($_) > $max_len) { $max_len = length($_); }
+ }
+if ($max_len <= 6) { $max_len = 11; }
+
+foreach (0..$#align) {
+ # Begining of line
+ if ($_ == 0) {
+ print HTML "$align[$_]";
+ }
+ else {
+ if ($format eq 'seq') {
+ ## differentiate between Uniprot and Uniparc (UniRef90_UNI\w+) IDs as they require
+ ## different SRS query strings
+ if ($align[$_] =~ /UniRef90_(UPI\w+)/) {
+ print HTML "<a href=$SRSSERVER/wgetz?-e+[uniparc-AccNumber:$1]>$align[$_]</a>";
+ } else {
+ print HTML "<a href=$SRSSERVER/wgetz?-e+[UNIREF90-acc:$align[$_]]>$align[$_]</a>";
+ }
+ } else {
+ print HTML "$align[$_]";
+ }
+ }
+ &printgap(length($align[$_]) ,$max_len);
+
+ # Print the sequence
+ @_ = split("", $seq[$_] );
+# &printseq([@_], [@score]);
+ &printseq([@_]);
+
+ # End of the line
+ if ($_ == 0) {
+ print HTML " : $align[$_] \n";
+ }
+ else {
+ if ($format eq 'seq') {
+ ## differentiate between Uniprot and Uniparc IDs as they require
+ ## different SRS query strings
+ if ($align[$_] =~ /UniRef90_(UPI\w+)/) {
+ print HTML " : <a href=$SRSSERVER/wgetz?-e+[uniparc-AccNumber:$1]>$align[$_]</a>\n";
+ } else {
+ print HTML " : <a href=$SRSSERVER/wgetz?-e+[UNIREF90-acc:$align[$_]]>$align[$_]</a>\n";
+ }
+ } else {
+ print HTML " : $align[$_]\n";
+ }
+ }
+ }
+print HTML "\n";
+
+######################################################################
+
+#
+# Create a guide sequence of the original query
+# (of the form 1----------11--- etc.)
+#
+
+my ($guide, $i);
+my $seq_len = length($seq[0]);
+
+for ($i = 1; $i < $seq_len; $i += 10) {
+ my $dash;
+ my $no_dash = 9 - length($i);
+ foreach (0..$no_dash) { $dash .= "-"; }
+ $guide .= $i.$dash;
+ }
+
+while (length($guide) > $seq_len) { # Trim spare guide away
+ chop($guide);
+ }
+
+printgap(0, $max_len);
+print HTML $guide." :\n";
+
+print HTML "OrigSeq";
+printgap(7, $max_len);
+print HTML $seq[0];
+print HTML " : OrigSeq\n\n";
+
+# Print out secondary structure predictions, goes through in order of
+# @print_pred so comes out right
+
+foreach (@print_pred) {
+ if (!$predict{$_}) { next; }
+ if ($_ eq "JNETALIGN") { # Add a line before JNETALIGN which will put jnetpred on its own
+ print HTML "\n";
+ }
+
+ print HTML $convert{$_};
+ printgap(length($convert{$_}), $max_len);
+ @_ = split("", $predict{$_});
+ foreach (@_) {
+ if ($_ eq "E") {
+ print HTML "<font color=ffa800>$_</font>";
+ }
+ elsif ($_ eq "H") {
+ print HTML "<font color=e90055>$_</font>";
+ }
+ elsif ($_ eq "-") {
+ print HTML $_;
+ }
+ }
+ print HTML " : $convert{$_}\n";
+ }
+print HTML "\n";
+
+foreach (@coils) {
+ if (!$predict{$_}) { next; }
+ print HTML $convert{$_};
+ printgap(length($convert{$_}), $max_len);
+ @_ = split("", $predict{$_});
+ foreach (@_) {
+ if ($_ eq "C") { print HTML "<font color=00aa00>$_</font>"; }
+ else { print HTML $_; }
+ }
+ print HTML " : $convert{$_}\n";
+ }
+print HTML "\n";
+
+foreach (@solvent) {
+ if (!$predict{$_}) { next; }
+ print HTML $convert{$_};
+ printgap(length($convert{$_}), $max_len);
+ @_ = split("", $predict{$_});
+ foreach (@_) {
+ if ($_ eq "B") {
+ print HTML "<font color=aa0000>$_</font>";
+ }
+ else { print HTML $_; }
+ }
+ print HTML " : $convert{$_}\n";
+ }
+
+printf HTML $convert{"JNETCONF"};
+printgap(8, $max_len);
+
+@_ = split("", $predict{"JNETCONF"});
+foreach (@_) {
+ if ($_ >= 7) { print HTML "<font color=00aa00>$_</font>"; }
+ else { print HTML "$_"; }
+ }
+print HTML " : ".$convert{"JNETCONF"}."\n";
+
+print HTML "</code></pre>";
+print HTML "<hr>";
+
+#
+# Key to data
+#
+
+print HTML "<pre><code>";
+print HTML "<b>\nNotes
+Key:</b>
+Colour code for alignment:
+Blue - Complete identity at a position
+Shades of red - The more red a position is, the higher the level of
+ conservation of chemical properties of the amino acids
+Jnet - Final secondary structure prediction for query
+jalign - Jnet alignment prediction
+jhmm - Jnet hmm profile prediction
+jpssm - Jnet PSIBLAST pssm profile prediction
+
+Lupas - Lupas Coil prediction (window size of 14, 21 and 28)
+
+Note on coiled coil predictions - = less than 50% probability
+ c = between 50% and 90% probability
+ C = greater than 90% probability
+
+Jnet_25 - Jnet prediction of burial, less than 25% solvent accesibility
+Jnet_5 - Jnet prediction of burial, less than 5% exposure
+Jnet_0 - Jnet prediction of burial, 0% exposure
+Jnet Rel - Jnet reliability of prediction accuracy, ranges from 0 to 9, bigger is better.
+";
+
+print HTML "</code></pre>$JPREDFOOT</body></html>";
+close(HTML);