#!/usr/bin/perl # # Generates a blc file from the concise file, reads in the sequence # alignments those records in @data # # # CC 22/06/07 - bug fix in 'conversion' of coil-coiled predictions # CC 22/05/06 - bug fix in final foreach loop # use strict; use warnings; if ($ARGV[0]) { open(IN, "<$ARGV[0]") or die($!); } else { open(IN, "<-") or die($!); } # The names of the records from the concise file that we want my @data = ("JNETALIGN", "JNETHMM", "jnetpred", "JNETPSSM", "JNETCONF", "JNETSOL25", "JNETSOL5", "JNETSOL0", "Lupas_21", "Lupas_14", "Lupas_28"); my %convert = ("JNETALIGN" => "jalign", "JNETHMM" => "jhmm", "jnetpred" => "jnet", "JNETPSSM" => "jpssm", "JNETCONF" => "conf", "JNETSOL25" => "sol25", "JNETSOL5" => "sol5", "JNETSOL0" => "sol0", "Lupas_21" => "lupas_21", "Lupas_14" => "lupas_14", "Lupas_28" => "lupas_28", ); my (@seq, %seq, @pred, %pred); # CC - these could be replaced with a tied hash, for future edits. # # Read in the concise file and extract the data # while () { if (/^\n/) { next; } my ($id, $seq) = split(":", $_); if (!$id || !$seq) { next; } # Check we have proper values chomp($seq); $seq =~ s/,//g; if ($id =~ /align\d*;/) { @_ = split(";", $id); # Then its an alignment push @seq, $_[1]; $seq{$_[1]} = $seq; } foreach (@data) { if ($id eq $_) { push @pred, $_; $pred{$_} = $seq; } } } close(IN); my @blc; my $gap; my $seq_len = length($seq{$seq[0]}) - 1; foreach (0..$seq_len) { $gap .= " "; } push @blc, $gap; foreach (@seq) { $seq{$_} =~ s/\.|-/ /g; push @blc, $seq{$_}; } push @blc, $gap; push @blc, $gap; push @blc, $gap; foreach (@pred) { ## CC 22/06/07 - Convert 8-state secondary structure types to 3-state types unless ## it's a coil-coil prediction where 'C' is a valid prediction. ## I think this is a throw-back from when Jpred was a consensus server? if ($_ =~ /Lupas/i) { $pred{$_} =~ s/-/ /g; } else { $pred{$_} =~ s/[TCYWXZ_SI\?-]/ /g; $pred{$_} =~ s/G/H/g; } push @blc, $pred{$_}; } foreach (@seq) { print ">$_\n"; } print ">\n>\n>\n"; foreach (@pred) { print ">$convert{$_}\n"; } print " * iteration 1\n"; #print " $seq_len\n"; foreach my $i (0..$seq_len) { foreach (@blc) { ### CC 22/05/06 - below has been changed due to bug where if # statement return false when $b == 0. This should actually # be true when parsing the JNETCONF string of integers. # # Now we test whether the variable is defined rather than true. # NB: not sure what do if the variable is not defined? #if (my $b = substr($_, $i, 1)) { print $b; } my $value = substr($_, $i, 1); print $value if defined($value); } print "\n"; } print " *";