From eb3001dc41bf6cd46e20fd13fe3efbe9dedf6013 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Mon, 18 Nov 2013 14:44:00 +0000 Subject: [PATCH] JPRED-2 Current state of the SVN trank --- jpred/jpred | 1107 ++++++++++++++++++++++---------------------- jpred/lib/DSSP/SCOP.pm | 177 +++---- jpred/lib/FASTA.pm | 21 +- jpred/lib/FASTA/File.pm | 91 ++-- jpred/lib/IO/String.pm | 606 ++++++++++++------------ jpred/lib/Jpred.pm | 125 +++-- jpred/lib/PSIBLAST/Run.pm | 117 ++--- jpred/lib/Pairwise.pm | 32 +- jpred/lib/Paths.pm | 5 +- jpred/lib/Root.pm | 39 +- jpred/lib/Sequence.pm | 100 ++-- jpred/lib/Sequence/File.pm | 122 ++--- 12 files changed, 1279 insertions(+), 1263 deletions(-) diff --git a/jpred/jpred b/jpred/jpred index 30e9f40..261e527 100755 --- a/jpred/jpred +++ b/jpred/jpred @@ -10,9 +10,11 @@ jpred - Secondary structure prediction program =head1 DESCRIPTION -This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence. It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet. +This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence. +It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet. -The program is primarily design for use by the Jpred server, but it can be used directly by any user. Some of the output may not be meaningful if used directly - it can be safely ignored. +The program is primarily design for use by the Jpred server, but it can be used directly by any user. +Some of the output may not be meaningful if used directly - it can be safely ignored. =head1 OPTIONS @@ -79,7 +81,7 @@ Chris Cole (current maintainer) use strict; use warnings; -use Getopt::Long qw(:config auto_version); +use Getopt::Long; use Pod::Usage; use Carp; use UNIVERSAL qw(isa); @@ -88,8 +90,10 @@ use File::Temp; use FindBin qw($Bin); use lib "$Bin/../lib"; +#use lib "lib"; +#use lib "/home/asherstnev/Projects/Jpred/jpred/trunk/lib"; -use Jpred; # needed to define BLAST environment variables +use Jpred; # needed to define BLAST environment variables # my modules use HMMER::Profile; @@ -108,292 +112,289 @@ use Utils qw(profile); use Run qw(check); use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin); -our $VERSION = '3.0.2'; -my $MAX_ALIGN = 1000; # maximum number of alignment sequences allowed -my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy +our $VERSION = '3.0.2'; +my $MAX_ALIGN = 1000; # maximum number of alignment sequences allowed +my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy # Light object to hold sequence information, light to prevent memory overload # for big search results. Even so this is quite memory intensive, I can't see # how to reduce the size further. { - package PSISEQ; - #use Compress::LZF qw(compress decompress); - - sub new { - my ($class, %args) = @_; - my $self = []; - bless $self, $class; - for (keys %args) { $self->$_($args{$_}) } - return $self; - } - - sub id { - return defined $_[1] ? - $_[0]->[0] = $_[1] : - $_[0]->[0]; - } - - sub start { - return defined $_[1] ? - $_[0]->[1] = $_[1] : - $_[0]->[1]; - } - - sub align { - if (defined $_[1]) { - ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref"; - #$_[0]->[2] = compress(join "", @{ $_[1] }); - $_[0]->[2] = join("", @{ $_[1] }); - } - else { - #return [ split //, decompress($_[0]->[2]) ]; - return [ split(//, $_[0]->[2]) ]; - } - } + + package PSISEQ; + + #use Compress::LZF qw(compress decompress); + + sub new { + my ( $class, %args ) = @_; + my $self = []; + bless $self, $class; + for ( keys %args ) { $self->$_( $args{$_} ) } + return $self; + } + + sub id { + return defined $_[1] + ? $_[0]->[0] = $_[1] + : $_[0]->[0]; + } + + sub start { + return defined $_[1] + ? $_[0]->[1] = $_[1] + : $_[0]->[1]; + } + + sub align { + if ( defined $_[1] ) { + ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref"; + + #$_[0]->[2] = compress(join "", @{ $_[1] }); + $_[0]->[2] = join( "", @{ $_[1] } ); + } else { + + #return [ split //, decompress($_[0]->[2]) ]; + return [ split( //, $_[0]->[2] ) ]; + } + } } # Key to database information and information for accessing them -my $dbPATH = '/homes/chris/projects/Jnet/db'; +my $dbPATH = '/homes/chris/projects/Jnet/db'; my $db_entry = "uniref90"; my $database = { - - ## default database used by Jpred - uniref90 => { - database => 'uniref90.filt', - unfiltered => 'uniref90', - }, - - ## database used during Jnet training - training => { - database => 'training/uniref90.filt', - unfiltered => 'training/uniref90', - }, - - ## cluster-specific path for Jpred - cluster => { - database => '/local/gjb_lab/www-jpred/uniref90.filt', - unfiltered => '/local/gjb_lab/www-jpred/uniref90', - }, - - ## these other DBs are experimental ones used during development. - ## check they exist before using them. - swall => { - # generic entry for use with validate_jnet.pl - # real db location defined by validate_jnet.pl - database => "$dbPATH/swall/swall.filt", # CC new - unfiltered => "$dbPATH/swall/swall", - }, - uniprot => { - # Path to PSIBLAST db - database => "$dbPATH/3/swall.filt", # CC new - unfiltered => "$dbPATH/3/swall", - }, - uniref50 => { - database => "$dbPATH/6/swall.filt", - unfiltered => "$dbPATH/6/swall", - }, + + ## default database used by Jpred + uniref90 => { + database => 'uniref90.filt', + unfiltered => 'uniref90', + }, + + ## database used during Jnet training + training => { + database => 'training/uniref90.filt', + unfiltered => 'training/uniref90', + }, + + ## cluster-specific path for Jpred + cluster => { + database => '/local/gjb_lab/www-jpred/uniref90.filt', + unfiltered => '/local/gjb_lab/www-jpred/uniref90', + }, + + ## these other DBs are experimental ones used during development. + ## check they exist before using them. + swall => { + + # generic entry for use with validate_jnet.pl + # real db location defined by validate_jnet.pl + database => "$dbPATH/swall/swall.filt", # CC new + unfiltered => "$dbPATH/swall/swall", + }, + uniprot => { + + # Path to PSIBLAST db + database => "$dbPATH/3/swall.filt", # CC new + unfiltered => "$dbPATH/3/swall", + }, + uniref50 => { + database => "$dbPATH/6/swall.filt", + unfiltered => "$dbPATH/6/swall", + }, }; # Gap characters in sequences our $GAP = '-'; -my ($help, $man, $path, $output, $psiblast, $DEBUG, $VERBOSE); -my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found +my ( $help, $man, $path, $output, $psiblast, $DEBUG, $VERBOSE ); +my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found GetOptions( - "help" => \$help, - "man" => \$man, - "verbose" => \$VERBOSE, - "sequence=s" => \$path, - "output=s" => \$output, - "psi=s" => \$psiblast, - "db=s" => \$db_entry, - "pred-nohits" => \$predNoHits, - "debug" => \$DEBUG + "help" => \$help, + "man" => \$man, + "verbose" => \$VERBOSE, + "sequence=s" => \$path, + "output=s" => \$output, + "psi=s" => \$psiblast, + "db=s" => \$db_entry, + "pred-nohits" => \$predNoHits, + "debug" => \$DEBUG ) or pod2usage(2); pod2usage(1) if $help; -pod2usage(verbose => 2) if $man; +pod2usage( verbose => 2 ) if $man; pod2usage(' --sequence argument not provided') unless $path; die "--db $db_entry not recognised" unless exists $database->{$db_entry}; $output = $path unless $output; -for ( - [ "path", $path ], - [ "output", $output ], - [ "psi", $psiblast ], - [ "db", $db_entry] - ) { - my ($key, $value) = @{$_}; - defined $value or next; - errlog(join(": ", $key, $value), "\n"); +for ( [ "path", $path ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) { + my ( $key, $value ) = @{$_}; + defined $value or next; + errlog( join( ": ", $key, $value ), "\n" ); } -my $query = FASTA::File->new(read_file => $path); +my $query = FASTA::File->new( read_file => $path ); my @seqs; if (1) { - my $psi = PSIBLAST->new; # CC PSIBLAST.pm doesn't have a new() class?? - unless (defined $psiblast && -e $psiblast) { - my $psi_run = PSIBLAST::Run->new( - debug => $DEBUG, -# path => "/software/jpred_bin/blastpgp", - path => $psiblastbin, # CC new path - input => $path, - output => "$output.blast", - matrix => "$output.profile", - database => $database->{$db_entry}{database}, - ## potentially a better set of parameters (esp. -b & -v) - ## which avoid PSI-BLAST dying with large number of hits - # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence - # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria - # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3' + my $psi = PSIBLAST->new; # CC PSIBLAST.pm doesn't have a new() class?? + unless ( defined $psiblast && -e $psiblast ) { + my $psi_run = PSIBLAST::Run->new( + debug => $DEBUG, + + # path => "/software/jpred_bin/blastpgp", + path => $psiblastbin, # CC new path + input => $path, + output => "$output.blast", + matrix => "$output.profile", + database => $database->{$db_entry}{database}, + ## potentially a better set of parameters (esp. -b & -v) + ## which avoid PSI-BLAST dying with large number of hits + # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence + # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria + # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3' + ); + + # For reduced databases, get the size of the orginal and use this as + # the equivilent DB size + #print $db_entry, "\n"; + if ( $db_entry =~ /^sp_red_/ ) { + ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/; + $psi_run->args( + $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below ); - - # For reduced databases, get the size of the orginal and use this as - # the equivilent DB size - #print $db_entry, "\n"; - if ($db_entry =~ /^sp_red_/) { - (my $entry = $db_entry) =~ s/^sp_red_/sp_/; - $psi_run->args( $psi_run->args . " -z " . - fasta_seq_length($database->{$entry}{database}) # CC sub is below - ); - } - print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG; - print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG; - - print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE; - $psi_run->run or die; # CC sub is from PSIBLAST::Run - - $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does? - system("gzip -9f $output.blast"); - } - else { - if ($psiblast =~ /.gz$/) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does? - else { $psi->read_file($psiblast) } # CC ditto above - } - - # Convert the last itteration into a collection of PSISEQ objects - for ($psi->get_all) { # CC sub is from PSIBLAST.pm - my ($id, $start, $align) = @{$_}; - push @seqs, PSISEQ->new( - id => $id, - start => $start, - align => [ split(//, $align) ] + } + print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG; + print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG; + + print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE; + $psi_run->run or die; # CC sub is from PSIBLAST::Run + + $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does? + system("gzip -9f $output.blast"); + } else { + if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does? + else { $psi->read_file($psiblast) } # CC ditto above + } + + # Convert the last itteration into a collection of PSISEQ objects + for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm + my ( $id, $start, $align ) = @{$_}; + push @seqs, + PSISEQ->new( + id => $id, + start => $start, + align => [ split( //, $align ) ] ); - } + } } -## When there are no PSI-BLAST hits generate an HMM from the query +## When there are no PSI-BLAST hits generate an HMM from the query ## and run Jnet against that only. ## Accuracy won't be as good, but at least it's a prediction -if (@seqs == 0) { - if ($predNoHits == 0) { - warn "\nJPRED: Warning - no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n"; - print ">>100% complete\n"; - exit; - } - print ">>50% complete\n"; - warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n"; - print "Running HMMer on query...\n" if $VERBOSE; - - # copy input query to alignment - system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n"; - - # Temp files required for HMMer - my ($hmmbuild_out, $hmmconvert_out) = map { - File::Temp->new->filename - } 1..2; - - system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $path"); - system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out"); - - # Read in the HMMER file - my $hmmer = HMMER::Profile->new( - read_file => $hmmconvert_out - ); - - # Convert from HMMER::Profile to HMMER::Profile::Jnet - my $hmmer_jnet = HMMER::Profile::Jnet->new; - $hmmer_jnet->add_line(@{$_}) for $hmmer->get_line; - $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module - print ">>70% complete\n"; - - # Run Jnet for the prediction - print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE; - jnet(map { "$output.$_" } qw(hmm jnet)); # CC sub is below - - print "Jpred Finished\n"; - exit; +if ( @seqs == 0 ) { + if ( $predNoHits == 0 ) { + warn "\nJPRED: Warning - no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n"; + print ">>100% complete\n"; + exit; + } + print ">>50% complete\n"; + warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n"; + print "Running HMMer on query...\n" if $VERBOSE; + + # copy input query to alignment + system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n"; + + # Temp files required for HMMer + my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2; + + system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $path"); + system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out"); + + # Read in the HMMER file + my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out ); + + # Convert from HMMER::Profile to HMMER::Profile::Jnet + my $hmmer_jnet = HMMER::Profile::Jnet->new; + $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line; + $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module + print ">>70% complete\n"; + + # Run Jnet for the prediction + print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE; + jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below + + print "Jpred Finished\n"; + exit; } -psiseq2fasta("0.fasta.gz", @seqs) if $DEBUG; # CC sub is below +psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below print ">>40% complete\n"; # Make PSIBLAST truncated alignments the right length print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE; -@seqs = extend($query, @seqs); # CC sub si below -psiseq2fasta("1.fasta.gz", @seqs) if $DEBUG; +@seqs = extend( $query, @seqs ); # CC sub si below +psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG; # Remove masking from PSIBLAST alignment print "Unmasking the alignments...\n" if $VERBOSE; -dex($_) for @seqs[ 1..$#seqs ]; ## <-- CC dies here in dex() - found below -psiseq2fasta("2.fasta.gz", @seqs) if $DEBUG; +dex($_) for @seqs[ 1 .. $#seqs ]; ## <-- CC dies here in dex() - found below +psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG; # Convert the sequences to upper case print "Converting sequences to the same case...\n" if $VERBOSE; -toupper($_) for @seqs; # CC sub is below -psiseq2fasta("${output}_backup.fasta.gz", @seqs); +toupper($_) for @seqs; # CC sub is below +psiseq2fasta( "${output}_backup.fasta.gz", @seqs ); # Remove excessive sequences print "Remove excessive sequences...\n" if $VERBOSE; -@seqs = reduce($MAX_ALIGN, @seqs); # CC sub is below -psiseq2fasta("3.fasta.gz", @seqs) if $DEBUG; +@seqs = reduce( $MAX_ALIGN, @seqs ); # CC sub is below +psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG; # Remove sequences that are too long or too short print "Remove sequences which too long or short...\n" if $VERBOSE; -@seqs = del_long_seqs(50, @seqs); # CC sub is below -psiseq2fasta("4.fasta.gz", @seqs) if $DEBUG; +@seqs = del_long_seqs( 50, @seqs ); # CC sub is below +psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG; # Remove redundant sequences based upon pairwise identity and OC clustering print "Remove redundant sequences...\n" if $VERBOSE; -@seqs = nonred($NR_CUT, @seqs); # CC sub is below -psiseq2fasta("5.fasta.gz", @seqs) if $DEBUG; +@seqs = nonred( $NR_CUT, @seqs ); # CC sub is below +psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG; # Check that we haven't got rid of all of the sequences -if (@seqs < 2) { - warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n"; - @seqs = fasta2psiseq("${output}_backup.fasta.gz"); +if ( @seqs < 2 ) { + warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n"; + @seqs = fasta2psiseq("${output}_backup.fasta.gz"); } unlink("${output}_backup.fasta.gz") unless $DEBUG; # Remove gaps in the query sequence and same positions in the alignment print "Removing gaps in the query sequence...\n" if $VERBOSE; -degap(@seqs); # CC sub is below -psiseq2fasta("6.fasta.gz", @seqs) if $DEBUG; +degap(@seqs); # CC sub is below +psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG; # Output the alignment for the prediction print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE; -psiseq2fasta("$output.align", @seqs); +psiseq2fasta( "$output.align", @seqs ); { - # Equivilent to getpssm script - print "Output the PSSM matrix from the PSI-BLAST profile...\n"; - my $pssm = PSIBLAST::PSSM->new(read_file => "$output.profile"); - $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM + # Equivilent to getpssm script + print "Output the PSSM matrix from the PSI-BLAST profile...\n"; + my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" ); + $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM } print ">>50% complete\n"; # Run HMMER on the sequences { - print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE; - my $hmmer = hmmer(@seqs); # CC sub is below - $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module + print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE; + my $hmmer = hmmer(@seqs); # CC sub is below + $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module } print ">>70% complete\n"; # Run Jnet for the prediction print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE; -jnet(map { "$output.$_" } qw(hmm jnet pssm)); # CC sub is below +jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below print "Jpred Finished\n"; @@ -411,57 +412,55 @@ Returns a PSISEQ object which has any masked residues replaced by their entry fr =cut - BEGIN { - my $idx = Index->new(type => "Index::FastaCMD") or die "Index type cannot be found."; + my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found."; - sub dex { - my ($seq) = @_; + sub dex { + my ($seq) = @_; - croak "JPRED: dex: can only take a PSISEQ object as an argument" unless isa $seq, 'PSISEQ'; + croak "JPRED: dex: can only take a PSISEQ object as an argument" unless isa $seq, 'PSISEQ'; - # Only bother if we have a sequence which has been masked - return unless grep { uc $_ eq 'X' } @{ $seq->align }; + # Only bother if we have a sequence which has been masked + return unless grep { uc $_ eq 'X' } @{ $seq->align }; - my ($id, $start, @align) = ($seq->id, $seq->start, @{$seq->align}); + my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } ); - #$id = "$id"; # Add the EMBOSS USA - #$id = "sprot_37:$id"; # Add the EMBOSS USA - #$id = "sprot_43:$id"; # Add the EMBOSS USA -# $id = $database->{$db_entry}{emboss} . ":$id"; # commented out to try FastaCMD module - my $searchdb = $database->{$db_entry}{unfiltered}; - $start--; # We need this to be zero based + #$id = "$id"; # Add the EMBOSS USA + #$id = "sprot_37:$id"; # Add the EMBOSS USA + #$id = "sprot_43:$id"; # Add the EMBOSS USA + # $id = $database->{$db_entry}{emboss} . ":$id"; # commented out to try FastaCMD module + my $searchdb = $database->{$db_entry}{unfiltered}; + $start--; # We need this to be zero based - my $f = $idx->get_sequence($id, $searchdb) or croak "JPRED: No such entry for $id in $searchdb\n" and return; # CC sub is from Index::EMBOSS - my @seqs = $f->get_entries; ## Sub is from Sequence::File - my $db = shift @seqs; + my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return; # CC sub is from Index::EMBOSS + my @seqs = $f->get_entries; ## Sub is from Sequence::File + my $db = shift @seqs; - unless ($db) { - confess "JPRED: dex: Accession number $id returned no sequences"; - } - elsif (@seqs) { - confess "JPRED: dex: Accession number $id returned more than one sequence"; - } + unless ($db) { + confess "JPRED: dex: Accession number $id returned no sequences"; + } elsif (@seqs) { + confess "JPRED: dex: Accession number $id returned more than one sequence"; + } + + my @db_seq = @{ $db->seq }; - my @db_seq = @{ $db->seq }; - - # $i is a pointer into the original, ungapped, unmasked sequence. $_ is - # for the aligned sequence - my $i = 0; - for (0..$#align) { - next if $align[$_] =~ /$GAP/; - - if ($align[$_] eq 'X') { - unless (defined $db_seq[$i + $start]) { - croak "JPRED: dex: the database sequence is not as long as the query sequence!" - } - $align[$_] = $db_seq[$i + $start]; - } - $i++; + # $i is a pointer into the original, ungapped, unmasked sequence. $_ is + # for the aligned sequence + my $i = 0; + for ( 0 .. $#align ) { + next if $align[$_] =~ /$GAP/; + + if ( $align[$_] eq 'X' ) { + unless ( defined $db_seq[ $i + $start ] ) { + croak "JPRED: dex: the database sequence is not as long as the query sequence!"; + } + $align[$_] = $db_seq[ $i + $start ]; } + $i++; + } - $seq->align(\@align); - } + $seq->align( \@align ); + } } =head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN); @@ -473,19 +472,19 @@ Equivilent to reduce script. =cut sub reduce { - my ($max, @seqs) = @_; + my ( $max, @seqs ) = @_; - my $num = int(@seqs / $max); + my $num = int( @seqs / $max ); - my @res = shift @seqs; + my @res = shift @seqs; - # Don't bother if we have less than the required sequences - if (@seqs < $max) { return @res, @seqs } - if ($num == 0) { return @res, @seqs } + # Don't bother if we have less than the required sequences + if ( @seqs < $max ) { return @res, @seqs } + if ( $num == 0 ) { return @res, @seqs } - for (0..$#seqs) { push @res, $seqs[$_] if $_ % $num == 0 } + for ( 0 .. $#seqs ) { push @res, $seqs[$_] if $_ % $num == 0 } - return @res; + return @res; } =head2 nonred($cutoff, @PSISEQS); @@ -497,107 +496,107 @@ Equivilent to nonred script. =cut sub nonred { - my ($cutoff, @seqs) = @_; - # Run pairwise and then OC - # Remove similar sequences - - unless (defined $cutoff) { croak "JPRED: nonred() not passed a cutoff" } - unless (@seqs) { croak "JPRED: No sequences passed to nonred()" } - if (@seqs == 1) { - warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n"; - return @seqs; - } - - my $fasta = FASTA::File->new; - - # Convert the sequences into a FASTA object, which is what - # pairwise expects. - $fasta->add_entries( - map { - my $f = FASTA->new(id => $_->id); - $f->seq(@{$_->align}); - $f; + my ( $cutoff, @seqs ) = @_; + + # Run pairwise and then OC + # Remove similar sequences + + unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" } + unless (@seqs) { croak "JPRED: No sequences passed to nonred()" } + if ( @seqs == 1 ) { + warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n"; + return @seqs; + } + + my $fasta = FASTA::File->new; + + # Convert the sequences into a FASTA object, which is what + # pairwise expects. + $fasta->add_entries( + map { + my $f = FASTA->new( id => $_->id ); + $f->seq( @{ $_->align } ); + $f; } @seqs - ); - - # Run pairwise via wrappers - use Pairwise; - my $pair = Pairwise->new( - path => $pairwise - ); - my $foo = join "\n", $pair->run($fasta) or die $!; - - # Run OC - my $ocres = OC->new( - path => "$oc sim complete cut $cutoff" - ); - $ocres->read_string(join "", $ocres->run($foo) ); - - # Now found those entries that aren't related - my @keep; - { - for ($ocres->get_groups) { - my ($group, $score, $size, @labels) = @{ $_ }; - - # Keep the QUERY from the cluster, otherwise just get one - if (grep { /QUERY/ } @labels) { - # Make sure the query stays at the start - unshift @keep, "QUERY"; - next; - } - else { push @keep, shift @labels } + ); + + # Run pairwise via wrappers + use Pairwise; + my $pair = Pairwise->new( path => $pairwise ); + my $foo = join "\n", $pair->run($fasta) or die $!; + + # Run OC + my $ocres = OC->new( path => "$oc sim complete cut $cutoff" ); + $ocres->read_string( join "", $ocres->run($foo) ); + + # Now found those entries that aren't related + my @keep; + { + for ( $ocres->get_groups ) { + my ( $group, $score, $size, @labels ) = @{$_}; + + # Keep the QUERY from the cluster, otherwise just get one + if ( grep { /QUERY/ } @labels ) { + + # Make sure the query stays at the start + unshift @keep, "QUERY"; + next; + } else { + push @keep, shift @labels; } + } - push @keep, $ocres->get_unclustered; - } + push @keep, $ocres->get_unclustered; + } - # Return the matching entries. - my @return; + # Return the matching entries. + my @return; - # We use the temporay to mimic the selection of sequences in the nonred - # script, which took the sequences in the order they occured in the file. - for (@seqs) { - my $label = $_->id; - if (grep { $_ =~ /^$label$/ } @keep) { - push @return, $_; - } - } + # We use the temporay to mimic the selection of sequences in the nonred + # script, which took the sequences in the order they occured in the file. + for (@seqs) { + my $label = $_->id; + if ( grep { $_ =~ /^$label$/ } @keep ) { + push @return, $_; + } + } - return @return; + return @return; } =head2 @seqs = del_long_seqs($percentage, @PSISEQS) -Removes those sequences that are more than $percentage longer or shorter than the first sequence in @seqs. @seqs is an array of PSISEQ objects. +Removes those sequences that are more than $percentage longer or shorter than the first +sequence in @seqs. @seqs is an array of PSISEQ objects. Equivilent to trim_seqs script. =cut sub del_long_seqs { - my ($factor, @seqs) = @_; + my ( $factor, @seqs ) = @_; - # Keep the query sequence - my $query = shift @seqs; - my @res = $query; + # Keep the query sequence + my $query = shift @seqs; + my @res = $query; - # Find the length of the query sequence without any gaps - my $length = (() = (join '', @{$query->align}) =~ /[^$GAP]/g); + # Find the length of the query sequence without any gaps + my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g ); - # Calculate the upper and lower allowed bounds - my ($upper, $lower); - { - my $bounds = ($length / 100) * $factor; - ($upper, $lower) = ($length + $bounds, $length - $bounds); - } + # Calculate the upper and lower allowed bounds + my ( $upper, $lower ); + { + my $bounds = ( $length / 100 ) * $factor; + ( $upper, $lower ) = ( $length + $bounds, $length - $bounds ); + } - # Now try each sequence and see how long they are - for (@seqs) { - my $l = (() = (join '', @{$_->align}) =~ /[^$GAP]/g); - if ($l >= $lower && $l <= $upper) { push @res, $_ } - } + # Now try each sequence and see how long they are + for (@seqs) { + my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g ); + if ( $l >= $lower && $l <= $upper ) { push @res, $_ } + } - return @res; + return @res; } =head2 toupper ($PSISEQ) @@ -607,10 +606,8 @@ Converts sequence held in PSISEQ object to uppercase. =cut sub toupper { - croak "JPRED: toupper: argument not a PSISEQ object" unless isa $_[0], 'PSISEQ'; - $_[0]->align( - [ split //, uc join '', @{ $_[0]->align } ] - ); + croak "JPRED: toupper: argument not a PSISEQ object" unless isa $_[0], 'PSISEQ'; + $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] ); } =head2 degap($PSISEQ_QUERY, @PSISEQS) @@ -622,57 +619,57 @@ Equivilent to fasta2jnet script. =cut sub degap { - my (@seqs) = @_; - - # Find where the gaps in the query sequence are - my @gaps; - { - my $i = 0; - for (@{$seqs[0]->align}) { - push @gaps, $i if $_ !~ /$GAP/; - $i++; - } - } - - return unless @gaps; - - # Remove the gaps in all the sequences. - # Derefences the array reference and takes a slice inside the method call - # arguments, then coerce back to an array ref. - for (@seqs) { - $_->align([ @{$_->align}[@gaps] ]); - } + my (@seqs) = @_; + + # Find where the gaps in the query sequence are + my @gaps; + { + my $i = 0; + for ( @{ $seqs[0]->align } ) { + push @gaps, $i if $_ !~ /$GAP/; + $i++; + } + } + + return unless @gaps; + + # Remove the gaps in all the sequences. + # Derefences the array reference and takes a slice inside the method call + # arguments, then coerce back to an array ref. + for (@seqs) { + $_->align( [ @{ $_->align }[@gaps] ] ); + } } =head2 getfreq($filename, @PSISEQS) Creates a PSIBLAST like profile and writes it to the $filename. -Equivilant to profile or getfreq (older) script. This doesn't create the same answer as the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output, but it's *not* the same. +Equivilant to profile or getfreq (older) script. This doesn't create the same answer as +the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output, +but it's *not* the same. =cut sub getfreq { - my ($fn, @seqs) = @_; + my ( $fn, @seqs ) = @_; - croak "JPRED: Passed non PSISEQ object" if grep { ! isa($_, 'PSISEQ') } @seqs; + croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs; - # Create a PSIBLAST like profile from the alignment - # This doesn't greate the same result as old Jpred, I think this is because - # differences in the input between old and new + # Create a PSIBLAST like profile from the alignment + # This doesn't greate the same result as old Jpred, I think this is because + # differences in the input between old and new - # For testing -# my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa"); -# my @profile = profile( -# map { join "", @{ $_->seq } } $f->get_entries -# ); + # For testing + # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa"); + # my @profile = profile( + # map { join "", @{ $_->seq } } $f->get_entries + # ); - my @profile = profile( - map { join "", @{ $_->align } } @seqs - ); + my @profile = profile( map { join "", @{ $_->align } } @seqs ); - open my $fh, ">$fn" or die "JPRED: $fn: $!"; - print $fh join(" ", @{$_}), "\n" for @profile; + open my $fh, ">$fn" or die "JPRED: $fn: $!"; + print $fh join( " ", @{$_} ), "\n" for @profile; } =head2 HMMER::Profile::Jnet = hmmer(@PSISEQS) @@ -684,29 +681,25 @@ Equivilent to gethmm script. =cut sub hmmer { - my (@seqs) = @_; + my (@seqs) = @_; - # Temp files required - my ($hmmbuild_out, $hmmconvert_out, $tmp_fasta) = map { - File::Temp->new->filename - } 1..3; + # Temp files required + my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3; - # Create the FASTA file - psiseq2fasta($tmp_fasta, @seqs); + # Create the FASTA file + psiseq2fasta( $tmp_fasta, @seqs ); - system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta"); - system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out"); + system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta"); + system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out"); - # Read in the HMMER file - my $hmmer = HMMER::Profile->new( - read_file => $hmmconvert_out - ); + # Read in the HMMER file + my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out ); - # Convert from HMMER::Profile to HMMER::Profile::Jnet - my $hmmer_jnet = HMMER::Profile::Jnet->new; - $hmmer_jnet->add_line(@{$_}) for $hmmer->get_line; + # Convert from HMMER::Profile to HMMER::Profile::Jnet + my $hmmer_jnet = HMMER::Profile::Jnet->new; + $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line; - return $hmmer_jnet; + return $hmmer_jnet; } =head2 jnet($hmmer, $out, $pssm ) @@ -716,15 +709,17 @@ Runs Jnet. Pass the paths of the filenames to do the prediction on =cut sub jnet { - my ($hmmer, $outfile, $pssm) = @_; - if ($pssm) { - # run Jnet prediction with all the profile data - system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile"); - } else { - # run Jnet prediction with only HMMer and alignment data - system("$jnet --concise --hmmer $hmmer > $outfile"); - } - check("jnet", $?) or exit 1; + my ( $hmmer, $outfile, $pssm ) = @_; + if ($pssm) { + + # run Jnet prediction with all the profile data + system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile"); + } else { + + # run Jnet prediction with only HMMer and alignment data + system("$jnet --concise --hmmer $hmmer > $outfile"); + } + check( "jnet", $? ) or exit 1; } =head1 psiseq2fasta($path, @PSISEQ) @@ -734,38 +729,38 @@ Writes a FASTA file given an array of PSISEQ objects. =cut sub psiseq2fasta { - my ($fn, @seqs) = @_; - - confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ'; - @seqs or confess "JPRED: not passed PSISEQs"; - - confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; - - # Changed from using FASTA::File object due to seg faulting on some large, - # long alignments, presumably due to running out of memory. -# my $fasta = FASTA::File->new; -# $fasta->add_entries( -# map { -# my $f = FASTA->new(id => $_->id); -# $f->seq(@{$_->align}); -# $f; -# } @seqs -# ); -# $fasta->write_file($fn); - - my $fh; - $fn =~ /\.gz$/ ? - ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" ): - ( open $fh, ">$fn" or die "JPRED: $!" ); - - for (@seqs) { - print $fh ">", $_->id, "\n"; - my $seq = join("", @{ $_->align }); - $seq =~ s/(.{72})/$1\n/g; - chomp $seq; - print $fh $seq."\n"; - } - close $fh; + my ( $fn, @seqs ) = @_; + + confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ'; + @seqs or confess "JPRED: not passed PSISEQs"; + + confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; + + # Changed from using FASTA::File object due to seg faulting on some large, + # long alignments, presumably due to running out of memory. + # my $fasta = FASTA::File->new; + # $fasta->add_entries( + # map { + # my $f = FASTA->new(id => $_->id); + # $f->seq(@{$_->align}); + # $f; + # } @seqs + # ); + # $fasta->write_file($fn); + + my $fh; + $fn =~ /\.gz$/ + ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" ) + : ( open $fh, ">$fn" or die "JPRED: $!" ); + + for (@seqs) { + print $fh ">", $_->id, "\n"; + my $seq = join( "", @{ $_->align } ); + $seq =~ s/(.{72})/$1\n/g; + chomp $seq; + print $fh $seq . "\n"; + } + close $fh; } =head1 @PSISEQ = fasta2psiseq($path) @@ -775,60 +770,62 @@ Reads in a FASTA file and returns an array of PSISEQ objects. =cut sub fasta2psiseq { - my ($fn) = @_; - - my $fasta = $fn =~ /\.gz$/ ? - FASTA::File->new(read_gzip_file => $fn) : - FASTA::File->new(read_file => $fn); - $fasta or die "JPRED: $!"; - - my @seqs; - for ($fasta->get_entries) { - my $seq = PSISEQ->new; - $seq->id($_->id); - $seq->align( [ @{ $_->seq } ] ); - push @seqs, $seq; - } - - return @seqs; + my ($fn) = @_; + + my $fasta = + $fn =~ /\.gz$/ + ? FASTA::File->new( read_gzip_file => $fn ) + : FASTA::File->new( read_file => $fn ); + $fasta or die "JPRED: $!"; + + my @seqs; + for ( $fasta->get_entries ) { + my $seq = PSISEQ->new; + $seq->id( $_->id ); + $seq->align( [ @{ $_->seq } ] ); + push @seqs, $seq; + } + + return @seqs; } sub fasta2psiseq_lowmem { - my ($fn) = @_; - - local $/ = "\n"; - my $fh; - $fn =~ /\.gz$/ ? - (open $fh, "gzip -dc $fn|" or die $!) : - (open $fh, $fn or die $!); - - my @seqs; - - my ($id, $seq); - while (<$fh>) { - chomp; - if (s/^>//) { - $seq =~ s/ //g if defined $seq; - - if ($id and $seq) { - my $psi = PSISEQ->new(id => $id); - $psi->align([ split //, $seq ]); - push @seqs, $psi; - undef $id; undef $seq; - } - else { $id = $_ } + my ($fn) = @_; + + local $/ = "\n"; + my $fh; + $fn =~ /\.gz$/ + ? ( open $fh, "gzip -dc $fn|" or die $! ) + : ( open $fh, $fn or die $! ); + + my @seqs; + + my ( $id, $seq ); + while (<$fh>) { + chomp; + if (s/^>//) { + $seq =~ s/ //g if defined $seq; + + if ( $id and $seq ) { + my $psi = PSISEQ->new( id => $id ); + $psi->align( [ split //, $seq ] ); + push @seqs, $psi; + undef $id; + undef $seq; + } else { + $id = $_; } - else { - $seq .= $_; - } - } - if ($id and $seq) { - my $psi = PSISEQ->new(id => $id); - $psi->seq($seq); - push @seqs, $psi; - } - - return @seqs; + } else { + $seq .= $_; + } + } + if ( $id and $seq ) { + my $psi = PSISEQ->new( id => $id ); + $psi->seq($seq); + push @seqs, $psi; + } + + return @seqs; } =head1 die_if_no_seqs($size, $message, @seqs) @@ -838,70 +835,70 @@ Exits with error message if @seqs is not at least $size. =cut sub die_if_no_seqs { - my ($size, $message, @seqs) = @_; + my ( $size, $message, @seqs ) = @_; - confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; + confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; - chomp $message; - die "JPRED: $message\n" unless @seqs > $size; + chomp $message; + die "JPRED: $message\n" unless @seqs > $size; } =head1 extend($FASTA::File, @PSISEQ) -Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues against the query, this function extends the alignment so that all of the query is present in the alignment. The other sequences are extended with gap characters. +Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues +against the query, this function extends the alignment so that all of the query is +present in the alignment. The other sequences are extended with gap characters. =cut sub extend { - my ($query, @seqs) = @_; - - croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query; - croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; - - # Get the query sequence - my $q_query = $query->get_entry(0); - - # Get the query sequence from the PSIBLAST alignment - my $a_query = shift @seqs; - - # The gap size required to expand the alignment - my ($start_gap_length, $end_gap_length); - { - # Make handling the sequence easier - my $q_seq = join "", @{[ $q_query->seq ]}; - my $a_seq = join "", @{ $a_query->align }; - - $q_seq =~ s/-//g; - $a_seq =~ s/-//g; - - ($q_seq, $a_seq) = map { uc $_ } $q_seq, $a_seq; - - $start_gap_length = index($q_seq, $a_seq); - croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1; - - $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length; - } - - # Add the gaps to the end of the alignments - for (@seqs) { - $_->align([ - ($GAP) x $start_gap_length, - @{ $_->align }, - ($GAP) x $end_gap_length - ]); - } - - # Put the query sequence back - unshift @seqs, PSISEQ->new( - id => $a_query->id, - align => [ - ($start_gap_length ? @{ $q_query->seq }[ 0..($start_gap_length-1) ] : ()), - @{ $a_query->align }, - ($end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : ()), - ] - ); - - return @seqs; + my ( $query, @seqs ) = @_; + + croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query; + croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs; + + # Get the query sequence + my $q_query = $query->get_entry(0); + + # Get the query sequence from the PSIBLAST alignment + my $a_query = shift @seqs; + + # The gap size required to expand the alignment + my ( $start_gap_length, $end_gap_length ); + { + + # Make handling the sequence easier + my $q_seq = join "", @{ [ $q_query->seq ] }; + my $a_seq = join "", @{ $a_query->align }; + + $q_seq =~ s/-//g; + $a_seq =~ s/-//g; + + ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq; + + $start_gap_length = index( $q_seq, $a_seq ); + croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1; + + $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length; + } + + # Add the gaps to the end of the alignments + for (@seqs) { + $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] ); + } + + # Put the query sequence back + unshift @seqs, + PSISEQ->new( + id => $a_query->id, + align => [ + ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ), + @{ $a_query->align }, + ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ), + ] + ); + + return @seqs; } =head1 $int = fasta_seq_length($path) @@ -911,18 +908,18 @@ Given the path to a FASTA database, find the number of residues/bases in it. Cou =cut sub fasta_seq_length { - my ($fn) = @_; - open my $fh, $fn or croak "Can't open file $fn: $!\n"; - - my $length = 0; - local $_, $/ = "\n"; - while (<$fh>) { - next if /^>/; - chomp; - $length += tr///c; - } - - return $length; + my ($fn) = @_; + open my $fh, $fn or croak "Can't open file $fn: $!\n"; + + my $length = 0; + local $_, $/ = "\n"; + while (<$fh>) { + next if /^>/; + chomp; + $length += tr///c; + } + + return $length; } =head1 log(@messages) diff --git a/jpred/lib/DSSP/SCOP.pm b/jpred/lib/DSSP/SCOP.pm index 5975c33..a05baca 100644 --- a/jpred/lib/DSSP/SCOP.pm +++ b/jpred/lib/DSSP/SCOP.pm @@ -32,56 +32,61 @@ Returns the DSSP::Position objects from within the range given. These are in the =cut sub get_range { - my ($self, $defn) = @_; - - $defn =~ / / and croak "get_range('$defn') has a space in it, which isn't allowed"; - my @ranges = split /,/, $defn; - - my @data; - for (@ranges) { - my @range; - # A:1-200 or A: - if (/:/) { - my ($chain, $range) = split /:/, $_; - - # A:1-200 - if ($range =~ /([[:alnum:]]+)-([[:alnum:]]+)/) { - push @data, [ $self->select_positions($chain, $1, $2) ]; - } - # Hmmmmm. - elsif ($range) { - confess "You shouldn't be here"; - } - # A: - else { - push @data, [ $self->get_chain_defs($chain) ]; - } - } - else { - # 297-368 - if (/([[:alnum:]]+)-([[:alnum:]]+)/) { - my @chains = $self->chains; - if (@chains > 1) { - die "Don't know which chain to select" - } - my ($chain) = @chains; - - push @data, [ $self->select_positions($chain, $1, $2) ]; - } - # - - elsif (/^-$/) { - for my $chain ($self->chains) { - push @data, [ $self->get_chain_defs($chain) ]; - } - } - # Otherwise we don't know what it is - else { - confess "Missed particular '$_'"; - } - } - } - - return @data; + my ( $self, $defn ) = @_; + + $defn =~ / / and croak "get_range('$defn') has a space in it, which isn't allowed"; + my @ranges = split /,/, $defn; + + my @data; + for (@ranges) { + my @range; + + # A:1-200 or A: + if (/:/) { + my ( $chain, $range ) = split /:/, $_; + + # A:1-200 + if ( $range =~ /([[:alnum:]]+)-([[:alnum:]]+)/ ) { + push @data, [ $self->select_positions( $chain, $1, $2 ) ]; + } + + # Hmmmmm. + elsif ($range) { + confess "You shouldn't be here"; + } + + # A: + else { + push @data, [ $self->get_chain_defs($chain) ]; + } + } else { + + # 297-368 + if (/([[:alnum:]]+)-([[:alnum:]]+)/) { + my @chains = $self->chains; + if ( @chains > 1 ) { + die "Don't know which chain to select"; + } + my ($chain) = @chains; + + push @data, [ $self->select_positions( $chain, $1, $2 ) ]; + } + + # - + elsif (/^-$/) { + for my $chain ( $self->chains ) { + push @data, [ $self->get_chain_defs($chain) ]; + } + } + + # Otherwise we don't know what it is + else { + confess "Missed particular '$_'"; + } + } + } + + return @data; } =head2 $self->select_positions($chain, $start, $end); @@ -93,42 +98,42 @@ Passed a chain =cut sub select_positions { - my ($self, $chain, $start, $end) = @_; - - confess "Not passed all arguments" if grep { not defined } $chain, $start, $end; - confess "Not passed a valid chain" unless grep { $_ eq $chain } $self->chains; - - my ($flag, @positions) = 0; - my $test = sub { - my ($off, $start, $end) = @_; - if (3 == grep { looks_like_number $_ } $off, $start, $end) { - return 1 if $off >= $start and $off <= $end; - } - else { - return undef; - } - }; - - for my $pos ($self->get_chain_defs($chain)) { - if ($pos->break) { - push @positions, $pos if $flag; - next; - } - - my $off = $pos->off; - if ( - ($off eq $start or $test->($off, $start, $end)) - .. - ($off eq $end or $test->($off, $start, $end)) - # $test($off, $start, $end) $off >= $start and $off <= $end) - #($off eq $start or ) - ) { - push @positions, $pos; - $flag = 1; - } - else { $flag = 0 } - } - return @positions; + my ( $self, $chain, $start, $end ) = @_; + + confess "Not passed all arguments" if grep { not defined } $chain, $start, $end; + confess "Not passed a valid chain" unless grep { $_ eq $chain } $self->chains; + + my ( $flag, @positions ) = 0; + my $test = sub { + my ( $off, $start, $end ) = @_; + if ( 3 == grep { looks_like_number $_ } $off, $start, $end ) { + return 1 if $off >= $start and $off <= $end; + } else { + return undef; + } + }; + + for my $pos ( $self->get_chain_defs($chain) ) { + if ( $pos->break ) { + push @positions, $pos if $flag; + next; + } + + my $off = $pos->off; + if ( + ( $off eq $start or $test->( $off, $start, $end ) ) .. ( $off eq $end or $test->( $off, $start, $end ) ) + + # $test($off, $start, $end) $off >= $start and $off <= $end) + #($off eq $start or ) + ) + { + push @positions, $pos; + $flag = 1; + } else { + $flag = 0; + } + } + return @positions; } 1; diff --git a/jpred/lib/FASTA.pm b/jpred/lib/FASTA.pm index e0a81c7..ab90d20 100644 --- a/jpred/lib/FASTA.pm +++ b/jpred/lib/FASTA.pm @@ -7,23 +7,24 @@ use Carp; use base qw(Root Sequence); sub id { - my ($self, $id) = @_; + my ( $self, $id ) = @_; - if (defined $id) { - #warn "'$1' detected in ID, changing to ';'\n" if $id =~ s/([:#])/;/ - } + if ( defined $id ) { - $self->SUPER::id($id); + #warn "'$1' detected in ID, changing to ';'\n" if $id =~ s/([:#])/;/ + } + + $self->SUPER::id($id); } sub seq { - my ($self, @entries) = @_; + my ( $self, @entries ) = @_; - for (@entries) { - warn "'$1' detected in entry, changing to ';'\n" if s/([:#,])/;/ - } + for (@entries) { + warn "'$1' detected in entry, changing to ';'\n" if s/([:#,])/;/; + } - $self->SUPER::seq(@entries); + $self->SUPER::seq(@entries); } 1; diff --git a/jpred/lib/FASTA/File.pm b/jpred/lib/FASTA/File.pm index a4356d0..ddf639d 100644 --- a/jpred/lib/FASTA/File.pm +++ b/jpred/lib/FASTA/File.pm @@ -8,63 +8,64 @@ use base qw(Sequence::File); use FASTA; sub old_read { - my ($self, $fh) = @_; - - my ($id, $seq, @seqs); - while (<$fh>) { - chomp; - next if /^\s+$/; - - if (s/^>//) { - push @seqs, [ $id, $seq ] if $id and $seq; - $seq = undef; - $id = $_; - } - else { $seq .= $_ } - } - push @seqs, [ $id, $seq ] if $id and $seq; - - for (@seqs) { - my $new = FASTA->new(id => ${$_}[0]); - $new->seq(split //, ${$_}[1]); - $self->add_entries($new); - } - - 1; + my ( $self, $fh ) = @_; + + my ( $id, $seq, @seqs ); + while (<$fh>) { + chomp; + next if /^\s+$/; + + if (s/^>//) { + push @seqs, [ $id, $seq ] if $id and $seq; + $seq = undef; + $id = $_; + } else { + $seq .= $_; + } + } + push @seqs, [ $id, $seq ] if $id and $seq; + + for (@seqs) { + my $new = FASTA->new( id => ${$_}[0] ); + $new->seq( split //, ${$_}[1] ); + $self->add_entries($new); + } + + 1; } sub read { - my ($self, $fh) = @_; - local $/ = "\n>"; - while (<$fh>) { - s/^>//g; - s/>$//g; + my ( $self, $fh ) = @_; + local $/ = "\n>"; + while (<$fh>) { + s/^>//g; + s/>$//g; - my ($id, @data) = split /\n/, $_; - my $entry = FASTA->new(id => $id); - $entry->seq( split //, join("", @data) ); + my ( $id, @data ) = split /\n/, $_; + my $entry = FASTA->new( id => $id ); + $entry->seq( split //, join( "", @data ) ); - $self->add_entries($entry); - } + $self->add_entries($entry); + } - 1; + 1; } sub write { - my ($self, $fh) = @_; + my ( $self, $fh ) = @_; - local $| = 1; - - for ($self->get_entries) { - my $id = $_->id; - my @seq = $_->seq; + local $| = 1; - my $seq = join '', @seq; - $seq =~ s/\s*//g; - $seq =~ s/(.{72})/$1\n/g; + for ( $self->get_entries ) { + my $id = $_->id; + my @seq = $_->seq; - print $fh ">$id\n$seq\n"; - } + my $seq = join '', @seq; + $seq =~ s/\s*//g; + $seq =~ s/(.{72})/$1\n/g; + + print $fh ">$id\n$seq\n"; + } } 1; diff --git a/jpred/lib/IO/String.pm b/jpred/lib/IO/String.pm index c44bce4..ac51127 100644 --- a/jpred/lib/IO/String.pm +++ b/jpred/lib/IO/String.pm @@ -8,200 +8,177 @@ package IO::String; require 5.005_03; use strict; use vars qw($VERSION $DEBUG $IO_CONSTANTS); -$VERSION = "1.08"; # $Date: 2007/02/05 17:08:37 $ +$VERSION = "1.08"; # $Date: 2007/02/05 17:08:37 $ use Symbol (); -sub new -{ - my $class = shift; - my $self = bless Symbol::gensym(), ref($class) || $class; - tie *$self, $self; - $self->open(@_); - return $self; +sub new { + my $class = shift; + my $self = bless Symbol::gensym(), ref($class) || $class; + tie *$self, $self; + $self->open(@_); + return $self; } -sub open -{ - my $self = shift; - return $self->new(@_) unless ref($self); - - if (@_) { - my $bufref = ref($_[0]) ? $_[0] : \$_[0]; - $$bufref = "" unless defined $$bufref; - *$self->{buf} = $bufref; - } - else { - my $buf = ""; - *$self->{buf} = \$buf; - } - *$self->{pos} = 0; - *$self->{lno} = 0; - return $self; +sub open { + my $self = shift; + return $self->new(@_) unless ref($self); + + if (@_) { + my $bufref = ref( $_[0] ) ? $_[0] : \$_[0]; + $$bufref = "" unless defined $$bufref; + *$self->{buf} = $bufref; + } else { + my $buf = ""; + *$self->{buf} = \$buf; + } + *$self->{pos} = 0; + *$self->{lno} = 0; + return $self; } -sub pad -{ - my $self = shift; - my $old = *$self->{pad}; - *$self->{pad} = substr($_[0], 0, 1) if @_; - return "\0" unless defined($old) && length($old); - return $old; +sub pad { + my $self = shift; + my $old = *$self->{pad}; + *$self->{pad} = substr( $_[0], 0, 1 ) if @_; + return "\0" unless defined($old) && length($old); + return $old; } -sub dump -{ - require Data::Dumper; - my $self = shift; - print Data::Dumper->Dump([$self], ['*self']); - print Data::Dumper->Dump([*$self{HASH}], ['$self{HASH}']); - return; +sub dump { + require Data::Dumper; + my $self = shift; + print Data::Dumper->Dump( [$self], ['*self'] ); + print Data::Dumper->Dump( [ *$self{HASH} ], ['$self{HASH}'] ); + return; } -sub TIEHANDLE -{ - print "TIEHANDLE @_\n" if $DEBUG; - return $_[0] if ref($_[0]); - my $class = shift; - my $self = bless Symbol::gensym(), $class; - $self->open(@_); - return $self; +sub TIEHANDLE { + print "TIEHANDLE @_\n" if $DEBUG; + return $_[0] if ref( $_[0] ); + my $class = shift; + my $self = bless Symbol::gensym(), $class; + $self->open(@_); + return $self; } -sub DESTROY -{ - print "DESTROY @_\n" if $DEBUG; +sub DESTROY { + print "DESTROY @_\n" if $DEBUG; } -sub close -{ - my $self = shift; - delete *$self->{buf}; - delete *$self->{pos}; - delete *$self->{lno}; - undef *$self if $] eq "5.008"; # workaround for some bug - return 1; +sub close { + my $self = shift; + delete *$self->{buf}; + delete *$self->{pos}; + delete *$self->{lno}; + undef *$self if $] eq "5.008"; # workaround for some bug + return 1; } -sub opened -{ - my $self = shift; - return defined *$self->{buf}; +sub opened { + my $self = shift; + return defined *$self->{buf}; } -sub binmode -{ - my $self = shift; - return 1 unless @_; - # XXX don't know much about layers yet :-( - return 0; +sub binmode { + my $self = shift; + return 1 unless @_; + + # XXX don't know much about layers yet :-( + return 0; } -sub getc -{ - my $self = shift; - my $buf; - return $buf if $self->read($buf, 1); - return undef; +sub getc { + my $self = shift; + my $buf; + return $buf if $self->read( $buf, 1 ); + return undef; } -sub ungetc -{ - my $self = shift; - $self->setpos($self->getpos() - 1); - return 1; +sub ungetc { + my $self = shift; + $self->setpos( $self->getpos() - 1 ); + return 1; } -sub eof -{ - my $self = shift; - return length(${*$self->{buf}}) <= *$self->{pos}; +sub eof { + my $self = shift; + return length( ${ *$self->{buf} } ) <= *$self->{pos}; } -sub print -{ - my $self = shift; - if (defined $\) { - if (defined $,) { - $self->write(join($,, @_).$\); - } - else { - $self->write(join("",@_).$\); - } +sub print { + my $self = shift; + if ( defined $\ ) { + if ( defined $, ) { + $self->write( join( $,, @_ ) . $\ ); + } else { + $self->write( join( "", @_ ) . $\ ); } - else { - if (defined $,) { - $self->write(join($,, @_)); - } - else { - $self->write(join("",@_)); - } + } else { + if ( defined $, ) { + $self->write( join( $,, @_ ) ); + } else { + $self->write( join( "", @_ ) ); } - return 1; + } + return 1; } *printflush = \*print; -sub printf -{ - my $self = shift; - print "PRINTF(@_)\n" if $DEBUG; - my $fmt = shift; - $self->write(sprintf($fmt, @_)); - return 1; +sub printf { + my $self = shift; + print "PRINTF(@_)\n" if $DEBUG; + my $fmt = shift; + $self->write( sprintf( $fmt, @_ ) ); + return 1; } - -my($SEEK_SET, $SEEK_CUR, $SEEK_END); - -sub _init_seek_constants -{ - if ($IO_CONSTANTS) { - require IO::Handle; - $SEEK_SET = &IO::Handle::SEEK_SET; - $SEEK_CUR = &IO::Handle::SEEK_CUR; - $SEEK_END = &IO::Handle::SEEK_END; - } - else { - $SEEK_SET = 0; - $SEEK_CUR = 1; - $SEEK_END = 2; - } +my ( $SEEK_SET, $SEEK_CUR, $SEEK_END ); + +sub _init_seek_constants { + if ($IO_CONSTANTS) { + require IO::Handle; + $SEEK_SET = &IO::Handle::SEEK_SET; + $SEEK_CUR = &IO::Handle::SEEK_CUR; + $SEEK_END = &IO::Handle::SEEK_END; + } else { + $SEEK_SET = 0; + $SEEK_CUR = 1; + $SEEK_END = 2; + } } +sub seek { + my ( $self, $off, $whence ) = @_; + my $buf = *$self->{buf} || return 0; + my $len = length($$buf); + my $pos = *$self->{pos}; -sub seek -{ - my($self,$off,$whence) = @_; - my $buf = *$self->{buf} || return 0; - my $len = length($$buf); - my $pos = *$self->{pos}; - - _init_seek_constants() unless defined $SEEK_SET; + _init_seek_constants() unless defined $SEEK_SET; - if ($whence == $SEEK_SET) { $pos = $off } - elsif ($whence == $SEEK_CUR) { $pos += $off } - elsif ($whence == $SEEK_END) { $pos = $len + $off } - else { die "Bad whence ($whence)" } - print "SEEK(POS=$pos,OFF=$off,LEN=$len)\n" if $DEBUG; + if ( $whence == $SEEK_SET ) { $pos = $off } + elsif ( $whence == $SEEK_CUR ) { $pos += $off } + elsif ( $whence == $SEEK_END ) { $pos = $len + $off } + else { die "Bad whence ($whence)" } + print "SEEK(POS=$pos,OFF=$off,LEN=$len)\n" if $DEBUG; - $pos = 0 if $pos < 0; - $self->truncate($pos) if $pos > $len; # extend file - *$self->{pos} = $pos; - return 1; + $pos = 0 if $pos < 0; + $self->truncate($pos) if $pos > $len; # extend file + *$self->{pos} = $pos; + return 1; } -sub pos -{ - my $self = shift; - my $old = *$self->{pos}; - if (@_) { - my $pos = shift || 0; - my $buf = *$self->{buf}; - my $len = $buf ? length($$buf) : 0; - $pos = $len if $pos > $len; - *$self->{pos} = $pos; - } - return $old; +sub pos { + my $self = shift; + my $old = *$self->{pos}; + if (@_) { + my $pos = shift || 0; + my $buf = *$self->{buf}; + my $len = $buf ? length($$buf) : 0; + $pos = $len if $pos > $len; + *$self->{pos} = $pos; + } + return $old; } sub getpos { shift->pos; } @@ -210,211 +187,196 @@ sub getpos { shift->pos; } *setpos = \&pos; *tell = \&getpos; - - -sub getline -{ - my $self = shift; - my $buf = *$self->{buf} || return; - my $len = length($$buf); - my $pos = *$self->{pos}; - return if $pos >= $len; - - unless (defined $/) { # slurp - *$self->{pos} = $len; - return substr($$buf, $pos); - } - - unless (length $/) { # paragraph mode - # XXX slow&lazy implementation using getc() - my $para = ""; - my $eol = 0; - my $c; - while (defined($c = $self->getc)) { - if ($c eq "\n") { - $eol++; - next if $eol > 2; - } - elsif ($eol > 1) { - $self->ungetc($c); - last; - } - else { - $eol = 0; - } - $para .= $c; - } - return $para; # XXX wantarray +sub getline { + my $self = shift; + my $buf = *$self->{buf} || return; + my $len = length($$buf); + my $pos = *$self->{pos}; + return if $pos >= $len; + + unless ( defined $/ ) { # slurp + *$self->{pos} = $len; + return substr( $$buf, $pos ); + } + + unless ( length $/ ) { # paragraph mode + # XXX slow&lazy implementation using getc() + my $para = ""; + my $eol = 0; + my $c; + while ( defined( $c = $self->getc ) ) { + if ( $c eq "\n" ) { + $eol++; + next if $eol > 2; + } elsif ( $eol > 1 ) { + $self->ungetc($c); + last; + } else { + $eol = 0; + } + $para .= $c; } - - my $idx = index($$buf,$/,$pos); - if ($idx < 0) { - # return rest of it - *$self->{pos} = $len; - $. = ++ *$self->{lno}; - return substr($$buf, $pos); - } - $len = $idx - $pos + length($/); - *$self->{pos} += $len; - $. = ++ *$self->{lno}; - return substr($$buf, $pos, $len); + return $para; # XXX wantarray + } + + my $idx = index( $$buf, $/, $pos ); + if ( $idx < 0 ) { + + # return rest of it + *$self->{pos} = $len; + $. = ++*$self->{lno}; + return substr( $$buf, $pos ); + } + $len = $idx - $pos + length($/); + *$self->{pos} += $len; + $. = ++*$self->{lno}; + return substr( $$buf, $pos, $len ); } -sub getlines -{ - die "getlines() called in scalar context\n" unless wantarray; - my $self = shift; - my($line, @lines); - push(@lines, $line) while defined($line = $self->getline); - return @lines; +sub getlines { + die "getlines() called in scalar context\n" unless wantarray; + my $self = shift; + my ( $line, @lines ); + push( @lines, $line ) while defined( $line = $self->getline ); + return @lines; } -sub READLINE -{ - goto &getlines if wantarray; - goto &getline; +sub READLINE { + goto &getlines if wantarray; + goto &getline; } -sub input_line_number -{ - my $self = shift; - my $old = *$self->{lno}; - *$self->{lno} = shift if @_; - return $old; +sub input_line_number { + my $self = shift; + my $old = *$self->{lno}; + *$self->{lno} = shift if @_; + return $old; } -sub truncate -{ - my $self = shift; - my $len = shift || 0; - my $buf = *$self->{buf}; - if (length($$buf) >= $len) { - substr($$buf, $len) = ''; - *$self->{pos} = $len if $len < *$self->{pos}; - } - else { - $$buf .= ($self->pad x ($len - length($$buf))); - } - return 1; +sub truncate { + my $self = shift; + my $len = shift || 0; + my $buf = *$self->{buf}; + if ( length($$buf) >= $len ) { + substr( $$buf, $len ) = ''; + *$self->{pos} = $len if $len < *$self->{pos}; + } else { + $$buf .= ( $self->pad x ( $len - length($$buf) ) ); + } + return 1; } -sub read -{ - my $self = shift; - my $buf = *$self->{buf}; - return undef unless $buf; - - my $pos = *$self->{pos}; - my $rem = length($$buf) - $pos; - my $len = $_[1]; - $len = $rem if $len > $rem; - return undef if $len < 0; - if (@_ > 2) { # read offset - substr($_[0],$_[2]) = substr($$buf, $pos, $len); - } - else { - $_[0] = substr($$buf, $pos, $len); - } - *$self->{pos} += $len; - return $len; +sub read { + my $self = shift; + my $buf = *$self->{buf}; + return undef unless $buf; + + my $pos = *$self->{pos}; + my $rem = length($$buf) - $pos; + my $len = $_[1]; + $len = $rem if $len > $rem; + return undef if $len < 0; + if ( @_ > 2 ) { # read offset + substr( $_[0], $_[2] ) = substr( $$buf, $pos, $len ); + } else { + $_[0] = substr( $$buf, $pos, $len ); + } + *$self->{pos} += $len; + return $len; } -sub write -{ - my $self = shift; - my $buf = *$self->{buf}; - return unless $buf; - - my $pos = *$self->{pos}; - my $slen = length($_[0]); - my $len = $slen; - my $off = 0; - if (@_ > 1) { - $len = $_[1] if $_[1] < $len; - if (@_ > 2) { - $off = $_[2] || 0; - die "Offset outside string" if $off > $slen; - if ($off < 0) { - $off += $slen; - die "Offset outside string" if $off < 0; - } - my $rem = $slen - $off; - $len = $rem if $rem < $len; - } +sub write { + my $self = shift; + my $buf = *$self->{buf}; + return unless $buf; + + my $pos = *$self->{pos}; + my $slen = length( $_[0] ); + my $len = $slen; + my $off = 0; + if ( @_ > 1 ) { + $len = $_[1] if $_[1] < $len; + if ( @_ > 2 ) { + $off = $_[2] || 0; + die "Offset outside string" if $off > $slen; + if ( $off < 0 ) { + $off += $slen; + die "Offset outside string" if $off < 0; + } + my $rem = $slen - $off; + $len = $rem if $rem < $len; } - substr($$buf, $pos, $len) = substr($_[0], $off, $len); - *$self->{pos} += $len; - return $len; + } + substr( $$buf, $pos, $len ) = substr( $_[0], $off, $len ); + *$self->{pos} += $len; + return $len; } -*sysread = \&read; +*sysread = \&read; *syswrite = \&write; -sub stat -{ - my $self = shift; - return unless $self->opened; - return 1 unless wantarray; - my $len = length ${*$self->{buf}}; - - return ( - undef, undef, # dev, ino - 0666, # filemode - 1, # links - $>, # user id - $), # group id - undef, # device id - $len, # size - undef, # atime - undef, # mtime - undef, # ctime - 512, # blksize - int(($len+511)/512) # blocks - ); +sub stat { + my $self = shift; + return unless $self->opened; + return 1 unless wantarray; + my $len = length ${ *$self->{buf} }; + + return ( + undef, undef, # dev, ino + 0666, # filemode + 1, # links + $>, # user id + $), # group id + undef, # device id + $len, # size + undef, # atime + undef, # mtime + undef, # ctime + 512, # blksize + int( ( $len + 511 ) / 512 ) # blocks + ); } sub FILENO { - return undef; # XXX perlfunc says this means the file is closed + return undef; # XXX perlfunc says this means the file is closed } sub blocking { - my $self = shift; - my $old = *$self->{blocking} || 0; - *$self->{blocking} = shift if @_; - return $old; + my $self = shift; + my $old = *$self->{blocking} || 0; + *$self->{blocking} = shift if @_; + return $old; } my $notmuch = sub { return }; -*fileno = $notmuch; -*error = $notmuch; -*clearerr = $notmuch; -*sync = $notmuch; -*flush = $notmuch; -*setbuf = $notmuch; -*setvbuf = $notmuch; +*fileno = $notmuch; +*error = $notmuch; +*clearerr = $notmuch; +*sync = $notmuch; +*flush = $notmuch; +*setbuf = $notmuch; +*setvbuf = $notmuch; *untaint = $notmuch; *autoflush = $notmuch; *fcntl = $notmuch; *ioctl = $notmuch; -*GETC = \&getc; -*PRINT = \&print; -*PRINTF = \&printf; -*READ = \&read; -*WRITE = \&write; -*SEEK = \&seek; -*TELL = \&getpos; -*EOF = \&eof; -*CLOSE = \&close; +*GETC = \&getc; +*PRINT = \&print; +*PRINTF = \&printf; +*READ = \&read; +*WRITE = \&write; +*SEEK = \&seek; +*TELL = \&getpos; +*EOF = \&eof; +*CLOSE = \&close; *BINMODE = \&binmode; - -sub string_ref -{ - my $self = shift; - return *$self->{buf}; +sub string_ref { + my $self = shift; + return *$self->{buf}; } *sref = \&string_ref; diff --git a/jpred/lib/Jpred.pm b/jpred/lib/Jpred.pm index a367d38..7119d28 100755 --- a/jpred/lib/Jpred.pm +++ b/jpred/lib/Jpred.pm @@ -10,40 +10,78 @@ use warnings; use strict; BEGIN { - use Exporter; - our @ISA = ('Exporter'); - our @EXPORT = qw($WEBSERVER $WEBSERVERCGI $SERVERROOT $SRSSERVER $CHKLOG $RESULTS $PDBLNK $CSS $IMAGES $JNET $TIMEOUT $BATCHLIM $DUNDEE $JPREDUSER $JPREDEMAIL $MAILHOST $JPREDROOT $BINDIR $LIBDIR $JOBDIR $PREFIX $RESOURCE $BLASTDB $SWALL $SWALLFILT $PDB $PDB_DAT $JPREDHEAD $JPREDFOOT &xsystem); - our @EXPORT_OK = @EXPORT; + use Exporter; + our @ISA = ('Exporter'); + our @EXPORT = qw( + $WEBSERVER + $WEBSERVERCGI + $SERVERROOT + $SRSSERVER + $CHKLOG + $RESULTS + $PDBLNK + $CSS + $IMAGES + $JNET + $TIMEOUT + $BATCHLIM + $DUNDEE + $JPREDUSER + $JPREDEMAIL + $MAILHOST + $JPREDROOT + $BINDIR + $LIBDIR + $JOBDIR + $PREFIX + $RESOURCE + $BLASTDB + $SWALL + $SWALLFILT + $PDB + $PDB_DAT + $JPREDHEAD + $JPREDFOOT + &xsystem + ); + our @EXPORT_OK = @EXPORT; } +# library path for Mail::CheckUser dependency for jpred_form. +# for some reason doesn't work if in PERL5LIB +use lib '/sw/lib/perl5.10.1/lib/perl5'; + # URIs -#our $WEBSERVER = 'http://www.compbio.dundee.ac.uk/www-jpred'; -our $WEBSERVER = 'http://webserv1.cluster.lifesci.dundee.ac.uk:3209'; +our $WEBSERVER = 'http://www.compbio.dundee.ac.uk/www-jpred'; + +#our $WEBSERVER = 'http://webserv1.cluster.lifesci.dundee.ac.uk:3209'; our $WEBSERVERCGI = "$WEBSERVER/cgi-bin"; + #$SERVERROOT = "$WEBSERVER/~www-jpred"; our $SERVERROOT = "$WEBSERVER"; -our $SRSSERVER = 'http://srs.ebi.ac.uk/srs6bin/cgi-bin'; -our $CHKLOG = "$WEBSERVERCGI/chklog?"; -our $RESULTS = "$SERVERROOT/results"; -our $PDBLNK = "http://www.ebi.ac.uk/pdbsum/"; -our $CSS = "$SERVERROOT/jpred.css"; -our $IMAGES = "$SERVERROOT/images"; -our $JNET = "$SERVERROOT/about.html#jnet"; +our $SRSSERVER = 'http://srs.ebi.ac.uk/srs6bin/cgi-bin'; +our $CHKLOG = "$WEBSERVERCGI/chklog?"; +our $RESULTS = "$SERVERROOT/results"; +our $PDBLNK = "http://www.ebi.ac.uk/pdbsum/"; +our $CSS = "$SERVERROOT/jpred.css"; +our $IMAGES = "$SERVERROOT/images"; +our $JNET = "$SERVERROOT/about.html#jnet"; # This is the time (in seconds) the job is allowed to take -our $TIMEOUT = 60 * 60; # now is 60mins +our $TIMEOUT = 60 * 60; # now is 60mins our $BATCHLIM = 200; our $DUNDEE = "http://www.dundee.ac.uk"; # e-mail details -our $JPREDUSER = 'www-jpred'; +our $JPREDUSER = 'www-jpred'; our $JPREDEMAIL = 'www-jpred@compbio.dundee.ac.uk'; -our $MAILHOST = 'smtp.lifesci.dundee.ac.uk'; # CC 19/05/06 - updated to current smtp host from weevil - +our $MAILHOST = 'smtp.lifesci.dundee.ac.uk'; # CC 19/05/06 - updated to current smtp host from weevil # Server paths -our $JPREDROOT = '/homes/www-jpred/devel'; +our $JPREDROOT = '/homes/www-jpred/live'; + +#our $JPREDROOT = '/homes/www-jpred/devel'; # Directory for binaries either on the cluster or on the www server our $BINDIR = "$JPREDROOT/bin"; @@ -52,47 +90,44 @@ our $BINDIR = "$JPREDROOT/bin"; our $LIBDIR = "$JPREDROOT/lib"; # Cluster paths -our $JOBDIR = "$JPREDROOT/public_html/results"; # directory for output +our $JOBDIR = "$JPREDROOT/public_html/results"; # directory for output # Cluster names -our $PREFIX = "jp_"; # Prefix for job submissions (qstat will only display 10 chars of job name) -our $RESOURCE = "www_service2"; # Resource for the submission to use +our $PREFIX = "jp_"; # Prefix for job submissions (qstat will only display 10 chars of job name) +our $RESOURCE = "www_service2"; # Resource for the submission to use # Variables for external programs # psiblast $ENV{BLASTMAT} = "$JPREDROOT/data/blast"; -our $BLASTDB = $JPREDROOT."/databases"; +our $BLASTDB = $JPREDROOT . "/databases"; $ENV{BLASTDB} = $BLASTDB; -our $SWALL = "$BLASTDB/uniref90"; +our $SWALL = "$BLASTDB/uniref90"; our $SWALLFILT = "$SWALL.filt"; -our $PDB = '/db/blastdb/pdb'; -our $PDB_DAT = '/db/blastdb/DB.dat'; +our $PDB = '/db/blastdb/pdb'; +our $PDB_DAT = '/db/blastdb/DB.dat'; # ncoils matrix location $ENV{COILSDIR} = "$JPREDROOT/data/coils"; # Error checking system call sub xsystem { - my ($command) = @_; - my $rc = 0xffff & system $command; - if ($rc == 0) { - return; - } - elsif ($rc == 0xff00) { - print "'$command' failed!\n"; - die "Jpred failed\n"; - } - elsif (($rc & 0xff) == 0) { - $rc >>= 8; - die "'$command' did something else! (exited $rc)\n"; - } - else { - if ($rc & 0x80) { - $rc &= ~0x80; - print "'$command' dumped core!\n"; - die "Jpred failed\n"; - } - } + my ($command) = @_; + my $rc = 0xffff & system $command; + if ( $rc == 0 ) { + return; + } elsif ( $rc == 0xff00 ) { + print "'$command' failed!\n"; + die "Jpred failed\n"; + } elsif ( ( $rc & 0xff ) == 0 ) { + $rc >>= 8; + die "'$command' did something else! (exited $rc)\n"; + } else { + if ( $rc & 0x80 ) { + $rc &= ~0x80; + print "'$command' dumped core!\n"; + die "Jpred failed\n"; + } + } } # The header and footer for any HTML produced dynamically diff --git a/jpred/lib/PSIBLAST/Run.pm b/jpred/lib/PSIBLAST/Run.pm index 99188e1..bf74c71 100644 --- a/jpred/lib/PSIBLAST/Run.pm +++ b/jpred/lib/PSIBLAST/Run.pm @@ -5,68 +5,71 @@ use base qw(Root); use Run qw(check); sub new { - my ($class, %args) = @_; - my $self = {}; - bless $self, ref($class) || $class; - - # -a number of CPUs used (default 1) - # -e e-value threshold for reporting sequences (default 10) - # -h e-value threshold for including sequences in PSSM (default 0.002) - # -m type of output - # -b max number of alignments reported (default 250) - # -v max number of sequences described (default 500) - # -j max number of itterations (default 1) - $self->args("-e0.05 -h0.01 -m6 -b10000 -v10000 -j3"); - #$self->args("-a2 -e0.05 -h0.01 -m6 -b20000 -v20000 -j15"); -# $self->BLASTMAT("/software/jpred_bin/blast"); -# $self->BLASTDB("/software/jpred_uniprot_all_filt"); - - # Automatically run any arguments passed to the constructor - for (keys %args) { - croak "No such method '$_' of object $class" unless $self->can($_); - $self->$_($args{$_}); - } - - return $self; + my ( $class, %args ) = @_; + my $self = {}; + bless $self, ref($class) || $class; + + # -a number of CPUs used (default 1) + # -e e-value threshold for reporting sequences (default 10) + # -h e-value threshold for including sequences in PSSM (default 0.002) + # -m type of output + # -b max number of alignments reported (default 250) + # -v max number of sequences described (default 500) + # -j max number of itterations (default 1) + $self->args("-e0.05 -h0.01 -m6 -b10000 -v10000 -j3"); + + #$self->args("-a2 -e0.05 -h0.01 -m6 -b20000 -v20000 -j15"); + # $self->BLASTMAT("/software/jpred_bin/blast"); + # $self->BLASTDB("/software/jpred_uniprot_all_filt"); + + # Automatically run any arguments passed to the constructor + for ( keys %args ) { + croak "No such method '$_' of object $class" unless $self->can($_); + $self->$_( $args{$_} ); + } + + return $self; } -sub path { defined $_[1] ? $_[0]->{path} = $_[1] : $_[0]->{path} } -sub args { defined $_[1] ? $_[0]->{args} = $_[1] : $_[0]->{args} } -sub database { defined $_[1] ? $_[0]->{data} = "-d $_[1]" : $_[0]->{data} } -sub input { defined $_[1] ? $_[0]->{input} = "-i $_[1]" : $_[0]->{input} } -sub output { defined $_[1] ? $_[0]->{output} = "-o $_[1]" : $_[0]->{output} } -sub matrix { defined $_[1] ? $_[0]->{matrix} = "-Q $_[1]" : $_[0]->{matrix} } -sub debug { defined $_[1] ? $_[0]->{debug} = $_[1] : $_[0]->{debug} } # CC corrected - now works as expected. Before, debug was always on. +sub path { defined $_[1] ? $_[0]->{path} = $_[1] : $_[0]->{path} } +sub args { defined $_[1] ? $_[0]->{args} = $_[1] : $_[0]->{args} } +sub database { defined $_[1] ? $_[0]->{data} = "-d $_[1]" : $_[0]->{data} } +sub input { defined $_[1] ? $_[0]->{input} = "-i $_[1]" : $_[0]->{input} } +sub output { defined $_[1] ? $_[0]->{output} = "-o $_[1]" : $_[0]->{output} } +sub matrix { defined $_[1] ? $_[0]->{matrix} = "-Q $_[1]" : $_[0]->{matrix} } +sub debug { defined $_[1] ? $_[0]->{debug} = $_[1] : $_[0]->{debug} } # CC corrected - now works as expected. Before, debug was always on. sub BLASTMAT { $ENV{BLASTMAT} = $_[1] } -sub BLASTDB { $ENV{BLASTDB} = $_[1] } +sub BLASTDB { $ENV{BLASTDB} = $_[1] } sub run { - my ($self) = @_; - - # Required arguments - for (qw(path output database input)) { - croak "Method '$_' needs to be set" unless defined $self->$_; - } - - # Construct the command line - my @cmd; - for (qw(path args database input matrix output)) { - next unless $self->$_; - push @cmd, $self->$_; - } - - my $cmd = join " ", @cmd; - # Execute PSIBLAST and check it ran okay - if ($self->debug) { - #for (keys %ENV) { warn "$_=$ENV{$_}\n" } - warn "$cmd\n" - } - - system($cmd) == 0 or check("blastpgp", $?) and die "blastpgp was naughty"; - - # Clean up the error.log file it produces - if (-z "error.log") { unlink "error.log" } - else { warn "blastpgp error.log file was not empty\n" } + my ($self) = @_; + + # Required arguments + for (qw(path output database input)) { + croak "Method '$_' needs to be set" unless defined $self->$_; + } + + # Construct the command line + my @cmd; + for (qw(path args database input matrix output)) { + next unless $self->$_; + push @cmd, $self->$_; + } + + my $cmd = join " ", @cmd; + + # Execute PSIBLAST and check it ran okay + if ( $self->debug ) { + + #for (keys %ENV) { warn "$_=$ENV{$_}\n" } + warn "$cmd\n"; + } + + system($cmd) == 0 or check( "blastpgp", $? ) and die "blastpgp was naughty"; + + # Clean up the error.log file it produces + if ( -z "error.log" ) { unlink "error.log" } + else { warn "blastpgp error.log file was not empty\n" } } 1; diff --git a/jpred/lib/Pairwise.pm b/jpred/lib/Pairwise.pm index d6d32bd..370d226 100644 --- a/jpred/lib/Pairwise.pm +++ b/jpred/lib/Pairwise.pm @@ -11,32 +11,32 @@ use base qw(Root Common); use Run qw(check); sub path { - my ($self, $path) = @_; - if (defined $path) { $self->{path}= $path } - else { - if (defined $self->{path}) { return $self->{path} } - else { croak "Path not defined" } - } + my ( $self, $path ) = @_; + if ( defined $path ) { $self->{path} = $path } + else { + if ( defined $self->{path} ) { return $self->{path} } + else { croak "Path not defined" } + } } sub run { - my ($self, $fasta) = @_; + my ( $self, $fasta ) = @_; - croak "Non FASTA::File object passed to Pairwise::run" unless isa $fasta, 'FASTA::File'; + croak "Non FASTA::File object passed to Pairwise::run" unless isa $fasta, 'FASTA::File'; - local ($/, $?) = (undef, 0); + local ( $/, $? ) = ( undef, 0 ); - my $f = File::Temp->new->filename; - $fasta->write_file($f); + my $f = File::Temp->new->filename; + $fasta->write_file($f); - my $pid = open my $fh, $self->path." $f |" or die $!; + my $pid = open my $fh, $self->path . " $f |" or die $!; - my @output = join "\n", split "\n", <$fh>; + my @output = join "\n", split "\n", <$fh>; - waitpid $pid, 0; - check($self->path, $?) or die "Pairwise was naughty\n"; + waitpid $pid, 0; + check( $self->path, $? ) or die "Pairwise was naughty\n"; - return @output; + return @output; } 1; diff --git a/jpred/lib/Paths.pm b/jpred/lib/Paths.pm index 04fab10..7ca2847 100644 --- a/jpred/lib/Paths.pm +++ b/jpred/lib/Paths.pm @@ -20,7 +20,10 @@ Putting it all here should mean that this is the only file that needs updating w our @EXPORT_OK = qw($ff_bignet $analyze $batchman $sov $pairwise $oc $jnet $fastacmd $hmmbuild $hmmconvert $psiblastbin); my $HOME = $ENV{HOME}; -my $BINDIR = '/homes/www-jpred/live/jpred/bin'; +# main production path +#my $BINDIR = '/homes/www-jpred/live/jpred/bin'; +# development path +my $BINDIR = '/homes/www-jpred/devel/jpred/bin'; our $pairwise = "$BINDIR/pairwise"; # CC modified for correct path our $oc = "$BINDIR/oc"; diff --git a/jpred/lib/Root.pm b/jpred/lib/Root.pm index e25c776..7e79a07 100644 --- a/jpred/lib/Root.pm +++ b/jpred/lib/Root.pm @@ -19,38 +19,41 @@ This modules provides a new method for other classes to use if they so wish. =head2 new(foo => "bar") -This constructs an object of the right class and returns it. Any arugments passed to the constructer will be parsed as a hash with the first argument from the pair acting as the method that should be called and the second being the argument for that method. +This constructs an object of the right class and returns it. Any arugments passed to the constructer +will be parsed as a hash with the first argument from the pair acting as the method that should +be called and the second being the argument for that method. =cut sub new { - my ($class, %args) = @_; - my $self = bless {}, ref($class) || $class; + my ( $class, %args ) = @_; + my $self = bless {}, ref($class) || $class; - for (keys %args) { - croak "No such method '$_'" unless $self->can($_); - $self->$_($args{$_}); - } + for ( keys %args ) { + croak "No such method '$_'" unless $self->can($_); + $self->$_( $args{$_} ); + } - return $self; + return $self; } sub trace { - my ($self) = @_; + my ($self) = @_; - my $i = 1; + my $i = 1; - while (my @caller = caller $i) { - #print join("#", map { defined $_ ? $_ : ""} @caller), "\n"; - print $caller[0], ":", $caller[2], " ", $caller[3], "\n"; - $i++; - } + while ( my @caller = caller $i ) { + + #print join("#", map { defined $_ ? $_ : ""} @caller), "\n"; + print $caller[0], ":", $caller[2], " ", $caller[3], "\n"; + $i++; + } } sub warn { - my ($self, @message) = @_; - print STDERR join("\n", @message), "\n"; - $self->trace; + my ( $self, @message ) = @_; + print STDERR join( "\n", @message ), "\n"; + $self->trace; } 1; diff --git a/jpred/lib/Sequence.pm b/jpred/lib/Sequence.pm index 6a53af1..ea342d3 100644 --- a/jpred/lib/Sequence.pm +++ b/jpred/lib/Sequence.pm @@ -6,33 +6,34 @@ use Carp; use base qw(Root); sub id { - my ($self, $id) = @_; - defined $id ? - $self->{__PACKAGE__."id"} = $id : - return $self->{__PACKAGE__."id"}; + my ( $self, $id ) = @_; + defined $id + ? $self->{ __PACKAGE__ . "id" } = $id + : return $self->{ __PACKAGE__ . "id" }; } sub seq { - my ($self, @entries) = @_; - if (@entries) { $self->{__PACKAGE__."seq"} = [ @entries ] } - else { - if (wantarray) { - return defined $self->{__PACKAGE__."seq"} ? - @{ $self->{__PACKAGE__."seq"} } : - (); - } - else { return $self->{__PACKAGE__."seq"} } - } + my ( $self, @entries ) = @_; + if (@entries) { $self->{ __PACKAGE__ . "seq" } = [@entries] } + else { + if (wantarray) { + return defined $self->{ __PACKAGE__ . "seq" } + ? @{ $self->{ __PACKAGE__ . "seq" } } + : (); + } else { + return $self->{ __PACKAGE__ . "seq" }; + } + } } sub length { - my ($self) = @_; - return scalar @{ [ $self->seq ] }; + my ($self) = @_; + return scalar @{ [ $self->seq ] }; } #sub numeric { # my ($self, $set) = @_; -# return $set ? +# return $set ? # $self->{__PACKAGE__."numeric"} = $set : # $self->{__PACKAGE__."numeric"}; #} @@ -58,38 +59,39 @@ sub length { #} sub sub_seq { - my ($self, @segments) = @_; - - my $package = ref $self; - - return $self->seq unless @segments; - confess "Passed an uneven number of segments" unless @segments % 2 == 0; - - { - my %segments = @segments; - # Find out if there are overlapping segments - for my $pri (sort keys %segments) { - for my $test (sort keys %segments) { - next if $pri == $test; - croak "Overlapping segments" if $test < $segments{$pri}; - } - } - } - - my @sections; - my ($i, @seq) = (0, $self->seq); - do { - my ($start, $end) = @segments[$i, $i + 1]; - - # Create a new sequence object in the correct package, and put the - # segment into it - my $new_seq = $package->new(id => $self->id); - $new_seq->seq( @seq[ $start .. $end ] ); - push @sections, $new_seq; - $i += 2; - } while ($i < @segments); - - return @sections; + my ( $self, @segments ) = @_; + + my $package = ref $self; + + return $self->seq unless @segments; + confess "Passed an uneven number of segments" unless @segments % 2 == 0; + + { + my %segments = @segments; + + # Find out if there are overlapping segments + for my $pri ( sort keys %segments ) { + for my $test ( sort keys %segments ) { + next if $pri == $test; + croak "Overlapping segments" if $test < $segments{$pri}; + } + } + } + + my @sections; + my ( $i, @seq ) = ( 0, $self->seq ); + do { + my ( $start, $end ) = @segments[ $i, $i + 1 ]; + + # Create a new sequence object in the correct package, and put the + # segment into it + my $new_seq = $package->new( id => $self->id ); + $new_seq->seq( @seq[ $start .. $end ] ); + push @sections, $new_seq; + $i += 2; + } while ( $i < @segments ); + + return @sections; } 1; diff --git a/jpred/lib/Sequence/File.pm b/jpred/lib/Sequence/File.pm index 77b64da..188966c 100644 --- a/jpred/lib/Sequence/File.pm +++ b/jpred/lib/Sequence/File.pm @@ -14,12 +14,13 @@ Returns all records so far found in the file. =cut sub get_entries { - my ($self) = @_; + my ($self) = @_; - if (exists $self->{__PACKAGE__."entries"}) { - return @{ $self->{__PACKAGE__."entries"} }; - } - else { return undef; } + if ( exists $self->{ __PACKAGE__ . "entries" } ) { + return @{ $self->{ __PACKAGE__ . "entries" } }; + } else { + return undef; + } } =head2 $file->get_entry(@positions); @@ -29,12 +30,13 @@ Retrieves the @position's'th record found in the file or undef if there is no su =cut sub get_entry { - my ($self, @offsets) = @_; + my ( $self, @offsets ) = @_; - if (exists $self->{__PACKAGE__."entries"} and @offsets) { - return @{ $self->{__PACKAGE__."entries"} }[@offsets]; - } - else { return undef } + if ( exists $self->{ __PACKAGE__ . "entries" } and @offsets ) { + return @{ $self->{ __PACKAGE__ . "entries" } }[@offsets]; + } else { + return undef; + } } =head2 $self->add_entries(@entries) @@ -44,29 +46,29 @@ Adds more entries to the object. Returns the number of entries added to the obje =cut sub add_entries { - my ($self, @entries) = @_; - return unless @entries; - - for (@entries) { - croak "Adding non Sequence object" unless isa $_, "Sequence"; - $self->_ids($_->id); - push @{ $self->{__PACKAGE__."entries"} }, $_; - } - -# my $max = $self->get_max_entries; -# if (defined $max) { -# my $exist_size = @{ $self->{__PACKAGE__."entries"} }; -# if ($exist_size > $max) { return 0 } -# elsif ($exist_size + @entries > $max) { -# return push @{ $self->{__PACKAGE__."entries"} }, @entries[0..$max - $exist_size]; -# } -# else { -# return push @{ $self->{__PACKAGE__."entries"} }, @entries; -# } -# } -# else { -# return push @{ $self->{__PACKAGE__."entries"} }, @entries; -# } + my ( $self, @entries ) = @_; + return unless @entries; + + for (@entries) { + croak "Adding non Sequence object" unless isa $_, "Sequence"; + $self->_ids( $_->id ); + push @{ $self->{ __PACKAGE__ . "entries" } }, $_; + } + + # my $max = $self->get_max_entries; + # if (defined $max) { + # my $exist_size = @{ $self->{__PACKAGE__."entries"} }; + # if ($exist_size > $max) { return 0 } + # elsif ($exist_size + @entries > $max) { + # return push @{ $self->{__PACKAGE__."entries"} }, @entries[0..$max - $exist_size]; + # } + # else { + # return push @{ $self->{__PACKAGE__."entries"} }, @entries; + # } + # } + # else { + # return push @{ $self->{__PACKAGE__."entries"} }, @entries; + # } } =head2 $file->get_entry_by_id(/regex/); @@ -77,35 +79,37 @@ Returns all of those entries which have id's that match the regex. # Cache of sequence IDs for fast grepping sub _ids { - my ($self, $id) = @_; - if ($id) { push @{ $self->{__PACKAGE__."ids"} }, $id } - else { return @{ $self->{__PACKAGE__."ids"} } } + my ( $self, $id ) = @_; + if ($id) { push @{ $self->{ __PACKAGE__ . "ids" } }, $id } + else { return @{ $self->{ __PACKAGE__ . "ids" } } } } sub get_entry_by_id { - my ($self, $id) = @_; - croak "No id passed" unless defined $id; + my ( $self, $id ) = @_; + croak "No id passed" unless defined $id; - #return grep { $_->id =~ /$id/ } $self->get_entries; + #return grep { $_->id =~ /$id/ } $self->get_entries; - { - my @ids = $self->_ids; - my @indices = grep { $ids[$_] =~ /$id/ } 0..$#ids; - return $self->get_entry(@indices); - } + { + my @ids = $self->_ids; + my @indices = grep { $ids[$_] =~ /$id/ } 0 .. $#ids; + return $self->get_entry(@indices); + } } =head2 $file->set_max_entries($size); -Limits the storing of $size records. Will prevent the addition of more records, but won't delete existing records in the object if there are already more than $size entries. +Limits the storing of $size records. Will prevent the addition of more records, +but won't delete existing records in the object if there are already more than +$size entries. =cut sub set_max_entries { - my ($self, $size) = @_; + my ( $self, $size ) = @_; - $self->{__PACKAGE__."max_size"} = $size; - return $size; + $self->{ __PACKAGE__ . "max_size" } = $size; + return $size; } =head2 $file->get_max_entries @@ -115,8 +119,8 @@ Accessor for set_max_entries(). =cut sub get_max_entries { - my ($self) = @_; - return $self->{__PACKAGE__."max_size"}; + my ($self) = @_; + return $self->{ __PACKAGE__ . "max_size" }; } =head2 @@ -124,19 +128,19 @@ sub get_max_entries { =cut sub sub_seq { - my ($self, $start, $end) = @_; + my ( $self, $start, $end ) = @_; - croak "Not passed start and end arguments" unless 2 == grep { defined } $start, $end; + croak "Not passed start and end arguments" unless 2 == grep { defined } $start, $end; - # Produce a new version of myself, in the right namespace - my ($new_self) = (ref $self)->new; + # Produce a new version of myself, in the right namespace + my ($new_self) = ( ref $self )->new; - for ($self->get_entries) { - my ($seq) = $_->sub_seq($start, $end); - $new_self->add_entries($seq); - } + for ( $self->get_entries ) { + my ($seq) = $_->sub_seq( $start, $end ); + $new_self->add_entries($seq); + } - return $new_self; + return $new_self; } 1; -- 1.7.10.2