=head1 DESCRIPTION
-This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence. It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
+This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence.
+It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
-The program is primarily design for use by the Jpred server, but it can be used directly by any user. Some of the output may not be meaningful if used directly - it can be safely ignored.
+The program is primarily design for use by the Jpred server, but it can be used directly by any user.
+Some of the output may not be meaningful if used directly - it can be safely ignored.
=head1 OPTIONS
use strict;
use warnings;
-use Getopt::Long qw(:config auto_version);
+use Getopt::Long;
use Pod::Usage;
use Carp;
use UNIVERSAL qw(isa);
use FindBin qw($Bin);
use lib "$Bin/../lib";
+#use lib "lib";
+#use lib "/home/asherstnev/Projects/Jpred/jpred/trunk/lib";
-use Jpred; # needed to define BLAST environment variables
+use Jpred; # needed to define BLAST environment variables
# my modules
use HMMER::Profile;
use Run qw(check);
use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin);
-our $VERSION = '3.0.2';
-my $MAX_ALIGN = 1000; # maximum number of alignment sequences allowed
-my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
+our $VERSION = '3.0.2';
+my $MAX_ALIGN = 1000; # maximum number of alignment sequences allowed
+my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
# Light object to hold sequence information, light to prevent memory overload
# for big search results. Even so this is quite memory intensive, I can't see
# how to reduce the size further.
{
- package PSISEQ;
- #use Compress::LZF qw(compress decompress);
-
- sub new {
- my ($class, %args) = @_;
- my $self = [];
- bless $self, $class;
- for (keys %args) { $self->$_($args{$_}) }
- return $self;
- }
-
- sub id {
- return defined $_[1] ?
- $_[0]->[0] = $_[1] :
- $_[0]->[0];
- }
-
- sub start {
- return defined $_[1] ?
- $_[0]->[1] = $_[1] :
- $_[0]->[1];
- }
-
- sub align {
- if (defined $_[1]) {
- ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
- #$_[0]->[2] = compress(join "", @{ $_[1] });
- $_[0]->[2] = join("", @{ $_[1] });
- }
- else {
- #return [ split //, decompress($_[0]->[2]) ];
- return [ split(//, $_[0]->[2]) ];
- }
- }
+
+ package PSISEQ;
+
+ #use Compress::LZF qw(compress decompress);
+
+ sub new {
+ my ( $class, %args ) = @_;
+ my $self = [];
+ bless $self, $class;
+ for ( keys %args ) { $self->$_( $args{$_} ) }
+ return $self;
+ }
+
+ sub id {
+ return defined $_[1]
+ ? $_[0]->[0] = $_[1]
+ : $_[0]->[0];
+ }
+
+ sub start {
+ return defined $_[1]
+ ? $_[0]->[1] = $_[1]
+ : $_[0]->[1];
+ }
+
+ sub align {
+ if ( defined $_[1] ) {
+ ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
+
+ #$_[0]->[2] = compress(join "", @{ $_[1] });
+ $_[0]->[2] = join( "", @{ $_[1] } );
+ } else {
+
+ #return [ split //, decompress($_[0]->[2]) ];
+ return [ split( //, $_[0]->[2] ) ];
+ }
+ }
}
# Key to database information and information for accessing them
-my $dbPATH = '/homes/chris/projects/Jnet/db';
+my $dbPATH = '/homes/chris/projects/Jnet/db';
my $db_entry = "uniref90";
my $database = {
-
- ## default database used by Jpred
- uniref90 => {
- database => 'uniref90.filt',
- unfiltered => 'uniref90',
- },
-
- ## database used during Jnet training
- training => {
- database => 'training/uniref90.filt',
- unfiltered => 'training/uniref90',
- },
-
- ## cluster-specific path for Jpred
- cluster => {
- database => '/local/gjb_lab/www-jpred/uniref90.filt',
- unfiltered => '/local/gjb_lab/www-jpred/uniref90',
- },
-
- ## these other DBs are experimental ones used during development.
- ## check they exist before using them.
- swall => {
- # generic entry for use with validate_jnet.pl
- # real db location defined by validate_jnet.pl
- database => "$dbPATH/swall/swall.filt", # CC new
- unfiltered => "$dbPATH/swall/swall",
- },
- uniprot => {
- # Path to PSIBLAST db
- database => "$dbPATH/3/swall.filt", # CC new
- unfiltered => "$dbPATH/3/swall",
- },
- uniref50 => {
- database => "$dbPATH/6/swall.filt",
- unfiltered => "$dbPATH/6/swall",
- },
+
+ ## default database used by Jpred
+ uniref90 => {
+ database => 'uniref90.filt',
+ unfiltered => 'uniref90',
+ },
+
+ ## database used during Jnet training
+ training => {
+ database => 'training/uniref90.filt',
+ unfiltered => 'training/uniref90',
+ },
+
+ ## cluster-specific path for Jpred
+ cluster => {
+ database => '/local/gjb_lab/www-jpred/uniref90.filt',
+ unfiltered => '/local/gjb_lab/www-jpred/uniref90',
+ },
+
+ ## these other DBs are experimental ones used during development.
+ ## check they exist before using them.
+ swall => {
+
+ # generic entry for use with validate_jnet.pl
+ # real db location defined by validate_jnet.pl
+ database => "$dbPATH/swall/swall.filt", # CC new
+ unfiltered => "$dbPATH/swall/swall",
+ },
+ uniprot => {
+
+ # Path to PSIBLAST db
+ database => "$dbPATH/3/swall.filt", # CC new
+ unfiltered => "$dbPATH/3/swall",
+ },
+ uniref50 => {
+ database => "$dbPATH/6/swall.filt",
+ unfiltered => "$dbPATH/6/swall",
+ },
};
# Gap characters in sequences
our $GAP = '-';
-my ($help, $man, $path, $output, $psiblast, $DEBUG, $VERBOSE);
-my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
+my ( $help, $man, $path, $output, $psiblast, $DEBUG, $VERBOSE );
+my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
GetOptions(
- "help" => \$help,
- "man" => \$man,
- "verbose" => \$VERBOSE,
- "sequence=s" => \$path,
- "output=s" => \$output,
- "psi=s" => \$psiblast,
- "db=s" => \$db_entry,
- "pred-nohits" => \$predNoHits,
- "debug" => \$DEBUG
+ "help" => \$help,
+ "man" => \$man,
+ "verbose" => \$VERBOSE,
+ "sequence=s" => \$path,
+ "output=s" => \$output,
+ "psi=s" => \$psiblast,
+ "db=s" => \$db_entry,
+ "pred-nohits" => \$predNoHits,
+ "debug" => \$DEBUG
) or pod2usage(2);
pod2usage(1) if $help;
-pod2usage(verbose => 2) if $man;
+pod2usage( verbose => 2 ) if $man;
pod2usage(' --sequence argument not provided') unless $path;
die "--db $db_entry not recognised" unless exists $database->{$db_entry};
$output = $path unless $output;
-for (
- [ "path", $path ],
- [ "output", $output ],
- [ "psi", $psiblast ],
- [ "db", $db_entry]
- ) {
- my ($key, $value) = @{$_};
- defined $value or next;
- errlog(join(": ", $key, $value), "\n");
+for ( [ "path", $path ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) {
+ my ( $key, $value ) = @{$_};
+ defined $value or next;
+ errlog( join( ": ", $key, $value ), "\n" );
}
-my $query = FASTA::File->new(read_file => $path);
+my $query = FASTA::File->new( read_file => $path );
my @seqs;
if (1) {
- my $psi = PSIBLAST->new; # CC PSIBLAST.pm doesn't have a new() class??
- unless (defined $psiblast && -e $psiblast) {
- my $psi_run = PSIBLAST::Run->new(
- debug => $DEBUG,
-# path => "/software/jpred_bin/blastpgp",
- path => $psiblastbin, # CC new path
- input => $path,
- output => "$output.blast",
- matrix => "$output.profile",
- database => $database->{$db_entry}{database},
- ## potentially a better set of parameters (esp. -b & -v)
- ## which avoid PSI-BLAST dying with large number of hits
- # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
- # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
- # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
+ my $psi = PSIBLAST->new; # CC PSIBLAST.pm doesn't have a new() class??
+ unless ( defined $psiblast && -e $psiblast ) {
+ my $psi_run = PSIBLAST::Run->new(
+ debug => $DEBUG,
+
+ # path => "/software/jpred_bin/blastpgp",
+ path => $psiblastbin, # CC new path
+ input => $path,
+ output => "$output.blast",
+ matrix => "$output.profile",
+ database => $database->{$db_entry}{database},
+ ## potentially a better set of parameters (esp. -b & -v)
+ ## which avoid PSI-BLAST dying with large number of hits
+ # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
+ # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
+ # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
+ );
+
+ # For reduced databases, get the size of the orginal and use this as
+ # the equivilent DB size
+ #print $db_entry, "\n";
+ if ( $db_entry =~ /^sp_red_/ ) {
+ ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/;
+ $psi_run->args(
+ $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below
);
-
- # For reduced databases, get the size of the orginal and use this as
- # the equivilent DB size
- #print $db_entry, "\n";
- if ($db_entry =~ /^sp_red_/) {
- (my $entry = $db_entry) =~ s/^sp_red_/sp_/;
- $psi_run->args( $psi_run->args . " -z " .
- fasta_seq_length($database->{$entry}{database}) # CC sub is below
- );
- }
- print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
- print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
-
- print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
- $psi_run->run or die; # CC sub is from PSIBLAST::Run
-
- $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
- system("gzip -9f $output.blast");
- }
- else {
- if ($psiblast =~ /.gz$/) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
- else { $psi->read_file($psiblast) } # CC ditto above
- }
-
- # Convert the last itteration into a collection of PSISEQ objects
- for ($psi->get_all) { # CC sub is from PSIBLAST.pm
- my ($id, $start, $align) = @{$_};
- push @seqs, PSISEQ->new(
- id => $id,
- start => $start,
- align => [ split(//, $align) ]
+ }
+ print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
+ print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
+
+ print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
+ $psi_run->run or die; # CC sub is from PSIBLAST::Run
+
+ $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
+ system("gzip -9f $output.blast");
+ } else {
+ if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
+ else { $psi->read_file($psiblast) } # CC ditto above
+ }
+
+ # Convert the last itteration into a collection of PSISEQ objects
+ for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm
+ my ( $id, $start, $align ) = @{$_};
+ push @seqs,
+ PSISEQ->new(
+ id => $id,
+ start => $start,
+ align => [ split( //, $align ) ]
);
- }
+ }
}
-## When there are no PSI-BLAST hits generate an HMM from the query
+## When there are no PSI-BLAST hits generate an HMM from the query
## and run Jnet against that only.
## Accuracy won't be as good, but at least it's a prediction
-if (@seqs == 0) {
- if ($predNoHits == 0) {
- warn "\nJPRED: Warning - no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n";
- print ">>100% complete\n";
- exit;
- }
- print ">>50% complete\n";
- warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
- print "Running HMMer on query...\n" if $VERBOSE;
-
- # copy input query to alignment
- system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
-
- # Temp files required for HMMer
- my ($hmmbuild_out, $hmmconvert_out) = map {
- File::Temp->new->filename
- } 1..2;
-
- system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $path");
- system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
-
- # Read in the HMMER file
- my $hmmer = HMMER::Profile->new(
- read_file => $hmmconvert_out
- );
-
- # Convert from HMMER::Profile to HMMER::Profile::Jnet
- my $hmmer_jnet = HMMER::Profile::Jnet->new;
- $hmmer_jnet->add_line(@{$_}) for $hmmer->get_line;
- $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
- print ">>70% complete\n";
-
- # Run Jnet for the prediction
- print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
- jnet(map { "$output.$_" } qw(hmm jnet)); # CC sub is below
-
- print "Jpred Finished\n";
- exit;
+if ( @seqs == 0 ) {
+ if ( $predNoHits == 0 ) {
+ warn "\nJPRED: Warning - no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n";
+ print ">>100% complete\n";
+ exit;
+ }
+ print ">>50% complete\n";
+ warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
+ print "Running HMMer on query...\n" if $VERBOSE;
+
+ # copy input query to alignment
+ system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
+
+ # Temp files required for HMMer
+ my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2;
+
+ system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $path");
+ system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
+
+ # Read in the HMMER file
+ my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
+
+ # Convert from HMMER::Profile to HMMER::Profile::Jnet
+ my $hmmer_jnet = HMMER::Profile::Jnet->new;
+ $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
+ $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
+ print ">>70% complete\n";
+
+ # Run Jnet for the prediction
+ print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
+ jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below
+
+ print "Jpred Finished\n";
+ exit;
}
-psiseq2fasta("0.fasta.gz", @seqs) if $DEBUG; # CC sub is below
+psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below
print ">>40% complete\n";
# Make PSIBLAST truncated alignments the right length
print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE;
-@seqs = extend($query, @seqs); # CC sub si below
-psiseq2fasta("1.fasta.gz", @seqs) if $DEBUG;
+@seqs = extend( $query, @seqs ); # CC sub si below
+psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG;
# Remove masking from PSIBLAST alignment
print "Unmasking the alignments...\n" if $VERBOSE;
-dex($_) for @seqs[ 1..$#seqs ]; ## <-- CC dies here in dex() - found below
-psiseq2fasta("2.fasta.gz", @seqs) if $DEBUG;
+dex($_) for @seqs[ 1 .. $#seqs ]; ## <-- CC dies here in dex() - found below
+psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG;
# Convert the sequences to upper case
print "Converting sequences to the same case...\n" if $VERBOSE;
-toupper($_) for @seqs; # CC sub is below
-psiseq2fasta("${output}_backup.fasta.gz", @seqs);
+toupper($_) for @seqs; # CC sub is below
+psiseq2fasta( "${output}_backup.fasta.gz", @seqs );
# Remove excessive sequences
print "Remove excessive sequences...\n" if $VERBOSE;
-@seqs = reduce($MAX_ALIGN, @seqs); # CC sub is below
-psiseq2fasta("3.fasta.gz", @seqs) if $DEBUG;
+@seqs = reduce( $MAX_ALIGN, @seqs ); # CC sub is below
+psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG;
# Remove sequences that are too long or too short
print "Remove sequences which too long or short...\n" if $VERBOSE;
-@seqs = del_long_seqs(50, @seqs); # CC sub is below
-psiseq2fasta("4.fasta.gz", @seqs) if $DEBUG;
+@seqs = del_long_seqs( 50, @seqs ); # CC sub is below
+psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG;
# Remove redundant sequences based upon pairwise identity and OC clustering
print "Remove redundant sequences...\n" if $VERBOSE;
-@seqs = nonred($NR_CUT, @seqs); # CC sub is below
-psiseq2fasta("5.fasta.gz", @seqs) if $DEBUG;
+@seqs = nonred( $NR_CUT, @seqs ); # CC sub is below
+psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG;
# Check that we haven't got rid of all of the sequences
-if (@seqs < 2) {
- warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
- @seqs = fasta2psiseq("${output}_backup.fasta.gz");
+if ( @seqs < 2 ) {
+ warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
+ @seqs = fasta2psiseq("${output}_backup.fasta.gz");
}
unlink("${output}_backup.fasta.gz") unless $DEBUG;
# Remove gaps in the query sequence and same positions in the alignment
print "Removing gaps in the query sequence...\n" if $VERBOSE;
-degap(@seqs); # CC sub is below
-psiseq2fasta("6.fasta.gz", @seqs) if $DEBUG;
+degap(@seqs); # CC sub is below
+psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG;
# Output the alignment for the prediction
print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE;
-psiseq2fasta("$output.align", @seqs);
+psiseq2fasta( "$output.align", @seqs );
{
- # Equivilent to getpssm script
- print "Output the PSSM matrix from the PSI-BLAST profile...\n";
- my $pssm = PSIBLAST::PSSM->new(read_file => "$output.profile");
- $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
+ # Equivilent to getpssm script
+ print "Output the PSSM matrix from the PSI-BLAST profile...\n";
+ my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" );
+ $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
}
print ">>50% complete\n";
# Run HMMER on the sequences
{
- print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
- my $hmmer = hmmer(@seqs); # CC sub is below
- $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
+ print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
+ my $hmmer = hmmer(@seqs); # CC sub is below
+ $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
}
print ">>70% complete\n";
# Run Jnet for the prediction
print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE;
-jnet(map { "$output.$_" } qw(hmm jnet pssm)); # CC sub is below
+jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below
print "Jpred Finished\n";
=cut
-
BEGIN {
- my $idx = Index->new(type => "Index::FastaCMD") or die "Index type cannot be found.";
+ my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found.";
- sub dex {
- my ($seq) = @_;
+ sub dex {
+ my ($seq) = @_;
- croak "JPRED: dex: can only take a PSISEQ object as an argument" unless isa $seq, 'PSISEQ';
+ croak "JPRED: dex: can only take a PSISEQ object as an argument" unless isa $seq, 'PSISEQ';
- # Only bother if we have a sequence which has been masked
- return unless grep { uc $_ eq 'X' } @{ $seq->align };
+ # Only bother if we have a sequence which has been masked
+ return unless grep { uc $_ eq 'X' } @{ $seq->align };
- my ($id, $start, @align) = ($seq->id, $seq->start, @{$seq->align});
+ my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } );
- #$id = "$id"; # Add the EMBOSS USA
- #$id = "sprot_37:$id"; # Add the EMBOSS USA
- #$id = "sprot_43:$id"; # Add the EMBOSS USA
-# $id = $database->{$db_entry}{emboss} . ":$id"; # commented out to try FastaCMD module
- my $searchdb = $database->{$db_entry}{unfiltered};
- $start--; # We need this to be zero based
+ #$id = "$id"; # Add the EMBOSS USA
+ #$id = "sprot_37:$id"; # Add the EMBOSS USA
+ #$id = "sprot_43:$id"; # Add the EMBOSS USA
+ # $id = $database->{$db_entry}{emboss} . ":$id"; # commented out to try FastaCMD module
+ my $searchdb = $database->{$db_entry}{unfiltered};
+ $start--; # We need this to be zero based
- my $f = $idx->get_sequence($id, $searchdb) or croak "JPRED: No such entry for $id in $searchdb\n" and return; # CC sub is from Index::EMBOSS
- my @seqs = $f->get_entries; ## Sub is from Sequence::File
- my $db = shift @seqs;
+ my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return; # CC sub is from Index::EMBOSS
+ my @seqs = $f->get_entries; ## Sub is from Sequence::File
+ my $db = shift @seqs;
- unless ($db) {
- confess "JPRED: dex: Accession number $id returned no sequences";
- }
- elsif (@seqs) {
- confess "JPRED: dex: Accession number $id returned more than one sequence";
- }
+ unless ($db) {
+ confess "JPRED: dex: Accession number $id returned no sequences";
+ } elsif (@seqs) {
+ confess "JPRED: dex: Accession number $id returned more than one sequence";
+ }
+
+ my @db_seq = @{ $db->seq };
- my @db_seq = @{ $db->seq };
-
- # $i is a pointer into the original, ungapped, unmasked sequence. $_ is
- # for the aligned sequence
- my $i = 0;
- for (0..$#align) {
- next if $align[$_] =~ /$GAP/;
-
- if ($align[$_] eq 'X') {
- unless (defined $db_seq[$i + $start]) {
- croak "JPRED: dex: the database sequence is not as long as the query sequence!"
- }
- $align[$_] = $db_seq[$i + $start];
- }
- $i++;
+ # $i is a pointer into the original, ungapped, unmasked sequence. $_ is
+ # for the aligned sequence
+ my $i = 0;
+ for ( 0 .. $#align ) {
+ next if $align[$_] =~ /$GAP/;
+
+ if ( $align[$_] eq 'X' ) {
+ unless ( defined $db_seq[ $i + $start ] ) {
+ croak "JPRED: dex: the database sequence is not as long as the query sequence!";
+ }
+ $align[$_] = $db_seq[ $i + $start ];
}
+ $i++;
+ }
- $seq->align(\@align);
- }
+ $seq->align( \@align );
+ }
}
=head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN);
=cut
sub reduce {
- my ($max, @seqs) = @_;
+ my ( $max, @seqs ) = @_;
- my $num = int(@seqs / $max);
+ my $num = int( @seqs / $max );
- my @res = shift @seqs;
+ my @res = shift @seqs;
- # Don't bother if we have less than the required sequences
- if (@seqs < $max) { return @res, @seqs }
- if ($num == 0) { return @res, @seqs }
+ # Don't bother if we have less than the required sequences
+ if ( @seqs < $max ) { return @res, @seqs }
+ if ( $num == 0 ) { return @res, @seqs }
- for (0..$#seqs) { push @res, $seqs[$_] if $_ % $num == 0 }
+ for ( 0 .. $#seqs ) { push @res, $seqs[$_] if $_ % $num == 0 }
- return @res;
+ return @res;
}
=head2 nonred($cutoff, @PSISEQS);
=cut
sub nonred {
- my ($cutoff, @seqs) = @_;
- # Run pairwise and then OC
- # Remove similar sequences
-
- unless (defined $cutoff) { croak "JPRED: nonred() not passed a cutoff" }
- unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
- if (@seqs == 1) {
- warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
- return @seqs;
- }
-
- my $fasta = FASTA::File->new;
-
- # Convert the sequences into a FASTA object, which is what
- # pairwise expects.
- $fasta->add_entries(
- map {
- my $f = FASTA->new(id => $_->id);
- $f->seq(@{$_->align});
- $f;
+ my ( $cutoff, @seqs ) = @_;
+
+ # Run pairwise and then OC
+ # Remove similar sequences
+
+ unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" }
+ unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
+ if ( @seqs == 1 ) {
+ warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
+ return @seqs;
+ }
+
+ my $fasta = FASTA::File->new;
+
+ # Convert the sequences into a FASTA object, which is what
+ # pairwise expects.
+ $fasta->add_entries(
+ map {
+ my $f = FASTA->new( id => $_->id );
+ $f->seq( @{ $_->align } );
+ $f;
} @seqs
- );
-
- # Run pairwise via wrappers
- use Pairwise;
- my $pair = Pairwise->new(
- path => $pairwise
- );
- my $foo = join "\n", $pair->run($fasta) or die $!;
-
- # Run OC
- my $ocres = OC->new(
- path => "$oc sim complete cut $cutoff"
- );
- $ocres->read_string(join "", $ocres->run($foo) );
-
- # Now found those entries that aren't related
- my @keep;
- {
- for ($ocres->get_groups) {
- my ($group, $score, $size, @labels) = @{ $_ };
-
- # Keep the QUERY from the cluster, otherwise just get one
- if (grep { /QUERY/ } @labels) {
- # Make sure the query stays at the start
- unshift @keep, "QUERY";
- next;
- }
- else { push @keep, shift @labels }
+ );
+
+ # Run pairwise via wrappers
+ use Pairwise;
+ my $pair = Pairwise->new( path => $pairwise );
+ my $foo = join "\n", $pair->run($fasta) or die $!;
+
+ # Run OC
+ my $ocres = OC->new( path => "$oc sim complete cut $cutoff" );
+ $ocres->read_string( join "", $ocres->run($foo) );
+
+ # Now found those entries that aren't related
+ my @keep;
+ {
+ for ( $ocres->get_groups ) {
+ my ( $group, $score, $size, @labels ) = @{$_};
+
+ # Keep the QUERY from the cluster, otherwise just get one
+ if ( grep { /QUERY/ } @labels ) {
+
+ # Make sure the query stays at the start
+ unshift @keep, "QUERY";
+ next;
+ } else {
+ push @keep, shift @labels;
}
+ }
- push @keep, $ocres->get_unclustered;
- }
+ push @keep, $ocres->get_unclustered;
+ }
- # Return the matching entries.
- my @return;
+ # Return the matching entries.
+ my @return;
- # We use the temporay to mimic the selection of sequences in the nonred
- # script, which took the sequences in the order they occured in the file.
- for (@seqs) {
- my $label = $_->id;
- if (grep { $_ =~ /^$label$/ } @keep) {
- push @return, $_;
- }
- }
+ # We use the temporay to mimic the selection of sequences in the nonred
+ # script, which took the sequences in the order they occured in the file.
+ for (@seqs) {
+ my $label = $_->id;
+ if ( grep { $_ =~ /^$label$/ } @keep ) {
+ push @return, $_;
+ }
+ }
- return @return;
+ return @return;
}
=head2 @seqs = del_long_seqs($percentage, @PSISEQS)
-Removes those sequences that are more than $percentage longer or shorter than the first sequence in @seqs. @seqs is an array of PSISEQ objects.
+Removes those sequences that are more than $percentage longer or shorter than the first
+sequence in @seqs. @seqs is an array of PSISEQ objects.
Equivilent to trim_seqs script.
=cut
sub del_long_seqs {
- my ($factor, @seqs) = @_;
+ my ( $factor, @seqs ) = @_;
- # Keep the query sequence
- my $query = shift @seqs;
- my @res = $query;
+ # Keep the query sequence
+ my $query = shift @seqs;
+ my @res = $query;
- # Find the length of the query sequence without any gaps
- my $length = (() = (join '', @{$query->align}) =~ /[^$GAP]/g);
+ # Find the length of the query sequence without any gaps
+ my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g );
- # Calculate the upper and lower allowed bounds
- my ($upper, $lower);
- {
- my $bounds = ($length / 100) * $factor;
- ($upper, $lower) = ($length + $bounds, $length - $bounds);
- }
+ # Calculate the upper and lower allowed bounds
+ my ( $upper, $lower );
+ {
+ my $bounds = ( $length / 100 ) * $factor;
+ ( $upper, $lower ) = ( $length + $bounds, $length - $bounds );
+ }
- # Now try each sequence and see how long they are
- for (@seqs) {
- my $l = (() = (join '', @{$_->align}) =~ /[^$GAP]/g);
- if ($l >= $lower && $l <= $upper) { push @res, $_ }
- }
+ # Now try each sequence and see how long they are
+ for (@seqs) {
+ my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g );
+ if ( $l >= $lower && $l <= $upper ) { push @res, $_ }
+ }
- return @res;
+ return @res;
}
=head2 toupper ($PSISEQ)
=cut
sub toupper {
- croak "JPRED: toupper: argument not a PSISEQ object" unless isa $_[0], 'PSISEQ';
- $_[0]->align(
- [ split //, uc join '', @{ $_[0]->align } ]
- );
+ croak "JPRED: toupper: argument not a PSISEQ object" unless isa $_[0], 'PSISEQ';
+ $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] );
}
=head2 degap($PSISEQ_QUERY, @PSISEQS)
=cut
sub degap {
- my (@seqs) = @_;
-
- # Find where the gaps in the query sequence are
- my @gaps;
- {
- my $i = 0;
- for (@{$seqs[0]->align}) {
- push @gaps, $i if $_ !~ /$GAP/;
- $i++;
- }
- }
-
- return unless @gaps;
-
- # Remove the gaps in all the sequences.
- # Derefences the array reference and takes a slice inside the method call
- # arguments, then coerce back to an array ref.
- for (@seqs) {
- $_->align([ @{$_->align}[@gaps] ]);
- }
+ my (@seqs) = @_;
+
+ # Find where the gaps in the query sequence are
+ my @gaps;
+ {
+ my $i = 0;
+ for ( @{ $seqs[0]->align } ) {
+ push @gaps, $i if $_ !~ /$GAP/;
+ $i++;
+ }
+ }
+
+ return unless @gaps;
+
+ # Remove the gaps in all the sequences.
+ # Derefences the array reference and takes a slice inside the method call
+ # arguments, then coerce back to an array ref.
+ for (@seqs) {
+ $_->align( [ @{ $_->align }[@gaps] ] );
+ }
}
=head2 getfreq($filename, @PSISEQS)
Creates a PSIBLAST like profile and writes it to the $filename.
-Equivilant to profile or getfreq (older) script. This doesn't create the same answer as the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output, but it's *not* the same.
+Equivilant to profile or getfreq (older) script. This doesn't create the same answer as
+the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output,
+but it's *not* the same.
=cut
sub getfreq {
- my ($fn, @seqs) = @_;
+ my ( $fn, @seqs ) = @_;
- croak "JPRED: Passed non PSISEQ object" if grep { ! isa($_, 'PSISEQ') } @seqs;
+ croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs;
- # Create a PSIBLAST like profile from the alignment
- # This doesn't greate the same result as old Jpred, I think this is because
- # differences in the input between old and new
+ # Create a PSIBLAST like profile from the alignment
+ # This doesn't greate the same result as old Jpred, I think this is because
+ # differences in the input between old and new
- # For testing
-# my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
-# my @profile = profile(
-# map { join "", @{ $_->seq } } $f->get_entries
-# );
+ # For testing
+ # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
+ # my @profile = profile(
+ # map { join "", @{ $_->seq } } $f->get_entries
+ # );
- my @profile = profile(
- map { join "", @{ $_->align } } @seqs
- );
+ my @profile = profile( map { join "", @{ $_->align } } @seqs );
- open my $fh, ">$fn" or die "JPRED: $fn: $!";
- print $fh join(" ", @{$_}), "\n" for @profile;
+ open my $fh, ">$fn" or die "JPRED: $fn: $!";
+ print $fh join( " ", @{$_} ), "\n" for @profile;
}
=head2 HMMER::Profile::Jnet = hmmer(@PSISEQS)
=cut
sub hmmer {
- my (@seqs) = @_;
+ my (@seqs) = @_;
- # Temp files required
- my ($hmmbuild_out, $hmmconvert_out, $tmp_fasta) = map {
- File::Temp->new->filename
- } 1..3;
+ # Temp files required
+ my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3;
- # Create the FASTA file
- psiseq2fasta($tmp_fasta, @seqs);
+ # Create the FASTA file
+ psiseq2fasta( $tmp_fasta, @seqs );
- system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
- system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
+ system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
+ system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
- # Read in the HMMER file
- my $hmmer = HMMER::Profile->new(
- read_file => $hmmconvert_out
- );
+ # Read in the HMMER file
+ my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
- # Convert from HMMER::Profile to HMMER::Profile::Jnet
- my $hmmer_jnet = HMMER::Profile::Jnet->new;
- $hmmer_jnet->add_line(@{$_}) for $hmmer->get_line;
+ # Convert from HMMER::Profile to HMMER::Profile::Jnet
+ my $hmmer_jnet = HMMER::Profile::Jnet->new;
+ $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
- return $hmmer_jnet;
+ return $hmmer_jnet;
}
=head2 jnet($hmmer, $out, $pssm )
=cut
sub jnet {
- my ($hmmer, $outfile, $pssm) = @_;
- if ($pssm) {
- # run Jnet prediction with all the profile data
- system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
- } else {
- # run Jnet prediction with only HMMer and alignment data
- system("$jnet --concise --hmmer $hmmer > $outfile");
- }
- check("jnet", $?) or exit 1;
+ my ( $hmmer, $outfile, $pssm ) = @_;
+ if ($pssm) {
+
+ # run Jnet prediction with all the profile data
+ system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
+ } else {
+
+ # run Jnet prediction with only HMMer and alignment data
+ system("$jnet --concise --hmmer $hmmer > $outfile");
+ }
+ check( "jnet", $? ) or exit 1;
}
=head1 psiseq2fasta($path, @PSISEQ)
=cut
sub psiseq2fasta {
- my ($fn, @seqs) = @_;
-
- confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
- @seqs or confess "JPRED: not passed PSISEQs";
-
- confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
-
- # Changed from using FASTA::File object due to seg faulting on some large,
- # long alignments, presumably due to running out of memory.
-# my $fasta = FASTA::File->new;
-# $fasta->add_entries(
-# map {
-# my $f = FASTA->new(id => $_->id);
-# $f->seq(@{$_->align});
-# $f;
-# } @seqs
-# );
-# $fasta->write_file($fn);
-
- my $fh;
- $fn =~ /\.gz$/ ?
- ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" ):
- ( open $fh, ">$fn" or die "JPRED: $!" );
-
- for (@seqs) {
- print $fh ">", $_->id, "\n";
- my $seq = join("", @{ $_->align });
- $seq =~ s/(.{72})/$1\n/g;
- chomp $seq;
- print $fh $seq."\n";
- }
- close $fh;
+ my ( $fn, @seqs ) = @_;
+
+ confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
+ @seqs or confess "JPRED: not passed PSISEQs";
+
+ confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
+
+ # Changed from using FASTA::File object due to seg faulting on some large,
+ # long alignments, presumably due to running out of memory.
+ # my $fasta = FASTA::File->new;
+ # $fasta->add_entries(
+ # map {
+ # my $f = FASTA->new(id => $_->id);
+ # $f->seq(@{$_->align});
+ # $f;
+ # } @seqs
+ # );
+ # $fasta->write_file($fn);
+
+ my $fh;
+ $fn =~ /\.gz$/
+ ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" )
+ : ( open $fh, ">$fn" or die "JPRED: $!" );
+
+ for (@seqs) {
+ print $fh ">", $_->id, "\n";
+ my $seq = join( "", @{ $_->align } );
+ $seq =~ s/(.{72})/$1\n/g;
+ chomp $seq;
+ print $fh $seq . "\n";
+ }
+ close $fh;
}
=head1 @PSISEQ = fasta2psiseq($path)
=cut
sub fasta2psiseq {
- my ($fn) = @_;
-
- my $fasta = $fn =~ /\.gz$/ ?
- FASTA::File->new(read_gzip_file => $fn) :
- FASTA::File->new(read_file => $fn);
- $fasta or die "JPRED: $!";
-
- my @seqs;
- for ($fasta->get_entries) {
- my $seq = PSISEQ->new;
- $seq->id($_->id);
- $seq->align( [ @{ $_->seq } ] );
- push @seqs, $seq;
- }
-
- return @seqs;
+ my ($fn) = @_;
+
+ my $fasta =
+ $fn =~ /\.gz$/
+ ? FASTA::File->new( read_gzip_file => $fn )
+ : FASTA::File->new( read_file => $fn );
+ $fasta or die "JPRED: $!";
+
+ my @seqs;
+ for ( $fasta->get_entries ) {
+ my $seq = PSISEQ->new;
+ $seq->id( $_->id );
+ $seq->align( [ @{ $_->seq } ] );
+ push @seqs, $seq;
+ }
+
+ return @seqs;
}
sub fasta2psiseq_lowmem {
- my ($fn) = @_;
-
- local $/ = "\n";
- my $fh;
- $fn =~ /\.gz$/ ?
- (open $fh, "gzip -dc $fn|" or die $!) :
- (open $fh, $fn or die $!);
-
- my @seqs;
-
- my ($id, $seq);
- while (<$fh>) {
- chomp;
- if (s/^>//) {
- $seq =~ s/ //g if defined $seq;
-
- if ($id and $seq) {
- my $psi = PSISEQ->new(id => $id);
- $psi->align([ split //, $seq ]);
- push @seqs, $psi;
- undef $id; undef $seq;
- }
- else { $id = $_ }
+ my ($fn) = @_;
+
+ local $/ = "\n";
+ my $fh;
+ $fn =~ /\.gz$/
+ ? ( open $fh, "gzip -dc $fn|" or die $! )
+ : ( open $fh, $fn or die $! );
+
+ my @seqs;
+
+ my ( $id, $seq );
+ while (<$fh>) {
+ chomp;
+ if (s/^>//) {
+ $seq =~ s/ //g if defined $seq;
+
+ if ( $id and $seq ) {
+ my $psi = PSISEQ->new( id => $id );
+ $psi->align( [ split //, $seq ] );
+ push @seqs, $psi;
+ undef $id;
+ undef $seq;
+ } else {
+ $id = $_;
}
- else {
- $seq .= $_;
- }
- }
- if ($id and $seq) {
- my $psi = PSISEQ->new(id => $id);
- $psi->seq($seq);
- push @seqs, $psi;
- }
-
- return @seqs;
+ } else {
+ $seq .= $_;
+ }
+ }
+ if ( $id and $seq ) {
+ my $psi = PSISEQ->new( id => $id );
+ $psi->seq($seq);
+ push @seqs, $psi;
+ }
+
+ return @seqs;
}
=head1 die_if_no_seqs($size, $message, @seqs)
=cut
sub die_if_no_seqs {
- my ($size, $message, @seqs) = @_;
+ my ( $size, $message, @seqs ) = @_;
- confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
+ confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
- chomp $message;
- die "JPRED: $message\n" unless @seqs > $size;
+ chomp $message;
+ die "JPRED: $message\n" unless @seqs > $size;
}
=head1 extend($FASTA::File, @PSISEQ)
-Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues against the query, this function extends the alignment so that all of the query is present in the alignment. The other sequences are extended with gap characters.
+Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues
+against the query, this function extends the alignment so that all of the query is
+present in the alignment. The other sequences are extended with gap characters.
=cut
sub extend {
- my ($query, @seqs) = @_;
-
- croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
- croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
-
- # Get the query sequence
- my $q_query = $query->get_entry(0);
-
- # Get the query sequence from the PSIBLAST alignment
- my $a_query = shift @seqs;
-
- # The gap size required to expand the alignment
- my ($start_gap_length, $end_gap_length);
- {
- # Make handling the sequence easier
- my $q_seq = join "", @{[ $q_query->seq ]};
- my $a_seq = join "", @{ $a_query->align };
-
- $q_seq =~ s/-//g;
- $a_seq =~ s/-//g;
-
- ($q_seq, $a_seq) = map { uc $_ } $q_seq, $a_seq;
-
- $start_gap_length = index($q_seq, $a_seq);
- croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
-
- $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
- }
-
- # Add the gaps to the end of the alignments
- for (@seqs) {
- $_->align([
- ($GAP) x $start_gap_length,
- @{ $_->align },
- ($GAP) x $end_gap_length
- ]);
- }
-
- # Put the query sequence back
- unshift @seqs, PSISEQ->new(
- id => $a_query->id,
- align => [
- ($start_gap_length ? @{ $q_query->seq }[ 0..($start_gap_length-1) ] : ()),
- @{ $a_query->align },
- ($end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : ()),
- ]
- );
-
- return @seqs;
+ my ( $query, @seqs ) = @_;
+
+ croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
+ croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
+
+ # Get the query sequence
+ my $q_query = $query->get_entry(0);
+
+ # Get the query sequence from the PSIBLAST alignment
+ my $a_query = shift @seqs;
+
+ # The gap size required to expand the alignment
+ my ( $start_gap_length, $end_gap_length );
+ {
+
+ # Make handling the sequence easier
+ my $q_seq = join "", @{ [ $q_query->seq ] };
+ my $a_seq = join "", @{ $a_query->align };
+
+ $q_seq =~ s/-//g;
+ $a_seq =~ s/-//g;
+
+ ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq;
+
+ $start_gap_length = index( $q_seq, $a_seq );
+ croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
+
+ $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
+ }
+
+ # Add the gaps to the end of the alignments
+ for (@seqs) {
+ $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] );
+ }
+
+ # Put the query sequence back
+ unshift @seqs,
+ PSISEQ->new(
+ id => $a_query->id,
+ align => [
+ ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ),
+ @{ $a_query->align },
+ ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ),
+ ]
+ );
+
+ return @seqs;
}
=head1 $int = fasta_seq_length($path)
=cut
sub fasta_seq_length {
- my ($fn) = @_;
- open my $fh, $fn or croak "Can't open file $fn: $!\n";
-
- my $length = 0;
- local $_, $/ = "\n";
- while (<$fh>) {
- next if /^>/;
- chomp;
- $length += tr///c;
- }
-
- return $length;
+ my ($fn) = @_;
+ open my $fh, $fn or croak "Can't open file $fn: $!\n";
+
+ my $length = 0;
+ local $_, $/ = "\n";
+ while (<$fh>) {
+ next if /^>/;
+ chomp;
+ $length += tr///c;
+ }
+
+ return $length;
}
=head1 log(@messages)
require 5.005_03;
use strict;
use vars qw($VERSION $DEBUG $IO_CONSTANTS);
-$VERSION = "1.08"; # $Date: 2007/02/05 17:08:37 $
+$VERSION = "1.08"; # $Date: 2007/02/05 17:08:37 $
use Symbol ();
-sub new
-{
- my $class = shift;
- my $self = bless Symbol::gensym(), ref($class) || $class;
- tie *$self, $self;
- $self->open(@_);
- return $self;
+sub new {
+ my $class = shift;
+ my $self = bless Symbol::gensym(), ref($class) || $class;
+ tie *$self, $self;
+ $self->open(@_);
+ return $self;
}
-sub open
-{
- my $self = shift;
- return $self->new(@_) unless ref($self);
-
- if (@_) {
- my $bufref = ref($_[0]) ? $_[0] : \$_[0];
- $$bufref = "" unless defined $$bufref;
- *$self->{buf} = $bufref;
- }
- else {
- my $buf = "";
- *$self->{buf} = \$buf;
- }
- *$self->{pos} = 0;
- *$self->{lno} = 0;
- return $self;
+sub open {
+ my $self = shift;
+ return $self->new(@_) unless ref($self);
+
+ if (@_) {
+ my $bufref = ref( $_[0] ) ? $_[0] : \$_[0];
+ $$bufref = "" unless defined $$bufref;
+ *$self->{buf} = $bufref;
+ } else {
+ my $buf = "";
+ *$self->{buf} = \$buf;
+ }
+ *$self->{pos} = 0;
+ *$self->{lno} = 0;
+ return $self;
}
-sub pad
-{
- my $self = shift;
- my $old = *$self->{pad};
- *$self->{pad} = substr($_[0], 0, 1) if @_;
- return "\0" unless defined($old) && length($old);
- return $old;
+sub pad {
+ my $self = shift;
+ my $old = *$self->{pad};
+ *$self->{pad} = substr( $_[0], 0, 1 ) if @_;
+ return "\0" unless defined($old) && length($old);
+ return $old;
}
-sub dump
-{
- require Data::Dumper;
- my $self = shift;
- print Data::Dumper->Dump([$self], ['*self']);
- print Data::Dumper->Dump([*$self{HASH}], ['$self{HASH}']);
- return;
+sub dump {
+ require Data::Dumper;
+ my $self = shift;
+ print Data::Dumper->Dump( [$self], ['*self'] );
+ print Data::Dumper->Dump( [ *$self{HASH} ], ['$self{HASH}'] );
+ return;
}
-sub TIEHANDLE
-{
- print "TIEHANDLE @_\n" if $DEBUG;
- return $_[0] if ref($_[0]);
- my $class = shift;
- my $self = bless Symbol::gensym(), $class;
- $self->open(@_);
- return $self;
+sub TIEHANDLE {
+ print "TIEHANDLE @_\n" if $DEBUG;
+ return $_[0] if ref( $_[0] );
+ my $class = shift;
+ my $self = bless Symbol::gensym(), $class;
+ $self->open(@_);
+ return $self;
}
-sub DESTROY
-{
- print "DESTROY @_\n" if $DEBUG;
+sub DESTROY {
+ print "DESTROY @_\n" if $DEBUG;
}
-sub close
-{
- my $self = shift;
- delete *$self->{buf};
- delete *$self->{pos};
- delete *$self->{lno};
- undef *$self if $] eq "5.008"; # workaround for some bug
- return 1;
+sub close {
+ my $self = shift;
+ delete *$self->{buf};
+ delete *$self->{pos};
+ delete *$self->{lno};
+ undef *$self if $] eq "5.008"; # workaround for some bug
+ return 1;
}
-sub opened
-{
- my $self = shift;
- return defined *$self->{buf};
+sub opened {
+ my $self = shift;
+ return defined *$self->{buf};
}
-sub binmode
-{
- my $self = shift;
- return 1 unless @_;
- # XXX don't know much about layers yet :-(
- return 0;
+sub binmode {
+ my $self = shift;
+ return 1 unless @_;
+
+ # XXX don't know much about layers yet :-(
+ return 0;
}
-sub getc
-{
- my $self = shift;
- my $buf;
- return $buf if $self->read($buf, 1);
- return undef;
+sub getc {
+ my $self = shift;
+ my $buf;
+ return $buf if $self->read( $buf, 1 );
+ return undef;
}
-sub ungetc
-{
- my $self = shift;
- $self->setpos($self->getpos() - 1);
- return 1;
+sub ungetc {
+ my $self = shift;
+ $self->setpos( $self->getpos() - 1 );
+ return 1;
}
-sub eof
-{
- my $self = shift;
- return length(${*$self->{buf}}) <= *$self->{pos};
+sub eof {
+ my $self = shift;
+ return length( ${ *$self->{buf} } ) <= *$self->{pos};
}
-sub print
-{
- my $self = shift;
- if (defined $\) {
- if (defined $,) {
- $self->write(join($,, @_).$\);
- }
- else {
- $self->write(join("",@_).$\);
- }
+sub print {
+ my $self = shift;
+ if ( defined $\ ) {
+ if ( defined $, ) {
+ $self->write( join( $,, @_ ) . $\ );
+ } else {
+ $self->write( join( "", @_ ) . $\ );
}
- else {
- if (defined $,) {
- $self->write(join($,, @_));
- }
- else {
- $self->write(join("",@_));
- }
+ } else {
+ if ( defined $, ) {
+ $self->write( join( $,, @_ ) );
+ } else {
+ $self->write( join( "", @_ ) );
}
- return 1;
+ }
+ return 1;
}
*printflush = \*print;
-sub printf
-{
- my $self = shift;
- print "PRINTF(@_)\n" if $DEBUG;
- my $fmt = shift;
- $self->write(sprintf($fmt, @_));
- return 1;
+sub printf {
+ my $self = shift;
+ print "PRINTF(@_)\n" if $DEBUG;
+ my $fmt = shift;
+ $self->write( sprintf( $fmt, @_ ) );
+ return 1;
}
-
-my($SEEK_SET, $SEEK_CUR, $SEEK_END);
-
-sub _init_seek_constants
-{
- if ($IO_CONSTANTS) {
- require IO::Handle;
- $SEEK_SET = &IO::Handle::SEEK_SET;
- $SEEK_CUR = &IO::Handle::SEEK_CUR;
- $SEEK_END = &IO::Handle::SEEK_END;
- }
- else {
- $SEEK_SET = 0;
- $SEEK_CUR = 1;
- $SEEK_END = 2;
- }
+my ( $SEEK_SET, $SEEK_CUR, $SEEK_END );
+
+sub _init_seek_constants {
+ if ($IO_CONSTANTS) {
+ require IO::Handle;
+ $SEEK_SET = &IO::Handle::SEEK_SET;
+ $SEEK_CUR = &IO::Handle::SEEK_CUR;
+ $SEEK_END = &IO::Handle::SEEK_END;
+ } else {
+ $SEEK_SET = 0;
+ $SEEK_CUR = 1;
+ $SEEK_END = 2;
+ }
}
+sub seek {
+ my ( $self, $off, $whence ) = @_;
+ my $buf = *$self->{buf} || return 0;
+ my $len = length($$buf);
+ my $pos = *$self->{pos};
-sub seek
-{
- my($self,$off,$whence) = @_;
- my $buf = *$self->{buf} || return 0;
- my $len = length($$buf);
- my $pos = *$self->{pos};
-
- _init_seek_constants() unless defined $SEEK_SET;
+ _init_seek_constants() unless defined $SEEK_SET;
- if ($whence == $SEEK_SET) { $pos = $off }
- elsif ($whence == $SEEK_CUR) { $pos += $off }
- elsif ($whence == $SEEK_END) { $pos = $len + $off }
- else { die "Bad whence ($whence)" }
- print "SEEK(POS=$pos,OFF=$off,LEN=$len)\n" if $DEBUG;
+ if ( $whence == $SEEK_SET ) { $pos = $off }
+ elsif ( $whence == $SEEK_CUR ) { $pos += $off }
+ elsif ( $whence == $SEEK_END ) { $pos = $len + $off }
+ else { die "Bad whence ($whence)" }
+ print "SEEK(POS=$pos,OFF=$off,LEN=$len)\n" if $DEBUG;
- $pos = 0 if $pos < 0;
- $self->truncate($pos) if $pos > $len; # extend file
- *$self->{pos} = $pos;
- return 1;
+ $pos = 0 if $pos < 0;
+ $self->truncate($pos) if $pos > $len; # extend file
+ *$self->{pos} = $pos;
+ return 1;
}
-sub pos
-{
- my $self = shift;
- my $old = *$self->{pos};
- if (@_) {
- my $pos = shift || 0;
- my $buf = *$self->{buf};
- my $len = $buf ? length($$buf) : 0;
- $pos = $len if $pos > $len;
- *$self->{pos} = $pos;
- }
- return $old;
+sub pos {
+ my $self = shift;
+ my $old = *$self->{pos};
+ if (@_) {
+ my $pos = shift || 0;
+ my $buf = *$self->{buf};
+ my $len = $buf ? length($$buf) : 0;
+ $pos = $len if $pos > $len;
+ *$self->{pos} = $pos;
+ }
+ return $old;
}
sub getpos { shift->pos; }
*setpos = \&pos;
*tell = \&getpos;
-
-
-sub getline
-{
- my $self = shift;
- my $buf = *$self->{buf} || return;
- my $len = length($$buf);
- my $pos = *$self->{pos};
- return if $pos >= $len;
-
- unless (defined $/) { # slurp
- *$self->{pos} = $len;
- return substr($$buf, $pos);
- }
-
- unless (length $/) { # paragraph mode
- # XXX slow&lazy implementation using getc()
- my $para = "";
- my $eol = 0;
- my $c;
- while (defined($c = $self->getc)) {
- if ($c eq "\n") {
- $eol++;
- next if $eol > 2;
- }
- elsif ($eol > 1) {
- $self->ungetc($c);
- last;
- }
- else {
- $eol = 0;
- }
- $para .= $c;
- }
- return $para; # XXX wantarray
+sub getline {
+ my $self = shift;
+ my $buf = *$self->{buf} || return;
+ my $len = length($$buf);
+ my $pos = *$self->{pos};
+ return if $pos >= $len;
+
+ unless ( defined $/ ) { # slurp
+ *$self->{pos} = $len;
+ return substr( $$buf, $pos );
+ }
+
+ unless ( length $/ ) { # paragraph mode
+ # XXX slow&lazy implementation using getc()
+ my $para = "";
+ my $eol = 0;
+ my $c;
+ while ( defined( $c = $self->getc ) ) {
+ if ( $c eq "\n" ) {
+ $eol++;
+ next if $eol > 2;
+ } elsif ( $eol > 1 ) {
+ $self->ungetc($c);
+ last;
+ } else {
+ $eol = 0;
+ }
+ $para .= $c;
}
-
- my $idx = index($$buf,$/,$pos);
- if ($idx < 0) {
- # return rest of it
- *$self->{pos} = $len;
- $. = ++ *$self->{lno};
- return substr($$buf, $pos);
- }
- $len = $idx - $pos + length($/);
- *$self->{pos} += $len;
- $. = ++ *$self->{lno};
- return substr($$buf, $pos, $len);
+ return $para; # XXX wantarray
+ }
+
+ my $idx = index( $$buf, $/, $pos );
+ if ( $idx < 0 ) {
+
+ # return rest of it
+ *$self->{pos} = $len;
+ $. = ++*$self->{lno};
+ return substr( $$buf, $pos );
+ }
+ $len = $idx - $pos + length($/);
+ *$self->{pos} += $len;
+ $. = ++*$self->{lno};
+ return substr( $$buf, $pos, $len );
}
-sub getlines
-{
- die "getlines() called in scalar context\n" unless wantarray;
- my $self = shift;
- my($line, @lines);
- push(@lines, $line) while defined($line = $self->getline);
- return @lines;
+sub getlines {
+ die "getlines() called in scalar context\n" unless wantarray;
+ my $self = shift;
+ my ( $line, @lines );
+ push( @lines, $line ) while defined( $line = $self->getline );
+ return @lines;
}
-sub READLINE
-{
- goto &getlines if wantarray;
- goto &getline;
+sub READLINE {
+ goto &getlines if wantarray;
+ goto &getline;
}
-sub input_line_number
-{
- my $self = shift;
- my $old = *$self->{lno};
- *$self->{lno} = shift if @_;
- return $old;
+sub input_line_number {
+ my $self = shift;
+ my $old = *$self->{lno};
+ *$self->{lno} = shift if @_;
+ return $old;
}
-sub truncate
-{
- my $self = shift;
- my $len = shift || 0;
- my $buf = *$self->{buf};
- if (length($$buf) >= $len) {
- substr($$buf, $len) = '';
- *$self->{pos} = $len if $len < *$self->{pos};
- }
- else {
- $$buf .= ($self->pad x ($len - length($$buf)));
- }
- return 1;
+sub truncate {
+ my $self = shift;
+ my $len = shift || 0;
+ my $buf = *$self->{buf};
+ if ( length($$buf) >= $len ) {
+ substr( $$buf, $len ) = '';
+ *$self->{pos} = $len if $len < *$self->{pos};
+ } else {
+ $$buf .= ( $self->pad x ( $len - length($$buf) ) );
+ }
+ return 1;
}
-sub read
-{
- my $self = shift;
- my $buf = *$self->{buf};
- return undef unless $buf;
-
- my $pos = *$self->{pos};
- my $rem = length($$buf) - $pos;
- my $len = $_[1];
- $len = $rem if $len > $rem;
- return undef if $len < 0;
- if (@_ > 2) { # read offset
- substr($_[0],$_[2]) = substr($$buf, $pos, $len);
- }
- else {
- $_[0] = substr($$buf, $pos, $len);
- }
- *$self->{pos} += $len;
- return $len;
+sub read {
+ my $self = shift;
+ my $buf = *$self->{buf};
+ return undef unless $buf;
+
+ my $pos = *$self->{pos};
+ my $rem = length($$buf) - $pos;
+ my $len = $_[1];
+ $len = $rem if $len > $rem;
+ return undef if $len < 0;
+ if ( @_ > 2 ) { # read offset
+ substr( $_[0], $_[2] ) = substr( $$buf, $pos, $len );
+ } else {
+ $_[0] = substr( $$buf, $pos, $len );
+ }
+ *$self->{pos} += $len;
+ return $len;
}
-sub write
-{
- my $self = shift;
- my $buf = *$self->{buf};
- return unless $buf;
-
- my $pos = *$self->{pos};
- my $slen = length($_[0]);
- my $len = $slen;
- my $off = 0;
- if (@_ > 1) {
- $len = $_[1] if $_[1] < $len;
- if (@_ > 2) {
- $off = $_[2] || 0;
- die "Offset outside string" if $off > $slen;
- if ($off < 0) {
- $off += $slen;
- die "Offset outside string" if $off < 0;
- }
- my $rem = $slen - $off;
- $len = $rem if $rem < $len;
- }
+sub write {
+ my $self = shift;
+ my $buf = *$self->{buf};
+ return unless $buf;
+
+ my $pos = *$self->{pos};
+ my $slen = length( $_[0] );
+ my $len = $slen;
+ my $off = 0;
+ if ( @_ > 1 ) {
+ $len = $_[1] if $_[1] < $len;
+ if ( @_ > 2 ) {
+ $off = $_[2] || 0;
+ die "Offset outside string" if $off > $slen;
+ if ( $off < 0 ) {
+ $off += $slen;
+ die "Offset outside string" if $off < 0;
+ }
+ my $rem = $slen - $off;
+ $len = $rem if $rem < $len;
}
- substr($$buf, $pos, $len) = substr($_[0], $off, $len);
- *$self->{pos} += $len;
- return $len;
+ }
+ substr( $$buf, $pos, $len ) = substr( $_[0], $off, $len );
+ *$self->{pos} += $len;
+ return $len;
}
-*sysread = \&read;
+*sysread = \&read;
*syswrite = \&write;
-sub stat
-{
- my $self = shift;
- return unless $self->opened;
- return 1 unless wantarray;
- my $len = length ${*$self->{buf}};
-
- return (
- undef, undef, # dev, ino
- 0666, # filemode
- 1, # links
- $>, # user id
- $), # group id
- undef, # device id
- $len, # size
- undef, # atime
- undef, # mtime
- undef, # ctime
- 512, # blksize
- int(($len+511)/512) # blocks
- );
+sub stat {
+ my $self = shift;
+ return unless $self->opened;
+ return 1 unless wantarray;
+ my $len = length ${ *$self->{buf} };
+
+ return (
+ undef, undef, # dev, ino
+ 0666, # filemode
+ 1, # links
+ $>, # user id
+ $), # group id
+ undef, # device id
+ $len, # size
+ undef, # atime
+ undef, # mtime
+ undef, # ctime
+ 512, # blksize
+ int( ( $len + 511 ) / 512 ) # blocks
+ );
}
sub FILENO {
- return undef; # XXX perlfunc says this means the file is closed
+ return undef; # XXX perlfunc says this means the file is closed
}
sub blocking {
- my $self = shift;
- my $old = *$self->{blocking} || 0;
- *$self->{blocking} = shift if @_;
- return $old;
+ my $self = shift;
+ my $old = *$self->{blocking} || 0;
+ *$self->{blocking} = shift if @_;
+ return $old;
}
my $notmuch = sub { return };
-*fileno = $notmuch;
-*error = $notmuch;
-*clearerr = $notmuch;
-*sync = $notmuch;
-*flush = $notmuch;
-*setbuf = $notmuch;
-*setvbuf = $notmuch;
+*fileno = $notmuch;
+*error = $notmuch;
+*clearerr = $notmuch;
+*sync = $notmuch;
+*flush = $notmuch;
+*setbuf = $notmuch;
+*setvbuf = $notmuch;
*untaint = $notmuch;
*autoflush = $notmuch;
*fcntl = $notmuch;
*ioctl = $notmuch;
-*GETC = \&getc;
-*PRINT = \&print;
-*PRINTF = \&printf;
-*READ = \&read;
-*WRITE = \&write;
-*SEEK = \&seek;
-*TELL = \&getpos;
-*EOF = \&eof;
-*CLOSE = \&close;
+*GETC = \&getc;
+*PRINT = \&print;
+*PRINTF = \&printf;
+*READ = \&read;
+*WRITE = \&write;
+*SEEK = \&seek;
+*TELL = \&getpos;
+*EOF = \&eof;
+*CLOSE = \&close;
*BINMODE = \&binmode;
-
-sub string_ref
-{
- my $self = shift;
- return *$self->{buf};
+sub string_ref {
+ my $self = shift;
+ return *$self->{buf};
}
*sref = \&string_ref;