5 jpred - Secondary structure prediction program
9 jpred --sequence <fasta file> [--output <output prefix>] [--db <database>] [--psi <psiblast output>] [--pred-nohits] [--verbose] [--debug] [--help] [--man]
13 This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence.
14 It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
16 The program is primarily design for use by the Jpred server, but it can be used directly by any user.
17 Some of the output may not be meaningful if used directly - it can be safely ignored.
25 The path to the sequence (in FASTA format) you want to predict.
29 A prefix to the filenames created by Jpred, defaults to the value set by --sequence.
33 Database to use for PSI-BLAST querying. This only accepts databases from a list which is pre-defined in the code. Crap I know, but that's the way it is at the moment. It's probably best to stick the default for the time being.
39 Path to a PSIBLAST output file.
43 Toggle allowing Jpred to make predictions even when there are no PSI-BLAST hits.
47 Verbose mode. Print more information while running jpred.
51 Debug mode. Print debugging information while running jpred.
55 Gives help on the programs usage.
59 Displays this man page.
65 Can't cope with gaps in the query sequence.
67 Doesn't accept alignments.
69 If you find any others please contact me.
73 Jonathan Barber <jon@compbio.dundee.ac.uk>
75 Chris Cole <christian@cole.name> (current maintainer)
79 ## TODO check for gaps in query sequence
80 ## check for disallowed chars in sequence
87 use UNIVERSAL qw(isa);
92 use lib "$Bin/../lib";
94 #use lib "/home/asherstnev/Projects/Jpred/jpred/trunk/lib";
96 use Jpred; # needed to define BLAST environment variables
100 use HMMER::Profile::Jnet;
111 use Utils qw(profile);
113 use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin);
115 our $VERSION = '3.0.2';
116 my $MAX_ALIGN = 1000; # maximum number of alignment sequences allowed
117 my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
119 # Light object to hold sequence information, light to prevent memory overload
120 # for big search results. Even so this is quite memory intensive, I can't see
121 # how to reduce the size further.
126 #use Compress::LZF qw(compress decompress);
129 my ( $class, %args ) = @_;
132 for ( keys %args ) { $self->$_( $args{$_} ) }
149 if ( defined $_[1] ) {
150 ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
152 #$_[0]->[2] = compress(join "", @{ $_[1] });
153 $_[0]->[2] = join( "", @{ $_[1] } );
156 #return [ split //, decompress($_[0]->[2]) ];
157 return [ split( //, $_[0]->[2] ) ];
162 # Key to database information and information for accessing them
163 my $dbPATH = '/homes/chris/projects/Jnet/db';
164 my $db_entry = "uniref90";
167 ## default database used by Jpred
169 database => 'uniref90.filt',
170 unfiltered => 'uniref90',
173 ## database used during Jnet training
175 database => 'training/uniref90.filt',
176 unfiltered => 'training/uniref90',
179 ## cluster-specific path for Jpred
181 database => '/local/gjb_lab/www-jpred/uniref90.filt',
182 unfiltered => '/local/gjb_lab/www-jpred/uniref90',
185 ## these other DBs are experimental ones used during development.
186 ## check they exist before using them.
189 # generic entry for use with validate_jnet.pl
190 # real db location defined by validate_jnet.pl
191 database => "$dbPATH/swall/swall.filt", # CC new
192 unfiltered => "$dbPATH/swall/swall",
196 # Path to PSIBLAST db
197 database => "$dbPATH/3/swall.filt", # CC new
198 unfiltered => "$dbPATH/3/swall",
201 database => "$dbPATH/6/swall.filt",
202 unfiltered => "$dbPATH/6/swall",
206 # Gap characters in sequences
209 my ( $help, $man, $path, $output, $psiblast, $DEBUG, $VERBOSE );
210 my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
214 "verbose" => \$VERBOSE,
215 "sequence=s" => \$path,
216 "output=s" => \$output,
217 "psi=s" => \$psiblast,
218 "db=s" => \$db_entry,
219 "pred-nohits" => \$predNoHits,
222 pod2usage(1) if $help;
223 pod2usage( verbose => 2 ) if $man;
225 pod2usage(' --sequence argument not provided') unless $path;
226 die "--db $db_entry not recognised" unless exists $database->{$db_entry};
227 $output = $path unless $output;
229 for ( [ "path", $path ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) {
230 my ( $key, $value ) = @{$_};
231 defined $value or next;
232 errlog( join( ": ", $key, $value ), "\n" );
235 my $query = FASTA::File->new( read_file => $path );
239 my $psi = PSIBLAST->new; # CC PSIBLAST.pm doesn't have a new() class??
240 unless ( defined $psiblast && -e $psiblast ) {
241 my $psi_run = PSIBLAST::Run->new(
244 # path => "/software/jpred_bin/blastpgp",
245 path => $psiblastbin, # CC new path
247 output => "$output.blast",
248 matrix => "$output.profile",
249 database => $database->{$db_entry}{database},
250 ## potentially a better set of parameters (esp. -b & -v)
251 ## which avoid PSI-BLAST dying with large number of hits
252 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
253 # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
254 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
257 # For reduced databases, get the size of the orginal and use this as
258 # the equivilent DB size
259 #print $db_entry, "\n";
260 if ( $db_entry =~ /^sp_red_/ ) {
261 ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/;
263 $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below
266 print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
267 print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
269 print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
270 $psi_run->run or die; # CC sub is from PSIBLAST::Run
272 $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
273 system("gzip -9f $output.blast");
275 if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
276 else { $psi->read_file($psiblast) } # CC ditto above
279 # Convert the last itteration into a collection of PSISEQ objects
280 for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm
281 my ( $id, $start, $align ) = @{$_};
286 align => [ split( //, $align ) ]
291 ## When there are no PSI-BLAST hits generate an HMM from the query
292 ## and run Jnet against that only.
293 ## Accuracy won't be as good, but at least it's a prediction
295 if ( $predNoHits == 0 ) {
296 warn "\nJPRED: Warning - no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n";
297 print ">>100% complete\n";
300 print ">>50% complete\n";
301 warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
302 print "Running HMMer on query...\n" if $VERBOSE;
304 # copy input query to alignment
305 system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
307 # Temp files required for HMMer
308 my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2;
310 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $path");
311 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
313 # Read in the HMMER file
314 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
316 # Convert from HMMER::Profile to HMMER::Profile::Jnet
317 my $hmmer_jnet = HMMER::Profile::Jnet->new;
318 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
319 $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
320 print ">>70% complete\n";
322 # Run Jnet for the prediction
323 print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
324 jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below
326 print "Jpred Finished\n";
330 psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below
331 print ">>40% complete\n";
333 # Make PSIBLAST truncated alignments the right length
334 print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE;
335 @seqs = extend( $query, @seqs ); # CC sub si below
336 psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG;
338 # Remove masking from PSIBLAST alignment
339 print "Unmasking the alignments...\n" if $VERBOSE;
340 dex($_) for @seqs[ 1 .. $#seqs ]; ## <-- CC dies here in dex() - found below
341 psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG;
343 # Convert the sequences to upper case
344 print "Converting sequences to the same case...\n" if $VERBOSE;
345 toupper($_) for @seqs; # CC sub is below
346 psiseq2fasta( "${output}_backup.fasta.gz", @seqs );
348 # Remove excessive sequences
349 print "Remove excessive sequences...\n" if $VERBOSE;
350 @seqs = reduce( $MAX_ALIGN, @seqs ); # CC sub is below
351 psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG;
353 # Remove sequences that are too long or too short
354 print "Remove sequences which too long or short...\n" if $VERBOSE;
355 @seqs = del_long_seqs( 50, @seqs ); # CC sub is below
356 psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG;
358 # Remove redundant sequences based upon pairwise identity and OC clustering
359 print "Remove redundant sequences...\n" if $VERBOSE;
360 @seqs = nonred( $NR_CUT, @seqs ); # CC sub is below
361 psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG;
363 # Check that we haven't got rid of all of the sequences
365 warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
366 @seqs = fasta2psiseq("${output}_backup.fasta.gz");
368 unlink("${output}_backup.fasta.gz") unless $DEBUG;
370 # Remove gaps in the query sequence and same positions in the alignment
371 print "Removing gaps in the query sequence...\n" if $VERBOSE;
372 degap(@seqs); # CC sub is below
373 psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG;
375 # Output the alignment for the prediction
376 print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE;
377 psiseq2fasta( "$output.align", @seqs );
380 # Equivilent to getpssm script
381 print "Output the PSSM matrix from the PSI-BLAST profile...\n";
382 my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" );
383 $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
385 print ">>50% complete\n";
387 # Run HMMER on the sequences
389 print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
390 my $hmmer = hmmer(@seqs); # CC sub is below
391 $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
393 print ">>70% complete\n";
395 # Run Jnet for the prediction
396 print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE;
397 jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below
399 print "Jpred Finished\n";
401 # Then do an accuracy prediction
403 ############################################################
405 ############################################################
409 =head2 $PSISEQ = dex($PSISEQ)
411 Returns a PSISEQ object which has any masked residues replaced by their entry from the unfiltered database. Does it in place.
416 my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found.";
421 croak "JPRED: dex: can only take a PSISEQ object as an argument" unless isa $seq, 'PSISEQ';
423 # Only bother if we have a sequence which has been masked
424 return unless grep { uc $_ eq 'X' } @{ $seq->align };
426 my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } );
428 #$id = "$id"; # Add the EMBOSS USA
429 #$id = "sprot_37:$id"; # Add the EMBOSS USA
430 #$id = "sprot_43:$id"; # Add the EMBOSS USA
431 # $id = $database->{$db_entry}{emboss} . ":$id"; # commented out to try FastaCMD module
432 my $searchdb = $database->{$db_entry}{unfiltered};
433 $start--; # We need this to be zero based
435 my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return; # CC sub is from Index::EMBOSS
436 my @seqs = $f->get_entries; ## Sub is from Sequence::File
437 my $db = shift @seqs;
440 confess "JPRED: dex: Accession number $id returned no sequences";
442 confess "JPRED: dex: Accession number $id returned more than one sequence";
445 my @db_seq = @{ $db->seq };
447 # $i is a pointer into the original, ungapped, unmasked sequence. $_ is
448 # for the aligned sequence
450 for ( 0 .. $#align ) {
451 next if $align[$_] =~ /$GAP/;
453 if ( $align[$_] eq 'X' ) {
454 unless ( defined $db_seq[ $i + $start ] ) {
455 croak "JPRED: dex: the database sequence is not as long as the query sequence!";
457 $align[$_] = $db_seq[ $i + $start ];
462 $seq->align( \@align );
466 =head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN);
468 Reduces @PSISEQS_IN to at most $max sequences by taking every n sequences in @PSISEQ_IN.
470 Equivilent to reduce script.
475 my ( $max, @seqs ) = @_;
477 my $num = int( @seqs / $max );
479 my @res = shift @seqs;
481 # Don't bother if we have less than the required sequences
482 if ( @seqs < $max ) { return @res, @seqs }
483 if ( $num == 0 ) { return @res, @seqs }
485 for ( 0 .. $#seqs ) { push @res, $seqs[$_] if $_ % $num == 0 }
490 =head2 nonred($cutoff, @PSISEQS);
492 Removes redundant sequences at $cutoff level of sequence identity.
494 Equivilent to nonred script.
499 my ( $cutoff, @seqs ) = @_;
501 # Run pairwise and then OC
502 # Remove similar sequences
504 unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" }
505 unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
507 warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
511 my $fasta = FASTA::File->new;
513 # Convert the sequences into a FASTA object, which is what
517 my $f = FASTA->new( id => $_->id );
518 $f->seq( @{ $_->align } );
523 # Run pairwise via wrappers
525 my $pair = Pairwise->new( path => $pairwise );
526 my $foo = join "\n", $pair->run($fasta) or die $!;
529 my $ocres = OC->new( path => "$oc sim complete cut $cutoff" );
530 $ocres->read_string( join "", $ocres->run($foo) );
532 # Now found those entries that aren't related
535 for ( $ocres->get_groups ) {
536 my ( $group, $score, $size, @labels ) = @{$_};
538 # Keep the QUERY from the cluster, otherwise just get one
539 if ( grep { /QUERY/ } @labels ) {
541 # Make sure the query stays at the start
542 unshift @keep, "QUERY";
545 push @keep, shift @labels;
549 push @keep, $ocres->get_unclustered;
552 # Return the matching entries.
555 # We use the temporay to mimic the selection of sequences in the nonred
556 # script, which took the sequences in the order they occured in the file.
559 if ( grep { $_ =~ /^$label$/ } @keep ) {
567 =head2 @seqs = del_long_seqs($percentage, @PSISEQS)
569 Removes those sequences that are more than $percentage longer or shorter than the first
570 sequence in @seqs. @seqs is an array of PSISEQ objects.
572 Equivilent to trim_seqs script.
577 my ( $factor, @seqs ) = @_;
579 # Keep the query sequence
580 my $query = shift @seqs;
583 # Find the length of the query sequence without any gaps
584 my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g );
586 # Calculate the upper and lower allowed bounds
587 my ( $upper, $lower );
589 my $bounds = ( $length / 100 ) * $factor;
590 ( $upper, $lower ) = ( $length + $bounds, $length - $bounds );
593 # Now try each sequence and see how long they are
595 my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g );
596 if ( $l >= $lower && $l <= $upper ) { push @res, $_ }
602 =head2 toupper ($PSISEQ)
604 Converts sequence held in PSISEQ object to uppercase.
609 croak "JPRED: toupper: argument not a PSISEQ object" unless isa $_[0], 'PSISEQ';
610 $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] );
613 =head2 degap($PSISEQ_QUERY, @PSISEQS)
615 Removes gaps in the query sequence ($PSISEQ_QUERY) and corresponding positions in @PSISEQS. Change made in place.
617 Equivilent to fasta2jnet script.
624 # Find where the gaps in the query sequence are
628 for ( @{ $seqs[0]->align } ) {
629 push @gaps, $i if $_ !~ /$GAP/;
636 # Remove the gaps in all the sequences.
637 # Derefences the array reference and takes a slice inside the method call
638 # arguments, then coerce back to an array ref.
640 $_->align( [ @{ $_->align }[@gaps] ] );
644 =head2 getfreq($filename, @PSISEQS)
646 Creates a PSIBLAST like profile and writes it to the $filename.
648 Equivilant to profile or getfreq (older) script. This doesn't create the same answer as
649 the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output,
650 but it's *not* the same.
655 my ( $fn, @seqs ) = @_;
657 croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs;
659 # Create a PSIBLAST like profile from the alignment
660 # This doesn't greate the same result as old Jpred, I think this is because
661 # differences in the input between old and new
664 # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
665 # my @profile = profile(
666 # map { join "", @{ $_->seq } } $f->get_entries
669 my @profile = profile( map { join "", @{ $_->align } } @seqs );
671 open my $fh, ">$fn" or die "JPRED: $fn: $!";
672 print $fh join( " ", @{$_} ), "\n" for @profile;
675 =head2 HMMER::Profile::Jnet = hmmer(@PSISEQS)
677 Creates a HMMER profile from the aligned sequences in @PSISEQS. Returns a HMMER::Profile::Jnet object.
679 Equivilent to gethmm script.
686 # Temp files required
687 my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3;
689 # Create the FASTA file
690 psiseq2fasta( $tmp_fasta, @seqs );
692 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
693 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
695 # Read in the HMMER file
696 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
698 # Convert from HMMER::Profile to HMMER::Profile::Jnet
699 my $hmmer_jnet = HMMER::Profile::Jnet->new;
700 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
705 =head2 jnet($hmmer, $out, $pssm )
707 Runs Jnet. Pass the paths of the filenames to do the prediction on
712 my ( $hmmer, $outfile, $pssm ) = @_;
715 # run Jnet prediction with all the profile data
716 system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
719 # run Jnet prediction with only HMMer and alignment data
720 system("$jnet --concise --hmmer $hmmer > $outfile");
722 check( "jnet", $? ) or exit 1;
725 =head1 psiseq2fasta($path, @PSISEQ)
727 Writes a FASTA file given an array of PSISEQ objects.
732 my ( $fn, @seqs ) = @_;
734 confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
735 @seqs or confess "JPRED: not passed PSISEQs";
737 confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
739 # Changed from using FASTA::File object due to seg faulting on some large,
740 # long alignments, presumably due to running out of memory.
741 # my $fasta = FASTA::File->new;
742 # $fasta->add_entries(
744 # my $f = FASTA->new(id => $_->id);
745 # $f->seq(@{$_->align});
749 # $fasta->write_file($fn);
753 ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" )
754 : ( open $fh, ">$fn" or die "JPRED: $!" );
757 print $fh ">", $_->id, "\n";
758 my $seq = join( "", @{ $_->align } );
759 $seq =~ s/(.{72})/$1\n/g;
761 print $fh $seq . "\n";
766 =head1 @PSISEQ = fasta2psiseq($path)
768 Reads in a FASTA file and returns an array of PSISEQ objects.
777 ? FASTA::File->new( read_gzip_file => $fn )
778 : FASTA::File->new( read_file => $fn );
779 $fasta or die "JPRED: $!";
782 for ( $fasta->get_entries ) {
783 my $seq = PSISEQ->new;
785 $seq->align( [ @{ $_->seq } ] );
792 sub fasta2psiseq_lowmem {
798 ? ( open $fh, "gzip -dc $fn|" or die $! )
799 : ( open $fh, $fn or die $! );
807 $seq =~ s/ //g if defined $seq;
809 if ( $id and $seq ) {
810 my $psi = PSISEQ->new( id => $id );
811 $psi->align( [ split //, $seq ] );
822 if ( $id and $seq ) {
823 my $psi = PSISEQ->new( id => $id );
831 =head1 die_if_no_seqs($size, $message, @seqs)
833 Exits with error message if @seqs is not at least $size.
838 my ( $size, $message, @seqs ) = @_;
840 confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
843 die "JPRED: $message\n" unless @seqs > $size;
846 =head1 extend($FASTA::File, @PSISEQ)
848 Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues
849 against the query, this function extends the alignment so that all of the query is
850 present in the alignment. The other sequences are extended with gap characters.
855 my ( $query, @seqs ) = @_;
857 croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
858 croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
860 # Get the query sequence
861 my $q_query = $query->get_entry(0);
863 # Get the query sequence from the PSIBLAST alignment
864 my $a_query = shift @seqs;
866 # The gap size required to expand the alignment
867 my ( $start_gap_length, $end_gap_length );
870 # Make handling the sequence easier
871 my $q_seq = join "", @{ [ $q_query->seq ] };
872 my $a_seq = join "", @{ $a_query->align };
877 ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq;
879 $start_gap_length = index( $q_seq, $a_seq );
880 croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
882 $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
885 # Add the gaps to the end of the alignments
887 $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] );
890 # Put the query sequence back
895 ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ),
896 @{ $a_query->align },
897 ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ),
904 =head1 $int = fasta_seq_length($path)
906 Given the path to a FASTA database, find the number of residues/bases in it. Counts whitespace as a residue.
910 sub fasta_seq_length {
912 open my $fh, $fn or croak "Can't open file $fn: $!\n";
925 =head1 log(@messages)
927 Report messages to STDERR
931 sub errlog { print STDERR @_ }