5 jpred - Secondary structure prediction program
9 ./jpred.pl -in/-sequence <FILE1> [-out/-output <FILEPREFIX>] [-dbname <DBNAME>] [-dbpath <PATH>] [-ncpu NNN] [-psi <psiblast output>] [-pred-nohits] [-verbose] [-debug] [-help] [-man]
13 This is a program which predicts the secondary structure of a protein sequence given a path to a
14 FASTA sequence. It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
16 The program is primarily design for use by the Jpred server, but it can be used directly by any user.
17 Some of the output may not be meaningful if used directly - it can be safely ignored.
23 =item -in/-sequence FILE
25 The path to the sequence file (in FASTA format) you want to predict.
27 =item -out/-output FILEPREFIX
29 A prefix to the filenames created by Jpred, defaults to the value set by -sequence/-in.
33 Path to the uniref database used for PSI-BLAST querying.
36 =item -dbname database
38 Database to use for PSI-BLAST querying. This only accepts databases from a list which is pre-defined in the code.
43 Path to a PSIBLAST output file.
47 Number of CPU used by jpred.pl. Maximum value is 8
52 Toggle allowing Jpred to make predictions even when there are no PSI-BLAST hits.
56 Verbose mode. Print more information while running jpred.
60 Debug mode. Print debugging information while running jpred.
64 Gives help on the programs usage.
68 Displays this man page.
74 Can't cope with gaps in the query sequence.
76 Doesn't accept alignments.
78 If you find any others please contact authors.
82 Jonathan Barber <jon@compbio.dundee.ac.uk>
83 Chris Cole <christian@cole.name>
84 Alexander Sherstnev <a.sherstnev@dundee.ac.uk> (current maintainer)
88 ## TODO check for gaps in query sequence
89 ## check for disallowed chars in sequence
101 # path to Jpred modules
102 use FindBin qw($Bin);
105 # internal jpred modules
107 use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin check_OS);
109 use HMMER::Profile::Jnet;
118 use Utils qw(profile);
121 our $VERSION = '3.0.2';
122 my $MAX_ALIGN = 10000; # maximum number of alignment sequences allowed
123 my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
125 # Gap characters in sequences
128 #####################################################################################################
129 # Light object to hold sequence information, light to prevent memory overload
130 # for big search results. Even so this is quite memory intensive, I can't see
131 # how to reduce the size further.
137 my ( $class, %args ) = @_;
140 for ( keys %args ) { $self->$_( $args{$_} ) }
157 if ( defined $_[1] ) {
158 ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
160 $_[0]->[2] = join( "", @{ $_[1] } );
162 return [ split( //, $_[0]->[2] ) ];
167 #####################################################################################################
171 my $ncpu = 1; # number of CPUs for psiblast calculations
172 my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
174 my $db_entry = "unknown";
176 my ( $help, $man, $DEBUG, $VERBOSE );
179 "in|sequence=s" => \$infile,
180 "out|output=s" => \$output,
181 "psi=s" => \$psiblast,
182 "dbname=s" => \$db_entry,
183 "dbpath=s" => \$db_path,
185 "pred-nohits" => \$predNoHits,
189 "verbose" => \$VERBOSE
191 pod2usage(1) if $help;
192 pod2usage( verbose => 2 ) if $man;
194 #####################################################################################################
195 # Key to database information and information for accessing them
198 ## default database used by Jpred
200 database => $db_path . "/uniref90.filt",
201 unfiltered => $db_path . "/uniref90",
204 ## database used during Jnet training
206 database => $db_path . "/training/uniref90.filt",
207 unfiltered => $db_path . "/training/uniref90",
210 ## database used for ported Jpred
212 database => $db_path . "/databases/uniref90.filt",
213 unfiltered => $db_path . "/databases/uniref90",
215 ## cluster-specific path for Jpred
217 database => "/homes/www-jpred/databases/uniref90.filt",
218 unfiltered => "/homes/www-jpred/databases/uniref90",
221 ## these other DBs are experimental ones used during development.
222 ## check they exist before using them.
224 # generic entry for use with validate_jnet.pl
225 # real db location defined by validate_jnet.pl
226 database => $db_path . "/swall/swall.filt",
227 unfiltered => $db_path . "/swall/swall",
231 # Path to PSIBLAST db
232 database => $db_path . "/3/swall.filt",
233 unfiltered => $db_path . "/3/swall",
237 database => $db_path . "/6/swall.filt",
238 unfiltered => $db_path . "/6/swall",
242 pod2usage(' -sequence argument not provided') unless $infile;
243 die "-db $db_entry not recognised" unless exists $database->{$db_entry};
244 $output = $infile . ".res" unless $output;
245 $ncpu = 1 if ( 1 > $ncpu or 8 < $ncpu );
247 for ( [ "input", $infile ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) {
248 my ( $key, $value ) = @{$_};
249 defined $value or next;
250 errlog( join( ": ", $key, $value ), "\n" );
253 #####################################################################################################
254 my $platform = check_OS();
255 print "JPRED: checking platiform... $platform\n" if $DEBUG;
257 #####################################################################################################
258 my $query = FASTA::File->new( read_file => $infile );
261 my $psi = PSIBLAST->new;
262 unless ( defined $psiblast && -e $psiblast ) {
263 ## potentially a better set of parameters (esp. -b & -v) which avoid PSI-BLAST dying with large number of hits
264 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
265 # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
266 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
267 my $psi_run = PSIBLAST::Run->new(
269 args => "-a" . $ncpu . " -e0.05 -h0.01 -m6 -b10000 -v10000 -j3",
270 path => $psiblastbin,
272 output => "$output.blast",
273 matrix => "$output.profile",
274 database => $database->{$db_entry}{database},
277 # For reduced databases, get the size of the orginal and use this as
278 # the equivilent DB size
279 if ( $db_entry =~ /^sp_red_/ ) {
280 ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/;
282 $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below
285 print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
286 print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
288 print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
289 $psi_run->run or die; # CC sub is from PSIBLAST::Run
291 $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
292 system("gzip -9f $output.blast");
294 if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
295 else { $psi->read_file($psiblast) } # CC ditto above
298 #####################################################################################################
299 # Convert the last itteration into a collection of PSISEQ objects
300 for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm
301 my ( $id, $start, $align ) = @{$_};
306 align => [ split( //, $align ) ]
310 #####################################################################################################
311 ## When there are no PSI-BLAST hits generate an HMM from the query
312 ## and run Jnet against that only.
313 ## Accuracy won't be as good, but at least it's a prediction
315 if ( $predNoHits == 0 ) {
316 warn "\nJPRED: Warning! no PSI-BLAST hits found and '-pred-nohits' option not set. Exiting...\n";
317 print ">>100% complete\n";
320 print ">>50% complete\n";
321 warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
322 print "Running HMMer on query...\n" if $VERBOSE;
324 # copy input query to alignment
325 #system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
326 open( my $ifh, "<", $infile ) or die "JPRED: cannot open file $infile: $!";
327 open( my $ofh, ">", $output . ".align" ) or die "JPRED: cannot open file $output\.align: $!";
328 while (<$ifh>) { print $ofh, $_ }
332 # Temp files required for HMMer
333 my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2;
335 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $infile");
336 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
338 # Read in the HMMER file
339 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
341 # Convert from HMMER::Profile to HMMER::Profile::Jnet
342 my $hmmer_jnet = HMMER::Profile::Jnet->new;
343 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
344 $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
345 print ">>70% complete\n";
347 # Run Jnet for the prediction
348 print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
349 jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below
352 psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below
353 print ">>40% complete\n";
355 #####################################################################################################
356 # Make PSIBLAST truncated alignments the right length
357 print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE;
358 @seqs = extend( $query, @seqs ); # CC sub si below
359 psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG;
361 #####################################################################################################
362 # Remove masking from PSIBLAST alignment
363 print "Unmasking the alignments...\n" if $VERBOSE;
364 my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found.";
365 remove_seq_masks( $idx, $_ ) for @seqs[ 1 .. $#seqs ];
366 psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG;
368 #####################################################################################################
369 # Convert the sequences to upper case
370 print "Converting sequences to the same case...\n" if $VERBOSE;
371 toupper($_) for @seqs; # CC sub is below
372 psiseq2fasta( "${output}_backup.fasta.gz", @seqs );
374 #####################################################################################################
375 # Remove excessive sequences
376 print "Remove excessive sequences...\n" if $VERBOSE;
377 @seqs = reduce( $MAX_ALIGN, @seqs );
378 psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG;
380 #####################################################################################################
381 # Remove sequences that are too long or too short
382 print "Remove sequences which too long or short...\n" if $VERBOSE;
383 @seqs = del_long_seqs( 50, @seqs );
384 psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG;
386 #####################################################################################################
387 # Remove redundant sequences based upon pairwise identity and OC clustering
388 print "Remove redundant sequences...\n" if $VERBOSE;
389 @seqs = nonred( $NR_CUT, @seqs );
390 psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG;
392 #####################################################################################################
393 # Check that we haven't got rid of all of the sequences
395 warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
396 @seqs = fasta2psiseq("${output}_backup.fasta.gz");
398 unlink("${output}_backup.fasta.gz") unless $DEBUG;
400 # Remove gaps in the query sequence and same positions in the alignment
401 print "Removing gaps in the query sequence...\n" if $VERBOSE;
403 psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG;
405 # Output the alignment for the prediction
406 print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE;
407 psiseq2fasta( "$output.align", @seqs );
409 #####################################################################################################
410 # Equivilent to getpssm script
411 print "Output the PSSM matrix from the PSI-BLAST profile...\n";
412 my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" );
413 $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
414 print ">>50% complete\n";
416 #####################################################################################################
417 # Run HMMER on the sequences
418 print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
419 my $hmmer = hmmer(@seqs); # CC sub is below
420 $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
421 print ">>70% complete\n";
423 #####################################################################################################
424 # Run Jnet for the prediction
425 print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE;
426 jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below
429 print ">>100% complete\n";
430 print "Jpred Finished\n";
433 #####################################################################################################
435 #####################################################################################################
439 =head2 $PSISEQ = remove_seq_masks($PSISEQ)
441 Returns a PSISEQ object which has any masked residues replaced by their entry from the unfiltered database.
446 #####################################################################################################
447 sub remove_seq_masks {
451 # Only bother if we have a sequence which has been masked
452 return unless grep { uc $_ eq 'X' } @{ $seq->align };
454 my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } );
455 my $searchdb = $database->{$db_entry}{unfiltered};
456 $start--; # We need this to be zero based
458 my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return;
459 my @seqs = $f->get_entries;
460 my $db = shift @seqs;
463 confess "JPRED: remove_seq_masks: Accession number $id returned no sequences";
465 confess "JPRED: remove_seq_masks: Accession number $id returned more than one sequence";
468 my @db_seq = @{ $db->seq };
470 # $i is a pointer into the original, ungapped, unmasked sequence. $_ is for the aligned sequence
472 for ( 0 .. $#align ) {
473 next if $align[$_] =~ /$GAP/;
475 if ( $align[$_] eq 'X' ) {
476 unless ( defined $db_seq[ $i + $start ] ) {
477 croak "JPRED: remove_seq_masks: the database sequence is not as long as the query sequence!";
479 $align[$_] = $db_seq[ $i + $start ];
483 $seq->align( \@align );
486 =head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN);
488 Reduces @PSISEQS_IN to at most $max sequences by taking every n sequences in @PSISEQ_IN.
490 Equivilent to reduce script.
494 #####################################################################################################
496 my ( $max, @seqs ) = @_;
498 my $num = int( @seqs / $max );
499 my @res = shift @seqs;
501 # Don't bother if we have less than the required sequences
502 if ( @seqs < $max or 0 == $num ) {
506 for ( 0 .. $#seqs ) {
507 push @res, $seqs[$_] if ( 0 == $_ % $num );
513 =head2 nonred($cutoff, @PSISEQS);
515 Removes redundant sequences at $cutoff level of sequence identity.
517 Equivilent to nonred script.
521 #####################################################################################################
523 my ( $cutoff, @seqs ) = @_;
525 # Run pairwise and then OC and remove similar sequences
526 unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" }
527 unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
529 warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
533 # Convert the sequences into a FASTA object, which is what pairwise expects (simpler version).
534 my $fasta = FASTA::File->new;
536 my $f = FASTA->new( id => $_->id );
537 $f->seq( @{ $_->align } );
538 $fasta->add_entries($f);
542 my $pair = Pairwise->new( path => $pairwise );
543 my $foo = join "\n", $pair->run($fasta) or die $!;
546 my $ocres = OC->new( path => "$oc sim complete cut $cutoff" );
547 $ocres->read_string( join "", $ocres->run($foo) );
549 # Now found those entries that are unrelated
551 for ( $ocres->get_groups ) {
552 my ( $group, $score, $size, @labels ) = @{$_};
554 # Keep the QUERY from the cluster, otherwise just get onelt
555 if ( grep { /QUERY/ } @labels ) {
557 # Make sure the query stays at the start
558 unshift @keep, "QUERY";
561 push @keep, shift @labels;
564 push @keep, $ocres->get_unclustered;
566 # Return the matching entries.
567 # We use the temporay to mimic the selection of sequences in the nonred
568 # script, which took the sequences in the order they occured in the file.
572 if ( grep { $_ =~ /^$label$/ } @keep ) {
573 push @filtered_res, $_;
577 return @filtered_res;
580 =head2 @seqs = del_long_seqs($percentage, @PSISEQS)
582 Removes those sequences that are more than $percentage longer or shorter than the first
583 sequence in @seqs. @seqs is an array of PSISEQ objects.
585 Equivilent to trim_seqs script.
589 #####################################################################################################
591 my ( $factor, @seqs ) = @_;
593 # Keep the query sequence (we assume query is the 1st FASTA record)
594 my $query = shift @seqs;
597 # Find the length of the query sequence without any gaps
598 my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g );
600 # Calculate the upper and lower allowed bounds
601 my ( $upper, $lower );
603 my $bounds = ( $length / 100 ) * $factor;
604 ( $upper, $lower ) = ( $length + $bounds, $length - $bounds );
607 # Now try each sequence and see how long they are
609 my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g );
610 if ( $l >= $lower && $l <= $upper ) { push @res, $_ }
616 =head2 toupper ($PSISEQ)
618 Converts sequence held in PSISEQ object to uppercase.
622 #####################################################################################################
624 $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] );
627 =head2 degap($PSISEQ_QUERY, @PSISEQS)
629 Removes gaps in the query sequence ($PSISEQ_QUERY) and corresponding positions in @PSISEQS. Change made in place.
631 Equivilent to fasta2jnet script.
635 #####################################################################################################
639 # Find where the gaps in the query sequence are
642 for ( @{ $seqs[0]->align } ) {
643 push @gaps, $i if $_ !~ /$GAP/;
649 # Remove the gaps in all the sequences.
650 # Derefences the array reference and takes a slice inside the method call
651 # arguments, then coerce back to an array ref.
653 $_->align( [ @{ $_->align }[@gaps] ] );
657 =head2 getfreq($filename, @PSISEQS)
659 Creates a PSIBLAST like profile and writes it to the $filename.
661 Equivilant to profile or getfreq (older) script. This doesn't create the same answer as
662 the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output,
663 but it's *not* the same.
667 #####################################################################################################
669 my ( $fn, @seqs ) = @_;
671 #croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs;
673 # Create a PSIBLAST like profile from the alignment
674 # This doesn't greate the same result as old Jpred, I think this is because
675 # differences in the input between old and new
678 # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
679 # my @profile = profile(
680 # map { join "", @{ $_->seq } } $f->get_entries
683 my @profile = profile( map { join "", @{ $_->align } } @seqs );
685 open my $fh, ">$fn" or die "JPRED: $fn: $!";
686 print $fh join( " ", @{$_} ), "\n" for @profile;
689 =head2 HMMER::Profile::Jnet = hmmer(@PSISEQS)
691 Creates a HMMER profile from the aligned sequences in @PSISEQS. Returns a HMMER::Profile::Jnet object.
693 Equivilent to gethmm script.
697 #####################################################################################################
701 # Temp files required
702 my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3;
704 # Create the FASTA file
705 psiseq2fasta( $tmp_fasta, @seqs );
708 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
709 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
711 # Read in the HMMER file
712 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
714 # Convert from HMMER::Profile to HMMER::Profile::Jnet
715 my $hmmer_jnet = HMMER::Profile::Jnet->new;
716 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
721 =head2 jnet($hmmer, $out, $pssm )
723 Runs Jnet. Pass the paths of the filenames to do the prediction on
727 #####################################################################################################
729 my ( $hmmer, $outfile, $pssm ) = @_;
732 # run Jnet prediction with all the profile data
733 system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
736 # run Jnet prediction with only HMMer and alignment data
737 system("$jnet --concise --hmmer $hmmer > $outfile");
739 check( "jnet", $? ) or exit 1;
742 =head1 psiseq2fasta($path, @PSISEQ)
744 Writes a FASTA file given an array of PSISEQ objects.
748 #####################################################################################################
750 my ( $fn, @seqs ) = @_;
752 #confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
753 confess "JPRED: not passed PSISEQs" unless @seqs;
755 #confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
757 # Changed from using FASTA::File object due to seg faulting on some large,
758 # long alignments, presumably due to running out of memory.
759 # my $fasta = FASTA::File->new;
760 # $fasta->add_entries(
762 # my $f = FASTA->new(id => $_->id);
763 # $f->seq(@{$_->align});
767 # $fasta->write_file($fn);
771 ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" )
772 : ( open $fh, ">$fn" or die "JPRED: $!" );
775 print $fh ">", $_->id, "\n";
776 my $seq = join( "", @{ $_->align } );
777 $seq =~ s/(.{72})/$1\n/g;
779 print $fh $seq . "\n";
784 =head1 @PSISEQ = fasta2psiseq($path)
786 Reads in a FASTA file and returns an array of PSISEQ objects.
790 #####################################################################################################
796 ? FASTA::File->new( read_gzip_file => $fn )
797 : FASTA::File->new( read_file => $fn );
798 $fasta or die "JPRED: $!";
801 for ( $fasta->get_entries ) {
802 my $seq = PSISEQ->new;
804 $seq->align( [ @{ $_->seq } ] );
811 #####################################################################################################
812 sub fasta2psiseq_lowmemory {
813 my $filename = shift;
818 ? ( open $fh, "gzip -dc $filename|" or die $! )
819 : ( open $fh, $filename or die $! );
826 $seq =~ s/ //g if defined $seq;
828 if ( $id and $seq ) {
829 my $psi = PSISEQ->new( id => $id );
830 $psi->align( [ split //, $seq ] );
841 if ( $id and $seq ) {
842 my $psi = PSISEQ->new( id => $id );
850 =head1 extend($FASTA::File, @PSISEQ)
852 Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues
853 against the query, this function extends the alignment so that all of the query is
854 present in the alignment. The other sequences are extended with gap characters.
858 #####################################################################################################
860 my ( $query, @seqs ) = @_;
862 #croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
863 #croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
865 # Get the query sequence
866 my $q_query = $query->get_entry(0);
868 # Get the query sequence from the PSIBLAST alignment
869 my $a_query = shift @seqs;
871 # The gap size required to expand the alignment
872 my ( $start_gap_length, $end_gap_length );
875 # Make handling the sequence easier
876 my $q_seq = join "", @{ [ $q_query->seq ] };
877 my $a_seq = join "", @{ $a_query->align };
882 ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq;
884 $start_gap_length = index( $q_seq, $a_seq );
885 croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
887 $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
890 # Add the gaps to the end of the alignments
892 $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] );
895 # Put the query sequence back
900 ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ),
901 @{ $a_query->align },
902 ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ),
909 =head1 $int = fasta_seq_length($path)
911 Given the path to a FASTA database, find the number of residues/bases in it. Counts whitespace as a residue.
915 #####################################################################################################
916 sub fasta_seq_length {
918 open my $fh, $fn or croak "Can't open file $fn: $!\n";
931 =head1 log(@messages)
933 Report messages to STDERR
937 #####################################################################################################
938 sub errlog { print STDERR @_ }