5 jpred - Secondary structure prediction program
9 jpred --sequence <fasta file> [--output <output prefix>] [--db <database>] [--psi <psiblast output>] [--pred-nohits] [--verbose] [--debug] [--help] [--man]
13 This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence.
14 It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
16 The program is primarily design for use by the Jpred server, but it can be used directly by any user.
17 Some of the output may not be meaningful if used directly - it can be safely ignored.
23 =item -sequence path to input FASTA file
25 The path to the sequence (in FASTA format) you want to predict.
29 A prefix to the filenames created by Jpred, defaults to the value set by --sequence.
33 Database to use for PSI-BLAST querying. This only accepts databases from a list which is pre-defined in the code. Crap I know, but that's the way it is at the moment. It's probably best to stick the default for the time being.
39 Path to a PSIBLAST output file.
43 Toggle allowing Jpred to make predictions even when there are no PSI-BLAST hits.
47 Verbose mode. Print more information while running jpred.
51 Debug mode. Print debugging information while running jpred.
55 Gives help on the programs usage.
59 Displays this man page.
65 Can't cope with gaps in the query sequence.
67 Doesn't accept alignments.
69 If you find any others please contact me.
73 Jonathan Barber <jon@compbio.dundee.ac.uk>
74 Chris Cole <christian@cole.name>
75 Alexander Sherstnev <a.sherstnev@dundee.ac.uk> (current maintainer)
79 ## TODO check for gaps in query sequence
80 ## check for disallowed chars in sequence
92 # path to Jpred modules
96 # internal jpred modules
98 use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin check_OS);
100 use HMMER::Profile::Jnet;
109 use Utils qw(profile);
112 our $VERSION = '3.0.2';
113 my $MAX_ALIGN = 10000; # maximum number of alignment sequences allowed
114 my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
116 # Gap characters in sequences
119 #####################################################################################################
120 # Light object to hold sequence information, light to prevent memory overload
121 # for big search results. Even so this is quite memory intensive, I can't see
122 # how to reduce the size further.
128 my ( $class, %args ) = @_;
131 for ( keys %args ) { $self->$_( $args{$_} ) }
148 if ( defined $_[1] ) {
149 ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
151 $_[0]->[2] = join( "", @{ $_[1] } );
153 return [ split( //, $_[0]->[2] ) ];
158 #####################################################################################################
162 my $ncpu = 1; # number of CPUs for psiblast calculations
163 my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
165 my $db_entry = "unknown";
167 my ( $help, $man, $DEBUG, $VERBOSE );
170 "in|sequence=s" => \$infile,
171 "out|output=s" => \$output,
172 "psi=s" => \$psiblast,
173 "dbname=s" => \$db_entry,
174 "dbpath=s" => \$db_path,
176 "pred-nohits" => \$predNoHits,
180 "verbose" => \$VERBOSE
182 pod2usage(1) if $help;
183 pod2usage( verbose => 2 ) if $man;
185 #####################################################################################################
186 # Key to database information and information for accessing them
189 ## default database used by Jpred
191 database => $db_path . "/uniref90.filt",
192 unfiltered => $db_path . "/uniref90",
195 ## database used during Jnet training
197 database => $db_path . "/training/uniref90.filt",
198 unfiltered => $db_path . "/training/uniref90",
201 ## database used for ported Jpred
203 database => $db_path . "/databases/uniref90.filt",
204 unfiltered => $db_path . "/databases/uniref90",
206 ## cluster-specific path for Jpred
208 database => "/homes/www-jpred/databases/uniref90.filt",
209 unfiltered => "/homes/www-jpred/databases/uniref90",
212 ## these other DBs are experimental ones used during development.
213 ## check they exist before using them.
216 # generic entry for use with validate_jnet.pl
217 # real db location defined by validate_jnet.pl
218 database => $db_path . "/swall/swall.filt",
219 unfiltered => $db_path . "/swall/swall",
224 # Path to PSIBLAST db
225 database => $db_path . "/3/swall.filt",
226 unfiltered => $db_path . "/3/swall",
229 database => $db_path . "/6/swall.filt",
230 unfiltered => $db_path . "/6/swall",
234 pod2usage(' --sequence argument not provided') unless $infile;
235 die "--db $db_entry not recognised" unless exists $database->{$db_entry};
236 $output = $infile . ".res" unless $output;
237 $ncpu = 1 if ( 1 > $ncpu or 8 < $ncpu );
239 for ( [ "input", $infile ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) {
240 my ( $key, $value ) = @{$_};
241 defined $value or next;
242 errlog( join( ": ", $key, $value ), "\n" );
245 #####################################################################################################
246 my $platform = check_OS();
247 print "JPRED: checking platiform... $platform\n" if $DEBUG;
249 #####################################################################################################
250 my $query = FASTA::File->new( read_file => $infile );
253 my $psi = PSIBLAST->new;
254 unless ( defined $psiblast && -e $psiblast ) {
255 ## potentially a better set of parameters (esp. -b & -v) which avoid PSI-BLAST dying with large number of hits
256 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
257 # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
258 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
259 my $psi_run = PSIBLAST::Run->new(
261 args => "-a" . $ncpu . " -e0.05 -h0.01 -m6 -b10000 -v10000 -j3",
262 path => $psiblastbin,
264 output => "$output.blast",
265 matrix => "$output.profile",
266 database => $database->{$db_entry}{database},
269 # For reduced databases, get the size of the orginal and use this as
270 # the equivilent DB size
271 if ( $db_entry =~ /^sp_red_/ ) {
272 ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/;
274 $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below
277 print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
278 print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
280 print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
281 $psi_run->run or die; # CC sub is from PSIBLAST::Run
283 $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
284 system("gzip -9f $output.blast");
286 if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
287 else { $psi->read_file($psiblast) } # CC ditto above
290 #####################################################################################################
291 # Convert the last itteration into a collection of PSISEQ objects
292 for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm
293 my ( $id, $start, $align ) = @{$_};
298 align => [ split( //, $align ) ]
302 #####################################################################################################
303 ## When there are no PSI-BLAST hits generate an HMM from the query
304 ## and run Jnet against that only.
305 ## Accuracy won't be as good, but at least it's a prediction
307 if ( $predNoHits == 0 ) {
308 warn "\nJPRED: Warning! no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n";
309 print ">>100% complete\n";
312 print ">>50% complete\n";
313 warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
314 print "Running HMMer on query...\n" if $VERBOSE;
316 # copy input query to alignment
317 #system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
318 open( my $ifh, "<", $infile ) or die "JPRED: cannot open file $infile: $!";
319 open( my $ofh, ">", $output . ".align" ) or die "JPRED: cannot open file $output\.align: $!";
320 while (<$ifh>) { print $ofh, $_ }
324 # Temp files required for HMMer
325 my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2;
327 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $infile");
328 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
330 # Read in the HMMER file
331 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
333 # Convert from HMMER::Profile to HMMER::Profile::Jnet
334 my $hmmer_jnet = HMMER::Profile::Jnet->new;
335 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
336 $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
337 print ">>70% complete\n";
339 # Run Jnet for the prediction
340 print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
341 jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below
344 psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below
345 print ">>40% complete\n";
347 #####################################################################################################
348 # Make PSIBLAST truncated alignments the right length
349 print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE;
350 @seqs = extend( $query, @seqs ); # CC sub si below
351 psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG;
353 #####################################################################################################
354 # Remove masking from PSIBLAST alignment
355 print "Unmasking the alignments...\n" if $VERBOSE;
356 my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found.";
357 remove_seq_masks( $idx, $_ ) for @seqs[ 1 .. $#seqs ];
358 psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG;
360 #####################################################################################################
361 # Convert the sequences to upper case
362 print "Converting sequences to the same case...\n" if $VERBOSE;
363 toupper($_) for @seqs; # CC sub is below
364 psiseq2fasta( "${output}_backup.fasta.gz", @seqs );
366 #####################################################################################################
367 # Remove excessive sequences
368 print "Remove excessive sequences...\n" if $VERBOSE;
369 @seqs = reduce( $MAX_ALIGN, @seqs );
370 psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG;
372 #####################################################################################################
373 # Remove sequences that are too long or too short
374 print "Remove sequences which too long or short...\n" if $VERBOSE;
375 @seqs = del_long_seqs( 50, @seqs );
376 psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG;
378 #####################################################################################################
379 # Remove redundant sequences based upon pairwise identity and OC clustering
380 print "Remove redundant sequences...\n" if $VERBOSE;
381 @seqs = nonred( $NR_CUT, @seqs );
382 psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG;
384 #####################################################################################################
385 # Check that we haven't got rid of all of the sequences
387 warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
388 @seqs = fasta2psiseq("${output}_backup.fasta.gz");
390 unlink("${output}_backup.fasta.gz") unless $DEBUG;
392 # Remove gaps in the query sequence and same positions in the alignment
393 print "Removing gaps in the query sequence...\n" if $VERBOSE;
395 psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG;
397 # Output the alignment for the prediction
398 print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE;
399 psiseq2fasta( "$output.align", @seqs );
401 #####################################################################################################
402 # Equivilent to getpssm script
403 print "Output the PSSM matrix from the PSI-BLAST profile...\n";
404 my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" );
405 $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
406 print ">>50% complete\n";
408 #####################################################################################################
409 # Run HMMER on the sequences
410 print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
411 my $hmmer = hmmer(@seqs); # CC sub is below
412 $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
413 print ">>70% complete\n";
415 #####################################################################################################
416 # Run Jnet for the prediction
417 print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE;
418 jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below
421 print ">>100% complete\n";
422 print "Jpred Finished\n";
425 #####################################################################################################
427 #####################################################################################################
431 =head2 $PSISEQ = remove_seq_masks($PSISEQ)
433 Returns a PSISEQ object which has any masked residues replaced by their entry from the unfiltered database.
438 #####################################################################################################
439 sub remove_seq_masks {
443 # Only bother if we have a sequence which has been masked
444 return unless grep { uc $_ eq 'X' } @{ $seq->align };
446 my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } );
447 my $searchdb = $database->{$db_entry}{unfiltered};
448 $start--; # We need this to be zero based
450 my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return;
451 my @seqs = $f->get_entries;
452 my $db = shift @seqs;
455 confess "JPRED: remove_seq_masks: Accession number $id returned no sequences";
457 confess "JPRED: remove_seq_masks: Accession number $id returned more than one sequence";
460 my @db_seq = @{ $db->seq };
462 # $i is a pointer into the original, ungapped, unmasked sequence. $_ is for the aligned sequence
464 for ( 0 .. $#align ) {
465 next if $align[$_] =~ /$GAP/;
467 if ( $align[$_] eq 'X' ) {
468 unless ( defined $db_seq[ $i + $start ] ) {
469 croak "JPRED: remove_seq_masks: the database sequence is not as long as the query sequence!";
471 $align[$_] = $db_seq[ $i + $start ];
475 $seq->align( \@align );
478 =head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN);
480 Reduces @PSISEQS_IN to at most $max sequences by taking every n sequences in @PSISEQ_IN.
482 Equivilent to reduce script.
486 #####################################################################################################
488 my ( $max, @seqs ) = @_;
490 my $num = int( @seqs / $max );
491 my @res = shift @seqs;
493 # Don't bother if we have less than the required sequences
494 if ( @seqs < $max or 0 == $num ) {
498 for ( 0 .. $#seqs ) {
499 push @res, $seqs[$_] if ( 0 == $_ % $num );
505 =head2 nonred($cutoff, @PSISEQS);
507 Removes redundant sequences at $cutoff level of sequence identity.
509 Equivilent to nonred script.
513 #####################################################################################################
515 my ( $cutoff, @seqs ) = @_;
517 # Run pairwise and then OC and remove similar sequences
518 unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" }
519 unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
521 warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
525 # Convert the sequences into a FASTA object, which is what pairwise expects (simpler version).
526 my $fasta = FASTA::File->new;
528 my $f = FASTA->new( id => $_->id );
529 $f->seq( @{ $_->align } );
530 $fasta->add_entries($f);
534 my $pair = Pairwise->new( path => $pairwise );
535 my $foo = join "\n", $pair->run($fasta) or die $!;
538 my $ocres = OC->new( path => "$oc sim complete cut $cutoff" );
539 $ocres->read_string( join "", $ocres->run($foo) );
541 # Now found those entries that are unrelated
543 for ( $ocres->get_groups ) {
544 my ( $group, $score, $size, @labels ) = @{$_};
546 # Keep the QUERY from the cluster, otherwise just get onelt
547 if ( grep { /QUERY/ } @labels ) {
549 # Make sure the query stays at the start
550 unshift @keep, "QUERY";
553 push @keep, shift @labels;
556 push @keep, $ocres->get_unclustered;
558 # Return the matching entries.
559 # We use the temporay to mimic the selection of sequences in the nonred
560 # script, which took the sequences in the order they occured in the file.
564 if ( grep { $_ =~ /^$label$/ } @keep ) {
565 push @filtered_res, $_;
569 return @filtered_res;
572 =head2 @seqs = del_long_seqs($percentage, @PSISEQS)
574 Removes those sequences that are more than $percentage longer or shorter than the first
575 sequence in @seqs. @seqs is an array of PSISEQ objects.
577 Equivilent to trim_seqs script.
581 #####################################################################################################
583 my ( $factor, @seqs ) = @_;
585 # Keep the query sequence (we assume query is the 1st FASTA record)
586 my $query = shift @seqs;
589 # Find the length of the query sequence without any gaps
590 my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g );
592 # Calculate the upper and lower allowed bounds
593 my ( $upper, $lower );
595 my $bounds = ( $length / 100 ) * $factor;
596 ( $upper, $lower ) = ( $length + $bounds, $length - $bounds );
599 # Now try each sequence and see how long they are
601 my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g );
602 if ( $l >= $lower && $l <= $upper ) { push @res, $_ }
608 =head2 toupper ($PSISEQ)
610 Converts sequence held in PSISEQ object to uppercase.
614 #####################################################################################################
616 $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] );
619 =head2 degap($PSISEQ_QUERY, @PSISEQS)
621 Removes gaps in the query sequence ($PSISEQ_QUERY) and corresponding positions in @PSISEQS. Change made in place.
623 Equivilent to fasta2jnet script.
627 #####################################################################################################
631 # Find where the gaps in the query sequence are
634 for ( @{ $seqs[0]->align } ) {
635 push @gaps, $i if $_ !~ /$GAP/;
641 # Remove the gaps in all the sequences.
642 # Derefences the array reference and takes a slice inside the method call
643 # arguments, then coerce back to an array ref.
645 $_->align( [ @{ $_->align }[@gaps] ] );
649 =head2 getfreq($filename, @PSISEQS)
651 Creates a PSIBLAST like profile and writes it to the $filename.
653 Equivilant to profile or getfreq (older) script. This doesn't create the same answer as
654 the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output,
655 but it's *not* the same.
659 #####################################################################################################
661 my ( $fn, @seqs ) = @_;
663 #croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs;
665 # Create a PSIBLAST like profile from the alignment
666 # This doesn't greate the same result as old Jpred, I think this is because
667 # differences in the input between old and new
670 # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
671 # my @profile = profile(
672 # map { join "", @{ $_->seq } } $f->get_entries
675 my @profile = profile( map { join "", @{ $_->align } } @seqs );
677 open my $fh, ">$fn" or die "JPRED: $fn: $!";
678 print $fh join( " ", @{$_} ), "\n" for @profile;
681 =head2 HMMER::Profile::Jnet = hmmer(@PSISEQS)
683 Creates a HMMER profile from the aligned sequences in @PSISEQS. Returns a HMMER::Profile::Jnet object.
685 Equivilent to gethmm script.
689 #####################################################################################################
693 # Temp files required
694 my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3;
696 # Create the FASTA file
697 psiseq2fasta( $tmp_fasta, @seqs );
700 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
701 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
703 # Read in the HMMER file
704 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
706 # Convert from HMMER::Profile to HMMER::Profile::Jnet
707 my $hmmer_jnet = HMMER::Profile::Jnet->new;
708 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
713 =head2 jnet($hmmer, $out, $pssm )
715 Runs Jnet. Pass the paths of the filenames to do the prediction on
719 #####################################################################################################
721 my ( $hmmer, $outfile, $pssm ) = @_;
724 # run Jnet prediction with all the profile data
725 system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
728 # run Jnet prediction with only HMMer and alignment data
729 system("$jnet --concise --hmmer $hmmer > $outfile");
731 check( "jnet", $? ) or exit 1;
734 =head1 psiseq2fasta($path, @PSISEQ)
736 Writes a FASTA file given an array of PSISEQ objects.
740 #####################################################################################################
742 my ( $fn, @seqs ) = @_;
744 #confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
745 confess "JPRED: not passed PSISEQs" unless @seqs;
747 #confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
749 # Changed from using FASTA::File object due to seg faulting on some large,
750 # long alignments, presumably due to running out of memory.
751 # my $fasta = FASTA::File->new;
752 # $fasta->add_entries(
754 # my $f = FASTA->new(id => $_->id);
755 # $f->seq(@{$_->align});
759 # $fasta->write_file($fn);
763 ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" )
764 : ( open $fh, ">$fn" or die "JPRED: $!" );
767 print $fh ">", $_->id, "\n";
768 my $seq = join( "", @{ $_->align } );
769 $seq =~ s/(.{72})/$1\n/g;
771 print $fh $seq . "\n";
776 =head1 @PSISEQ = fasta2psiseq($path)
778 Reads in a FASTA file and returns an array of PSISEQ objects.
782 #####################################################################################################
788 ? FASTA::File->new( read_gzip_file => $fn )
789 : FASTA::File->new( read_file => $fn );
790 $fasta or die "JPRED: $!";
793 for ( $fasta->get_entries ) {
794 my $seq = PSISEQ->new;
796 $seq->align( [ @{ $_->seq } ] );
803 #####################################################################################################
804 sub fasta2psiseq_lowmemory {
805 my $filename = shift;
810 ? ( open $fh, "gzip -dc $filename|" or die $! )
811 : ( open $fh, $filename or die $! );
818 $seq =~ s/ //g if defined $seq;
820 if ( $id and $seq ) {
821 my $psi = PSISEQ->new( id => $id );
822 $psi->align( [ split //, $seq ] );
833 if ( $id and $seq ) {
834 my $psi = PSISEQ->new( id => $id );
842 =head1 extend($FASTA::File, @PSISEQ)
844 Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues
845 against the query, this function extends the alignment so that all of the query is
846 present in the alignment. The other sequences are extended with gap characters.
850 #####################################################################################################
852 my ( $query, @seqs ) = @_;
854 #croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
855 #croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
857 # Get the query sequence
858 my $q_query = $query->get_entry(0);
860 # Get the query sequence from the PSIBLAST alignment
861 my $a_query = shift @seqs;
863 # The gap size required to expand the alignment
864 my ( $start_gap_length, $end_gap_length );
867 # Make handling the sequence easier
868 my $q_seq = join "", @{ [ $q_query->seq ] };
869 my $a_seq = join "", @{ $a_query->align };
874 ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq;
876 $start_gap_length = index( $q_seq, $a_seq );
877 croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
879 $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
882 # Add the gaps to the end of the alignments
884 $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] );
887 # Put the query sequence back
892 ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ),
893 @{ $a_query->align },
894 ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ),
901 =head1 $int = fasta_seq_length($path)
903 Given the path to a FASTA database, find the number of residues/bases in it. Counts whitespace as a residue.
907 #####################################################################################################
908 sub fasta_seq_length {
910 open my $fh, $fn or croak "Can't open file $fn: $!\n";
923 =head1 log(@messages)
925 Report messages to STDERR
929 #####################################################################################################
930 sub errlog { print STDERR @_ }