5 jpred - Secondary structure prediction program
9 jpred --sequence <fasta file> [--output <output prefix>] [--db <database>] [--psi <psiblast output>] [--pred-nohits] [--verbose] [--debug] [--help] [--man]
13 This is a program which predicts the secondary structure of a sequence given a path to a FASTA sequence.
14 It does all the PSI-BLAST searching and alignment 'meddling' as required by Jnet.
16 The program is primarily design for use by the Jpred server, but it can be used directly by any user.
17 Some of the output may not be meaningful if used directly - it can be safely ignored.
23 =item -sequence path to input FASTA file
25 The path to the sequence (in FASTA format) you want to predict.
29 A prefix to the filenames created by Jpred, defaults to the value set by --sequence.
33 Database to use for PSI-BLAST querying. This only accepts databases from a list which is pre-defined in the code. Crap I know, but that's the way it is at the moment. It's probably best to stick the default for the time being.
39 Path to a PSIBLAST output file.
43 Toggle allowing Jpred to make predictions even when there are no PSI-BLAST hits.
47 Verbose mode. Print more information while running jpred.
51 Debug mode. Print debugging information while running jpred.
55 Gives help on the programs usage.
59 Displays this man page.
65 Can't cope with gaps in the query sequence.
67 Doesn't accept alignments.
69 If you find any others please contact me.
73 Jonathan Barber <jon@compbio.dundee.ac.uk>
74 Chris Cole <christian@cole.name>
75 Alexander Sherstnev <a.sherstnev@dundee.ac.uk> (current maintainer)
79 ## TODO check for gaps in query sequence
80 ## check for disallowed chars in sequence
92 # path to Jpred modules
93 #use FindBin qw($Bin);
94 #use lib "$Bin/../lib";
96 use lib "/home/asherstnev/Projects/Jpred.project/jpred/branches/portable/lib";
98 # internal jpred modules
100 use Paths qw($pairwise $oc $jnet $hmmbuild $hmmconvert $psiblastbin check_OS);
102 use HMMER::Profile::Jnet;
111 use Utils qw(profile);
114 our $VERSION = '3.0.2';
115 my $MAX_ALIGN = 10000; # maximum number of alignment sequences allowed
116 my $NR_CUT = 75; # cut-off seqeunce ID value for defining non-redundancy
118 # Gap characters in sequences
121 #####################################################################################################
122 # Light object to hold sequence information, light to prevent memory overload
123 # for big search results. Even so this is quite memory intensive, I can't see
124 # how to reduce the size further.
130 my ( $class, %args ) = @_;
133 for ( keys %args ) { $self->$_( $args{$_} ) }
150 if ( defined $_[1] ) {
151 ref $_[1] eq 'ARRAY' or die "Not passed ARRAY ref";
153 $_[0]->[2] = join( "", @{ $_[1] } );
155 return [ split( //, $_[0]->[2] ) ];
160 #####################################################################################################
164 my $ncpu = 1; # number of CPUs for psiblast calculations
165 my $predNoHits = 0; # define whether to make predictions when no PSI-BLAST hits are found
167 my $db_entry = "unknown";
169 my ( $help, $man, $DEBUG, $VERBOSE );
172 "in|sequence=s" => \$infile,
173 "out|output=s" => \$output,
174 "psi=s" => \$psiblast,
175 "dbname=s" => \$db_entry,
176 "dbpath=s" => \$db_path,
178 "pred-nohits" => \$predNoHits,
182 "verbose" => \$VERBOSE
184 pod2usage(1) if $help;
185 pod2usage( verbose => 2 ) if $man;
187 #####################################################################################################
188 # Key to database information and information for accessing them
191 ## default database used by Jpred
193 database => $db_path . "/uniref90.filt",
194 unfiltered => $db_path . "/uniref90",
197 ## database used during Jnet training
199 database => $db_path . "/training/uniref90.filt",
200 unfiltered => $db_path . "/training/uniref90",
203 ## database used for ported Jpred
205 database => $db_path . "/databases/uniref90.filt",
206 unfiltered => $db_path . "/databases/uniref90",
208 ## cluster-specific path for Jpred
210 database => "/homes/www-jpred/databases/uniref90.filt",
211 unfiltered => "/homes/www-jpred/databases/uniref90",
214 ## these other DBs are experimental ones used during development.
215 ## check they exist before using them.
218 # generic entry for use with validate_jnet.pl
219 # real db location defined by validate_jnet.pl
220 database => $db_path . "/swall/swall.filt",
221 unfiltered => $db_path . "/swall/swall",
226 # Path to PSIBLAST db
227 database => $db_path . "/3/swall.filt",
228 unfiltered => $db_path . "/3/swall",
231 database => $db_path . "/6/swall.filt",
232 unfiltered => $db_path . "/6/swall",
236 pod2usage(' --sequence argument not provided') unless $infile;
237 die "--db $db_entry not recognised" unless exists $database->{$db_entry};
238 $output = $infile . ".res" unless $output;
239 $ncpu = 1 if ( 1 > $ncpu or 8 < $ncpu );
241 for ( [ "input", $infile ], [ "output", $output ], [ "psi", $psiblast ], [ "db", $db_entry ] ) {
242 my ( $key, $value ) = @{$_};
243 defined $value or next;
244 errlog( join( ": ", $key, $value ), "\n" );
247 #####################################################################################################
248 my $platform = check_OS();
249 print "JPRED: checking platiform... $platform\n" if $DEBUG;
251 #####################################################################################################
252 my $query = FASTA::File->new( read_file => $infile );
255 my $psi = PSIBLAST->new;
256 unless ( defined $psiblast && -e $psiblast ) {
257 ## potentially a better set of parameters (esp. -b & -v) which avoid PSI-BLAST dying with large number of hits
258 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3 -F "m S"' # 'soft' filtering of the query sequence
259 # args => '-e0.001 -h0.0001 -m6 -b10000 -v10000 -j3' # stricter searching criteria
260 # args => '-e0.05 -h0.01 -m6 -b10000 -v10000 -j3'
261 my $psi_run = PSIBLAST::Run->new(
263 args => "-a" . $ncpu . " -e0.05 -h0.01 -m6 -b10000 -v10000 -j3",
264 path => $psiblastbin,
266 output => "$output.blast",
267 matrix => "$output.profile",
268 database => $database->{$db_entry}{database},
271 # For reduced databases, get the size of the orginal and use this as
272 # the equivilent DB size
273 if ( $db_entry =~ /^sp_red_/ ) {
274 ( my $entry = $db_entry ) =~ s/^sp_red_/sp_/;
276 $psi_run->args . " -z " . fasta_seq_length( $database->{$entry}{database} ) # CC sub is below
279 print "BLAST matrix: $ENV{BLASTMAT}\n" if $DEBUG;
280 print "BLASTDB path: $ENV{BLASTDB}\n" if $DEBUG;
282 print "Running PSI-BLAST on query against \'$database->{$db_entry}{database}\'...\n" if $VERBOSE;
283 $psi_run->run or die; # CC sub is from PSIBLAST::Run
285 $psi->read_file("$output.blast"); # CC PSIBLAST.pm doesn't have a read_file(), but Read.pm does?
286 system("gzip -9f $output.blast");
288 if ( $psiblast =~ /.gz$/ ) { $psi->read_gzip_file($psiblast) } # CC PSIBALST.pm doesn't have a read_gzip_file() object, but Read.pm does?
289 else { $psi->read_file($psiblast) } # CC ditto above
292 #####################################################################################################
293 # Convert the last itteration into a collection of PSISEQ objects
294 for ( $psi->get_all ) { # CC sub is from PSIBLAST.pm
295 my ( $id, $start, $align ) = @{$_};
300 align => [ split( //, $align ) ]
304 #####################################################################################################
305 ## When there are no PSI-BLAST hits generate an HMM from the query
306 ## and run Jnet against that only.
307 ## Accuracy won't be as good, but at least it's a prediction
309 if ( $predNoHits == 0 ) {
310 warn "\nJPRED: Warning! no PSI-BLAST hits found and '--pred-nohits' option not set. Exiting...\n";
311 print ">>100% complete\n";
314 print ">>50% complete\n";
315 warn "\nJPRED: no PSI-BLAST hits found. Continuing with single query sequence only.\n\n";
316 print "Running HMMer on query...\n" if $VERBOSE;
318 # copy input query to alignment
319 #system("cp $path $output.align") == 0 or croak "Error: unable to copy '$path'\n";
320 open( my $ifh, "<", $infile ) or die "JPRED: cannot open file $infile: $!";
321 open( my $ofh, ">", $output . ".align" ) or die "JPRED: cannot open file $output\.align: $!";
322 while (<$ifh>) { print $ofh, $_ }
326 # Temp files required for HMMer
327 my ( $hmmbuild_out, $hmmconvert_out ) = map { File::Temp->new->filename } 1 .. 2;
329 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $infile");
330 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
332 # Read in the HMMER file
333 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
335 # Convert from HMMER::Profile to HMMER::Profile::Jnet
336 my $hmmer_jnet = HMMER::Profile::Jnet->new;
337 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
338 $hmmer_jnet->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
339 print ">>70% complete\n";
341 # Run Jnet for the prediction
342 print "Running JNet using the generated inputs from HMM only...\n" if $VERBOSE;
343 jnet( map { "$output.$_" } qw(hmm jnet) ); # CC sub is below
346 psiseq2fasta( "0.fasta.gz", @seqs ) if $DEBUG; # CC sub is below
347 print ">>40% complete\n";
349 #####################################################################################################
350 # Make PSIBLAST truncated alignments the right length
351 print "Untruncating the PSIBLAST alignments...\n" if $VERBOSE;
352 @seqs = extend( $query, @seqs ); # CC sub si below
353 psiseq2fasta( "1.fasta.gz", @seqs ) if $DEBUG;
355 #####################################################################################################
356 # Remove masking from PSIBLAST alignment
357 print "Unmasking the alignments...\n" if $VERBOSE;
358 my $idx = Index->new( type => "Index::FastaCMD" ) or die "Index type cannot be found.";
359 remove_seq_masks( $idx, $_ ) for @seqs[ 1 .. $#seqs ];
360 psiseq2fasta( "2.fasta.gz", @seqs ) if $DEBUG;
362 #####################################################################################################
363 # Convert the sequences to upper case
364 print "Converting sequences to the same case...\n" if $VERBOSE;
365 toupper($_) for @seqs; # CC sub is below
366 psiseq2fasta( "${output}_backup.fasta.gz", @seqs );
368 #####################################################################################################
369 # Remove excessive sequences
370 print "Remove excessive sequences...\n" if $VERBOSE;
371 @seqs = reduce( $MAX_ALIGN, @seqs );
372 psiseq2fasta( "3.fasta.gz", @seqs ) if $DEBUG;
374 #####################################################################################################
375 # Remove sequences that are too long or too short
376 print "Remove sequences which too long or short...\n" if $VERBOSE;
377 @seqs = del_long_seqs( 50, @seqs );
378 psiseq2fasta( "4.fasta.gz", @seqs ) if $DEBUG;
380 #####################################################################################################
381 # Remove redundant sequences based upon pairwise identity and OC clustering
382 print "Remove redundant sequences...\n" if $VERBOSE;
383 @seqs = nonred( $NR_CUT, @seqs );
384 psiseq2fasta( "5.fasta.gz", @seqs ) if $DEBUG;
386 #####################################################################################################
387 # Check that we haven't got rid of all of the sequences
389 warn "JPRED: All the sequences found by PSIBLAST were removed during filtering, reverting to prefiltered sequences\n";
390 @seqs = fasta2psiseq("${output}_backup.fasta.gz");
392 unlink("${output}_backup.fasta.gz") unless $DEBUG;
394 # Remove gaps in the query sequence and same positions in the alignment
395 print "Removing gaps in the query sequence...\n" if $VERBOSE;
397 psiseq2fasta( "6.fasta.gz", @seqs ) if $DEBUG;
399 # Output the alignment for the prediction
400 print "Outputting cleaned-up PSI_BLAST alignment...\n" if $VERBOSE;
401 psiseq2fasta( "$output.align", @seqs );
403 #####################################################################################################
404 # Equivilent to getpssm script
405 print "Output the PSSM matrix from the PSI-BLAST profile...\n";
406 my $pssm = PSIBLAST::PSSM->new( read_file => "$output.profile" );
407 $pssm->write_file("$output.pssm"); # CC write_file() sub is in Write.pm loaded via PSIBLAST::PSSM
408 print ">>50% complete\n";
410 #####################################################################################################
411 # Run HMMER on the sequences
412 print "Running HMMer on sequences found by PSI-BLAST...\n" if $VERBOSE;
413 my $hmmer = hmmer(@seqs); # CC sub is below
414 $hmmer->write_file("$output.hmm"); # write_file is in the Read.pm called from the HMMER::Profile module
415 print ">>70% complete\n";
417 #####################################################################################################
418 # Run Jnet for the prediction
419 print "Running JNet using the generated inputs from HMM and PSI-BLAST...\n" if $VERBOSE;
420 jnet( map { "$output.$_" } qw(hmm jnet pssm) ); # CC sub is below
423 print ">>100% complete\n";
424 print "Jpred Finished\n";
427 #####################################################################################################
429 #####################################################################################################
433 =head2 $PSISEQ = remove_seq_masks($PSISEQ)
435 Returns a PSISEQ object which has any masked residues replaced by their entry from the unfiltered database.
440 #####################################################################################################
441 sub remove_seq_masks {
445 # Only bother if we have a sequence which has been masked
446 return unless grep { uc $_ eq 'X' } @{ $seq->align };
448 my ( $id, $start, @align ) = ( $seq->id, $seq->start, @{ $seq->align } );
449 my $searchdb = $database->{$db_entry}{unfiltered};
450 $start--; # We need this to be zero based
452 my $f = $idx->get_sequence( $id, $searchdb ) or croak "JPRED: No such entry for $id in $searchdb\n" and return;
453 my @seqs = $f->get_entries;
454 my $db = shift @seqs;
457 confess "JPRED: remove_seq_masks: Accession number $id returned no sequences";
459 confess "JPRED: remove_seq_masks: Accession number $id returned more than one sequence";
462 my @db_seq = @{ $db->seq };
464 # $i is a pointer into the original, ungapped, unmasked sequence. $_ is for the aligned sequence
466 for ( 0 .. $#align ) {
467 next if $align[$_] =~ /$GAP/;
469 if ( $align[$_] eq 'X' ) {
470 unless ( defined $db_seq[ $i + $start ] ) {
471 croak "JPRED: remove_seq_masks: the database sequence is not as long as the query sequence!";
473 $align[$_] = $db_seq[ $i + $start ];
477 $seq->align( \@align );
480 =head2 @PSISEQS_OUT = reduce ($max, @PSISEQS_IN);
482 Reduces @PSISEQS_IN to at most $max sequences by taking every n sequences in @PSISEQ_IN.
484 Equivilent to reduce script.
488 #####################################################################################################
490 my ( $max, @seqs ) = @_;
492 my $num = int( @seqs / $max );
493 my @res = shift @seqs;
495 # Don't bother if we have less than the required sequences
496 if ( @seqs < $max or 0 == $num ) {
500 for ( 0 .. $#seqs ) {
501 push @res, $seqs[$_] if ( 0 == $_ % $num );
507 =head2 nonred($cutoff, @PSISEQS);
509 Removes redundant sequences at $cutoff level of sequence identity.
511 Equivilent to nonred script.
515 #####################################################################################################
517 my ( $cutoff, @seqs ) = @_;
519 # Run pairwise and then OC and remove similar sequences
520 unless ( defined $cutoff ) { croak "JPRED: nonred() not passed a cutoff" }
521 unless (@seqs) { croak "JPRED: No sequences passed to nonred()" }
523 warn "JPRED: Only one sequence passed to nonred(). Not reducing sequence redundancy\n";
527 # Convert the sequences into a FASTA object, which is what pairwise expects (simpler version).
528 my $fasta = FASTA::File->new;
530 my $f = FASTA->new( id => $_->id );
531 $f->seq( @{ $_->align } );
532 $fasta->add_entries($f);
536 my $pair = Pairwise->new( path => $pairwise );
537 my $foo = join "\n", $pair->run($fasta) or die $!;
540 my $ocres = OC->new( path => "$oc sim complete cut $cutoff" );
541 $ocres->read_string( join "", $ocres->run($foo) );
543 # Now found those entries that are unrelated
545 for ( $ocres->get_groups ) {
546 my ( $group, $score, $size, @labels ) = @{$_};
548 # Keep the QUERY from the cluster, otherwise just get onelt
549 if ( grep { /QUERY/ } @labels ) {
551 # Make sure the query stays at the start
552 unshift @keep, "QUERY";
555 push @keep, shift @labels;
558 push @keep, $ocres->get_unclustered;
560 # Return the matching entries.
561 # We use the temporay to mimic the selection of sequences in the nonred
562 # script, which took the sequences in the order they occured in the file.
566 if ( grep { $_ =~ /^$label$/ } @keep ) {
567 push @filtered_res, $_;
571 return @filtered_res;
574 =head2 @seqs = del_long_seqs($percentage, @PSISEQS)
576 Removes those sequences that are more than $percentage longer or shorter than the first
577 sequence in @seqs. @seqs is an array of PSISEQ objects.
579 Equivilent to trim_seqs script.
583 #####################################################################################################
585 my ( $factor, @seqs ) = @_;
587 # Keep the query sequence (we assume query is the 1st FASTA record)
588 my $query = shift @seqs;
591 # Find the length of the query sequence without any gaps
592 my $length = ( () = ( join '', @{ $query->align } ) =~ /[^$GAP]/g );
594 # Calculate the upper and lower allowed bounds
595 my ( $upper, $lower );
597 my $bounds = ( $length / 100 ) * $factor;
598 ( $upper, $lower ) = ( $length + $bounds, $length - $bounds );
601 # Now try each sequence and see how long they are
603 my $l = ( () = ( join '', @{ $_->align } ) =~ /[^$GAP]/g );
604 if ( $l >= $lower && $l <= $upper ) { push @res, $_ }
610 =head2 toupper ($PSISEQ)
612 Converts sequence held in PSISEQ object to uppercase.
616 #####################################################################################################
618 $_[0]->align( [ split //, uc join '', @{ $_[0]->align } ] );
621 =head2 degap($PSISEQ_QUERY, @PSISEQS)
623 Removes gaps in the query sequence ($PSISEQ_QUERY) and corresponding positions in @PSISEQS. Change made in place.
625 Equivilent to fasta2jnet script.
629 #####################################################################################################
633 # Find where the gaps in the query sequence are
636 for ( @{ $seqs[0]->align } ) {
637 push @gaps, $i if $_ !~ /$GAP/;
643 # Remove the gaps in all the sequences.
644 # Derefences the array reference and takes a slice inside the method call
645 # arguments, then coerce back to an array ref.
647 $_->align( [ @{ $_->align }[@gaps] ] );
651 =head2 getfreq($filename, @PSISEQS)
653 Creates a PSIBLAST like profile and writes it to the $filename.
655 Equivilant to profile or getfreq (older) script. This doesn't create the same answer as
656 the old jpred, as it got the numbers from PSIBLAST (v2.0.3), whereas this a *similar* output,
657 but it's *not* the same.
661 #####################################################################################################
663 my ( $fn, @seqs ) = @_;
665 #croak "JPRED: Passed non PSISEQ object" if grep { !isa( $_, 'PSISEQ' ) } @seqs;
667 # Create a PSIBLAST like profile from the alignment
668 # This doesn't greate the same result as old Jpred, I think this is because
669 # differences in the input between old and new
672 # my $f = FASTA::File->new(read_file => "/homes/jon/jnet/test/1bk4/1bk4.align.fa");
673 # my @profile = profile(
674 # map { join "", @{ $_->seq } } $f->get_entries
677 my @profile = profile( map { join "", @{ $_->align } } @seqs );
679 open my $fh, ">$fn" or die "JPRED: $fn: $!";
680 print $fh join( " ", @{$_} ), "\n" for @profile;
683 =head2 HMMER::Profile::Jnet = hmmer(@PSISEQS)
685 Creates a HMMER profile from the aligned sequences in @PSISEQS. Returns a HMMER::Profile::Jnet object.
687 Equivilent to gethmm script.
691 #####################################################################################################
695 # Temp files required
696 my ( $hmmbuild_out, $hmmconvert_out, $tmp_fasta ) = map { File::Temp->new->filename } 1 .. 3;
698 # Create the FASTA file
699 psiseq2fasta( $tmp_fasta, @seqs );
702 system("$hmmbuild -F --fast --amino --gapmax 1 --wblosum $hmmbuild_out $tmp_fasta");
703 system("$hmmconvert -F -p $hmmbuild_out $hmmconvert_out");
705 # Read in the HMMER file
706 my $hmmer = HMMER::Profile->new( read_file => $hmmconvert_out );
708 # Convert from HMMER::Profile to HMMER::Profile::Jnet
709 my $hmmer_jnet = HMMER::Profile::Jnet->new;
710 $hmmer_jnet->add_line( @{$_} ) for $hmmer->get_line;
715 =head2 jnet($hmmer, $out, $pssm )
717 Runs Jnet. Pass the paths of the filenames to do the prediction on
721 #####################################################################################################
723 my ( $hmmer, $outfile, $pssm ) = @_;
726 # run Jnet prediction with all the profile data
727 system("$jnet --concise --hmmer $hmmer --pssm $pssm > $outfile");
730 # run Jnet prediction with only HMMer and alignment data
731 system("$jnet --concise --hmmer $hmmer > $outfile");
733 check( "jnet", $? ) or exit 1;
736 =head1 psiseq2fasta($path, @PSISEQ)
738 Writes a FASTA file given an array of PSISEQ objects.
742 #####################################################################################################
744 my ( $fn, @seqs ) = @_;
746 #confess "JPRED: Not passed filename" if isa $fn, 'PSISEQ';
747 confess "JPRED: not passed PSISEQs" unless @seqs;
749 #confess "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
751 # Changed from using FASTA::File object due to seg faulting on some large,
752 # long alignments, presumably due to running out of memory.
753 # my $fasta = FASTA::File->new;
754 # $fasta->add_entries(
756 # my $f = FASTA->new(id => $_->id);
757 # $f->seq(@{$_->align});
761 # $fasta->write_file($fn);
765 ? ( open $fh, "| gzip -9 > $fn" or die "JPRED: $!" )
766 : ( open $fh, ">$fn" or die "JPRED: $!" );
769 print $fh ">", $_->id, "\n";
770 my $seq = join( "", @{ $_->align } );
771 $seq =~ s/(.{72})/$1\n/g;
773 print $fh $seq . "\n";
778 =head1 @PSISEQ = fasta2psiseq($path)
780 Reads in a FASTA file and returns an array of PSISEQ objects.
784 #####################################################################################################
790 ? FASTA::File->new( read_gzip_file => $fn )
791 : FASTA::File->new( read_file => $fn );
792 $fasta or die "JPRED: $!";
795 for ( $fasta->get_entries ) {
796 my $seq = PSISEQ->new;
798 $seq->align( [ @{ $_->seq } ] );
805 #####################################################################################################
806 sub fasta2psiseq_lowmemory {
807 my $filename = shift;
812 ? ( open $fh, "gzip -dc $filename|" or die $! )
813 : ( open $fh, $filename or die $! );
820 $seq =~ s/ //g if defined $seq;
822 if ( $id and $seq ) {
823 my $psi = PSISEQ->new( id => $id );
824 $psi->align( [ split //, $seq ] );
835 if ( $id and $seq ) {
836 my $psi = PSISEQ->new( id => $id );
844 =head1 extend($FASTA::File, @PSISEQ)
846 Sometimes PSIBLAST truncates the outputed alignment where it can't matched residues
847 against the query, this function extends the alignment so that all of the query is
848 present in the alignment. The other sequences are extended with gap characters.
852 #####################################################################################################
854 my ( $query, @seqs ) = @_;
856 #croak "JPRED: Not a Sequence::File" if grep { not isa $_, 'Sequence::File' } $query;
857 #croak "JPRED: Not PSISEQ" if grep { not isa $_, 'PSISEQ' } @seqs;
859 # Get the query sequence
860 my $q_query = $query->get_entry(0);
862 # Get the query sequence from the PSIBLAST alignment
863 my $a_query = shift @seqs;
865 # The gap size required to expand the alignment
866 my ( $start_gap_length, $end_gap_length );
869 # Make handling the sequence easier
870 my $q_seq = join "", @{ [ $q_query->seq ] };
871 my $a_seq = join "", @{ $a_query->align };
876 ( $q_seq, $a_seq ) = map { uc $_ } $q_seq, $a_seq;
878 $start_gap_length = index( $q_seq, $a_seq );
879 croak "JPRED: query sequence from alignment not found in original query sequence" if $start_gap_length == -1;
881 $end_gap_length = length($q_seq) - length($a_seq) - $start_gap_length;
884 # Add the gaps to the end of the alignments
886 $_->align( [ ($GAP) x $start_gap_length, @{ $_->align }, ($GAP) x $end_gap_length ] );
889 # Put the query sequence back
894 ( $start_gap_length ? @{ $q_query->seq }[ 0 .. ( $start_gap_length - 1 ) ] : () ),
895 @{ $a_query->align },
896 ( $end_gap_length ? @{ $q_query->seq }[ -$end_gap_length .. -1 ] : () ),
903 =head1 $int = fasta_seq_length($path)
905 Given the path to a FASTA database, find the number of residues/bases in it. Counts whitespace as a residue.
909 #####################################################################################################
910 sub fasta_seq_length {
912 open my $fh, $fn or croak "Can't open file $fn: $!\n";
925 =head1 log(@messages)
927 Report messages to STDERR
931 #####################################################################################################
932 sub errlog { print STDERR @_ }