X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fjpred%2Fjpred.pl;h=bd238149c70364b6349b6c3369ee699fb4bc0aa1;hb=2a4439190b9b6cf0f881d5d07531c25b057117d6;hp=99d7fdcceffee42f5dd4086f37280e4a699755eb;hpb=d32ee68d6295b36d77bd115b1fb0af2068f29837;p=jabaws.git diff --git a/binaries/src/jpred/jpred.pl b/binaries/src/jpred/jpred.pl index 99d7fdc..bd23814 100755 --- a/binaries/src/jpred/jpred.pl +++ b/binaries/src/jpred/jpred.pl @@ -6,14 +6,15 @@ jpred - Secondary structure prediction program =head1 SYNOPSIS -./jpred.pl -in [-outfile ] [-logfile ] [-output ] [-dbname ] [-dbpath ] [-ncpu NNN] [-psi ] [-seq] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man] +./jpred.pl -in [-outfile ] [-logfile ] [-output ] [-dbname ] [-dbpath ] [-ncpu NNN] [-psi ] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man] =head1 DESCRIPTION -This is a program for predicting the secondary structure of a multiple sequence alignment (by default) or a protein sequence -(with the -seq option). The input file can be stored in 3 formats: FASTA, MSF, or BLC. -For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and predicting the -secondary structure with Jnet. For the multiple sequence alignment only the HMM profile, created from the alignment, is used in Jnet. +This is a program for predicting the secondary structure of a multiple sequence alignment or a protein sequence. +The input file can be stored in 3 formats: FASTA, MSF, or BLC. +For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and +predicting the secondary structure with Jnet. For the multiple sequence alignment only the HMM profile, +created from the alignment, is used in Jnet. =head1 OPTIONS @@ -23,10 +24,6 @@ secondary structure with Jnet. For the multiple sequence alignment only the HMM The path to the sequence file (in FASTA, MSF, or BLC format) -=item -seq - -The input file is a FASTA file with one sequence only. - =item -output A prefix to the filenames created by Jpred, defaults to the value set by -sequence/-in. @@ -211,7 +208,6 @@ GetOptions( "ncpu=s" => \$ncpu, "pred-nohits" => \$predNoHits, "no-final" => \$nofinal, - "seq" => \$seqgoal, "jabaws" => \$jabaws, "help" => \$help, @@ -222,8 +218,6 @@ GetOptions( pod2usage(1) if $help; pod2usage( verbose => 2 ) if $man; -$goal = "seq" if ( defined $seqgoal ); - ##################################################################################################### # Key to database information and information for accessing them my $database = { @@ -327,25 +321,23 @@ print $LOG "JPRED: checking platiform... $platform\n" if $LOG; ##################################################################################################### # check input file format -if ( 'seq' eq $goal ) { - $format = "seq"; - if ( 1 != check_FASTA_format($infile) ) { - die "\nERROR! jpred requires 1 sequence in the FASTA file if the option -seq used. exit\n"; - } -} else { - my $nseq = check_FASTA_format($infile); - if ( 0 < $nseq ) { - $format = "fasta"; - if ( 1 == $nseq ) { - die "\nERROR! jpred requires alignment with more than 1 sequence\n if you provide only one sequence use the -seq option.\n"; - } - } elsif ( 0 < check_MSF_format($infile) ) { - $format = "msf"; - } elsif ( 0 < check_BLC_format($infile) ) { - $format = "blc"; +my $nseq = check_FASTA_format($infile); +if ( 0 < $nseq ) { + $format = "fasta"; + if ( 1 == $nseq ) { + # one FASTA record + $goal = 'seq'; } else { - die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n"; + unless ( 0 < check_FASTA_alignment($infile)) { + die "\nERROR! jpred requires either FASTA alignment or 1 sequence in the FASTA, MSF, or BLC formats\n"; + } } +} elsif ( 0 < check_MSF_format($infile) ) { + $format = "msf"; +} elsif ( 0 < check_BLC_format($infile) ) { + $format = "blc"; +} else { + die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n"; } $infastafile = $infile . ".fasta" if ( 'msf' eq $format or 'blc' eq $format ); @@ -645,6 +637,38 @@ sub check_FASTA_format { return $nseq; } ##################################################################################################### +sub check_FASTA_alignment { + my $infile = shift; + + open( my $IN, "<", $infile ) or die "ERROR! unable to open '$infile': ${!}\n"; + my $check_first_line = 1; + my $nseq = 0; + my $seqlen = -1; + local $/ = "\n>"; + while (<$IN>) { + if ($check_first_line) { + return 0 unless (/^>/); + $check_first_line = 0; + } + s/^>//g; + s/>$//g; + + my ( $id, @seqs ) = split /\n/, $_; + return 0 unless ( defined $id or @seqs ); + my $seq = join( "", @seqs ); + return 0 unless ( $seq =~ /[a-zA-Z\.-]/ ); + if (-1 == $seqlen) { + $seqlen = length ($seq); + } else { + return 0 if ($seqlen != length ($seq) ); + } + ++$nseq; + } + close($IN); + + return $nseq; +} +##################################################################################################### sub check_MSF_format { my $infile = shift; $? = 0;