=head1 SYNOPSIS
-./jpred.pl -in <FILE1> [-outfile <FILE2>] [-logfile <FILE3>] [-output <FILEPREFIX>] [-dbname <DBNAME>] [-dbpath <PATH>] [-ncpu NNN] [-psi <psiblast output>] [-seq] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man]
+./jpred.pl -in <FILE1> [-outfile <FILE2>] [-logfile <FILE3>] [-output <FILEPREFIX>] [-dbname <DBNAME>] [-dbpath <PATH>] [-ncpu NNN] [-psi <psiblast output>] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man]
=head1 DESCRIPTION
-This is a program for predicting the secondary structure of a multiple sequence alignment (by default) or a protein sequence
-(with the -seq option). The input file can be stored in 3 formats: FASTA, MSF, or BLC.
-For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and predicting the
-secondary structure with Jnet. For the multiple sequence alignment only the HMM profile, created from the alignment, is used in Jnet.
+This is a program for predicting the secondary structure of a multiple sequence alignment or a protein sequence.
+The input file can be stored in 3 formats: FASTA, MSF, or BLC.
+For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and
+predicting the secondary structure with Jnet. For the multiple sequence alignment only the HMM profile,
+created from the alignment, is used in Jnet.
=head1 OPTIONS
The path to the sequence file (in FASTA, MSF, or BLC format)
-=item -seq
-
-The input file is a FASTA file with one sequence only.
-
=item -output <FILEPREFIX>
A prefix to the filenames created by Jpred, defaults to the value set by -sequence/-in.
"ncpu=s" => \$ncpu,
"pred-nohits" => \$predNoHits,
"no-final" => \$nofinal,
- "seq" => \$seqgoal,
"jabaws" => \$jabaws,
"help" => \$help,
pod2usage(1) if $help;
pod2usage( verbose => 2 ) if $man;
-$goal = "seq" if ( defined $seqgoal );
-
#####################################################################################################
# Key to database information and information for accessing them
my $database = {
#####################################################################################################
# check input file format
-if ( 'seq' eq $goal ) {
- $format = "seq";
- if ( 1 != check_FASTA_format($infile) ) {
- die "\nERROR! jpred requires 1 sequence in the FASTA file if the option -seq used. exit\n";
- }
-} else {
- my $nseq = check_FASTA_format($infile);
- if ( 0 < $nseq ) {
- $format = "fasta";
- if ( 1 == $nseq ) {
- die "\nERROR! jpred requires alignment with more than 1 sequence\n if you provide only one sequence use the -seq option.\n";
- }
- } elsif ( 0 < check_MSF_format($infile) ) {
- $format = "msf";
- } elsif ( 0 < check_BLC_format($infile) ) {
- $format = "blc";
+my $nseq = check_FASTA_format($infile);
+if ( 0 < $nseq ) {
+ $format = "fasta";
+ if ( 1 == $nseq ) {
+ # one FASTA record
+ $goal = 'seq';
} else {
- die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n";
+ unless ( 0 < check_FASTA_alignment($infile)) {
+ die "\nERROR! jpred requires either FASTA alignment or 1 sequence in the FASTA, MSF, or BLC formats\n";
+ }
}
+} elsif ( 0 < check_MSF_format($infile) ) {
+ $format = "msf";
+} elsif ( 0 < check_BLC_format($infile) ) {
+ $format = "blc";
+} else {
+ die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n";
}
$infastafile = $infile . ".fasta" if ( 'msf' eq $format or 'blc' eq $format );
return $nseq;
}
#####################################################################################################
+sub check_FASTA_alignment {
+ my $infile = shift;
+
+ open( my $IN, "<", $infile ) or die "ERROR! unable to open '$infile': ${!}\n";
+ my $check_first_line = 1;
+ my $nseq = 0;
+ my $seqlen = -1;
+ local $/ = "\n>";
+ while (<$IN>) {
+ if ($check_first_line) {
+ return 0 unless (/^>/);
+ $check_first_line = 0;
+ }
+ s/^>//g;
+ s/>$//g;
+
+ my ( $id, @seqs ) = split /\n/, $_;
+ return 0 unless ( defined $id or @seqs );
+ my $seq = join( "", @seqs );
+ return 0 unless ( $seq =~ /[a-zA-Z\.-]/ );
+ if (-1 == $seqlen) {
+ $seqlen = length ($seq);
+ } else {
+ return 0 if ($seqlen != length ($seq) );
+ }
+ ++$nseq;
+ }
+ close($IN);
+
+ return $nseq;
+}
+#####################################################################################################
sub check_MSF_format {
my $infile = shift;
$? = 0;
<options>\r
<name>JABAWS configuration</name>\r
<description>\r
- Configure Jpred to worik within JABAWS\r
+ Configure Jpred to work within JABAWS\r
</description>\r
<optionNames>-jabaws</optionNames>\r
<furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
</options>\r
\r
- <options>\r
- <name>Single sequence prediction</name>\r
- <description>\r
- Configure Jpred to worik within JABAWS\r
- </description>\r
- <optionNames>-seq</optionNames>\r
- <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
- </options>\r
-\r
<prmSeparator> </prmSeparator>\r
\r
<parameters isRequired="false">\r
</description>\r
<optionNames>-dbpath</optionNames>\r
<furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
- <defaultValue>.</defaultValue>\r
+ <defaultValue>/data/UNIREFdb</defaultValue>\r
</parameters>\r
\r
<parameters isRequired="true">\r
<possibleValues>uniref90</possibleValues>\r
<possibleValues>ported_db</possibleValues>\r
<possibleValues>cluster</possibleValues>\r
-<!--\r
-experimental development databases:\r
- <possibleValues>training</possibleValues>\r
- <possibleValues>swall</possibleValues>\r
- <possibleValues>uniprot</possibleValues>\r
- <possibleValues>uniref50</possibleValues>\r
--->\r
</parameters>\r
\r
<parameters isRequired="false">\r
<name>Number of CPUs</name>\r
<description>\r
- Number of CPU used by jpred.pl. Maximum value is 8\r
+ Number of CPU used by Jpred. Maximum value is 8\r
</description>\r
<optionNames>-ncpu</optionNames>\r
<furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
</validValue>\r
</parameters>\r
\r
- <parameters isRequired="false">\r
- <name>PSI-BLAST output file</name>\r
- <description>\r
- Path to a PSI-BLAST output file\r
- </description>\r
- <optionNames>-psi</optionNames>\r
- <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
- <defaultValue></defaultValue>\r
- </parameters>\r
</runnerConfig>\r
<runnerClassName>compbio.runner.predictors.Jpred</runnerClassName>\r
\r
<preset>\r
- <name>cluser-single</name>\r
+ <name>cluser-configuration</name>\r
<description>\r
Dundee cluser configuration for single sequence prediction\r
</description>\r
<optlist>\r
<option>-dbpath /homes/www-jpred/databases </option>\r
- <option>-dbname uniref90</option>\r
- <option>-jabaws</option>\r
- <option>-seq</option>\r
- </optlist>\r
- </preset>\r
-\r
- <preset>\r
- <name>cluster-alignment</name>\r
- <description>\r
- Dundee cluser configuration for multiple sequence alignment prediction\r
- </description>\r
- <optlist>\r
- <option>-dbpath /homes/www-jpred/databases </option>\r
- <option>-dbname uniref90</option>\r
<option>-jabaws</option>\r
</optlist>\r
</preset>\r
<preset>\r
<name>single</name>\r
<description>\r
- 4-core laptop configuration for single sequence prediction\r
- </description>\r
- <optlist>\r
- <option>-dbpath /data/UNIREFdb/</option>\r
- <option>-dbname ported_db</option>\r
- <option>-ncpu 3</option>\r
- <option>-jabaws</option>\r
- <option>-seq</option>\r
- </optlist>\r
- </preset>\r
-\r
- <preset>\r
- <name>alignemnt</name>\r
- <description>\r
- 4-core laptop configuration for multiple sequence alignment prediction\r
+ 4-core computer configuration for single sequence prediction\r
</description>\r
<optlist>\r
<option>-dbpath /data/UNIREFdb/</option>\r
- <option>-dbname ported_db</option>\r
- <option>-ncpu 3</option>\r
+ <option>-ncpu 4</option>\r
<option>-jabaws</option>\r
</optlist>\r
</preset>\r