Re-design jpred.pl input options: no -seq for a file with 1 FASTA record only.

author Sasha Sherstnev <a.sherstnev@dundee.ac.uk>

Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)

committer Sasha Sherstnev <a.sherstnev@dundee.ac.uk>

Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)
author Sasha Sherstnev <a.sherstnev@dundee.ac.uk>
Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)
committer Sasha Sherstnev <a.sherstnev@dundee.ac.uk>
Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)
diff --git a/binaries/src/jpred/jpred.pl b/binaries/src/jpred/jpred.pl

index 99d7fdc..bd23814 100755 (executable)
--- a/binaries/src/jpred/jpred.pl
+++ b/binaries/src/jpred/jpred.pl
@@ -6,14 +6,15 @@ jpred - Secondary structure prediction program
  
  =head1 SYNOPSIS
  
-./jpred.pl -in <FILE1> [-outfile <FILE2>] [-logfile <FILE3>] [-output <FILEPREFIX>] [-dbname <DBNAME>] [-dbpath <PATH>] [-ncpu NNN] [-psi <psiblast output>] [-seq] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man]
+./jpred.pl -in <FILE1> [-outfile <FILE2>] [-logfile <FILE3>] [-output <FILEPREFIX>] [-dbname <DBNAME>] [-dbpath <PATH>] [-ncpu NNN] [-psi <psiblast output>] [-pred-nohits] [-no-final] [-jabaws] [-verbose] [-debug] [-help] [-man]
  
  =head1 DESCRIPTION
  
-This is a program for predicting the secondary structure of a multiple sequence alignment (by default) or a protein sequence 
-(with the -seq option). The input file can be stored in 3 formats: FASTA, MSF, or BLC. 
-For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and predicting the 
-secondary structure with Jnet. For the multiple sequence alignment only the HMM profile, created from the alignment, is used in Jnet.
+This is a program for predicting the secondary structure of a multiple sequence alignment or a protein sequence. 
+The input file can be stored in 3 formats: FASTA, MSF, or BLC. 
+For the single sequence the program does all the PSI-BLAST searching, preparing PSSM and HMM profiles and 
+predicting the secondary structure with Jnet. For the multiple sequence alignment only the HMM profile, 
+created from the alignment, is used in Jnet.
  
  =head1 OPTIONS
  
@@ -23,10 +24,6 @@ secondary structure with Jnet. For the multiple sequence alignment only the HMM
  
  The path to the sequence file (in FASTA, MSF, or BLC format)
  
-=item -seq
-
-The input file is a FASTA file with one sequence only.
-
  =item -output <FILEPREFIX>
  
  A prefix to the filenames created by Jpred, defaults to the value set by -sequence/-in.
@@ -211,7 +208,6 @@ GetOptions(
    "ncpu=s"      => \$ncpu,
    "pred-nohits" => \$predNoHits,
    "no-final"    => \$nofinal,
-  "seq"         => \$seqgoal,
    "jabaws"      => \$jabaws,
  
    "help"    => \$help,
@@ -222,8 +218,6 @@ GetOptions(
  pod2usage(1) if $help;
  pod2usage( verbose => 2 ) if $man;
  
-$goal = "seq" if ( defined $seqgoal );
-
  #####################################################################################################
  # Key to database information and information for accessing them
  my $database = {
@@ -327,25 +321,23 @@ print $LOG "JPRED: checking platiform... $platform\n" if $LOG;
  
  #####################################################################################################
  # check input file format
-if ( 'seq' eq $goal ) {
-  $format = "seq";
-  if ( 1 != check_FASTA_format($infile) ) {
-    die "\nERROR! jpred requires 1 sequence in the FASTA file if the option -seq used. exit\n";
-  }
-} else {
-  my $nseq = check_FASTA_format($infile);
-  if ( 0 < $nseq ) {
-    $format = "fasta";
-    if ( 1 == $nseq ) {
-      die "\nERROR! jpred requires alignment with more than 1 sequence\n       if you provide only one sequence use the -seq option.\n";
-    }
-  } elsif ( 0 < check_MSF_format($infile) ) {
-    $format = "msf";
-  } elsif ( 0 < check_BLC_format($infile) ) {
-    $format = "blc";
+my $nseq = check_FASTA_format($infile);
+if ( 0 < $nseq ) {
+  $format = "fasta";
+  if ( 1 == $nseq ) {
+    # one FASTA record
+    $goal = 'seq';
    } else {
-    die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n";
+    unless ( 0 < check_FASTA_alignment($infile)) {
+      die "\nERROR! jpred requires either FASTA alignment or 1 sequence in the FASTA, MSF, or BLC formats\n";
+    }
    }
+} elsif ( 0 < check_MSF_format($infile) ) {
+  $format = "msf";
+} elsif ( 0 < check_BLC_format($infile) ) {
+  $format = "blc";
+} else {
+  die "ERROR! unknown input file format for multiple sequence alignment (can be FASTA, MSF, or BLC). exit...\n";
  }
  $infastafile = $infile . ".fasta" if ( 'msf' eq $format or 'blc' eq $format );
  
@@ -645,6 +637,38 @@ sub check_FASTA_format {
    return $nseq;
  }
  #####################################################################################################
+sub check_FASTA_alignment {
+  my $infile = shift;
+
+  open( my $IN, "<", $infile ) or die "ERROR! unable to open '$infile': ${!}\n";
+  my $check_first_line = 1;
+  my $nseq             = 0;
+  my $seqlen = -1;
+  local $/ = "\n>";
+  while (<$IN>) {
+    if ($check_first_line) {
+      return 0 unless (/^>/);
+      $check_first_line = 0;
+    }
+    s/^>//g;
+    s/>$//g;
+
+    my ( $id, @seqs ) = split /\n/, $_;
+    return 0 unless ( defined $id or @seqs );
+    my $seq = join( "", @seqs );
+    return 0 unless ( $seq =~ /[a-zA-Z\.-]/ );
+    if (-1 == $seqlen) {
+      $seqlen = length ($seq);
+    } else {
+      return 0 if ($seqlen != length ($seq) );
+    }
+    ++$nseq;
+  }
+  close($IN);
+
+  return $nseq;
+}
+#####################################################################################################
  sub check_MSF_format {
    my $infile = shift;
    $? = 0;
diff --git a/conf/settings/JpredParameters.xml b/conf/settings/JpredParameters.xml

index 3697e70..365ffcd 100644 (file)
--- a/conf/settings/JpredParameters.xml
+++ b/conf/settings/JpredParameters.xml
@@ -15,21 +15,12 @@
      <options>\r
          <name>JABAWS configuration</name>\r
          <description>\r
-            Configure Jpred to worik within JABAWS\r
+            Configure Jpred to work within JABAWS\r
          </description>\r
          <optionNames>-jabaws</optionNames>\r
          <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
      </options>\r
  \r
-    <options>\r
-        <name>Single sequence prediction</name>\r
-        <description>\r
-            Configure Jpred to worik within JABAWS\r
-        </description>\r
-        <optionNames>-seq</optionNames>\r
-        <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
-    </options>\r
-\r
      <prmSeparator> </prmSeparator>\r
  \r
      <parameters isRequired="false">\r
@@ -39,7 +30,7 @@
          </description>\r
          <optionNames>-dbpath</optionNames>\r
          <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
-        <defaultValue>.</defaultValue>\r
+        <defaultValue>/data/UNIREFdb</defaultValue>\r
       </parameters>\r
  \r
      <parameters isRequired="true">\r
@@ -53,19 +44,12 @@
          <possibleValues>uniref90</possibleValues>\r
          <possibleValues>ported_db</possibleValues>\r
          <possibleValues>cluster</possibleValues>\r
-<!--\r
-experimental development databases:\r
-        <possibleValues>training</possibleValues>\r
-        <possibleValues>swall</possibleValues>\r
-        <possibleValues>uniprot</possibleValues>\r
-        <possibleValues>uniref50</possibleValues>\r
--->\r
      </parameters>\r
  \r
       <parameters isRequired="false">\r
          <name>Number of CPUs</name>\r
          <description>\r
-            Number of CPU used by jpred.pl. Maximum value is 8\r
+            Number of CPU used by Jpred. Maximum value is 8\r
          </description>\r
          <optionNames>-ncpu</optionNames>\r
          <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
@@ -77,13 +61,4 @@ experimental development databases:
          </validValue>\r
      </parameters>\r
  \r
-    <parameters isRequired="false">\r
-        <name>PSI-BLAST output file</name>\r
-        <description>\r
-            Path to a PSI-BLAST output file\r
-        </description>\r
-        <optionNames>-psi</optionNames>\r
-        <furtherDetails>prog_docs/jpred.txt</furtherDetails>\r
-        <defaultValue></defaultValue>\r
-    </parameters>\r
  </runnerConfig>\r
diff --git a/conf/settings/JpredPresets.xml b/conf/settings/JpredPresets.xml

index 5407708..c70e709 100644 (file)
--- a/conf/settings/JpredPresets.xml
+++ b/conf/settings/JpredPresets.xml
@@ -3,26 +3,12 @@
      <runnerClassName>compbio.runner.predictors.Jpred</runnerClassName>\r
  \r
      <preset>\r
-        <name>cluser-single</name>\r
+        <name>cluser-configuration</name>\r
          <description>\r
              Dundee cluser configuration for single sequence prediction\r
          </description>\r
          <optlist>\r
                         <option>-dbpath /homes/www-jpred/databases </option>\r
-                       <option>-dbname uniref90</option>\r
-                       <option>-jabaws</option>\r
-                       <option>-seq</option>\r
-        </optlist>\r
-    </preset>\r
-\r
-    <preset>\r
-        <name>cluster-alignment</name>\r
-        <description>\r
-            Dundee cluser configuration for multiple sequence alignment prediction\r
-        </description>\r
-        <optlist>\r
-                       <option>-dbpath /homes/www-jpred/databases </option>\r
-                       <option>-dbname uniref90</option>\r
                         <option>-jabaws</option>\r
          </optlist>\r
      </preset>\r
@@ -30,26 +16,11 @@
      <preset>\r
          <name>single</name>\r
          <description>\r
-            4-core laptop configuration for single sequence prediction\r
-        </description>\r
-        <optlist>\r
-            <option>-dbpath /data/UNIREFdb/</option>\r
-            <option>-dbname ported_db</option>\r
-            <option>-ncpu 3</option>\r
-            <option>-jabaws</option>\r
-            <option>-seq</option>\r
-        </optlist>\r
-    </preset>\r
-\r
-    <preset>\r
-        <name>alignemnt</name>\r
-        <description>\r
-            4-core laptop configuration for multiple sequence alignment prediction\r
+            4-core computer configuration for single sequence prediction\r
          </description>\r
          <optlist>\r
              <option>-dbpath /data/UNIREFdb/</option>\r
-            <option>-dbname ported_db</option>\r
-            <option>-ncpu 3</option>\r
+            <option>-ncpu 4</option>\r
              <option>-jabaws</option>\r
          </optlist>\r
      </preset>\r
diff --git a/runner/compbio/runner/predictors/Jpred.java b/runner/compbio/runner/predictors/Jpred.java

index 0e08e0a..4d94620 100644 (file)
--- a/runner/compbio/runner/predictors/Jpred.java
+++ b/runner/compbio/runner/predictors/Jpred.java
@@ -65,6 +65,7 @@ public class Jpred extends SkeletalExecutable<Jpred> {
                 addParameters(Arrays.asList("-logfile " + STAT_FILE));\r
                 addParameters(Arrays.asList("-dbpath " + dbpath));\r
                 addParameters(Arrays.asList("-dbname " + dbname));\r
+               addParameters(Arrays.asList("-jabaws"));\r
         }\r
  \r
         // HashMap<Method, float[]>\r
author	Sasha Sherstnev <a.sherstnev@dundee.ac.uk>
	Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)
committer	Sasha Sherstnev <a.sherstnev@dundee.ac.uk>
	Mon, 14 Oct 2013 14:20:41 +0000 (15:20 +0100)
binaries/src/jpred/jpred.pl		patch \| blob \| history
conf/settings/JpredParameters.xml		patch \| blob \| history
conf/settings/JpredPresets.xml		patch \| blob \| history
runner/compbio/runner/predictors/Jpred.java		patch \| blob \| history