JAL-2719 parse bit score annotation from search results
[jalview.git] / src / jalview / hmmer / HMMSearch.java
index b4e7427..4c5bfb3 100644 (file)
@@ -1,12 +1,13 @@
 package jalview.hmmer;
 
+import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
-import jalview.datamodel.Annotation;
 import jalview.datamodel.HiddenMarkovModel;
 import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
+import jalview.gui.Desktop;
 import jalview.gui.JvOptionPane;
 import jalview.io.DataSourceType;
 import jalview.io.FileParse;
@@ -15,12 +16,14 @@ import jalview.util.FileUtils;
 import jalview.util.MessageManager;
 import jalview.ws.params.ArgumentI;
 import jalview.ws.params.simple.BooleanOption;
+import jalview.ws.params.simple.Option;
 
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Scanner;
 
@@ -30,6 +33,37 @@ public class HMMSearch extends HmmerCommand
 {
   static final String HMMSEARCH = "hmmsearch";
 
+  /*
+   * constants for i18n lookup of passed parameter names
+   */
+  static final String DATABASE_KEY = "label.database";
+
+  static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
+
+  static final String USE_ACCESSIONS_KEY = "label.use_accessions";
+
+  static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
+
+  static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
+
+  static final String TRIM_TERMINI_KEY = "label.trim_termini";
+
+  static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
+
+  static final String CUTOFF_NONE = "None";
+
+  static final String CUTOFF_SCORE = "Score";
+
+  static final String CUTOFF_EVALUE = "E-Value";
+
+  static final String SEQ_EVALUE_KEY = "label.seq_evalue";
+
+  static final String DOM_EVALUE_KEY = "label.dom_evalue";
+
+  static final String SEQ_SCORE_KEY = "label.seq_score";
+
+  static final String DOM_SCORE_KEY = "label.dom_score";
+
   boolean realign = false;
 
   boolean trim = false;
@@ -38,6 +72,8 @@ public class HMMSearch extends HmmerCommand
 
   SequenceI[] seqs;
 
+  private String databaseName;
+
   /**
    * Constructor for the HMMSearchThread
    * 
@@ -57,15 +93,15 @@ public class HMMSearch extends HmmerCommand
   @Override
   public void run()
   {
-    HiddenMarkovModel hmm = af.getSelectedHMM();
+    HiddenMarkovModel hmm = getHmmProfile();
     if (hmm == null)
     {
-      JOptionPane.showMessageDialog(af,
-              MessageManager.getString("warn.no_selected_hmm"));
+      // shouldn't happen if we got this far
+      Cache.log.error("Error: no hmm for hmmsearch");
       return;
     }
 
-    SequenceI hmmSeq = af.getSelectedHMMSequence();
+    SequenceI hmmSeq = hmm.getConsensusSequence();
     long msgId = System.currentTimeMillis();
     af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
             msgId);
@@ -83,8 +119,8 @@ public class HMMSearch extends HmmerCommand
       boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
       if (!ran)
       {
-        JvOptionPane.showInternalMessageDialog(af,
-                MessageManager.getString("warn.hmmsearch_failed"));
+        JvOptionPane.showInternalMessageDialog(af, MessageManager
+                .formatMessage("warn.command_failed", "hmmsearch"));
         return;
       }
 
@@ -133,87 +169,115 @@ public class HMMSearch extends HmmerCommand
     String dbPath = "";
     File databaseFile = null;
 
+    boolean useEvalueCutoff = false;
+    boolean useScoreCutoff = false;
+    String seqEvalueCutoff = null;
+    String domEvalueCutoff = null;
+    String seqScoreCutoff = null;
+    String domScoreCutoff = null;
+    databaseName = "Alignment";
+
     if (params != null)
     {
       for (ArgumentI arg : params)
       {
         String name = arg.getName();
-        if (MessageManager.getString("label.number_of_results")
+        if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
                 .equals(name))
         {
           seqsToReturn = Integer.parseInt(arg.getValue());
         }
-        else if (MessageManager.getString("label.auto_align_seqs")
+        else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
                 .equals(name))
         {
-          realign = true; // TODO: not used
+          realign = true;
         }
-        else if (MessageManager.getString("label.use_accessions")
+        else if (MessageManager.getString(USE_ACCESSIONS_KEY)
                 .equals(name))
         {
           args.add("--acc");
         }
-        else if (MessageManager.getString("label.seq_e_value").equals(name))
+        else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
+                .equals(name))
+        {
+          if (CUTOFF_EVALUE.equals(arg.getValue()))
+          {
+            useEvalueCutoff = true;
+          }
+          else if (CUTOFF_SCORE.equals(arg.getValue()))
+          {
+            useScoreCutoff = true;
+          }
+        }
+        else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
         {
-          args.add("--incE");
-          args.add(arg.getValue());
+          seqEvalueCutoff = arg.getValue();
         }
-        else if (MessageManager.getString("label.seq_score").equals(name))
+        else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
         {
-          args.add("-incT");
-          args.add(arg.getValue());
+          seqScoreCutoff = arg.getValue();
         }
-        else if (MessageManager.getString("label.dom_e_value_desc")
+        else if (MessageManager.getString(DOM_EVALUE_KEY)
                 .equals(name))
         {
-          args.add("--incdomE");
-          args.add(arg.getValue());
+          domEvalueCutoff = arg.getValue();
         }
-        else if (MessageManager.getString("label.dom_score").equals(name))
+        else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
         {
-          args.add("--incdomT");
-          args.add(arg.getValue());
+          domScoreCutoff = arg.getValue();
         }
-        else if (MessageManager.getString("label.trim_termini")
+        else if (MessageManager.getString(TRIM_TERMINI_KEY)
                 .equals(name))
         {
           trim = true;
         }
-        else if (MessageManager.getString("label.database").equals(name))
+        else if (MessageManager.getString(DATABASE_KEY).equals(name))
         {
           dbFound = true;
           dbPath = arg.getValue();
-          if (!MessageManager.getString("label.this_alignment")
+          if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
                   .equals(dbPath))
           {
+            int pos = dbPath.lastIndexOf(File.separator);
+            databaseName = dbPath.substring(pos + 1);
             databaseFile = new File(dbPath);
           }
         }
       }
     }
 
-    if (!dbFound || MessageManager.getString("label.this_alignment")
+    if (useEvalueCutoff)
+    {
+      args.add("-E");
+      args.add(seqEvalueCutoff);
+      args.add("--domE");
+      args.add(domEvalueCutoff);
+    }
+    else if (useScoreCutoff)
+    {
+      args.add("-T");
+      args.add(seqScoreCutoff);
+      args.add("--domT");
+      args.add(domScoreCutoff);
+    }
+
+    if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
             .equals(dbPath))
     {
       /*
        * no external database specified for search, so
-       * export current alignment as 'database' to search
+       * export current alignment as 'database' to search,
+       * excluding any HMM consensus sequences it contains
        */
       databaseFile = FileUtils.createTempFile("database", ".sto");
       AlignmentI al = af.getViewport().getAlignment();
       AlignmentI copy = new Alignment(al);
-      SequenceI hmms = copy.getHmmConsensus();
-      if (hmms != null)
+      List<SequenceI> hmms = copy.getHmmSequences();
+      for (SequenceI hmmSeq : hmms)
       {
-        copy.deleteSequence(hmms);
+        copy.deleteSequence(hmmSeq);
       }
       exportStockholm(copy.getSequencesArray(), databaseFile, null);
-      // StockholmFile stoFile = new StockholmFile(copy);
-      // stoFile.setSeqs(copy.getSequencesArray());
-      // String alignmentString = stoFile.print();
-      // PrintWriter writer = new PrintWriter(databaseFile);
-      // writer.print(alignmentString);
-      // writer.close();
     }
 
     args.add(getFilePath(hmmFile));
@@ -224,7 +288,7 @@ public class HMMSearch extends HmmerCommand
 
   /**
    * Imports the data from the temporary file to which the output of hmmsearch
-   * is directed.
+   * was directed. The results are optionally realigned using hmmalign.
    * 
    * @param hmmSeq
    */
@@ -253,20 +317,21 @@ public class HMMSearch extends HmmerCommand
       hmmAndSeqs[0] = hmmSeq;
       System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
 
-      AlignmentI alignment = new Alignment(hmmAndSeqs);
-      AlignFrame frame = new AlignFrame(alignment, 1, 1);
-      frame.setSelectedHMMSequence(hmmSeq);
-      List<ArgumentI> alignArgs = new ArrayList<>();
-      if (trim)
+      if (realign)
       {
-        alignArgs.add(new BooleanOption(
-                MessageManager.getString("label.trim_termini"),
-                MessageManager.getString("label.trim_termini_desc"), true,
-                true, true, null));
+        realignResults(hmmAndSeqs);
       }
-      HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
-      hmmalign.run();
-      frame = null;
+      else
+      {
+        AlignmentI al = new Alignment(hmmAndSeqs);
+        AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+        String ttl = "hmmSearch of " + databaseName + " using "
+                + hmmSeq.getName();
+        Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+      }
+
       hmmTemp.delete();
       inputAlignmentTemp.delete();
       searchOutputFile.delete();
@@ -279,6 +344,43 @@ public class HMMSearch extends HmmerCommand
     }
   }
 
+  /**
+   * Realigns the given sequences using hmmalign, to the HMM profile sequence
+   * which is the first in the array, and opens the results in a new frame
+   * 
+   * @param hmmAndSeqs
+   */
+  protected void realignResults(SequenceI[] hmmAndSeqs)
+  {
+    /*
+     * and align the search results to the HMM profile
+     */
+    AlignmentI al = new Alignment(hmmAndSeqs);
+    AlignFrame frame = new AlignFrame(al, 1, 1);
+    List<ArgumentI> alignArgs = new ArrayList<>();
+    String alignTo = hmmAndSeqs[0].getName();
+    List<String> options = Collections.singletonList(alignTo);
+    Option option = new Option(MessageManager.getString("label.use_hmm"),
+            "", true, alignTo, alignTo, options, null);
+    alignArgs.add(option);
+    if (trim)
+    {
+      alignArgs.add(new BooleanOption(
+              MessageManager.getString(TRIM_TERMINI_KEY),
+              MessageManager.getString("label.trim_termini_desc"), true,
+              true, true, null));
+    }
+    HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
+    hmmalign.run();
+  }
+
+  /**
+   * Reads in the scores table output by hmmsearch and adds annotation to
+   * sequences for E-value and bit score
+   * 
+   * @param inputTableTemp
+   * @throws IOException
+   */
   void readTable(File inputTableTemp) throws IOException
   {
     BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
@@ -296,22 +398,14 @@ public class HMMSearch extends HmmerCommand
     while (!"  ------ inclusion threshold ------".equals(line)
             && !"".equals(line))
     {
+      SequenceI seq = seqs[index];
       Scanner scanner = new Scanner(line);
-
-      String str = scanner.next(); // full sequence eValue score
-      float eValue = Float.parseFloat(str);
-      int seqLength = seqs[index].getLength();
-      Annotation[] annots = new Annotation[seqLength];
-      for (int j = 0; j < seqLength; j++)
-      {
-        annots[j] = new Annotation(eValue);
-      }
-      AlignmentAnnotation annot = new AlignmentAnnotation("E-value",
-              "Score", annots);
-      annot.setScore(Double.parseDouble(str));
-      annot.setSequenceRef(seqs[index]);
-      seqs[index].addAlignmentAnnotation(annot);
-
+      String str = scanner.next();
+      addScoreAnnotation(str, seq, "hmmsearch E-value",
+              "Full sequence E-value");
+      str = scanner.next();
+      addScoreAnnotation(str, seq, "hmmsearch Score",
+              "Full sequence bit score");
       scanner.close();
       line = br.readLine();
       index++;
@@ -320,4 +414,31 @@ public class HMMSearch extends HmmerCommand
     br.close();
   }
 
+  /**
+   * A helper method that adds one score-only (non-positional) annotation to a
+   * sequence
+   * 
+   * @param value
+   * @param seq
+   * @param label
+   * @param description
+   */
+  protected void addScoreAnnotation(String value, SequenceI seq,
+          String label, String description)
+  {
+    try
+    {
+      AlignmentAnnotation annot = new AlignmentAnnotation(label,
+              description, null);
+      annot.setCalcId(HMMSEARCH);
+      double eValue = Double.parseDouble(value);
+      annot.setScore(eValue);
+      annot.setSequenceRef(seq);
+      seq.addAlignmentAnnotation(annot);
+    } catch (NumberFormatException e)
+    {
+      System.err.println("Error parsing " + label + " from " + value);
+    }
+  }
+
 }