Merge branch 'features/mchmmer' into merge/wsinterfaces_mchmmer_JAL-3070_JAL-1950
[jalview.git] / src / jalview / hmmer / HMMSearch.java
diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java
new file mode 100644 (file)
index 0000000..6cda421
--- /dev/null
@@ -0,0 +1,475 @@
+package jalview.hmmer;
+
+import jalview.bin.Cache;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.Desktop;
+import jalview.gui.JvOptionPane;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.StockholmFile;
+import jalview.util.FileUtils;
+import jalview.util.MessageManager;
+import jalview.ws.params.ArgumentI;
+import jalview.ws.params.simple.BooleanOption;
+import jalview.ws.params.simple.Option;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Scanner;
+
+import javax.swing.JOptionPane;
+
+public class HMMSearch extends HmmerCommand
+{
+  static final String HMMSEARCH = "hmmsearch";
+
+  /*
+   * constants for i18n lookup of passed parameter names
+   */
+  static final String DATABASE_KEY = "label.database";
+
+  static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
+
+  static final String USE_ACCESSIONS_KEY = "label.use_accessions";
+
+  static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
+
+  static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
+
+  static final String TRIM_TERMINI_KEY = "label.trim_termini";
+
+  static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
+
+  static final String CUTOFF_NONE = "None";
+
+  static final String CUTOFF_SCORE = "Score";
+
+  static final String CUTOFF_EVALUE = "E-Value";
+
+  static final String SEQ_EVALUE_KEY = "label.seq_evalue";
+
+  static final String DOM_EVALUE_KEY = "label.dom_evalue";
+
+  static final String SEQ_SCORE_KEY = "label.seq_score";
+
+  static final String DOM_SCORE_KEY = "label.dom_score";
+
+  boolean realign = false;
+
+  boolean trim = false;
+
+  int seqsToReturn = Integer.MAX_VALUE;
+
+  SequenceI[] seqs;
+
+  private String databaseName;
+
+  /**
+   * Constructor for the HMMSearchThread
+   * 
+   * @param af
+   */
+  public HMMSearch(AlignFrame af, List<ArgumentI> args)
+  {
+    super(af, args);
+  }
+
+  /**
+   * Runs the HMMSearchThread: the data on the alignment or group is exported,
+   * then the command is executed in the command line and then the data is
+   * imported and displayed in a new frame. Call this method directly to execute
+   * synchronously, or via start() in a new Thread for asynchronously.
+   */
+  @Override
+  public void run()
+  {
+    HiddenMarkovModel hmm = getHmmProfile();
+    if (hmm == null)
+    {
+      // shouldn't happen if we got this far
+      Cache.log.error("Error: no hmm for hmmsearch");
+      return;
+    }
+
+    SequenceI hmmSeq = hmm.getConsensusSequence();
+    long msgId = System.currentTimeMillis();
+    af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
+            msgId);
+
+    try
+    {
+      File hmmFile = FileUtils.createTempFile("hmm", ".hmm");
+      File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
+              ".sto");
+      File searchOutputFile = FileUtils.createTempFile("searchOutput",
+              ".sto");
+
+      exportHmm(hmm, hmmFile.getAbsoluteFile());
+
+      boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
+      if (!ran)
+      {
+        JvOptionPane.showInternalMessageDialog(af, MessageManager
+                .formatMessage("warn.command_failed", "hmmsearch"));
+        return;
+      }
+
+      importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile);
+      // TODO make realignment of search results a step at this level
+      // and make it conditional on this.realign
+    } catch (IOException | InterruptedException e)
+    {
+      e.printStackTrace();
+    }
+    finally
+    {
+      af.setProgressBar("", msgId);
+    }
+  }
+
+  /**
+   * Executes an hmmsearch with the given hmm as input. The database to be
+   * searched is a local file as specified by the 'Database' parameter, or the
+   * current alignment (written to file) if none is specified.
+   * 
+   * @param searchOutputFile
+   * @param hitsAlignmentFile
+   * @param hmmFile
+   * 
+   * @return
+   * @throws IOException
+   */
+  private boolean runCommand(File searchOutputFile, File hitsAlignmentFile,
+          File hmmFile) throws IOException
+  {
+    String command = getCommandPath(HMMSEARCH);
+    if (command == null)
+    {
+      return false;
+    }
+
+    List<String> args = new ArrayList<>();
+    args.add(command);
+    buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
+
+    return runCommand(args);
+  }
+
+  /**
+   * Appends command line arguments to the given list, to specify input and
+   * output files for the search, and any additional options that may have been
+   * passed from the parameters dialog
+   * 
+   * @param args
+   * @param searchOutputFile
+   * @param hitsAlignmentFile
+   * @param hmmFile
+   * @throws IOException
+   */
+  protected void buildArguments(List<String> args, File searchOutputFile,
+          File hitsAlignmentFile, File hmmFile) throws IOException
+  {
+    args.add("-o");
+    args.add(getFilePath(searchOutputFile));
+    args.add("-A");
+    args.add(getFilePath(hitsAlignmentFile));
+
+    boolean dbFound = false;
+    String dbPath = "";
+    File databaseFile = null;
+
+    boolean useEvalueCutoff = false;
+    boolean useScoreCutoff = false;
+    String seqEvalueCutoff = null;
+    String domEvalueCutoff = null;
+    String seqScoreCutoff = null;
+    String domScoreCutoff = null;
+    databaseName = "Alignment";
+    boolean searchAlignment = false;
+
+    if (params != null)
+    {
+      for (ArgumentI arg : params)
+      {
+        String name = arg.getName();
+        if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
+                .equals(name))
+        {
+          seqsToReturn = Integer.parseInt(arg.getValue());
+        }
+        else if (MessageManager.getString("action.search").equals(name))
+        {
+          searchAlignment = arg.getValue().equals(
+                  MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY));
+        }
+        else if (MessageManager.getString(DATABASE_KEY).equals(name))
+        {
+          dbPath = arg.getValue();
+          int pos = dbPath.lastIndexOf(File.separator);
+          databaseName = dbPath.substring(pos + 1);
+          databaseFile = new File(dbPath);
+        }
+        else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
+                .equals(name))
+        {
+          realign = true;
+        }
+        else if (MessageManager.getString(USE_ACCESSIONS_KEY)
+                .equals(name))
+        {
+          args.add("--acc");
+        }
+        else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
+                .equals(name))
+        {
+          if (CUTOFF_EVALUE.equals(arg.getValue()))
+          {
+            useEvalueCutoff = true;
+          }
+          else if (CUTOFF_SCORE.equals(arg.getValue()))
+          {
+            useScoreCutoff = true;
+          }
+        }
+        else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
+        {
+          seqEvalueCutoff = arg.getValue();
+        }
+        else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
+        {
+          seqScoreCutoff = arg.getValue();
+        }
+        else if (MessageManager.getString(DOM_EVALUE_KEY)
+                .equals(name))
+        {
+          domEvalueCutoff = arg.getValue();
+        }
+        else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
+        {
+          domScoreCutoff = arg.getValue();
+        }
+        else if (MessageManager.getString(TRIM_TERMINI_KEY)
+                .equals(name))
+        {
+          trim = true;
+        }
+        else if (MessageManager.getString(DATABASE_KEY).equals(name))
+        {
+          dbFound = true;
+          dbPath = arg.getValue();
+          if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
+                  .equals(dbPath))
+          {
+            int pos = dbPath.lastIndexOf(File.separator);
+            databaseName = dbPath.substring(pos + 1);
+            databaseFile = new File(dbPath);
+          }
+        }
+      }
+    }
+
+    if (useEvalueCutoff)
+    {
+      args.add("-E");
+      args.add(seqEvalueCutoff);
+      args.add("--domE");
+      args.add(domEvalueCutoff);
+    }
+    else if (useScoreCutoff)
+    {
+      args.add("-T");
+      args.add(seqScoreCutoff);
+      args.add("--domT");
+      args.add(domScoreCutoff);
+    }
+
+//    if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
+//            .equals(dbPath))
+      if (searchAlignment)
+    {
+      /*
+       * no external database specified for search, so
+       * export current alignment as 'database' to search,
+       * excluding any HMM consensus sequences it contains
+       */
+      databaseFile = FileUtils.createTempFile("database", ".sto");
+      AlignmentI al = af.getViewport().getAlignment();
+      AlignmentI copy = new Alignment(al);
+      List<SequenceI> hmms = copy.getHmmSequences();
+      for (SequenceI hmmSeq : hmms)
+      {
+        copy.deleteSequence(hmmSeq);
+      }
+      exportStockholm(copy.getSequencesArray(), databaseFile, null);
+    }
+
+    args.add(getFilePath(hmmFile));
+    args.add(getFilePath(databaseFile));
+  }
+
+  /**
+   * Imports the data from the temporary file to which the output of hmmsearch
+   * was directed. The results are optionally realigned using hmmalign.
+   * 
+   * @param hmmSeq
+   */
+  private void importData(SequenceI hmmSeq, File inputAlignmentTemp,
+          File hmmTemp, File searchOutputFile)
+          throws IOException, InterruptedException
+  {
+    BufferedReader br = new BufferedReader(
+            new FileReader(inputAlignmentTemp));
+    try
+    {
+      if (br.readLine() == null)
+      {
+        JOptionPane.showMessageDialog(af,
+                MessageManager.getString("label.no_sequences_found"));
+        return;
+      }
+      StockholmFile file = new StockholmFile(new FileParse(
+              inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
+      seqs = file.getSeqsAsArray();
+
+      readTable(searchOutputFile);
+
+      int seqCount = Math.min(seqs.length, seqsToReturn);
+      SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
+      hmmAndSeqs[0] = hmmSeq;
+      System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
+
+      if (realign)
+      {
+        realignResults(hmmAndSeqs);
+      }
+      else
+      {
+        AlignmentI al = new Alignment(hmmAndSeqs);
+        AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+        String ttl = "hmmSearch of " + databaseName + " using "
+                + hmmSeq.getName();
+        Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+      }
+
+      hmmTemp.delete();
+      inputAlignmentTemp.delete();
+      searchOutputFile.delete();
+    } finally
+    {
+      if (br != null)
+      {
+        br.close();
+      }
+    }
+  }
+
+  /**
+   * Realigns the given sequences using hmmalign, to the HMM profile sequence
+   * which is the first in the array, and opens the results in a new frame
+   * 
+   * @param hmmAndSeqs
+   */
+  protected void realignResults(SequenceI[] hmmAndSeqs)
+  {
+    /*
+     * and align the search results to the HMM profile
+     */
+    AlignmentI al = new Alignment(hmmAndSeqs);
+    AlignFrame frame = new AlignFrame(al, 1, 1);
+    List<ArgumentI> alignArgs = new ArrayList<>();
+    String alignTo = hmmAndSeqs[0].getName();
+    List<String> options = Collections.singletonList(alignTo);
+    Option option = new Option(MessageManager.getString("label.use_hmm"),
+            "", true, alignTo, alignTo, options, null);
+    alignArgs.add(option);
+    if (trim)
+    {
+      alignArgs.add(new BooleanOption(
+              MessageManager.getString(TRIM_TERMINI_KEY),
+              MessageManager.getString("label.trim_termini_desc"), true,
+              true, true, null));
+    }
+    HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
+    hmmalign.run();
+  }
+
+  /**
+   * Reads in the scores table output by hmmsearch and adds annotation to
+   * sequences for E-value and bit score
+   * 
+   * @param inputTableTemp
+   * @throws IOException
+   */
+  void readTable(File inputTableTemp) throws IOException
+  {
+    BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
+    String line = "";
+    while (!line.startsWith("Query:"))
+    {
+      line = br.readLine();
+    }
+    for (int i = 0; i < 5; i++)
+    {
+      line = br.readLine();
+    }
+
+    int index = 0;
+    while (!"  ------ inclusion threshold ------".equals(line)
+            && !"".equals(line))
+    {
+      SequenceI seq = seqs[index];
+      Scanner scanner = new Scanner(line);
+      String str = scanner.next();
+      addScoreAnnotation(str, seq, "hmmsearch E-value",
+              "Full sequence E-value");
+      str = scanner.next();
+      addScoreAnnotation(str, seq, "hmmsearch Score",
+              "Full sequence bit score");
+      scanner.close();
+      line = br.readLine();
+      index++;
+    }
+
+    br.close();
+  }
+
+  /**
+   * A helper method that adds one score-only (non-positional) annotation to a
+   * sequence
+   * 
+   * @param value
+   * @param seq
+   * @param label
+   * @param description
+   */
+  protected void addScoreAnnotation(String value, SequenceI seq,
+          String label, String description)
+  {
+    try
+    {
+      AlignmentAnnotation annot = new AlignmentAnnotation(label,
+              description, null);
+      annot.setCalcId(HMMSEARCH);
+      double eValue = Double.parseDouble(value);
+      annot.setScore(eValue);
+      annot.setSequenceRef(seq);
+      seq.addAlignmentAnnotation(annot);
+    } catch (NumberFormatException e)
+    {
+      System.err.println("Error parsing " + label + " from " + value);
+    }
+  }
+
+}