Merge branch 'alpha/JAL-3362_Jalview_212_alpha' into alpha/merge_212_JalviewJS_2112
[jalview.git] / src / jalview / hmmer / HMMSearch.java
diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java
new file mode 100644 (file)
index 0000000..f05823e
--- /dev/null
@@ -0,0 +1,318 @@
+package jalview.hmmer;
+
+import jalview.bin.Cache;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.Desktop;
+import jalview.gui.JvOptionPane;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.StockholmFile;
+import jalview.util.FileUtils;
+import jalview.util.MessageManager;
+import jalview.ws.params.ArgumentI;
+import jalview.ws.params.simple.BooleanOption;
+import jalview.ws.params.simple.Option;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import javax.swing.JOptionPane;
+
+public class HMMSearch extends Search
+{
+
+  boolean realign = false;
+
+  boolean trim = false;
+
+  boolean returnNoOfNewSeqs = false;
+
+  int seqsToReturn = Integer.MAX_VALUE;
+
+
+  /**
+   * Constructor for the HMMSearchThread
+   * 
+   * @param af
+   */
+  public HMMSearch(AlignFrame af, List<ArgumentI> args)
+  {
+    super(af, args);
+  }
+
+  /**
+   * Runs the HMMSearchThread: the data on the alignment or group is exported,
+   * then the command is executed in the command line and then the data is
+   * imported and displayed in a new frame. Call this method directly to execute
+   * synchronously, or via start() in a new Thread for asynchronously.
+   */
+  @Override
+  public void run()
+  {
+    HiddenMarkovModel hmm = getHmmProfile();
+    if (hmm == null)
+    {
+      // shouldn't happen if we got this far
+      Cache.log.error("Error: no hmm for hmmsearch");
+      return;
+    }
+
+    SequenceI hmmSeq = hmm.getConsensusSequence();
+    long msgId = System.currentTimeMillis();
+    af.setProgressBar(MessageManager.getString("status.running_search"),
+            msgId);
+
+    try
+    {
+      File hmmFile = FileUtils.createTempFile("hmm", ".hmm");
+      File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
+              ".sto");
+      File searchOutputFile = FileUtils.createTempFile("searchOutput",
+              ".sto");
+
+      exportHmm(hmm, hmmFile.getAbsoluteFile());
+
+      boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
+      if (!ran)
+      {
+        JvOptionPane.showInternalMessageDialog(af, MessageManager
+                .formatMessage("warn.command_failed", "hmmsearch"));
+        return;
+      }
+
+      importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile);
+      // TODO make realignment of search results a step at this level
+      // and make it conditional on this.realign
+    } catch (IOException | InterruptedException e)
+    {
+      e.printStackTrace();
+    }
+    finally
+    {
+      af.setProgressBar("", msgId);
+    }
+  }
+
+  /**
+   * Executes an hmmsearch with the given hmm as input. The database to be
+   * searched is a local file as specified by the 'Database' parameter, or the
+   * current alignment (written to file) if none is specified.
+   * 
+   * @param searchOutputFile
+   * @param hitsAlignmentFile
+   * @param hmmFile
+   * 
+   * @return
+   * @throws IOException
+   */
+  private boolean runCommand(File searchOutputFile, File hitsAlignmentFile,
+          File hmmFile) throws IOException
+  {
+    String command = getCommandPath(HMMSEARCH);
+    if (command == null)
+    {
+      return false;
+    }
+
+    List<String> args = new ArrayList<>();
+    args.add(command);
+    buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
+
+    return runCommand(args);
+  }
+
+
+  /**
+   * Imports the data from the temporary file to which the output of hmmsearch
+   * was directed. The results are optionally realigned using hmmalign.
+   * 
+   * @param hmmSeq
+   */
+  private void importData(SequenceI hmmSeq, File inputAlignmentTemp,
+          File hmmTemp, File searchOutputFile)
+          throws IOException, InterruptedException
+  {
+    BufferedReader br = new BufferedReader(
+            new FileReader(inputAlignmentTemp));
+    try
+    {
+      if (br.readLine() == null)
+      {
+        JOptionPane.showMessageDialog(af,
+                MessageManager.getString("label.no_sequences_found"));
+        return;
+      }
+      StockholmFile file = new StockholmFile(new FileParse(
+              inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
+      seqs = file.getSeqsAsArray();
+
+      readDomainTable(searchOutputFile, false);
+
+      if (searchAlignment)
+      {
+        recoverSequences(sequencesHash, seqs);
+      }
+
+      // look for PP cons and ref seq in alignment only annotation
+      AlignmentAnnotation modelpos = null, ppcons = null;
+      for (AlignmentAnnotation aa : file.getAnnotations())
+      {
+        if (aa.sequenceRef == null)
+        {
+          if (aa.label.equals("Reference Positions")) // RF feature type in
+                                                      // stockholm parser
+          {
+            modelpos = aa;
+          }
+          if (aa.label.equals("Posterior Probability"))
+          {
+            ppcons = aa;
+          }
+        }
+      }
+
+
+      int seqCount = Math.min(seqs.length, seqsToReturn);
+      SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
+      hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
+      hmmAndSeqs[0] = hmmSeq;
+      System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
+      if (modelpos != null)
+      {
+        // TODO need - get ungapped sequence method
+        hmmSeq.setSequence(
+                hmmSeq.getDatasetSequence().getSequenceAsString());
+        Annotation[] refpos = modelpos.annotations;
+        // insert gaps to match with refseq positions
+        int gc = 0, lcol = 0;
+        for (int c = 0; c < refpos.length; c++)
+        {
+          if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
+          {
+            if (gc > 0)
+            {
+              hmmSeq.insertCharAt(lcol + 1, gc, '-');
+            }
+            gc = 0;
+            lcol = c;
+          }
+          else
+          {
+            gc++;
+          }
+        }
+      }
+
+      if (realign)
+      {
+        realignResults(hmmAndSeqs);
+      }
+      else
+      {
+        AlignmentI al = new Alignment(hmmAndSeqs);
+        if (ppcons != null)
+        {
+          al.addAnnotation(ppcons);
+        }
+        if (modelpos != null)
+        {
+          al.addAnnotation(modelpos);
+        }
+        AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+        String ttl = "hmmSearch of " + databaseName + " using "
+                + hmmSeq.getName();
+        Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
+                AlignFrame.DEFAULT_HEIGHT);
+
+        if (returnNoOfNewSeqs)
+        {
+          int nNew = checkForNewSequences();
+          JvOptionPane.showMessageDialog(af.alignPanel, nNew + " "
+                  + MessageManager.getString("label.new_returned"));
+        }
+
+      }
+
+
+      hmmTemp.delete();
+      inputAlignmentTemp.delete();
+      searchOutputFile.delete();
+    } finally
+    {
+      if (br != null)
+      {
+        br.close();
+      }
+    }
+  }
+
+  private int checkForNewSequences()
+  {
+    int nNew = seqs.length;
+
+    for (SequenceI resultSeq : seqs)
+    {
+      for (SequenceI aliSeq : alignment.getSequencesArray())
+      {
+        if (resultSeq.getName().equals(aliSeq.getName()))
+        {
+          nNew--;
+          break;
+        }
+      }
+    }
+
+    return nNew;
+
+  }
+
+  /**
+   * Realigns the given sequences using hmmalign, to the HMM profile sequence
+   * which is the first in the array, and opens the results in a new frame
+   * 
+   * @param hmmAndSeqs
+   */
+  protected void realignResults(SequenceI[] hmmAndSeqs)
+  {
+    /*
+     * and align the search results to the HMM profile
+     */
+    AlignmentI al = new Alignment(hmmAndSeqs);
+    AlignFrame frame = new AlignFrame(al, 1, 1);
+    List<ArgumentI> alignArgs = new ArrayList<>();
+    String alignTo = hmmAndSeqs[0].getName();
+    List<String> options = Collections.singletonList(alignTo);
+    Option option = new Option(MessageManager.getString("label.use_hmm"),
+            "", true, alignTo, alignTo, options, null);
+    alignArgs.add(option);
+    if (trim)
+    {
+      alignArgs.add(new BooleanOption(
+              MessageManager.getString(TRIM_TERMINI_KEY),
+              MessageManager.getString("label.trim_termini_desc"), true,
+              true, true, null));
+    }
+    HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
+    hmmalign.run();
+
+    if (returnNoOfNewSeqs)
+    {
+      int nNew = checkForNewSequences();
+      JvOptionPane.showMessageDialog(frame.alignPanel,
+              nNew + " " + MessageManager.getString("label.new_returned"));
+    }
+  }
+
+}