JAL-2629 hmmer searches now read domain rather than full scores
[jalview.git] / src / jalview / hmmer / HMMSearch.java
index 21a3b20..f05823e 100644 (file)
@@ -4,6 +4,7 @@ import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
 import jalview.datamodel.HiddenMarkovModel;
 import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
@@ -25,54 +26,20 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Scanner;
 
 import javax.swing.JOptionPane;
 
-public class HMMSearch extends HmmerCommand
+public class HMMSearch extends Search
 {
-  static final String HMMSEARCH = "hmmsearch";
-
-  /*
-   * constants for i18n lookup of passed parameter names
-   */
-  static final String DATABASE_KEY = "label.database";
-
-  static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
-
-  static final String USE_ACCESSIONS_KEY = "label.use_accessions";
-
-  static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
-
-  static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
-
-  static final String TRIM_TERMINI_KEY = "label.trim_termini";
-
-  static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
-
-  static final String CUTOFF_NONE = "None";
-
-  static final String CUTOFF_SCORE = "Score";
-
-  static final String CUTOFF_EVALUE = "E-Value";
-
-  static final String SEQ_EVALUE_KEY = "label.seq_evalue";
-
-  static final String DOM_EVALUE_KEY = "label.dom_evalue";
-
-  static final String SEQ_SCORE_KEY = "label.seq_score";
-
-  static final String DOM_SCORE_KEY = "label.dom_score";
 
   boolean realign = false;
 
   boolean trim = false;
 
-  int seqsToReturn = Integer.MAX_VALUE;
+  boolean returnNoOfNewSeqs = false;
 
-  SequenceI[] seqs;
+  int seqsToReturn = Integer.MAX_VALUE;
 
-  private String databaseName;
 
   /**
    * Constructor for the HMMSearchThread
@@ -103,7 +70,7 @@ public class HMMSearch extends HmmerCommand
 
     SequenceI hmmSeq = hmm.getConsensusSequence();
     long msgId = System.currentTimeMillis();
-    af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
+    af.setProgressBar(MessageManager.getString("status.running_search"),
             msgId);
 
     try
@@ -160,132 +127,12 @@ public class HMMSearch extends HmmerCommand
 
     List<String> args = new ArrayList<>();
     args.add(command);
-    args.add("-o");
-    args.add(getFilePath(searchOutputFile));
-    args.add("-A");
-    args.add(getFilePath(hitsAlignmentFile));
-
-    boolean dbFound = false;
-    String dbPath = "";
-    File databaseFile = null;
-
-    boolean useEvalueCutoff = false;
-    boolean useScoreCutoff = false;
-    String seqEvalueCutoff = null;
-    String domEvalueCutoff = null;
-    String seqScoreCutoff = null;
-    String domScoreCutoff = null;
-    databaseName = "Alignment";
-
-    if (params != null)
-    {
-      for (ArgumentI arg : params)
-      {
-        String name = arg.getName();
-        if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
-                .equals(name))
-        {
-          seqsToReturn = Integer.parseInt(arg.getValue());
-        }
-        else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
-                .equals(name))
-        {
-          realign = true;
-        }
-        else if (MessageManager.getString(USE_ACCESSIONS_KEY)
-                .equals(name))
-        {
-          args.add("--acc");
-        }
-        else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
-                .equals(name))
-        {
-          if (CUTOFF_EVALUE.equals(arg.getValue()))
-          {
-            useEvalueCutoff = true;
-          }
-          else if (CUTOFF_SCORE.equals(arg.getValue()))
-          {
-            useScoreCutoff = true;
-          }
-        }
-        else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
-        {
-          seqEvalueCutoff = arg.getValue();
-        }
-        else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
-        {
-          seqScoreCutoff = arg.getValue();
-        }
-        else if (MessageManager.getString(DOM_EVALUE_KEY)
-                .equals(name))
-        {
-          domEvalueCutoff = arg.getValue();
-        }
-        else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
-        {
-          domScoreCutoff = arg.getValue();
-        }
-        else if (MessageManager.getString(TRIM_TERMINI_KEY)
-                .equals(name))
-        {
-          trim = true;
-        }
-        else if (MessageManager.getString(DATABASE_KEY).equals(name))
-        {
-          dbFound = true;
-          dbPath = arg.getValue();
-          if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
-                  .equals(dbPath))
-          {
-            int pos = dbPath.lastIndexOf(File.separator);
-            databaseName = dbPath.substring(pos + 1);
-            databaseFile = new File(dbPath);
-          }
-        }
-      }
-    }
-
-    if (useEvalueCutoff)
-    {
-      args.add("-E");
-      args.add(seqEvalueCutoff);
-      args.add("--domE");
-      args.add(domEvalueCutoff);
-    }
-    else if (useScoreCutoff)
-    {
-      args.add("-T");
-      args.add(seqScoreCutoff);
-      args.add("--domT");
-      args.add(domScoreCutoff);
-    }
-
-    if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
-            .equals(dbPath))
-    {
-      /*
-       * no external database specified for search, so
-       * export current alignment as 'database' to search,
-       * excluding any HMM consensus sequences it contains
-       */
-      databaseFile = FileUtils.createTempFile("database", ".sto");
-      AlignmentI al = af.getViewport().getAlignment();
-      AlignmentI copy = new Alignment(al);
-      List<SequenceI> hmms = copy.getHmmSequences();
-      for (SequenceI hmmSeq : hmms)
-      {
-        copy.deleteSequence(hmmSeq);
-      }
-      exportStockholm(copy.getSequencesArray(), databaseFile, null);
-    }
-
-    args.add(getFilePath(hmmFile));
-    args.add(getFilePath(databaseFile));
+    buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
 
     return runCommand(args);
   }
 
+
   /**
    * Imports the data from the temporary file to which the output of hmmsearch
    * was directed. The results are optionally realigned using hmmalign.
@@ -310,12 +157,62 @@ public class HMMSearch extends HmmerCommand
               inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
       seqs = file.getSeqsAsArray();
 
-      readTable(searchOutputFile);
+      readDomainTable(searchOutputFile, false);
+
+      if (searchAlignment)
+      {
+        recoverSequences(sequencesHash, seqs);
+      }
+
+      // look for PP cons and ref seq in alignment only annotation
+      AlignmentAnnotation modelpos = null, ppcons = null;
+      for (AlignmentAnnotation aa : file.getAnnotations())
+      {
+        if (aa.sequenceRef == null)
+        {
+          if (aa.label.equals("Reference Positions")) // RF feature type in
+                                                      // stockholm parser
+          {
+            modelpos = aa;
+          }
+          if (aa.label.equals("Posterior Probability"))
+          {
+            ppcons = aa;
+          }
+        }
+      }
+
 
       int seqCount = Math.min(seqs.length, seqsToReturn);
       SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
+      hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
       hmmAndSeqs[0] = hmmSeq;
       System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
+      if (modelpos != null)
+      {
+        // TODO need - get ungapped sequence method
+        hmmSeq.setSequence(
+                hmmSeq.getDatasetSequence().getSequenceAsString());
+        Annotation[] refpos = modelpos.annotations;
+        // insert gaps to match with refseq positions
+        int gc = 0, lcol = 0;
+        for (int c = 0; c < refpos.length; c++)
+        {
+          if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
+          {
+            if (gc > 0)
+            {
+              hmmSeq.insertCharAt(lcol + 1, gc, '-');
+            }
+            gc = 0;
+            lcol = c;
+          }
+          else
+          {
+            gc++;
+          }
+        }
+      }
 
       if (realign)
       {
@@ -324,14 +221,31 @@ public class HMMSearch extends HmmerCommand
       else
       {
         AlignmentI al = new Alignment(hmmAndSeqs);
+        if (ppcons != null)
+        {
+          al.addAnnotation(ppcons);
+        }
+        if (modelpos != null)
+        {
+          al.addAnnotation(modelpos);
+        }
         AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
                 AlignFrame.DEFAULT_HEIGHT);
         String ttl = "hmmSearch of " + databaseName + " using "
                 + hmmSeq.getName();
         Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
                 AlignFrame.DEFAULT_HEIGHT);
+
+        if (returnNoOfNewSeqs)
+        {
+          int nNew = checkForNewSequences();
+          JvOptionPane.showMessageDialog(af.alignPanel, nNew + " "
+                  + MessageManager.getString("label.new_returned"));
+        }
+
       }
 
+
       hmmTemp.delete();
       inputAlignmentTemp.delete();
       searchOutputFile.delete();
@@ -344,6 +258,26 @@ public class HMMSearch extends HmmerCommand
     }
   }
 
+  private int checkForNewSequences()
+  {
+    int nNew = seqs.length;
+
+    for (SequenceI resultSeq : seqs)
+    {
+      for (SequenceI aliSeq : alignment.getSequencesArray())
+      {
+        if (resultSeq.getName().equals(aliSeq.getName()))
+        {
+          nNew--;
+          break;
+        }
+      }
+    }
+
+    return nNew;
+
+  }
+
   /**
    * Realigns the given sequences using hmmalign, to the HMM profile sequence
    * which is the first in the array, and opens the results in a new frame
@@ -372,56 +306,13 @@ public class HMMSearch extends HmmerCommand
     }
     HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
     hmmalign.run();
-  }
-
-  /**
-   * Reads in the scores table output by hmmsearch and adds annotation to
-   * sequences for E-value and bit score
-   * 
-   * @param inputTableTemp
-   * @throws IOException
-   */
-  void readTable(File inputTableTemp) throws IOException
-  {
-    BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
-    String line = "";
-    while (!line.startsWith("Query:"))
-    {
-      line = br.readLine();
-    }
-    for (int i = 0; i < 5; i++)
-    {
-      line = br.readLine();
-    }
 
-    int index = 0;
-    while (!"  ------ inclusion threshold ------".equals(line)
-            && !"".equals(line))
+    if (returnNoOfNewSeqs)
     {
-      Scanner scanner = new Scanner(line);
-
-      String str = scanner.next(); // full sequence eValue score
-      // float eValue = Float.parseFloat(str);
-      // int seqLength = seqs[index].getLength();
-      // Annotation[] annots = new Annotation[seqLength];
-      // for (int j = 0; j < seqLength; j++)
-      // {
-      // annots[j] = new Annotation(eValue);
-      // }
-      AlignmentAnnotation annot = new AlignmentAnnotation("E-value",
-              "Score", null);
-      annot.setCalcId(HMMSEARCH);
-      double eValue = Double.parseDouble(str);
-      annot.setScore(eValue);
-      annot.setSequenceRef(seqs[index]);
-      seqs[index].addAlignmentAnnotation(annot);
-
-      scanner.close();
-      line = br.readLine();
-      index++;
+      int nNew = checkForNewSequences();
+      JvOptionPane.showMessageDialog(frame.alignPanel,
+              nNew + " " + MessageManager.getString("label.new_returned"));
     }
-
-    br.close();
   }
 
 }