JAL-2629 fix hmmsearch/jackhmmer assigning incorrect evalues and scores
[jalview.git] / src / jalview / hmmer / HMMSearch.java
index 57786e5..a06f186 100644 (file)
@@ -25,6 +25,7 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Hashtable;
 import java.util.List;
 import java.util.Scanner;
 
@@ -34,47 +35,22 @@ public class HMMSearch extends HmmerCommand
 {
   static final String HMMSEARCH = "hmmsearch";
 
-  /*
-   * constants for i18n lookup of passed parameter names
-   */
-  static final String DATABASE_KEY = "label.database";
-
-  static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
-
-  static final String USE_ACCESSIONS_KEY = "label.use_accessions";
-
-  static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
-
-  static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
-
-  static final String TRIM_TERMINI_KEY = "label.trim_termini";
-
-  static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
-
-  static final String CUTOFF_NONE = "None";
-
-  static final String CUTOFF_SCORE = "Score";
-
-  static final String CUTOFF_EVALUE = "E-Value";
-
-  static final String SEQ_EVALUE_KEY = "label.seq_evalue";
-
-  static final String DOM_EVALUE_KEY = "label.dom_evalue";
-
-  static final String SEQ_SCORE_KEY = "label.seq_score";
-
-  static final String DOM_SCORE_KEY = "label.dom_score";
-
   boolean realign = false;
 
   boolean trim = false;
 
+  boolean returnNoOfNewSeqs = false;
+
   int seqsToReturn = Integer.MAX_VALUE;
 
+  boolean searchAlignment = true;
+
   SequenceI[] seqs;
 
   private String databaseName;
 
+  Hashtable sequencesHash;
+
   /**
    * Constructor for the HMMSearchThread
    * 
@@ -104,7 +80,7 @@ public class HMMSearch extends HmmerCommand
 
     SequenceI hmmSeq = hmm.getConsensusSequence();
     long msgId = System.currentTimeMillis();
-    af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
+    af.setProgressBar(MessageManager.getString("status.running_search"),
             msgId);
 
     try
@@ -196,7 +172,6 @@ public class HMMSearch extends HmmerCommand
     String seqScoreCutoff = null;
     String domScoreCutoff = null;
     databaseName = "Alignment";
-    boolean searchAlignment = false;
 
     if (params != null)
     {
@@ -208,18 +183,6 @@ public class HMMSearch extends HmmerCommand
         {
           seqsToReturn = Integer.parseInt(arg.getValue());
         }
-        else if (MessageManager.getString("action.search").equals(name))
-        {
-          searchAlignment = arg.getValue().equals(
-                  MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY));
-        }
-        else if (MessageManager.getString(DATABASE_KEY).equals(name))
-        {
-          dbPath = arg.getValue();
-          int pos = dbPath.lastIndexOf(File.separator);
-          databaseName = dbPath.substring(pos + 1);
-          databaseFile = new File(dbPath);
-        }
         else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
                 .equals(name))
         {
@@ -233,11 +196,13 @@ public class HMMSearch extends HmmerCommand
         else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
                 .equals(name))
         {
-          if (CUTOFF_EVALUE.equals(arg.getValue()))
+          if (MessageManager.getString(CUTOFF_EVALUE)
+                  .equals(arg.getValue()))
           {
             useEvalueCutoff = true;
           }
-          else if (CUTOFF_SCORE.equals(arg.getValue()))
+          else if (MessageManager.getString(CUTOFF_SCORE)
+                  .equals(arg.getValue()))
           {
             useScoreCutoff = true;
           }
@@ -266,16 +231,16 @@ public class HMMSearch extends HmmerCommand
         }
         else if (MessageManager.getString(DATABASE_KEY).equals(name))
         {
-          dbFound = true;
-          dbPath = arg.getValue();
-          if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
-                  .equals(dbPath))
+          databaseFile = new File(arg.getValue());
+          if (!arg.getValue().isEmpty())
           {
-            int pos = dbPath.lastIndexOf(File.separator);
-            databaseName = dbPath.substring(pos + 1);
-            databaseFile = new File(dbPath);
+            searchAlignment = false;
           }
         }
+        else if (MessageManager.getString(RETURN_N_NEW_SEQ).equals(name))
+        {
+          returnNoOfNewSeqs = true;
+        }
       }
     }
 
@@ -306,12 +271,15 @@ public class HMMSearch extends HmmerCommand
       databaseFile = FileUtils.createTempFile("database", ".sto");
       AlignmentI al = af.getViewport().getAlignment();
       AlignmentI copy = new Alignment(al);
-      List<SequenceI> hmms = copy.getHmmSequences();
-      for (SequenceI hmmSeq : hmms)
+      deleteHmmSequences(copy);
+
+      if (searchAlignment)
       {
-        copy.deleteSequence(hmmSeq);
+        sequencesHash = stashSequences(copy.getSequencesArray());
       }
+
       exportStockholm(copy.getSequencesArray(), databaseFile, null);
+
     }
 
     args.add(getFilePath(hmmFile, true));
@@ -341,6 +309,12 @@ public class HMMSearch extends HmmerCommand
       StockholmFile file = new StockholmFile(new FileParse(
               inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
       seqs = file.getSeqsAsArray();
+
+      if (searchAlignment)
+      {
+        recoverSequences(sequencesHash, seqs);
+      }
+
       // look for PP cons and ref seq in alignment only annotation
       AlignmentAnnotation modelpos = null, ppcons = null;
       for (AlignmentAnnotation aa : file.getAnnotations())
@@ -390,6 +364,7 @@ public class HMMSearch extends HmmerCommand
           }
         }
       }
+
       if (realign)
       {
         realignResults(hmmAndSeqs);
@@ -411,8 +386,17 @@ public class HMMSearch extends HmmerCommand
                 + hmmSeq.getName();
         Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
                 AlignFrame.DEFAULT_HEIGHT);
+
+        if (returnNoOfNewSeqs)
+        {
+          int nNew = checkForNewSequences();
+          JvOptionPane.showMessageDialog(af.alignPanel, nNew + " "
+                  + MessageManager.getString("label.new_returned"));
+        }
+
       }
 
+
       hmmTemp.delete();
       inputAlignmentTemp.delete();
       searchOutputFile.delete();
@@ -425,6 +409,26 @@ public class HMMSearch extends HmmerCommand
     }
   }
 
+  private int checkForNewSequences()
+  {
+    int nNew = seqs.length;
+
+    for (SequenceI resultSeq : seqs)
+    {
+      for (SequenceI aliSeq : alignment.getSequencesArray())
+      {
+        if (resultSeq.getName().equals(aliSeq.getName()))
+        {
+          nNew--;
+          break;
+        }
+      }
+    }
+
+    return nNew;
+
+  }
+
   /**
    * Realigns the given sequences using hmmalign, to the HMM profile sequence
    * which is the first in the array, and opens the results in a new frame
@@ -453,6 +457,13 @@ public class HMMSearch extends HmmerCommand
     }
     HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
     hmmalign.run();
+
+    if (returnNoOfNewSeqs)
+    {
+      int nNew = checkForNewSequences();
+      JvOptionPane.showMessageDialog(frame.alignPanel,
+              nNew + " " + MessageManager.getString("label.new_returned"));
+    }
   }
 
   /**
@@ -480,18 +491,12 @@ public class HMMSearch extends HmmerCommand
     while (!"  ------ inclusion threshold ------".equals(line)
             && !"".equals(line))
     {
-      SequenceI seq = seqs[index];
-      AlignmentAnnotation pp = seq
-              .getAlignmentAnnotations("", "Posterior Probability")
-              .get(0);
       Scanner scanner = new Scanner(line);
-      String str = scanner.next();
-      addScoreAnnotation(str, seq, "hmmsearch E-value",
-              "Full sequence E-value", pp);
-      str = scanner.next();
-      addScoreAnnotation(str, seq, "hmmsearch Score",
-              "Full sequence bit score", pp);
-      seq.removeAlignmentAnnotation(pp);
+      String evalue = scanner.next();
+      String score = scanner.next();
+      checkSequenceOrder(index, scanner);
+      SequenceI seq = seqs[index];
+      addScoreAnnotations(evalue, score, seq);
       scanner.close();
       line = br.readLine();
       index++;
@@ -500,59 +505,61 @@ public class HMMSearch extends HmmerCommand
     br.close();
   }
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description)
-  {
-    addScoreAnnotation(value, seq, label, description, null);
-  }
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   * @param pp
-   *          existing posterior probability annotation - values copied to new
-   *          annotation row
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description, AlignmentAnnotation pp)
+  protected void addScoreAnnotations(String eValue, String bitScore,
+          SequenceI seq)
   {
+    String label = "Search Scores";
+    String description = "Full sequence bit score and E-Value";
+
     try
     {
-      AlignmentAnnotation annot = null;
-      if (pp == null)
-      {
-        new AlignmentAnnotation(label,
+      AlignmentAnnotation annot = new AlignmentAnnotation(label,
               description, null);
-      }
-      else
-      {
-        annot = new AlignmentAnnotation(pp);
-        annot.label = label;
-        annot.description = description;
-      }
+
+      annot.label = label;
+      annot.description = description;
+
       annot.setCalcId(HMMSEARCH);
-      double eValue = Double.parseDouble(value);
-      annot.setScore(eValue);
+
+      double dEValue = Double.parseDouble(eValue);
+      annot.setEValue(dEValue);
+
+      double dBitScore = Double.parseDouble(bitScore);
+      annot.setBitScore(dBitScore);
+
       annot.setSequenceRef(seq);
       seq.addAlignmentAnnotation(annot);
     } catch (NumberFormatException e)
     {
-      System.err.println("Error parsing " + label + " from " + value);
+      System.err.println("Error parsing " + label + " from " + eValue
+              + " & " + bitScore);
     }
   }
 
+  private void checkSequenceOrder(int index, Scanner scanner)
+  {
+    String seqName = null;
+
+    for (int i = 0; i < 8; i++)
+    {
+      seqName = scanner.next();
+    }
+
+    if (!seqs[index].getName().equals(seqName))
+    {
+      SequenceI temp = seqs[index];
+
+      for (int j = 0; j < seqs.length; j++)
+      {
+        if (seqs[j].getName().equals(seqName))
+        {
+          seqs[index] = seqs[j];
+          seqs[j] = temp;
+          break;
+        }
+      }
+    }
+  }
+    
 }