JAL-2629 can now filter by sequence e-value or bit score
authortva <tva@10.205.251.175>
Wed, 3 Jul 2019 10:07:33 +0000 (11:07 +0100)
committertva <tva@10.205.251.175>
Wed, 3 Jul 2019 10:07:33 +0000 (11:07 +0100)
resources/lang/Messages.properties
src/jalview/datamodel/AlignmentAnnotation.java
src/jalview/gui/AlignFrame.java
src/jalview/hmmer/HMMBuild.java
src/jalview/hmmer/HMMSearch.java
src/jalview/hmmer/HmmerCommand.java
src/jalview/hmmer/JackHMMER.java
src/jalview/io/SequenceAnnotationReport.java
src/jalview/jbgui/GAlignFrame.java
src/jalview/viewmodel/AlignmentViewport.java

index 9fe5615..3b22f2e 100644 (file)
@@ -133,6 +133,8 @@ action.select_highlighted_columns = Select Highlighted Columns
 tooltip.select_highlighted_columns = Press B to mark highlighted columns, Ctrl-(or Cmd)-B to toggle, and Alt-B to mark all but highlighted columns 
 action.deselect_all = Deselect all
 action.invert_selection = Invert selection
+action.filter_by_evalue = Filter by E-Value
+action.filter_by_score = Filter by Score
 action.using_jmol = Using Jmol
 action.link = Link
 action.group_link = Group Link
@@ -1055,6 +1057,7 @@ exception.ranml_couldnt_process_data = Couldn't process data as RNAML file ({0})
 exception.ranml_invalid_file = Invalid RNAML file ({0})
 exception.ranml_problem_parsing_data = Problem parsing data as RNAML ({0})
 exception.pfam_no_sequences_found = No sequences found (PFAM input)
+exception.hmmer_no_valid_sequences_found = No valid sequences found
 exception.stockholm_invalid_format = This file is not in valid STOCKHOLM format: First line does not contain '# STOCKHOLM'
 exception.couldnt_parse_sequence_line = Could not parse sequence line: {0}
 exception.unknown_annotation_detected = Unknown annotation detected: {0} {1}
index 6ac5da8..9354d55 100755 (executable)
@@ -94,6 +94,10 @@ public class AlignmentAnnotation
    */
   private long invalidrnastruc = -2;
 
+  private double bitScore;
+
+  private double eValue;
+
   /**
    * Updates the _rnasecstr field Determines the positions that base pair and
    * the positions of helices based on secondary structure from a Stockholm file
@@ -1714,4 +1718,24 @@ public class AlignmentAnnotation
     return aa;
   }
 
+  public double getBitScore()
+  {
+    return bitScore;
+  }
+
+  public void setBitScore(double bitScore)
+  {
+    this.bitScore = bitScore;
+  }
+
+  public double getEValue()
+  {
+    return eValue;
+  }
+
+  public void setEValue(double eValue)
+  {
+    this.eValue = eValue;
+  }
+
 }
index d1ebdb7..18000bd 100644 (file)
@@ -1159,6 +1159,36 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
     }
     return true;
   }
+  
+  @Override
+  protected void filterByEValue_actionPerformed()
+  {
+    viewport.filterByEvalue(inputDouble("Enter E-Value Cutoff"));
+  }
+  
+  @Override
+  protected void filterByScore_actionPerformed()
+  {
+    viewport.filterByScore(inputDouble("Enter Bit Score Threshold"));
+  }
+  
+  private double inputDouble(String message)
+  {
+    String str = null;
+    Double d = null;
+    while(d == null || d <= 0)
+    {
+      str = JOptionPane.showInputDialog(this.alignPanel, message);
+      try
+      {
+        d = Double.valueOf(str);
+      }
+      catch (NumberFormatException e)
+      {
+      }
+    }
+    return d;
+  }
 
   /**
    * Checks if the alignment contains the required number of sequences.
index 88f28dd..974d2b2 100644 (file)
@@ -231,33 +231,6 @@ public class HMMBuild extends HmmerCommand
   }
 
   /**
-   * A helper method that deletes any HMM consensus sequence from the given
-   * collection, and from the parent alignment if <code>ac</code> is a subgroup
-   * 
-   * @param ac
-   */
-  void deleteHmmSequences(AnnotatedCollectionI ac)
-  {
-    List<SequenceI> hmmSeqs = ac.getHmmSequences();
-    for (SequenceI hmmSeq : hmmSeqs)
-    {
-      if (ac instanceof SequenceGroup)
-      {
-        ((SequenceGroup) ac).deleteSequence(hmmSeq, false);
-        AnnotatedCollectionI context = ac.getContext();
-        if (context != null && context instanceof AlignmentI)
-        {
-          ((AlignmentI) context).deleteSequence(hmmSeq);
-        }
-      }
-      else
-      {
-        ((AlignmentI) ac).deleteSequence(hmmSeq);
-      }
-    }
-  }
-
-  /**
    * Constructs and executes the hmmbuild command as a separate process
    * 
    * @param sequencesFile
index 6cbe0c8..3faa4f2 100644 (file)
@@ -450,17 +450,10 @@ public class HMMSearch extends HmmerCommand
             && !"".equals(line))
     {
       SequenceI seq = seqs[index];
-      AlignmentAnnotation pp = seq
-              .getAlignmentAnnotations("", "Posterior Probability")
-              .get(0);
       Scanner scanner = new Scanner(line);
-      String str = scanner.next();
-      addScoreAnnotation(str, seq, "hmmsearch E-value",
-              "Full sequence E-value", pp);
-      str = scanner.next();
-      addScoreAnnotation(str, seq, "hmmsearch Score",
-              "Full sequence bit score", pp);
-      seq.removeAlignmentAnnotation(pp);
+      String evalue = scanner.next();
+      String score = scanner.next();
+      addScoreAnnotations(evalue, score, seq);
       scanner.close();
       line = br.readLine();
       index++;
@@ -469,58 +462,35 @@ public class HMMSearch extends HmmerCommand
     br.close();
   }
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description)
-  {
-    addScoreAnnotation(value, seq, label, description, null);
-  }
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   * @param pp
-   *          existing posterior probability annotation - values copied to new
-   *          annotation row
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description, AlignmentAnnotation pp)
+  protected void addScoreAnnotations(String eValue, String bitScore,
+          SequenceI seq)
   {
+    String label = "Search Scores";
+    String description = "Full sequence bit score and E-Value";
+
     try
     {
-      AlignmentAnnotation annot = null;
-      if (pp == null)
-      {
-        new AlignmentAnnotation(label,
+      AlignmentAnnotation annot = new AlignmentAnnotation(label,
               description, null);
-      }
-      else
-      {
-        annot = new AlignmentAnnotation(pp);
-        annot.label = label;
-        annot.description = description;
-      }
+
+      annot.label = label;
+      annot.description = description;
+
       annot.setCalcId(HMMSEARCH);
-      double eValue = Double.parseDouble(value);
-      annot.setScore(eValue);
+
+      double dEValue = Double.parseDouble(eValue);
+      annot.setEValue(dEValue);
+
+      double dBitScore = Double.parseDouble(bitScore);
+      annot.setBitScore(dBitScore);
+
       annot.setSequenceRef(seq);
       seq.addAlignmentAnnotation(annot);
     } catch (NumberFormatException e)
     {
-      System.err.println("Error parsing " + label + " from " + value);
+      System.err.println("Error parsing " + label + " from " + eValue
+              + " & " + bitScore);
     }
   }
 
index 85f64bf..79fcb4c 100644 (file)
@@ -8,6 +8,7 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.AnnotatedCollectionI;
 import jalview.datamodel.Annotation;
 import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
 import jalview.gui.JvOptionPane;
@@ -228,7 +229,7 @@ public abstract class HmmerCommand implements Runnable
 
   /**
    * Exports an alignment, and reference (RF) annotation if present, to the
-   * specified file, in Stockholm format
+   * specified file, in Stockholm format, removing all HMM sequences
    * 
    * @param seqs
    * @param toFile
@@ -283,10 +284,17 @@ public abstract class HmmerCommand implements Runnable
     writer.close();
   }
 
-  public void exportFasta(SequenceI[] seqs, File toFile)
+  /**
+   * Exports the given alignment withotu any anotations to a fasta file
+   * 
+   * @param seqs
+   * @param toFile
+   */
+  public void exportFasta(AlignmentI al, File toFile)
   {
     FastaFile file = new FastaFile();
-    String output = file.print(seqs, false);
+
+    String output = file.print(al.getSequencesArray(), false);
     PrintWriter writer;
     try
     {
@@ -440,4 +448,31 @@ public abstract class HmmerCommand implements Runnable
 
     return path;
   }
+
+  /**
+   * A helper method that deletes any HMM consensus sequence from the given
+   * collection, and from the parent alignment if <code>ac</code> is a subgroup
+   * 
+   * @param ac
+   */
+  void deleteHmmSequences(AnnotatedCollectionI ac)
+  {
+    List<SequenceI> hmmSeqs = ac.getHmmSequences();
+    for (SequenceI hmmSeq : hmmSeqs)
+    {
+      if (ac instanceof SequenceGroup)
+      {
+        ((SequenceGroup) ac).deleteSequence(hmmSeq, false);
+        AnnotatedCollectionI context = ac.getContext();
+        if (context != null && context instanceof AlignmentI)
+        {
+          ((AlignmentI) context).deleteSequence(hmmSeq);
+        }
+      }
+      else
+      {
+        ((AlignmentI) ac).deleteSequence(hmmSeq);
+      }
+    }
+  }
 }
index 85b97f9..1815ac4 100644 (file)
@@ -246,7 +246,8 @@ public class JackHMMER extends HmmerCommand
        */
       databaseFile = FileUtils.createTempFile("database", ".fa");
       AlignmentI al = af.getViewport().getAlignment();
-      exportFasta(al.getSequencesArray(), databaseFile);
+      deleteHmmSequences(al);
+      exportFasta(al, databaseFile);
     }
 
     args.add(getFilePath(seqFile, true));
@@ -325,22 +326,12 @@ public class JackHMMER extends HmmerCommand
             && !"".equals(line))
     {
       SequenceI seq = seqs[index];
-      AlignmentAnnotation pp = null;
-      if (seq.getAlignmentAnnotations("", "Posterior Probability")
-              .size() != 0)
-      {
-        pp = seq.getAlignmentAnnotations("", "Posterior Probability")
-                .get(0);
-      }
+
       Scanner scanner = new Scanner(line);
-      String str = scanner.next();
-      str = scanner.next();
-      addScoreAnnotation(str, seq, "jackhmmer E-value",
-              "Full sequence E-value", pp);
-      str = scanner.next();
-      addScoreAnnotation(str, seq, "jackhmmer Score",
-              "Full sequence bit score", pp);
-      seq.removeAlignmentAnnotation(pp);
+      String evalue = scanner.next();
+      evalue = scanner.next();
+      String score = scanner.next();
+      addScoreAnnotations(evalue, score, seq);
       scanner.close();
       line = br.readLine();
       index++;
@@ -349,58 +340,37 @@ public class JackHMMER extends HmmerCommand
     br.close();
   }
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description)
+  protected void addScoreAnnotations(String eValue, String bitScore,
+          SequenceI seq)
   {
-    addScoreAnnotation(value, seq, label, description, null);
-  }
+    String label = "Search Scores";
+    String description = "Full sequence bit score and E-Value";
 
-  /**
-   * A helper method that adds one score-only (non-positional) annotation to a
-   * sequence
-   * 
-   * @param value
-   * @param seq
-   * @param label
-   * @param description
-   * @param pp
-   *                      existing posterior probability annotation - values
-   *                      copied to new annotation row
-   */
-  protected void addScoreAnnotation(String value, SequenceI seq,
-          String label, String description, AlignmentAnnotation pp)
-  {
     try
     {
-      AlignmentAnnotation annot = null;
-      if (pp == null)
-      {
-        annot = new AlignmentAnnotation(label, description, null);
-      }
-      else
-      {
-        annot = new AlignmentAnnotation(pp);
-        annot.label = label;
-        annot.description = description;
-      }
+      AlignmentAnnotation annot = new AlignmentAnnotation(label,
+              description, null);
+
+      annot.label = label;
+      annot.description = description;
+
       annot.setCalcId(JACKHMMER);
-      double eValue = Double.parseDouble(value);
-      annot.setScore(eValue);
+
+      double dEValue = Double.parseDouble(eValue);
+      annot.setEValue(dEValue);
+
+      double dBitScore = Double.parseDouble(bitScore);
+      annot.setEValue(dBitScore);
+
       annot.setSequenceRef(seq);
       seq.addAlignmentAnnotation(annot);
     } catch (NumberFormatException e)
     {
-      System.err.println("Error parsing " + label + " from " + value);
+      System.err.println("Error parsing " + label + " from " + eValue
+              + " & " + bitScore);
     }
   }
 
+
+
 }
index 5ada355..d3191f9 100644 (file)
@@ -63,7 +63,7 @@ public class SequenceAnnotationReport
    * Comparator to order DBRefEntry by Source + accession id (case-insensitive),
    * with 'Primary' sources placed before others, and 'chromosome' first of all
    */
-  private static Comparator<DBRefEntry> comparator = new Comparator<DBRefEntry>()
+  private static Comparator<DBRefEntry> comparator = new Comparator<>()
   {
 
     @Override
@@ -394,7 +394,23 @@ public class SequenceAnnotationReport
         maxWidth = Math.max(maxWidth, sz);
       }
     }
+
+    sb.append("<br>");
+    if (sequence.getAnnotation("Search Scores") != null)
+    {
+      String eValue = " E-Value: "
+              + sequence.getAnnotation("Search Scores")[0].getEValue();
+      String bitScore = " Bit Score: "
+              + sequence.getAnnotation("Search Scores")[0].getBitScore();
+      sb.append(eValue);
+      sb.append("<br>");
+      sb.append(bitScore);
+      maxWidth = Math.max(maxWidth, eValue.length());
+      maxWidth = Math.max(maxWidth, bitScore.length());
+    }
+    sb.append("<br>");
     sb.append("</i>");
+
     return maxWidth;
   }
 
index f650707..62165cd 100755 (executable)
@@ -532,6 +532,30 @@ public class GAlignFrame extends JInternalFrame
       }
     };
     addMenuActionAndAccelerator(keyStroke, removeRedundancyMenuItem, al);
+
+    JMenuItem filterByEValue = new JMenuItem(
+            MessageManager.getString("action.filter_by_evalue"));
+    filterByEValue.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        filterByEValue_actionPerformed();
+      }
+
+    });
+
+    JMenuItem filterByScore = new JMenuItem(
+            MessageManager.getString("action.filter_by_score"));
+    filterByScore.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        filterByScore_actionPerformed();
+      }
+
+    });
   
     JMenuItem pairwiseAlignmentMenuItem = new JMenuItem(
             MessageManager.getString("action.pairwise_alignment"));
@@ -1744,6 +1768,18 @@ public class GAlignFrame extends JInternalFrame
         selectHighlightedColumns_actionPerformed(actionEvent);
       }
     };
+    JMenuItem Filter = new JMenuItem(
+            MessageManager.getString("action.select_highlighted_columns"));
+    selectHighlighted.setToolTipText(
+            MessageManager.getString("tooltip.select_highlighted_columns"));
+    al = new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent actionEvent)
+      {
+        selectHighlightedColumns_actionPerformed(actionEvent);
+      }
+    };
     selectHighlighted.addActionListener(al);
     JMenu tooltipSettingsMenu = new JMenu(
             MessageManager.getString("label.sequence_id_tooltip"));
@@ -1805,6 +1841,9 @@ public class GAlignFrame extends JInternalFrame
     // editMenu.add(justifyRightMenuItem);
     // editMenu.addSeparator();
     editMenu.add(padGapsMenuitem);
+    editMenu.addSeparator();
+    editMenu.add(filterByEValue);
+    editMenu.add(filterByScore);
   
     showMenu.add(showAllColumns);
     showMenu.add(showAllSeqs);
@@ -2656,6 +2695,14 @@ public class GAlignFrame extends JInternalFrame
   {
   }
 
+  protected void filterByEValue_actionPerformed()
+  {
+  }
+
+  protected void filterByScore_actionPerformed()
+  {
+  }
+
   protected void scaleRight_actionPerformed(ActionEvent e)
   {
   }
index 23c1d00..f12e55e 100644 (file)
@@ -3182,4 +3182,46 @@ public abstract class AlignmentViewport
       codingComplement.setUpdateStructures(needToUpdateStructureViews);
     }
   }
+
+  /**
+   * Filters out sequences with an eValue higher than the specified value. The
+   * filtered sequences are hidden or deleted. Sequences with no eValues are also
+   * filtered out.
+   * 
+   * @param eValue
+   * @param delete
+   */
+  public void filterByEvalue(double eValue)
+  {
+    for (SequenceI seq : alignment.getSequencesArray())
+    {
+      if ((seq.getAnnotation("Search Scores") == null
+              || seq.getAnnotation("Search Scores")[0].getEValue() > eValue)
+              && seq.getHMM() == null)
+      {
+        hideSequence(new SequenceI[] { seq });
+      }
+    }
+  }
+
+  /**
+   * Filters out sequences with an score lower than the specified value. The
+   * filtered sequences are hidden or deleted.
+   * 
+   * @param score
+   * @param delete
+   */
+  public void filterByScore(double score)
+  {
+    for (SequenceI seq : alignment.getSequencesArray())
+    {
+      if ((seq.getAnnotation("Search Scores") == null
+              || seq.getAnnotation("Search Scores")[0]
+                      .getBitScore() < score)
+              && seq.getHMM() == null)
+      {
+        hideSequence(new SequenceI[] { seq });
+      }
+    }
+  }
 }