From 5f924dd90e57718421edc0442055d4f2ea5b68e6 Mon Sep 17 00:00:00 2001 From: Thomas <170013569@dundee.ac.uk> Date: Thu, 11 Jul 2019 14:47:42 +0100 Subject: [PATCH 1/1] JAL-2629 hmm search UI and structural improvements, inclusion thresholds --- resources/lang/Messages.properties | 25 +- src/jalview/hmmer/HMMERParamStore.java | 96 +++++-- src/jalview/hmmer/HMMSearch.java | 251 +---------------- src/jalview/hmmer/HmmerCommand.java | 18 +- src/jalview/hmmer/JackHMMER.java | 236 +--------------- src/jalview/hmmer/Search.java | 294 ++++++++++++++++++++ src/jalview/schemes/HMMMatchScoreColourScheme.java | 5 +- 7 files changed, 409 insertions(+), 516 deletions(-) create mode 100644 src/jalview/hmmer/Search.java diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 4ff30d4..a2d5ab8 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -1368,6 +1368,7 @@ label.trim_termini = Trim Non-Matching Termini label.trim_termini_desc = If true, non-matching regions on either end of the resulting alignment are removed. label.no_of_sequences = Number of sequences returned label.reporting_cutoff = Reporting Cut-off +label.inclusion_threshold = Inlcusion Threshold label.freq_alignment = Use alignment background frequencies label.freq_uniprot = Use Uniprot background frequencies label.hmmalign_options = hmmalign options @@ -1381,19 +1382,27 @@ label.auto_align_seqs = Automatically Align Fetched Sequences label.new_returned = new sequences returned label.use_accessions = Return Accessions label.check_for_new_sequences = Return Number of New Sequences -label.seq_evalue = Sequence E-value Cut-off label.evalue = E-Value -label.seq_score = Sequence Score Threshold -label.dom_evalue = Domain E-value Cut-off -label.dom_score = Domain Score Threshold +label.reporting_seq_evalue = Reporting Sequence E-value Cut-off +label.reporting_seq_score = Reporting Sequence Score Threshold +label.reporting_dom_evalue = Reporting Domain E-value Cut-off +label.reporting_dom_score = Reporting Domain Score Threshold +label.inclusion_seq_evalue = Inclusion Sequence E-value Cut-off +label.inclusion_seq_score = Inclusion Sequence Score Threshold +label.inclusion_dom_evalue = Inclusion Domain E-value Cut-off +label.inclusion_dom_score = Inclusion Domain Score Threshold label.number_of_results_desc = The maximum number of hmmsearch results to display label.auto_align_seqs_desc = If true, all fetched sequences will be aligned to the hidden Markov model with which the search was performed label.check_for_new_sequences_desc = Display number of new sequences returned from hmmsearch compared to the previous alignment label.use_accessions_desc = If true, the accession number of each sequence is returned, rather than that sequence's name -label.seq_e_value_desc = The E-value cutoff for returned sequences (hmmsearch -E) -label.seq_score_desc = The score threshold for returned sequences (hmmsearch -T) -label.dom_e_value_desc = The E-value cutoff for returned domains (hmmsearch --domE) -label.dom_score_desc = The score threshold for returned domains (hmmsearch --domT) +label.reporting_seq_e_value_desc = The E-value cutoff for returned sequences +label.reporting_seq_score_desc = The score threshold for returned sequences +label.reporting_dom_e_value_desc = The E-value cutoff for returned domains +label.reporting_dom_score_desc = The score threshold for returned domains +label.inclusion_seq_e_value_desc = Sequences with an E-value less than this cut-off are classed as significant +label.inclusion_seq_score_desc = Sequences with a bit score greater than this threshold are classed as significant +label.inclusion_dom_e_value_desc = Domains with an E-value less than this cut-off are classed as significant +label.inclusion_dom_score_desc = Domains with a bit score greater than this threshold are classed as significant label.add_database = Add Database label.this_alignment = This alignment warn.invalid_format = This is not a valid database file format. The current supported formats are Fasta, Stockholm and Pfam. diff --git a/src/jalview/hmmer/HMMERParamStore.java b/src/jalview/hmmer/HMMERParamStore.java index f7c33b1..1aff332 100644 --- a/src/jalview/hmmer/HMMERParamStore.java +++ b/src/jalview/hmmer/HMMERParamStore.java @@ -146,28 +146,61 @@ public final class HMMERParamStore implements ParamDatastoreI 100, 0, 100000)); args.add(new RadioChoiceParameter( MessageManager.getString(HMMSearch.REPORTING_CUTOFF_KEY), null, - Arrays.asList(MessageManager.getString(HMMSearch.CUTOFF_NONE), - MessageManager.getString(HMMSearch.CUTOFF_EVALUE), + Arrays.asList(MessageManager.getString(HMMSearch.CUTOFF_EVALUE), MessageManager.getString(HMMSearch.CUTOFF_SCORE)), MessageManager.getString(HMMSearch.CUTOFF_EVALUE))); args.add(new LogarithmicParameter( - MessageManager.getString(HMMSearch.SEQ_EVALUE_KEY), - MessageManager.getString("label.seq_e_value_desc"), false, 1D, + MessageManager.getString(HMMSearch.REPORTING_SEQ_EVALUE_KEY), + MessageManager.getString("label.reporting_seq_e_value_desc"), + false, 1D, 1E-100, 10D)); args.add(new LogarithmicParameter( - MessageManager.getString(HMMSearch.DOM_EVALUE_KEY), - MessageManager.getString("label.dom_e_value_desc"), false, 1D, + MessageManager.getString(HMMSearch.REPORTING_DOM_EVALUE_KEY), + MessageManager.getString("label.reporting_dom_e_value_desc"), + false, 1D, 1E-100, 10D)); args.add( new DoubleParameter( - MessageManager.getString(HMMSearch.SEQ_SCORE_KEY), - MessageManager.getString("label.seq_score_desc"), false, + MessageManager + .getString(HMMSearch.REPORTING_SEQ_SCORE_KEY), + MessageManager.getString( + "label.reporting_seq_score_desc"), + false, 0d, 0d, 1000d)); args.add( new DoubleParameter( - MessageManager.getString(HMMSearch.DOM_SCORE_KEY), - MessageManager.getString("label.dom_score_desc"), false, + MessageManager + .getString(HMMSearch.REPORTING_DOM_SCORE_KEY), + MessageManager.getString( + "label.reporting_dom_score_desc"), + false, 0d, 0d, 1000d)); + args.add(new RadioChoiceParameter( + MessageManager.getString(HMMSearch.INCLUSION_THRESHOLD_KEY), + null, + Arrays.asList(MessageManager.getString(HMMSearch.CUTOFF_EVALUE), + MessageManager.getString(HMMSearch.CUTOFF_SCORE)), + MessageManager.getString(HMMSearch.CUTOFF_EVALUE))); + args.add(new LogarithmicParameter( + MessageManager.getString(HMMSearch.INCLUSION_SEQ_EVALUE_KEY), + MessageManager.getString("label.inclusion_seq_e_value_desc"), + false, 1D, + 1E-100, 10D)); + args.add(new LogarithmicParameter( + MessageManager.getString(HMMSearch.INCLUSION_DOM_EVALUE_KEY), + MessageManager.getString("label.inclusion_dom_e_value_desc"), + false, 1D, + 1E-100, 10D)); + args.add(new DoubleParameter( + MessageManager.getString(HMMSearch.INCLUSION_SEQ_SCORE_KEY), + MessageManager.getString("label.inclusion_seq_score_desc"), + false, 0d, 0d, + 1000d)); + args.add(new DoubleParameter( + MessageManager.getString(HMMSearch.INCLUSION_DOM_SCORE_KEY), + MessageManager.getString("label.inclusion_dom_score_desc"), + false, 0d, 0d, + 1000d)); } /** @@ -195,21 +228,48 @@ public final class HMMERParamStore implements ParamDatastoreI MessageManager.getString(JackHMMER.CUTOFF_SCORE)), MessageManager.getString(JackHMMER.CUTOFF_EVALUE))); args.add(new LogarithmicParameter( - MessageManager.getString(JackHMMER.SEQ_EVALUE_KEY), - MessageManager.getString("label.seq_e_value_desc"), false, 1D, + MessageManager.getString(JackHMMER.REPORTING_SEQ_EVALUE_KEY), + MessageManager.getString("label.reporting_seq_e_value_desc"), + false, 1D, 1E-38, 10D)); args.add(new LogarithmicParameter( - MessageManager.getString(JackHMMER.DOM_EVALUE_KEY), - MessageManager.getString("label.dom_e_value_desc"), false, 1D, + MessageManager.getString(JackHMMER.REPORTING_DOM_EVALUE_KEY), + MessageManager.getString( + "label.reporting_seq_dom_e_value_desc"), + false, 1D, 1E-38, 10D)); args.add(new DoubleParameter( - MessageManager.getString(JackHMMER.SEQ_SCORE_KEY), - MessageManager.getString("label.seq_score_desc"), false, 0d, 0d, + MessageManager.getString(JackHMMER.REPORTING_SEQ_SCORE_KEY), + MessageManager.getString("label.reporting_seq_seq_score_desc"), + false, 0d, 0d, 1000d)); args.add(new DoubleParameter( - MessageManager.getString(JackHMMER.DOM_SCORE_KEY), - MessageManager.getString("label.dom_score_desc"), false, 0d, 0d, + MessageManager.getString(JackHMMER.REPORTING_DOM_SCORE_KEY), + MessageManager.getString("label.reporting_seq_dom_score_desc"), + false, 0d, 0d, 1000d)); + args.add(new RadioChoiceParameter( + MessageManager.getString(HMMSearch.INCLUSION_THRESHOLD_KEY), + null, + Arrays.asList(MessageManager.getString(HMMSearch.CUTOFF_EVALUE), + MessageManager.getString(HMMSearch.CUTOFF_SCORE)), + MessageManager.getString(HmmerCommand.CUTOFF_EVALUE))); + args.add(new LogarithmicParameter( + MessageManager.getString(HMMSearch.INCLUSION_SEQ_EVALUE_KEY), + MessageManager.getString("label.inclusion_seq_e_value_desc"), + false, 1D, 1E-100, 10D)); + args.add(new LogarithmicParameter( + MessageManager.getString(HMMSearch.INCLUSION_DOM_EVALUE_KEY), + MessageManager.getString("label.inclusion_dom_e_value_desc"), + false, 1D, 1E-100, 10D)); + args.add(new DoubleParameter( + MessageManager.getString(HMMSearch.INCLUSION_SEQ_SCORE_KEY), + MessageManager.getString("label.inclusion_seq_score_desc"), + false, 0d, 0d, 1000d)); + args.add(new DoubleParameter( + MessageManager.getString(HMMSearch.INCLUSION_DOM_SCORE_KEY), + MessageManager.getString("label.inclusion_dom_score_desc"), + false, 0d, 0d, 1000d)); } /** diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java index a06f186..f670633 100644 --- a/src/jalview/hmmer/HMMSearch.java +++ b/src/jalview/hmmer/HMMSearch.java @@ -25,15 +25,12 @@ import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Hashtable; import java.util.List; -import java.util.Scanner; import javax.swing.JOptionPane; -public class HMMSearch extends HmmerCommand +public class HMMSearch extends Search { - static final String HMMSEARCH = "hmmsearch"; boolean realign = false; @@ -43,13 +40,6 @@ public class HMMSearch extends HmmerCommand int seqsToReturn = Integer.MAX_VALUE; - boolean searchAlignment = true; - - SequenceI[] seqs; - - private String databaseName; - - Hashtable sequencesHash; /** * Constructor for the HMMSearchThread @@ -142,149 +132,6 @@ public class HMMSearch extends HmmerCommand return runCommand(args); } - /** - * Appends command line arguments to the given list, to specify input and - * output files for the search, and any additional options that may have been - * passed from the parameters dialog - * - * @param args - * @param searchOutputFile - * @param hitsAlignmentFile - * @param hmmFile - * @throws IOException - */ - protected void buildArguments(List args, File searchOutputFile, - File hitsAlignmentFile, File hmmFile) throws IOException - { - args.add("-o"); - args.add(getFilePath(searchOutputFile, true)); - args.add("-A"); - args.add(getFilePath(hitsAlignmentFile, true)); - - boolean dbFound = false; - String dbPath = ""; - File databaseFile = null; - - boolean useEvalueCutoff = false; - boolean useScoreCutoff = false; - String seqEvalueCutoff = null; - String domEvalueCutoff = null; - String seqScoreCutoff = null; - String domScoreCutoff = null; - databaseName = "Alignment"; - - if (params != null) - { - for (ArgumentI arg : params) - { - String name = arg.getName(); - if (MessageManager.getString(NUMBER_OF_RESULTS_KEY) - .equals(name)) - { - seqsToReturn = Integer.parseInt(arg.getValue()); - } - else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY) - .equals(name)) - { - realign = true; - } - else if (MessageManager.getString(USE_ACCESSIONS_KEY) - .equals(name)) - { - args.add("--acc"); - } - else if (MessageManager.getString(REPORTING_CUTOFF_KEY) - .equals(name)) - { - if (MessageManager.getString(CUTOFF_EVALUE) - .equals(arg.getValue())) - { - useEvalueCutoff = true; - } - else if (MessageManager.getString(CUTOFF_SCORE) - .equals(arg.getValue())) - { - useScoreCutoff = true; - } - } - else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) - { - seqEvalueCutoff = arg.getValue(); - } - else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) - { - seqScoreCutoff = arg.getValue(); - } - else if (MessageManager.getString(DOM_EVALUE_KEY) - .equals(name)) - { - domEvalueCutoff = arg.getValue(); - } - else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) - { - domScoreCutoff = arg.getValue(); - } - else if (MessageManager.getString(TRIM_TERMINI_KEY) - .equals(name)) - { - trim = true; - } - else if (MessageManager.getString(DATABASE_KEY).equals(name)) - { - databaseFile = new File(arg.getValue()); - if (!arg.getValue().isEmpty()) - { - searchAlignment = false; - } - } - else if (MessageManager.getString(RETURN_N_NEW_SEQ).equals(name)) - { - returnNoOfNewSeqs = true; - } - } - } - - if (useEvalueCutoff) - { - args.add("-E"); - args.add(seqEvalueCutoff); - args.add("--domE"); - args.add(domEvalueCutoff); - } - else if (useScoreCutoff) - { - args.add("-T"); - args.add(seqScoreCutoff); - args.add("--domT"); - args.add(domScoreCutoff); - } - -// if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) -// .equals(dbPath)) - if (searchAlignment) - { - /* - * no external database specified for search, so - * export current alignment as 'database' to search, - * excluding any HMM consensus sequences it contains - */ - databaseFile = FileUtils.createTempFile("database", ".sto"); - AlignmentI al = af.getViewport().getAlignment(); - AlignmentI copy = new Alignment(al); - deleteHmmSequences(copy); - - if (searchAlignment) - { - sequencesHash = stashSequences(copy.getSequencesArray()); - } - - exportStockholm(copy.getSequencesArray(), databaseFile, null); - - } - - args.add(getFilePath(hmmFile, true)); - args.add(getFilePath(databaseFile, true)); - } /** * Imports the data from the temporary file to which the output of hmmsearch @@ -466,100 +313,4 @@ public class HMMSearch extends HmmerCommand } } - /** - * Reads in the scores table output by hmmsearch and adds annotation to - * sequences for E-value and bit score - * - * @param inputTableTemp - * @throws IOException - */ - void readTable(File inputTableTemp) throws IOException - { - BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); - String line = ""; - while (!line.startsWith("Query:")) - { - line = br.readLine(); - } - while (!line.contains("-------")) - { - line = br.readLine(); - } - line = br.readLine(); - - int index = 0; - while (!" ------ inclusion threshold ------".equals(line) - && !"".equals(line)) - { - Scanner scanner = new Scanner(line); - String evalue = scanner.next(); - String score = scanner.next(); - checkSequenceOrder(index, scanner); - SequenceI seq = seqs[index]; - addScoreAnnotations(evalue, score, seq); - scanner.close(); - line = br.readLine(); - index++; - } - - br.close(); - } - - - protected void addScoreAnnotations(String eValue, String bitScore, - SequenceI seq) - { - String label = "Search Scores"; - String description = "Full sequence bit score and E-Value"; - - try - { - AlignmentAnnotation annot = new AlignmentAnnotation(label, - description, null); - - annot.label = label; - annot.description = description; - - annot.setCalcId(HMMSEARCH); - - double dEValue = Double.parseDouble(eValue); - annot.setEValue(dEValue); - - double dBitScore = Double.parseDouble(bitScore); - annot.setBitScore(dBitScore); - - annot.setSequenceRef(seq); - seq.addAlignmentAnnotation(annot); - } catch (NumberFormatException e) - { - System.err.println("Error parsing " + label + " from " + eValue - + " & " + bitScore); - } - } - - private void checkSequenceOrder(int index, Scanner scanner) - { - String seqName = null; - - for (int i = 0; i < 8; i++) - { - seqName = scanner.next(); - } - - if (!seqs[index].getName().equals(seqName)) - { - SequenceI temp = seqs[index]; - - for (int j = 0; j < seqs.length; j++) - { - if (seqs[j].getName().equals(seqName)) - { - seqs[index] = seqs[j]; - seqs[j] = temp; - break; - } - } - } - } - } diff --git a/src/jalview/hmmer/HmmerCommand.java b/src/jalview/hmmer/HmmerCommand.java index a30c524..b5698eb 100644 --- a/src/jalview/hmmer/HmmerCommand.java +++ b/src/jalview/hmmer/HmmerCommand.java @@ -72,16 +72,26 @@ public abstract class HmmerCommand implements Runnable static final String CUTOFF_EVALUE = "label.evalue"; - static final String SEQ_EVALUE_KEY = "label.seq_evalue"; + static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue"; - static final String DOM_EVALUE_KEY = "label.dom_evalue"; + static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue"; - static final String SEQ_SCORE_KEY = "label.seq_score"; + static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score"; - static final String DOM_SCORE_KEY = "label.dom_score"; + static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score"; + + static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue"; + + static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue"; + + static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score"; + + static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score"; static final String ARG_TRIM = "--trim"; + static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold"; + /** * Constructor * diff --git a/src/jalview/hmmer/JackHMMER.java b/src/jalview/hmmer/JackHMMER.java index e6bb5f4..d822439 100644 --- a/src/jalview/hmmer/JackHMMER.java +++ b/src/jalview/hmmer/JackHMMER.java @@ -2,7 +2,6 @@ package jalview.hmmer; import jalview.bin.Cache; import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; @@ -20,29 +19,12 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; -import java.util.Hashtable; import java.util.List; -import java.util.Scanner; import javax.swing.JOptionPane; -public class JackHMMER extends HmmerCommand +public class JackHMMER extends Search { - static final String JACKHMMER = "jackhmmer"; - - boolean realign = false; - - boolean trim = false; - - int seqsToReturn = Integer.MAX_VALUE; - - SequenceI[] seqs; - - private String databaseName; - - boolean searchAlignment = true; - - Hashtable sequencesHash; /** * Constructor for the JackhmmerThread @@ -136,123 +118,6 @@ public class JackHMMER extends HmmerCommand } /** - * Appends command line arguments to the given list, to specify input and output - * files for the search, and any additional options that may have been passed - * from the parameters dialog - * - * @param args - * @param searchOutputFile - * @param hitsAlignmentFile - * @param seqFile - * @throws IOException - */ - protected void buildArguments(List args, File searchOutputFile, - File hitsAlignmentFile, File seqFile) throws IOException - { - args.add("-o"); - args.add(getFilePath(searchOutputFile, true)); - args.add("-A"); - args.add(getFilePath(hitsAlignmentFile, true)); - - File databaseFile = null; - - boolean useEvalueCutoff = false; - boolean useScoreCutoff = false; - String seqEvalueCutoff = null; - String domEvalueCutoff = null; - String seqScoreCutoff = null; - String domScoreCutoff = null; - databaseName = "Alignment"; - - if (params != null) - { - for (ArgumentI arg : params) - { - String name = arg.getName(); - - if (MessageManager.getString(REPORTING_CUTOFF_KEY) - .equals(name)) - { - if (MessageManager.getString(CUTOFF_EVALUE) - .equals(arg.getValue())) - { - useEvalueCutoff = true; - } - else if (MessageManager.getString(CUTOFF_SCORE) - .equals(arg.getValue())) - { - useScoreCutoff = true; - } - } - else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) - { - seqEvalueCutoff = arg.getValue(); - } - else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) - { - seqScoreCutoff = arg.getValue(); - } - else if (MessageManager.getString(DOM_EVALUE_KEY).equals(name)) - { - domEvalueCutoff = arg.getValue(); - } - else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) - { - domScoreCutoff = arg.getValue(); - } - else if (MessageManager.getString(DATABASE_KEY).equals(name)) - { - databaseFile = new File(arg.getValue()); - if (!arg.getValue().isEmpty()) - { - searchAlignment = false; - } - } - } - } - - if (useEvalueCutoff) - { - args.add("-E"); - args.add(seqEvalueCutoff); - args.add("--domE"); - args.add(domEvalueCutoff); - } - else if (useScoreCutoff) - { - args.add("-T"); - args.add(seqScoreCutoff); - args.add("--domT"); - args.add(domScoreCutoff); - } - - // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) - // .equals(dbPath)) - if (searchAlignment) - { - /* - * no external database specified for search, so - * export current alignment as 'database' to search - */ - databaseFile = FileUtils.createTempFile("database", ".sto"); - AlignmentI al = af.getViewport().getAlignment(); - AlignmentI copy = new Alignment(al); - - deleteHmmSequences(copy); - - if (searchAlignment) - { - sequencesHash = stashSequences(copy.getSequencesArray()); - } - - exportStockholm(copy.getSequencesArray(), databaseFile, null); - } - - args.add(getFilePath(seqFile, true)); - args.add(getFilePath(databaseFile, true)); - } - - /** * Imports the data from the temporary file to which the output of jackhmmer was * directed. */ @@ -280,7 +145,7 @@ public class JackHMMER extends HmmerCommand readTable(searchOutputFile); - int seqCount = Math.min(seqs.length, seqsToReturn); + int seqCount = seqs.length; AlignmentI al = new Alignment(seqs); @@ -303,103 +168,6 @@ public class JackHMMER extends HmmerCommand } } - /** - * Reads in the scores table output by jackhmmer and adds annotation to - * sequences for E-value and bit score - * - * @param inputTableTemp - * @throws IOException - */ - void readTable(File inputTableTemp) throws IOException - { - BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); - String line = ""; - while (!line.startsWith("Query:")) - { - line = br.readLine(); - } - while (!line.contains("-------")) - { - line = br.readLine(); - } - line = br.readLine(); - - int index = 0; - while (!" ------ inclusion threshold ------".equals(line) - && !"".equals(line)) - { - - Scanner scanner = new Scanner(line); - String evalue = scanner.next(); - evalue = scanner.next(); - checkSequenceOrder(index, scanner); - SequenceI seq = seqs[index]; - String score = scanner.next(); - addScoreAnnotations(evalue, score, seq); - scanner.close(); - line = br.readLine(); - index++; - } - - br.close(); - } - - private void checkSequenceOrder(int index, Scanner scanner) - { - String seqName = null; - - for (int i = 0; i < 8; i++) - { - seqName = scanner.next(); - } - - if (!seqs[index + 1].getName().equals(seqName)) - { - SequenceI temp = seqs[index + 1]; - - for (int j = 0; j < seqs.length; j++) - { - if (seqs[j].getName().equals(seqName)) - { - seqs[index + 1] = seqs[j]; - seqs[j] = temp; - break; - } - } - } - } - - protected void addScoreAnnotations(String eValue, String bitScore, - SequenceI seq) - { - String label = "Search Scores"; - String description = "Full sequence bit score and E-Value"; - - try - { - AlignmentAnnotation annot = new AlignmentAnnotation(label, - description, null); - - annot.label = label; - annot.description = description; - - annot.setCalcId(JACKHMMER); - - double dEValue = Double.parseDouble(eValue); - annot.setEValue(dEValue); - - double dBitScore = Double.parseDouble(bitScore); - annot.setBitScore(dBitScore); - - annot.setSequenceRef(seq); - seq.addAlignmentAnnotation(annot); - - } catch (NumberFormatException e) - { - System.err.println("Error parsing " + label + " from " + eValue - + " & " + bitScore); - } - } diff --git a/src/jalview/hmmer/Search.java b/src/jalview/hmmer/Search.java new file mode 100644 index 0000000..24bd6b7 --- /dev/null +++ b/src/jalview/hmmer/Search.java @@ -0,0 +1,294 @@ +package jalview.hmmer; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.util.FileUtils; +import jalview.util.MessageManager; +import jalview.ws.params.ArgumentI; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Hashtable; +import java.util.List; +import java.util.Scanner; + +public abstract class Search extends HmmerCommand +{ + + static final String JACKHMMER = "jackhmmer"; + + static final String HMMSEARCH = "hmmsearch"; + + boolean realign = false; + + boolean trim = false; + + SequenceI[] seqs; + + String databaseName; + + boolean searchAlignment = true; + + Hashtable sequencesHash; + + public Search(AlignFrame alignFrame, List args) + { + super(alignFrame, args); + } + + @Override + public void run() + { + } + + /** + * Reads in the scores table output by jackhmmer and adds annotation to + * sequences for E-value and bit score + * + * @param inputTableTemp + * @throws IOException + */ + void readTable(File inputTableTemp) throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); + String line = ""; + while (!line.startsWith("Query:")) + { + line = br.readLine(); + } + while (!line.contains("-------")) + { + line = br.readLine(); + } + line = br.readLine(); + + int index = 0; + + while (!" ------ inclusion threshold ------".equals(line) + && !"".equals(line)) + { + + Scanner scanner = new Scanner(line); + String evalue = scanner.next(); + + if (evalue.equals("+") || evalue.equals("-")) + { + evalue = scanner.next(); + } + + String score = scanner.next(); + checkSequenceOrder(index, scanner); + SequenceI seq = seqs[index]; + addScoreAnnotations(evalue, score, seq); + scanner.close(); + line = br.readLine(); + index++; + } + + br.close(); + } + + void checkSequenceOrder(int index, Scanner scanner) + { + String seqName = null; + + for (int i = 0; i < 7; i++) + { + seqName = scanner.next(); + } + + if (!seqs[index].getName().equals(seqName)) + { + SequenceI temp = seqs[index]; + + for (int j = 0; j < seqs.length; j++) + { + if (seqs[j].getName().equals(seqName)) + { + seqs[index] = seqs[j]; + seqs[j] = temp; + break; + } + } + } + } + + void addScoreAnnotations(String eValue, String bitScore, SequenceI seq) + { + String label = "Search Scores"; + String description = "Full sequence bit score and E-Value"; + + try + { + AlignmentAnnotation annot = new AlignmentAnnotation(label, + description, null); + + annot.label = label; + annot.description = description; + + annot.setCalcId(JACKHMMER); + + double dEValue = Double.parseDouble(eValue); + annot.setEValue(dEValue); + + double dBitScore = Double.parseDouble(bitScore); + annot.setBitScore(dBitScore); + + annot.setSequenceRef(seq); + seq.addAlignmentAnnotation(annot); + + } catch (NumberFormatException e) + { + System.err.println("Error parsing " + label + " from " + eValue + + " & " + bitScore); + } + } + + void buildArguments(List args, File searchOutputFile, + File hitsAlignmentFile, File queryFile) throws IOException + { + args.add("-o"); + args.add(getFilePath(searchOutputFile, true)); + args.add("-A"); + args.add(getFilePath(hitsAlignmentFile, true)); + + File databaseFile = null; + + boolean useEvalueCutoff = false; + boolean useScoreCutoff = false; + String seqReportingEvalueCutoff = null; + String domReportingEvalueCutoff = null; + String seqReportingScoreCutoff = null; + String domReportingScoreCutoff = null; + String seqInclusionEvalueCutoff = null; + String domInclusionEvalueCutoff = null; + String seqInclusionScoreCutoff = null; + String domInclusionScoreCutoff = null; + databaseName = "Alignment"; + + if (params != null) + { + for (ArgumentI arg : params) + { + String name = arg.getName(); + + if (MessageManager.getString(REPORTING_CUTOFF_KEY).equals(name)) + { + if (MessageManager.getString(CUTOFF_EVALUE) + .equals(arg.getValue())) + { + useEvalueCutoff = true; + } + else if (MessageManager.getString(CUTOFF_SCORE) + .equals(arg.getValue())) + { + useScoreCutoff = true; + } + } + else if (MessageManager.getString(REPORTING_SEQ_EVALUE_KEY) + .equals(name)) + { + seqReportingEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(REPORTING_SEQ_SCORE_KEY) + .equals(name)) + { + seqReportingScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(REPORTING_DOM_EVALUE_KEY) + .equals(name)) + { + domReportingEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(REPORTING_DOM_SCORE_KEY) + .equals(name)) + { + domReportingScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(INCLUSION_SEQ_EVALUE_KEY) + .equals(name)) + { + seqInclusionEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(INCLUSION_SEQ_SCORE_KEY) + .equals(name)) + { + seqInclusionScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(INCLUSION_DOM_EVALUE_KEY) + .equals(name)) + { + domInclusionEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(INCLUSION_DOM_SCORE_KEY) + .equals(name)) + { + domInclusionScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(DATABASE_KEY).equals(name)) + { + databaseFile = new File(arg.getValue()); + if (!arg.getValue().isEmpty()) + { + searchAlignment = false; + } + } + } + } + + if (useEvalueCutoff) + { + args.add("-E"); + args.add(seqReportingEvalueCutoff); + args.add("--domE"); + args.add(domReportingEvalueCutoff); + + args.add("--incE"); + args.add(seqInclusionEvalueCutoff); + args.add("--incdomE"); + args.add(domInclusionEvalueCutoff); + } + else if (useScoreCutoff) + { + args.add("-T"); + args.add(seqReportingScoreCutoff); + args.add("--domT"); + args.add(domReportingScoreCutoff); + + args.add("--incT"); + args.add(seqInclusionEvalueCutoff); + args.add("--incdomT"); + args.add(domInclusionEvalueCutoff); + } + + // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) + // .equals(dbPath)) + if (searchAlignment) + { + /* + * no external database specified for search, so + * export current alignment as 'database' to search + */ + databaseFile = FileUtils.createTempFile("database", ".sto"); + AlignmentI al = af.getViewport().getAlignment(); + AlignmentI copy = new Alignment(al); + + deleteHmmSequences(copy); + + if (searchAlignment) + { + sequencesHash = stashSequences(copy.getSequencesArray()); + } + + exportStockholm(copy.getSequencesArray(), databaseFile, null); + } + + args.add(getFilePath(queryFile, true)); + args.add(getFilePath(databaseFile, true)); + } +} diff --git a/src/jalview/schemes/HMMMatchScoreColourScheme.java b/src/jalview/schemes/HMMMatchScoreColourScheme.java index 0ee83a5..96eb26a 100644 --- a/src/jalview/schemes/HMMMatchScoreColourScheme.java +++ b/src/jalview/schemes/HMMMatchScoreColourScheme.java @@ -212,10 +212,11 @@ public class HMMMatchScoreColourScheme extends ResidueColourScheme .log(getHmm().getMatchEmissionProbability(column, symbol) / hmm.getBackgroundFrequencies().get(symbol)); - if (!probabilities.get(symbol).get(lengthBin) + if (!probabilities.containsKey(symbol) + || !probabilities.get(symbol).get(lengthBin) .containsKey(format(llr))) { - return Color.white; + return new Color(140, 140, 140); } -- 1.7.10.2