package jalview.hmmer; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.Desktop; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.StockholmFile; import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.ws.params.ArgumentI; import jalview.ws.params.simple.BooleanOption; import jalview.ws.params.simple.Option; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Scanner; import javax.swing.JOptionPane; public class HMMSearch extends HmmerCommand { static final String HMMSEARCH = "hmmsearch"; /* * constants for i18n lookup of passed parameter names */ static final String DATABASE_KEY = "label.database"; static final String THIS_ALIGNMENT_KEY = "label.this_alignment"; static final String USE_ACCESSIONS_KEY = "label.use_accessions"; static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs"; static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results"; static final String TRIM_TERMINI_KEY = "label.trim_termini"; static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff"; static final String CUTOFF_NONE = "None"; static final String CUTOFF_SCORE = "Score"; static final String CUTOFF_EVALUE = "E-Value"; static final String SEQ_EVALUE_KEY = "label.seq_evalue"; static final String DOM_EVALUE_KEY = "label.dom_evalue"; static final String SEQ_SCORE_KEY = "label.seq_score"; static final String DOM_SCORE_KEY = "label.dom_score"; boolean realign = false; boolean trim = false; int seqsToReturn = Integer.MAX_VALUE; SequenceI[] seqs; private String databaseName; /** * Constructor for the HMMSearchThread * * @param af */ public HMMSearch(AlignFrame af, List args) { super(af, args); } /** * Runs the HMMSearchThread: the data on the alignment or group is exported, * then the command is executed in the command line and then the data is * imported and displayed in a new frame. Call this method directly to execute * synchronously, or via start() in a new Thread for asynchronously. */ @Override public void run() { HiddenMarkovModel hmm = getHmmProfile(); if (hmm == null) { // shouldn't happen if we got this far Cache.log.error("Error: no hmm for hmmsearch"); return; } SequenceI hmmSeq = hmm.getConsensusSequence(); long msgId = System.currentTimeMillis(); af.setProgressBar(MessageManager.getString("status.running_hmmsearch"), msgId); try { File hmmFile = FileUtils.createTempFile("hmm", ".hmm"); File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment", ".sto"); File searchOutputFile = FileUtils.createTempFile("searchOutput", ".sto"); exportHmm(hmm, hmmFile.getAbsoluteFile()); boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile); if (!ran) { JvOptionPane.showInternalMessageDialog(af, MessageManager .formatMessage("warn.command_failed", "hmmsearch")); return; } importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile); // TODO make realignment of search results a step at this level // and make it conditional on this.realign } catch (IOException | InterruptedException e) { e.printStackTrace(); } finally { af.setProgressBar("", msgId); } } /** * Executes an hmmsearch with the given hmm as input. The database to be * searched is a local file as specified by the 'Database' parameter, or the * current alignment (written to file) if none is specified. * * @param searchOutputFile * @param hitsAlignmentFile * @param hmmFile * * @return * @throws IOException */ private boolean runCommand(File searchOutputFile, File hitsAlignmentFile, File hmmFile) throws IOException { String command = getCommandPath(HMMSEARCH); if (command == null) { return false; } List args = new ArrayList<>(); args.add(command); buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile); return runCommand(args); } /** * Appends command line arguments to the given list, to specify input and * output files for the search, and any additional options that may have been * passed from the parameters dialog * * @param args * @param searchOutputFile * @param hitsAlignmentFile * @param hmmFile * @throws IOException */ protected void buildArguments(List args, File searchOutputFile, File hitsAlignmentFile, File hmmFile) throws IOException { args.add("-o"); args.add(getFilePath(searchOutputFile, true)); args.add("-A"); args.add(getFilePath(hitsAlignmentFile, true)); boolean dbFound = false; String dbPath = ""; File databaseFile = null; boolean useEvalueCutoff = false; boolean useScoreCutoff = false; String seqEvalueCutoff = null; String domEvalueCutoff = null; String seqScoreCutoff = null; String domScoreCutoff = null; databaseName = "Alignment"; boolean searchAlignment = false; if (params != null) { for (ArgumentI arg : params) { String name = arg.getName(); if (MessageManager.getString(NUMBER_OF_RESULTS_KEY) .equals(name)) { seqsToReturn = Integer.parseInt(arg.getValue()); } else if (MessageManager.getString("action.search").equals(name)) { searchAlignment = arg.getValue().equals( MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY)); } else if (MessageManager.getString(DATABASE_KEY).equals(name)) { dbPath = arg.getValue(); int pos = dbPath.lastIndexOf(File.separator); databaseName = dbPath.substring(pos + 1); databaseFile = new File(dbPath); } else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY) .equals(name)) { realign = true; } else if (MessageManager.getString(USE_ACCESSIONS_KEY) .equals(name)) { args.add("--acc"); } else if (MessageManager.getString(REPORTING_CUTOFF_KEY) .equals(name)) { if (CUTOFF_EVALUE.equals(arg.getValue())) { useEvalueCutoff = true; } else if (CUTOFF_SCORE.equals(arg.getValue())) { useScoreCutoff = true; } } else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) { seqEvalueCutoff = arg.getValue(); } else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) { seqScoreCutoff = arg.getValue(); } else if (MessageManager.getString(DOM_EVALUE_KEY) .equals(name)) { domEvalueCutoff = arg.getValue(); } else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) { domScoreCutoff = arg.getValue(); } else if (MessageManager.getString(TRIM_TERMINI_KEY) .equals(name)) { trim = true; } else if (MessageManager.getString(DATABASE_KEY).equals(name)) { dbFound = true; dbPath = arg.getValue(); if (!MessageManager.getString(THIS_ALIGNMENT_KEY) .equals(dbPath)) { int pos = dbPath.lastIndexOf(File.separator); databaseName = dbPath.substring(pos + 1); databaseFile = new File(dbPath); } } } } if (useEvalueCutoff) { args.add("-E"); args.add(seqEvalueCutoff); args.add("--domE"); args.add(domEvalueCutoff); } else if (useScoreCutoff) { args.add("-T"); args.add(seqScoreCutoff); args.add("--domT"); args.add(domScoreCutoff); } // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) // .equals(dbPath)) if (searchAlignment) { /* * no external database specified for search, so * export current alignment as 'database' to search, * excluding any HMM consensus sequences it contains */ databaseFile = FileUtils.createTempFile("database", ".sto"); AlignmentI al = af.getViewport().getAlignment(); AlignmentI copy = new Alignment(al); List hmms = copy.getHmmSequences(); for (SequenceI hmmSeq : hmms) { copy.deleteSequence(hmmSeq); } exportStockholm(copy.getSequencesArray(), databaseFile, null); } args.add(getFilePath(hmmFile, true)); args.add(getFilePath(databaseFile, true)); } /** * Imports the data from the temporary file to which the output of hmmsearch * was directed. The results are optionally realigned using hmmalign. * * @param hmmSeq */ private void importData(SequenceI hmmSeq, File inputAlignmentTemp, File hmmTemp, File searchOutputFile) throws IOException, InterruptedException { BufferedReader br = new BufferedReader( new FileReader(inputAlignmentTemp)); try { if (br.readLine() == null) { JOptionPane.showMessageDialog(af, MessageManager.getString("label.no_sequences_found")); return; } StockholmFile file = new StockholmFile(new FileParse( inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE)); seqs = file.getSeqsAsArray(); // look for PP cons and ref seq in alignment only annotation AlignmentAnnotation modelpos = null, ppcons = null; for (AlignmentAnnotation aa : file.getAnnotations()) { if (aa.sequenceRef == null) { if (aa.label.equals("Reference Positions")) // RF feature type in // stockholm parser { modelpos = aa; } if (aa.label.equals("Posterior Probability")) { ppcons = aa; } } } readTable(searchOutputFile); int seqCount = Math.min(seqs.length, seqsToReturn); SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1]; hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen hmmAndSeqs[0] = hmmSeq; System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount); if (modelpos != null) { // TODO need - get ungapped sequence method hmmSeq.setSequence( hmmSeq.getDatasetSequence().getSequenceAsString()); Annotation[] refpos = modelpos.annotations; // insert gaps to match with refseq positions int gc = 0, lcol = 0; for (int c = 0; c < refpos.length; c++) { if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter))) { if (gc > 0) { hmmSeq.insertCharAt(lcol + 1, gc, '-'); } gc = 0; lcol = c; } else { gc++; } } } if (realign) { realignResults(hmmAndSeqs); } else { AlignmentI al = new Alignment(hmmAndSeqs); if (ppcons != null) { al.addAnnotation(ppcons); } if (modelpos != null) { al.addAnnotation(modelpos); } AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); String ttl = "hmmSearch of " + databaseName + " using " + hmmSeq.getName(); Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); } hmmTemp.delete(); inputAlignmentTemp.delete(); searchOutputFile.delete(); } finally { if (br != null) { br.close(); } } } /** * Realigns the given sequences using hmmalign, to the HMM profile sequence * which is the first in the array, and opens the results in a new frame * * @param hmmAndSeqs */ protected void realignResults(SequenceI[] hmmAndSeqs) { /* * and align the search results to the HMM profile */ AlignmentI al = new Alignment(hmmAndSeqs); AlignFrame frame = new AlignFrame(al, 1, 1); List alignArgs = new ArrayList<>(); String alignTo = hmmAndSeqs[0].getName(); List options = Collections.singletonList(alignTo); Option option = new Option(MessageManager.getString("label.use_hmm"), "", true, alignTo, alignTo, options, null); alignArgs.add(option); if (trim) { alignArgs.add(new BooleanOption( MessageManager.getString(TRIM_TERMINI_KEY), MessageManager.getString("label.trim_termini_desc"), true, true, true, null)); } HmmerCommand hmmalign = new HMMAlign(frame, alignArgs); hmmalign.run(); } /** * Reads in the scores table output by hmmsearch and adds annotation to * sequences for E-value and bit score * * @param inputTableTemp * @throws IOException */ void readTable(File inputTableTemp) throws IOException { BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); String line = ""; while (!line.startsWith("Query:")) { line = br.readLine(); } while (!line.contains("-------")) { line = br.readLine(); } line = br.readLine(); int index = 0; while (!" ------ inclusion threshold ------".equals(line) && !"".equals(line)) { SequenceI seq = seqs[index]; AlignmentAnnotation pp = seq .getAlignmentAnnotations("", "Posterior Probability") .get(0); Scanner scanner = new Scanner(line); String str = scanner.next(); addScoreAnnotation(str, seq, "hmmsearch E-value", "Full sequence E-value", pp); str = scanner.next(); addScoreAnnotation(str, seq, "hmmsearch Score", "Full sequence bit score", pp); seq.removeAlignmentAnnotation(pp); scanner.close(); line = br.readLine(); index++; } br.close(); } /** * A helper method that adds one score-only (non-positional) annotation to a * sequence * * @param value * @param seq * @param label * @param description */ protected void addScoreAnnotation(String value, SequenceI seq, String label, String description) { addScoreAnnotation(value, seq, label, description, null); } /** * A helper method that adds one score-only (non-positional) annotation to a * sequence * * @param value * @param seq * @param label * @param description * @param pp * existing posterior probability annotation - values copied to new * annotation row */ protected void addScoreAnnotation(String value, SequenceI seq, String label, String description, AlignmentAnnotation pp) { try { AlignmentAnnotation annot = null; if (pp == null) { new AlignmentAnnotation(label, description, null); } else { annot = new AlignmentAnnotation(pp); annot.label = label; annot.description = description; } annot.setCalcId(HMMSEARCH); double eValue = Double.parseDouble(value); annot.setScore(eValue); annot.setSequenceRef(seq); seq.addAlignmentAnnotation(annot); } catch (NumberFormatException e) { System.err.println("Error parsing " + label + " from " + value); } } }