package jalview.hmmer; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.Desktop; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.StockholmFile; import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.ws.params.ArgumentI; import jalview.ws.params.simple.BooleanOption; import jalview.ws.params.simple.Option; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Hashtable; import java.util.List; import java.util.Scanner; import javax.swing.JOptionPane; public class HMMSearch extends HmmerCommand { static final String HMMSEARCH = "hmmsearch"; boolean realign = false; boolean trim = false; boolean returnNoOfNewSeqs = false; int seqsToReturn = Integer.MAX_VALUE; boolean searchAlignment = true; SequenceI[] seqs; private String databaseName; Hashtable sequencesHash; /** * Constructor for the HMMSearchThread * * @param af */ public HMMSearch(AlignFrame af, List args) { super(af, args); } /** * Runs the HMMSearchThread: the data on the alignment or group is exported, * then the command is executed in the command line and then the data is * imported and displayed in a new frame. Call this method directly to execute * synchronously, or via start() in a new Thread for asynchronously. */ @Override public void run() { HiddenMarkovModel hmm = getHmmProfile(); if (hmm == null) { // shouldn't happen if we got this far Cache.log.error("Error: no hmm for hmmsearch"); return; } SequenceI hmmSeq = hmm.getConsensusSequence(); long msgId = System.currentTimeMillis(); af.setProgressBar(MessageManager.getString("status.running_search"), msgId); try { File hmmFile = FileUtils.createTempFile("hmm", ".hmm"); File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment", ".sto"); File searchOutputFile = FileUtils.createTempFile("searchOutput", ".sto"); exportHmm(hmm, hmmFile.getAbsoluteFile()); boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile); if (!ran) { JvOptionPane.showInternalMessageDialog(af, MessageManager .formatMessage("warn.command_failed", "hmmsearch")); return; } importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile); // TODO make realignment of search results a step at this level // and make it conditional on this.realign } catch (IOException | InterruptedException e) { e.printStackTrace(); } finally { af.setProgressBar("", msgId); } } /** * Executes an hmmsearch with the given hmm as input. The database to be * searched is a local file as specified by the 'Database' parameter, or the * current alignment (written to file) if none is specified. * * @param searchOutputFile * @param hitsAlignmentFile * @param hmmFile * * @return * @throws IOException */ private boolean runCommand(File searchOutputFile, File hitsAlignmentFile, File hmmFile) throws IOException { String command = getCommandPath(HMMSEARCH); if (command == null) { return false; } List args = new ArrayList<>(); args.add(command); buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile); return runCommand(args); } /** * Appends command line arguments to the given list, to specify input and * output files for the search, and any additional options that may have been * passed from the parameters dialog * * @param args * @param searchOutputFile * @param hitsAlignmentFile * @param hmmFile * @throws IOException */ protected void buildArguments(List args, File searchOutputFile, File hitsAlignmentFile, File hmmFile) throws IOException { args.add("-o"); args.add(getFilePath(searchOutputFile, true)); args.add("-A"); args.add(getFilePath(hitsAlignmentFile, true)); boolean dbFound = false; String dbPath = ""; File databaseFile = null; boolean useEvalueCutoff = false; boolean useScoreCutoff = false; String seqEvalueCutoff = null; String domEvalueCutoff = null; String seqScoreCutoff = null; String domScoreCutoff = null; databaseName = "Alignment"; if (params != null) { for (ArgumentI arg : params) { String name = arg.getName(); if (MessageManager.getString(NUMBER_OF_RESULTS_KEY) .equals(name)) { seqsToReturn = Integer.parseInt(arg.getValue()); } else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY) .equals(name)) { realign = true; } else if (MessageManager.getString(USE_ACCESSIONS_KEY) .equals(name)) { args.add("--acc"); } else if (MessageManager.getString(REPORTING_CUTOFF_KEY) .equals(name)) { if (CUTOFF_EVALUE.equals(arg.getValue())) { useEvalueCutoff = true; } else if (CUTOFF_SCORE.equals(arg.getValue())) { useScoreCutoff = true; } } else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) { seqEvalueCutoff = arg.getValue(); } else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) { seqScoreCutoff = arg.getValue(); } else if (MessageManager.getString(DOM_EVALUE_KEY) .equals(name)) { domEvalueCutoff = arg.getValue(); } else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) { domScoreCutoff = arg.getValue(); } else if (MessageManager.getString(TRIM_TERMINI_KEY) .equals(name)) { trim = true; } else if (MessageManager.getString(DATABASE_KEY).equals(name)) { dbFound = true; dbPath = arg.getValue(); if (!MessageManager.getString(THIS_ALIGNMENT_KEY) .equals(dbPath)) { int pos = dbPath.lastIndexOf(File.separator); databaseName = dbPath.substring(pos + 1); databaseFile = new File(dbPath); } searchAlignment = false; } else if (MessageManager.getString(RETURN_N_NEW_SEQ).equals(name)) { returnNoOfNewSeqs = true; } } } if (useEvalueCutoff) { args.add("-E"); args.add(seqEvalueCutoff); args.add("--domE"); args.add(domEvalueCutoff); } else if (useScoreCutoff) { args.add("-T"); args.add(seqScoreCutoff); args.add("--domT"); args.add(domScoreCutoff); } // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) // .equals(dbPath)) if (searchAlignment) { /* * no external database specified for search, so * export current alignment as 'database' to search, * excluding any HMM consensus sequences it contains */ databaseFile = FileUtils.createTempFile("database", ".sto"); AlignmentI al = af.getViewport().getAlignment(); AlignmentI copy = new Alignment(al); deleteHmmSequences(copy); sequencesHash = stashSequences(copy.getSequencesArray()); exportStockholm(copy.getSequencesArray(), databaseFile, null); } args.add(getFilePath(hmmFile, true)); args.add(getFilePath(databaseFile, true)); } /** * Imports the data from the temporary file to which the output of hmmsearch * was directed. The results are optionally realigned using hmmalign. * * @param hmmSeq */ private void importData(SequenceI hmmSeq, File inputAlignmentTemp, File hmmTemp, File searchOutputFile) throws IOException, InterruptedException { BufferedReader br = new BufferedReader( new FileReader(inputAlignmentTemp)); try { if (br.readLine() == null) { JOptionPane.showMessageDialog(af, MessageManager.getString("label.no_sequences_found")); return; } StockholmFile file = new StockholmFile(new FileParse( inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE)); seqs = file.getSeqsAsArray(); if (searchAlignment) { recoverSequences(sequencesHash, seqs); } // look for PP cons and ref seq in alignment only annotation AlignmentAnnotation modelpos = null, ppcons = null; for (AlignmentAnnotation aa : file.getAnnotations()) { if (aa.sequenceRef == null) { if (aa.label.equals("Reference Positions")) // RF feature type in // stockholm parser { modelpos = aa; } if (aa.label.equals("Posterior Probability")) { ppcons = aa; } } } readTable(searchOutputFile); int seqCount = Math.min(seqs.length, seqsToReturn); SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1]; hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen hmmAndSeqs[0] = hmmSeq; System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount); if (modelpos != null) { // TODO need - get ungapped sequence method hmmSeq.setSequence( hmmSeq.getDatasetSequence().getSequenceAsString()); Annotation[] refpos = modelpos.annotations; // insert gaps to match with refseq positions int gc = 0, lcol = 0; for (int c = 0; c < refpos.length; c++) { if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter))) { if (gc > 0) { hmmSeq.insertCharAt(lcol + 1, gc, '-'); } gc = 0; lcol = c; } else { gc++; } } } if (realign) { realignResults(hmmAndSeqs); } else { AlignmentI al = new Alignment(hmmAndSeqs); if (ppcons != null) { al.addAnnotation(ppcons); } if (modelpos != null) { al.addAnnotation(modelpos); } AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); String ttl = "hmmSearch of " + databaseName + " using " + hmmSeq.getName(); Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); if (returnNoOfNewSeqs) { int nNew = checkForNewSequences(); JvOptionPane.showMessageDialog(af.alignPanel, nNew + " " + MessageManager.getString("label.new_returned")); } } hmmTemp.delete(); inputAlignmentTemp.delete(); searchOutputFile.delete(); } finally { if (br != null) { br.close(); } } } private int checkForNewSequences() { int nNew = seqs.length; for (SequenceI resultSeq : seqs) { for (SequenceI aliSeq : alignment.getSequencesArray()) { if (resultSeq.getName().equals(aliSeq.getName())) { nNew--; break; } } } return nNew; } /** * Realigns the given sequences using hmmalign, to the HMM profile sequence * which is the first in the array, and opens the results in a new frame * * @param hmmAndSeqs */ protected void realignResults(SequenceI[] hmmAndSeqs) { /* * and align the search results to the HMM profile */ AlignmentI al = new Alignment(hmmAndSeqs); AlignFrame frame = new AlignFrame(al, 1, 1); List alignArgs = new ArrayList<>(); String alignTo = hmmAndSeqs[0].getName(); List options = Collections.singletonList(alignTo); Option option = new Option(MessageManager.getString("label.use_hmm"), "", true, alignTo, alignTo, options, null); alignArgs.add(option); if (trim) { alignArgs.add(new BooleanOption( MessageManager.getString(TRIM_TERMINI_KEY), MessageManager.getString("label.trim_termini_desc"), true, true, true, null)); } HmmerCommand hmmalign = new HMMAlign(frame, alignArgs); hmmalign.run(); if (returnNoOfNewSeqs) { int nNew = checkForNewSequences(); JvOptionPane.showMessageDialog(frame.alignPanel, nNew + " " + MessageManager.getString("label.new_returned")); } } /** * Reads in the scores table output by hmmsearch and adds annotation to * sequences for E-value and bit score * * @param inputTableTemp * @throws IOException */ void readTable(File inputTableTemp) throws IOException { BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); String line = ""; while (!line.startsWith("Query:")) { line = br.readLine(); } while (!line.contains("-------")) { line = br.readLine(); } line = br.readLine(); int index = 0; while (!" ------ inclusion threshold ------".equals(line) && !"".equals(line)) { SequenceI seq = seqs[index]; Scanner scanner = new Scanner(line); String evalue = scanner.next(); String score = scanner.next(); addScoreAnnotations(evalue, score, seq); scanner.close(); line = br.readLine(); index++; } br.close(); } protected void addScoreAnnotations(String eValue, String bitScore, SequenceI seq) { String label = "Search Scores"; String description = "Full sequence bit score and E-Value"; try { AlignmentAnnotation annot = new AlignmentAnnotation(label, description, null); annot.label = label; annot.description = description; annot.setCalcId(HMMSEARCH); double dEValue = Double.parseDouble(eValue); annot.setEValue(dEValue); double dBitScore = Double.parseDouble(bitScore); annot.setBitScore(dBitScore); annot.setSequenceRef(seq); seq.addAlignmentAnnotation(annot); } catch (NumberFormatException e) { System.err.println("Error parsing " + label + " from " + eValue + " & " + bitScore); } } }