package jalview.hmmer; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.Desktop; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.StockholmFile; import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.ws.params.ArgumentI; import jalview.ws.params.simple.BooleanOption; import jalview.ws.params.simple.Option; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import javax.swing.JOptionPane; public class HMMSearch extends Search { boolean realign = false; boolean trim = false; boolean returnNoOfNewSeqs = false; int seqsToReturn = Integer.MAX_VALUE; /** * Constructor for the HMMSearchThread * * @param af */ public HMMSearch(AlignFrame af, List args) { super(af, args); } /** * Runs the HMMSearchThread: the data on the alignment or group is exported, * then the command is executed in the command line and then the data is * imported and displayed in a new frame. Call this method directly to execute * synchronously, or via start() in a new Thread for asynchronously. */ @Override public void run() { HiddenMarkovModel hmm = getHmmProfile(); if (hmm == null) { // shouldn't happen if we got this far Cache.log.error("Error: no hmm for hmmsearch"); return; } SequenceI hmmSeq = hmm.getConsensusSequence(); long msgId = System.currentTimeMillis(); af.setProgressBar(MessageManager.getString("status.running_search"), msgId); try { File hmmFile = FileUtils.createTempFile("hmm", ".hmm"); File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment", ".sto"); File searchOutputFile = FileUtils.createTempFile("searchOutput", ".sto"); exportHmm(hmm, hmmFile.getAbsoluteFile()); boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile); if (!ran) { JvOptionPane.showInternalMessageDialog(af, MessageManager .formatMessage("warn.command_failed", "hmmsearch")); return; } importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile); // TODO make realignment of search results a step at this level // and make it conditional on this.realign } catch (IOException | InterruptedException e) { e.printStackTrace(); } finally { af.setProgressBar("", msgId); } } /** * Executes an hmmsearch with the given hmm as input. The database to be * searched is a local file as specified by the 'Database' parameter, or the * current alignment (written to file) if none is specified. * * @param searchOutputFile * @param hitsAlignmentFile * @param hmmFile * * @return * @throws IOException */ private boolean runCommand(File searchOutputFile, File hitsAlignmentFile, File hmmFile) throws IOException { String command = getCommandPath(HMMSEARCH); if (command == null) { return false; } List args = new ArrayList<>(); args.add(command); buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile); return runCommand(args); } /** * Imports the data from the temporary file to which the output of hmmsearch * was directed. The results are optionally realigned using hmmalign. * * @param hmmSeq */ private void importData(SequenceI hmmSeq, File inputAlignmentTemp, File hmmTemp, File searchOutputFile) throws IOException, InterruptedException { BufferedReader br = new BufferedReader( new FileReader(inputAlignmentTemp)); try { if (br.readLine() == null) { JOptionPane.showMessageDialog(af, MessageManager.getString("label.no_sequences_found")); return; } StockholmFile file = new StockholmFile(new FileParse( inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE)); seqs = file.getSeqsAsArray(); readDomainTable(searchOutputFile, false); if (searchAlignment) { recoverSequences(sequencesHash, seqs); } // look for PP cons and ref seq in alignment only annotation AlignmentAnnotation modelpos = null, ppcons = null; for (AlignmentAnnotation aa : file.getAnnotations()) { if (aa.sequenceRef == null) { if (aa.label.equals("Reference Positions")) // RF feature type in // stockholm parser { modelpos = aa; } if (aa.label.equals("Posterior Probability")) { ppcons = aa; } } } int seqCount = Math.min(seqs.length, seqsToReturn); SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1]; hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen hmmAndSeqs[0] = hmmSeq; System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount); if (modelpos != null) { // TODO need - get ungapped sequence method hmmSeq.setSequence( hmmSeq.getDatasetSequence().getSequenceAsString()); Annotation[] refpos = modelpos.annotations; // insert gaps to match with refseq positions int gc = 0, lcol = 0; for (int c = 0; c < refpos.length; c++) { if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter))) { if (gc > 0) { hmmSeq.insertCharAt(lcol + 1, gc, '-'); } gc = 0; lcol = c; } else { gc++; } } } if (realign) { realignResults(hmmAndSeqs); } else { AlignmentI al = new Alignment(hmmAndSeqs); if (ppcons != null) { al.addAnnotation(ppcons); } if (modelpos != null) { al.addAnnotation(modelpos); } AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); String ttl = "hmmSearch of " + databaseName + " using " + hmmSeq.getName(); Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); if (returnNoOfNewSeqs) { int nNew = checkForNewSequences(); JvOptionPane.showMessageDialog(af.alignPanel, nNew + " " + MessageManager.getString("label.new_returned")); } } hmmTemp.delete(); inputAlignmentTemp.delete(); searchOutputFile.delete(); } finally { if (br != null) { br.close(); } } } private int checkForNewSequences() { int nNew = seqs.length; for (SequenceI resultSeq : seqs) { for (SequenceI aliSeq : alignment.getSequencesArray()) { if (resultSeq.getName().equals(aliSeq.getName())) { nNew--; break; } } } return nNew; } /** * Realigns the given sequences using hmmalign, to the HMM profile sequence * which is the first in the array, and opens the results in a new frame * * @param hmmAndSeqs */ protected void realignResults(SequenceI[] hmmAndSeqs) { /* * and align the search results to the HMM profile */ AlignmentI al = new Alignment(hmmAndSeqs); AlignFrame frame = new AlignFrame(al, 1, 1); List alignArgs = new ArrayList<>(); String alignTo = hmmAndSeqs[0].getName(); List options = Collections.singletonList(alignTo); Option option = new Option(MessageManager.getString("label.use_hmm"), "", true, alignTo, alignTo, options, null); alignArgs.add(option); if (trim) { alignArgs.add(new BooleanOption( MessageManager.getString(TRIM_TERMINI_KEY), MessageManager.getString("label.trim_termini_desc"), true, true, true, null)); } HmmerCommand hmmalign = new HMMAlign(frame, alignArgs); hmmalign.run(); if (returnNoOfNewSeqs) { int nNew = checkForNewSequences(); JvOptionPane.showMessageDialog(frame.alignPanel, nNew + " " + MessageManager.getString("label.new_returned")); } } }