X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fhmmer%2FHMMSearch.java;fp=src%2Fjalview%2Fhmmer%2FHMMSearch.java;h=6cda4216c552c992911e2d389b7497c43a0a6325;hb=7692386ccfe778075dd83a753d30a7a27fe507be;hp=0000000000000000000000000000000000000000;hpb=97264690f394f98cf13cd1d834a53e8956fb1b0e;p=jalview.git diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java new file mode 100644 index 0000000..6cda421 --- /dev/null +++ b/src/jalview/hmmer/HMMSearch.java @@ -0,0 +1,475 @@ +package jalview.hmmer; + +import jalview.bin.Cache; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.gui.Desktop; +import jalview.gui.JvOptionPane; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.StockholmFile; +import jalview.util.FileUtils; +import jalview.util.MessageManager; +import jalview.ws.params.ArgumentI; +import jalview.ws.params.simple.BooleanOption; +import jalview.ws.params.simple.Option; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Scanner; + +import javax.swing.JOptionPane; + +public class HMMSearch extends HmmerCommand +{ + static final String HMMSEARCH = "hmmsearch"; + + /* + * constants for i18n lookup of passed parameter names + */ + static final String DATABASE_KEY = "label.database"; + + static final String THIS_ALIGNMENT_KEY = "label.this_alignment"; + + static final String USE_ACCESSIONS_KEY = "label.use_accessions"; + + static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs"; + + static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results"; + + static final String TRIM_TERMINI_KEY = "label.trim_termini"; + + static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff"; + + static final String CUTOFF_NONE = "None"; + + static final String CUTOFF_SCORE = "Score"; + + static final String CUTOFF_EVALUE = "E-Value"; + + static final String SEQ_EVALUE_KEY = "label.seq_evalue"; + + static final String DOM_EVALUE_KEY = "label.dom_evalue"; + + static final String SEQ_SCORE_KEY = "label.seq_score"; + + static final String DOM_SCORE_KEY = "label.dom_score"; + + boolean realign = false; + + boolean trim = false; + + int seqsToReturn = Integer.MAX_VALUE; + + SequenceI[] seqs; + + private String databaseName; + + /** + * Constructor for the HMMSearchThread + * + * @param af + */ + public HMMSearch(AlignFrame af, List args) + { + super(af, args); + } + + /** + * Runs the HMMSearchThread: the data on the alignment or group is exported, + * then the command is executed in the command line and then the data is + * imported and displayed in a new frame. Call this method directly to execute + * synchronously, or via start() in a new Thread for asynchronously. + */ + @Override + public void run() + { + HiddenMarkovModel hmm = getHmmProfile(); + if (hmm == null) + { + // shouldn't happen if we got this far + Cache.log.error("Error: no hmm for hmmsearch"); + return; + } + + SequenceI hmmSeq = hmm.getConsensusSequence(); + long msgId = System.currentTimeMillis(); + af.setProgressBar(MessageManager.getString("status.running_hmmsearch"), + msgId); + + try + { + File hmmFile = FileUtils.createTempFile("hmm", ".hmm"); + File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment", + ".sto"); + File searchOutputFile = FileUtils.createTempFile("searchOutput", + ".sto"); + + exportHmm(hmm, hmmFile.getAbsoluteFile()); + + boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile); + if (!ran) + { + JvOptionPane.showInternalMessageDialog(af, MessageManager + .formatMessage("warn.command_failed", "hmmsearch")); + return; + } + + importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile); + // TODO make realignment of search results a step at this level + // and make it conditional on this.realign + } catch (IOException | InterruptedException e) + { + e.printStackTrace(); + } + finally + { + af.setProgressBar("", msgId); + } + } + + /** + * Executes an hmmsearch with the given hmm as input. The database to be + * searched is a local file as specified by the 'Database' parameter, or the + * current alignment (written to file) if none is specified. + * + * @param searchOutputFile + * @param hitsAlignmentFile + * @param hmmFile + * + * @return + * @throws IOException + */ + private boolean runCommand(File searchOutputFile, File hitsAlignmentFile, + File hmmFile) throws IOException + { + String command = getCommandPath(HMMSEARCH); + if (command == null) + { + return false; + } + + List args = new ArrayList<>(); + args.add(command); + buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile); + + return runCommand(args); + } + + /** + * Appends command line arguments to the given list, to specify input and + * output files for the search, and any additional options that may have been + * passed from the parameters dialog + * + * @param args + * @param searchOutputFile + * @param hitsAlignmentFile + * @param hmmFile + * @throws IOException + */ + protected void buildArguments(List args, File searchOutputFile, + File hitsAlignmentFile, File hmmFile) throws IOException + { + args.add("-o"); + args.add(getFilePath(searchOutputFile)); + args.add("-A"); + args.add(getFilePath(hitsAlignmentFile)); + + boolean dbFound = false; + String dbPath = ""; + File databaseFile = null; + + boolean useEvalueCutoff = false; + boolean useScoreCutoff = false; + String seqEvalueCutoff = null; + String domEvalueCutoff = null; + String seqScoreCutoff = null; + String domScoreCutoff = null; + databaseName = "Alignment"; + boolean searchAlignment = false; + + if (params != null) + { + for (ArgumentI arg : params) + { + String name = arg.getName(); + if (MessageManager.getString(NUMBER_OF_RESULTS_KEY) + .equals(name)) + { + seqsToReturn = Integer.parseInt(arg.getValue()); + } + else if (MessageManager.getString("action.search").equals(name)) + { + searchAlignment = arg.getValue().equals( + MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY)); + } + else if (MessageManager.getString(DATABASE_KEY).equals(name)) + { + dbPath = arg.getValue(); + int pos = dbPath.lastIndexOf(File.separator); + databaseName = dbPath.substring(pos + 1); + databaseFile = new File(dbPath); + } + else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY) + .equals(name)) + { + realign = true; + } + else if (MessageManager.getString(USE_ACCESSIONS_KEY) + .equals(name)) + { + args.add("--acc"); + } + else if (MessageManager.getString(REPORTING_CUTOFF_KEY) + .equals(name)) + { + if (CUTOFF_EVALUE.equals(arg.getValue())) + { + useEvalueCutoff = true; + } + else if (CUTOFF_SCORE.equals(arg.getValue())) + { + useScoreCutoff = true; + } + } + else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) + { + seqEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) + { + seqScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(DOM_EVALUE_KEY) + .equals(name)) + { + domEvalueCutoff = arg.getValue(); + } + else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) + { + domScoreCutoff = arg.getValue(); + } + else if (MessageManager.getString(TRIM_TERMINI_KEY) + .equals(name)) + { + trim = true; + } + else if (MessageManager.getString(DATABASE_KEY).equals(name)) + { + dbFound = true; + dbPath = arg.getValue(); + if (!MessageManager.getString(THIS_ALIGNMENT_KEY) + .equals(dbPath)) + { + int pos = dbPath.lastIndexOf(File.separator); + databaseName = dbPath.substring(pos + 1); + databaseFile = new File(dbPath); + } + } + } + } + + if (useEvalueCutoff) + { + args.add("-E"); + args.add(seqEvalueCutoff); + args.add("--domE"); + args.add(domEvalueCutoff); + } + else if (useScoreCutoff) + { + args.add("-T"); + args.add(seqScoreCutoff); + args.add("--domT"); + args.add(domScoreCutoff); + } + +// if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) +// .equals(dbPath)) + if (searchAlignment) + { + /* + * no external database specified for search, so + * export current alignment as 'database' to search, + * excluding any HMM consensus sequences it contains + */ + databaseFile = FileUtils.createTempFile("database", ".sto"); + AlignmentI al = af.getViewport().getAlignment(); + AlignmentI copy = new Alignment(al); + List hmms = copy.getHmmSequences(); + for (SequenceI hmmSeq : hmms) + { + copy.deleteSequence(hmmSeq); + } + exportStockholm(copy.getSequencesArray(), databaseFile, null); + } + + args.add(getFilePath(hmmFile)); + args.add(getFilePath(databaseFile)); + } + + /** + * Imports the data from the temporary file to which the output of hmmsearch + * was directed. The results are optionally realigned using hmmalign. + * + * @param hmmSeq + */ + private void importData(SequenceI hmmSeq, File inputAlignmentTemp, + File hmmTemp, File searchOutputFile) + throws IOException, InterruptedException + { + BufferedReader br = new BufferedReader( + new FileReader(inputAlignmentTemp)); + try + { + if (br.readLine() == null) + { + JOptionPane.showMessageDialog(af, + MessageManager.getString("label.no_sequences_found")); + return; + } + StockholmFile file = new StockholmFile(new FileParse( + inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE)); + seqs = file.getSeqsAsArray(); + + readTable(searchOutputFile); + + int seqCount = Math.min(seqs.length, seqsToReturn); + SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1]; + hmmAndSeqs[0] = hmmSeq; + System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount); + + if (realign) + { + realignResults(hmmAndSeqs); + } + else + { + AlignmentI al = new Alignment(hmmAndSeqs); + AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + String ttl = "hmmSearch of " + databaseName + " using " + + hmmSeq.getName(); + Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + } + + hmmTemp.delete(); + inputAlignmentTemp.delete(); + searchOutputFile.delete(); + } finally + { + if (br != null) + { + br.close(); + } + } + } + + /** + * Realigns the given sequences using hmmalign, to the HMM profile sequence + * which is the first in the array, and opens the results in a new frame + * + * @param hmmAndSeqs + */ + protected void realignResults(SequenceI[] hmmAndSeqs) + { + /* + * and align the search results to the HMM profile + */ + AlignmentI al = new Alignment(hmmAndSeqs); + AlignFrame frame = new AlignFrame(al, 1, 1); + List alignArgs = new ArrayList<>(); + String alignTo = hmmAndSeqs[0].getName(); + List options = Collections.singletonList(alignTo); + Option option = new Option(MessageManager.getString("label.use_hmm"), + "", true, alignTo, alignTo, options, null); + alignArgs.add(option); + if (trim) + { + alignArgs.add(new BooleanOption( + MessageManager.getString(TRIM_TERMINI_KEY), + MessageManager.getString("label.trim_termini_desc"), true, + true, true, null)); + } + HmmerCommand hmmalign = new HMMAlign(frame, alignArgs); + hmmalign.run(); + } + + /** + * Reads in the scores table output by hmmsearch and adds annotation to + * sequences for E-value and bit score + * + * @param inputTableTemp + * @throws IOException + */ + void readTable(File inputTableTemp) throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); + String line = ""; + while (!line.startsWith("Query:")) + { + line = br.readLine(); + } + for (int i = 0; i < 5; i++) + { + line = br.readLine(); + } + + int index = 0; + while (!" ------ inclusion threshold ------".equals(line) + && !"".equals(line)) + { + SequenceI seq = seqs[index]; + Scanner scanner = new Scanner(line); + String str = scanner.next(); + addScoreAnnotation(str, seq, "hmmsearch E-value", + "Full sequence E-value"); + str = scanner.next(); + addScoreAnnotation(str, seq, "hmmsearch Score", + "Full sequence bit score"); + scanner.close(); + line = br.readLine(); + index++; + } + + br.close(); + } + + /** + * A helper method that adds one score-only (non-positional) annotation to a + * sequence + * + * @param value + * @param seq + * @param label + * @param description + */ + protected void addScoreAnnotation(String value, SequenceI seq, + String label, String description) + { + try + { + AlignmentAnnotation annot = new AlignmentAnnotation(label, + description, null); + annot.setCalcId(HMMSEARCH); + double eValue = Double.parseDouble(value); + annot.setScore(eValue); + annot.setSequenceRef(seq); + seq.addAlignmentAnnotation(annot); + } catch (NumberFormatException e) + { + System.err.println("Error parsing " + label + " from " + value); + } + } + +}