X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fhmmer%2FHMMSearch.java;h=64802c7dc206053582e22e2d79225d15a630cf8f;hb=79d76d41dc80b2c5a71201f26f832af337b7809f;hp=6f73f2fab8d9f1b8ba0ae7abc72db650bfd030bb;hpb=1ed4ac38ab7e097a051afaa25853434f1cfb6d4c;p=jalview.git diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java index 6f73f2f..64802c7 100644 --- a/src/jalview/hmmer/HMMSearch.java +++ b/src/jalview/hmmer/HMMSearch.java @@ -1,5 +1,6 @@ package jalview.hmmer; +import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -7,6 +8,7 @@ import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; +import jalview.gui.Desktop; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; @@ -15,12 +17,14 @@ import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.ws.params.ArgumentI; import jalview.ws.params.simple.BooleanOption; +import jalview.ws.params.simple.Option; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Scanner; @@ -38,6 +42,8 @@ public class HMMSearch extends HmmerCommand SequenceI[] seqs; + private String databaseName; + /** * Constructor for the HMMSearchThread * @@ -57,17 +63,17 @@ public class HMMSearch extends HmmerCommand @Override public void run() { - HiddenMarkovModel hmm = af.getSelectedHMM(); + HiddenMarkovModel hmm = getHmmProfile(); if (hmm == null) { - JOptionPane.showMessageDialog(af, - MessageManager.getString("warn.no_selected_hmm")); + // shouldn't happen if we got this far + Cache.log.error("Error: no hmm for hmmsearch"); return; } - SequenceI hmmSeq = af.getSelectedHMMSequence(); + SequenceI hmmSeq = hmm.getConsensusSequence(); long msgId = System.currentTimeMillis(); - af.setProgressBar(MessageManager.getString("status.running_hmmsearch"), + af.setProgressBar(MessageManager.getString("status.running_search"), msgId); try @@ -83,8 +89,8 @@ public class HMMSearch extends HmmerCommand boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile); if (!ran) { - JvOptionPane.showInternalMessageDialog(af, - MessageManager.getString("warn.hmmsearch_failed")); + JvOptionPane.showInternalMessageDialog(af, MessageManager + .formatMessage("warn.command_failed", "hmmsearch")); return; } @@ -124,107 +130,162 @@ public class HMMSearch extends HmmerCommand List args = new ArrayList<>(); args.add(command); + buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile); + + return runCommand(args); + } + + /** + * Appends command line arguments to the given list, to specify input and + * output files for the search, and any additional options that may have been + * passed from the parameters dialog + * + * @param args + * @param searchOutputFile + * @param hitsAlignmentFile + * @param hmmFile + * @throws IOException + */ + protected void buildArguments(List args, File searchOutputFile, + File hitsAlignmentFile, File hmmFile) throws IOException + { args.add("-o"); - args.add(searchOutputFile.getAbsolutePath()); + args.add(getFilePath(searchOutputFile, true)); args.add("-A"); - args.add(hitsAlignmentFile.getAbsolutePath()); + args.add(getFilePath(hitsAlignmentFile, true)); boolean dbFound = false; String dbPath = ""; File databaseFile = null; + boolean useEvalueCutoff = false; + boolean useScoreCutoff = false; + String seqEvalueCutoff = null; + String domEvalueCutoff = null; + String seqScoreCutoff = null; + String domScoreCutoff = null; + databaseName = "Alignment"; + boolean searchAlignment = false; + if (params != null) { for (ArgumentI arg : params) { String name = arg.getName(); - if (MessageManager.getString("label.number_of_results") + if (MessageManager.getString(NUMBER_OF_RESULTS_KEY) .equals(name)) { seqsToReturn = Integer.parseInt(arg.getValue()); } - else if (MessageManager.getString("label.auto_align_seqs") + else if (MessageManager.getString("action.search").equals(name)) + { + searchAlignment = arg.getValue().equals( + MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY)); + } + else if (MessageManager.getString(DATABASE_KEY).equals(name)) + { + dbPath = arg.getValue(); + int pos = dbPath.lastIndexOf(File.separator); + databaseName = dbPath.substring(pos + 1); + databaseFile = new File(dbPath); + } + else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY) .equals(name)) { - realign = true; // TODO: not used + realign = true; } - else if (MessageManager.getString("label.use_accessions") + else if (MessageManager.getString(USE_ACCESSIONS_KEY) .equals(name)) { args.add("--acc"); } - else if (MessageManager.getString("label.seq_e_value").equals(name)) + else if (MessageManager.getString(REPORTING_CUTOFF_KEY) + .equals(name)) { - args.add("--incE"); - args.add(arg.getValue()); + if (CUTOFF_EVALUE.equals(arg.getValue())) + { + useEvalueCutoff = true; + } + else if (CUTOFF_SCORE.equals(arg.getValue())) + { + useScoreCutoff = true; + } + } + else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name)) + { + seqEvalueCutoff = arg.getValue(); } - else if (MessageManager.getString("label.seq_score").equals(name)) + else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name)) { - args.add("-incT"); - args.add(arg.getValue()); + seqScoreCutoff = arg.getValue(); } - else if (MessageManager.getString("label.dom_e_value_desc") + else if (MessageManager.getString(DOM_EVALUE_KEY) .equals(name)) { - args.add("--incdomE"); - args.add(arg.getValue()); + domEvalueCutoff = arg.getValue(); } - else if (MessageManager.getString("label.dom_score").equals(name)) + else if (MessageManager.getString(DOM_SCORE_KEY).equals(name)) { - args.add("--incdomT"); - args.add(arg.getValue()); + domScoreCutoff = arg.getValue(); } - else if (MessageManager.getString("label.trim_termini") + else if (MessageManager.getString(TRIM_TERMINI_KEY) .equals(name)) { trim = true; } - else if (MessageManager.getString("label.database").equals(name)) + else if (MessageManager.getString(DATABASE_KEY).equals(name)) { dbFound = true; dbPath = arg.getValue(); - if (!MessageManager.getString("label.this_alignment") + if (!MessageManager.getString(THIS_ALIGNMENT_KEY) .equals(dbPath)) { + int pos = dbPath.lastIndexOf(File.separator); + databaseName = dbPath.substring(pos + 1); databaseFile = new File(dbPath); } } } } - if (!dbFound || MessageManager.getString("label.this_alignment") - .equals(dbPath)) + if (useEvalueCutoff) + { + args.add("-E"); + args.add(seqEvalueCutoff); + args.add("--domE"); + args.add(domEvalueCutoff); + } + else if (useScoreCutoff) + { + args.add("-T"); + args.add(seqScoreCutoff); + args.add("--domT"); + args.add(domScoreCutoff); + } + +// if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY) +// .equals(dbPath)) + if (searchAlignment) { /* * no external database specified for search, so - * export current alignment as 'database' to search + * export current alignment as 'database' to search, + * excluding any HMM consensus sequences it contains */ databaseFile = FileUtils.createTempFile("database", ".sto"); AlignmentI al = af.getViewport().getAlignment(); AlignmentI copy = new Alignment(al); - SequenceI hmms = copy.getHmmConsensus(); - if (hmms != null) - { - copy.deleteSequence(hmms); - } + deleteHmmSequences(copy); exportStockholm(copy.getSequencesArray(), databaseFile, null); - // StockholmFile stoFile = new StockholmFile(copy); - // stoFile.setSeqs(copy.getSequencesArray()); - // String alignmentString = stoFile.print(); - // PrintWriter writer = new PrintWriter(databaseFile); - // writer.print(alignmentString); - // writer.close(); } - args.add(hmmFile.getAbsolutePath()); - args.add(databaseFile.getAbsolutePath()); - - return runCommand(args); + args.add(getFilePath(hmmFile, true)); + args.add(getFilePath(databaseFile, true)); } /** * Imports the data from the temporary file to which the output of hmmsearch - * is directed. + * was directed. The results are optionally realigned using hmmalign. * * @param hmmSeq */ @@ -245,28 +306,78 @@ public class HMMSearch extends HmmerCommand StockholmFile file = new StockholmFile(new FileParse( inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE)); seqs = file.getSeqsAsArray(); - + // look for PP cons and ref seq in alignment only annotation + AlignmentAnnotation modelpos = null, ppcons = null; + for (AlignmentAnnotation aa : file.getAnnotations()) + { + if (aa.sequenceRef == null) + { + if (aa.label.equals("Reference Positions")) // RF feature type in + // stockholm parser + { + modelpos = aa; + } + if (aa.label.equals("Posterior Probability")) + { + ppcons = aa; + } + } + } readTable(searchOutputFile); int seqCount = Math.min(seqs.length, seqsToReturn); SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1]; + hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen hmmAndSeqs[0] = hmmSeq; System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount); - - AlignmentI alignment = new Alignment(hmmAndSeqs); - AlignFrame frame = new AlignFrame(alignment, 1, 1); - frame.setSelectedHMMSequence(hmmSeq); - List alignArgs = new ArrayList<>(); - if (trim) + if (modelpos != null) { - alignArgs.add(new BooleanOption( - MessageManager.getString("label.trim_termini"), - MessageManager.getString("label.trim_termini_desc"), true, - true, true, null)); + // TODO need - get ungapped sequence method + hmmSeq.setSequence( + hmmSeq.getDatasetSequence().getSequenceAsString()); + Annotation[] refpos = modelpos.annotations; + // insert gaps to match with refseq positions + int gc = 0, lcol = 0; + for (int c = 0; c < refpos.length; c++) + { + if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter))) + { + if (gc > 0) + { + hmmSeq.insertCharAt(lcol + 1, gc, '-'); + } + gc = 0; + lcol = c; + } + else + { + gc++; + } + } } - HMMAlign hmmalign = new HMMAlign(frame, alignArgs); - hmmalign.run(); - frame = null; + if (realign) + { + realignResults(hmmAndSeqs); + } + else + { + AlignmentI al = new Alignment(hmmAndSeqs); + if (ppcons != null) + { + al.addAnnotation(ppcons); + } + if (modelpos != null) + { + al.addAnnotation(modelpos); + } + AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + String ttl = "hmmSearch of " + databaseName + " using " + + hmmSeq.getName(); + Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + } + hmmTemp.delete(); inputAlignmentTemp.delete(); searchOutputFile.delete(); @@ -279,6 +390,43 @@ public class HMMSearch extends HmmerCommand } } + /** + * Realigns the given sequences using hmmalign, to the HMM profile sequence + * which is the first in the array, and opens the results in a new frame + * + * @param hmmAndSeqs + */ + protected void realignResults(SequenceI[] hmmAndSeqs) + { + /* + * and align the search results to the HMM profile + */ + AlignmentI al = new Alignment(hmmAndSeqs); + AlignFrame frame = new AlignFrame(al, 1, 1); + List alignArgs = new ArrayList<>(); + String alignTo = hmmAndSeqs[0].getName(); + List options = Collections.singletonList(alignTo); + Option option = new Option(MessageManager.getString("label.use_hmm"), + "", true, alignTo, alignTo, options, null); + alignArgs.add(option); + if (trim) + { + alignArgs.add(new BooleanOption( + MessageManager.getString(TRIM_TERMINI_KEY), + MessageManager.getString("label.trim_termini_desc"), true, + true, true, null)); + } + HmmerCommand hmmalign = new HMMAlign(frame, alignArgs); + hmmalign.run(); + } + + /** + * Reads in the scores table output by hmmsearch and adds annotation to + * sequences for E-value and bit score + * + * @param inputTableTemp + * @throws IOException + */ void readTable(File inputTableTemp) throws IOException { BufferedReader br = new BufferedReader(new FileReader(inputTableTemp)); @@ -287,31 +435,21 @@ public class HMMSearch extends HmmerCommand { line = br.readLine(); } - for (int i = 0; i < 5; i++) + while (!line.contains("-------")) { line = br.readLine(); } + line = br.readLine(); int index = 0; while (!" ------ inclusion threshold ------".equals(line) && !"".equals(line)) { + SequenceI seq = seqs[index]; Scanner scanner = new Scanner(line); - - String str = scanner.next(); // full sequence eValue score - float eValue = Float.parseFloat(str); - int seqLength = seqs[index].getLength(); - Annotation[] annots = new Annotation[seqLength]; - for (int j = 0; j < seqLength; j++) - { - annots[j] = new Annotation(eValue); - } - AlignmentAnnotation annot = new AlignmentAnnotation("E-value", - "Score", annots); - annot.setScore(Double.parseDouble(str)); - annot.setSequenceRef(seqs[index]); - seqs[index].addAlignmentAnnotation(annot); - + String evalue = scanner.next(); + String score = scanner.next(); + addScoreAnnotations(evalue, score, seq); scanner.close(); line = br.readLine(); index++; @@ -320,4 +458,36 @@ public class HMMSearch extends HmmerCommand br.close(); } + + protected void addScoreAnnotations(String eValue, String bitScore, + SequenceI seq) + { + String label = "Search Scores"; + String description = "Full sequence bit score and E-Value"; + + try + { + AlignmentAnnotation annot = new AlignmentAnnotation(label, + description, null); + + annot.label = label; + annot.description = description; + + annot.setCalcId(HMMSEARCH); + + double dEValue = Double.parseDouble(eValue); + annot.setEValue(dEValue); + + double dBitScore = Double.parseDouble(bitScore); + annot.setBitScore(dBitScore); + + annot.setSequenceRef(seq); + seq.addAlignmentAnnotation(annot); + } catch (NumberFormatException e) + { + System.err.println("Error parsing " + label + " from " + eValue + + " & " + bitScore); + } + } + }