3 import jalview.bin.Cache;
4 import jalview.datamodel.Alignment;
5 import jalview.datamodel.AlignmentAnnotation;
6 import jalview.datamodel.AlignmentI;
7 import jalview.datamodel.Annotation;
8 import jalview.datamodel.HiddenMarkovModel;
9 import jalview.datamodel.SequenceI;
10 import jalview.gui.AlignFrame;
11 import jalview.gui.Desktop;
12 import jalview.gui.JvOptionPane;
13 import jalview.io.DataSourceType;
14 import jalview.io.FileParse;
15 import jalview.io.StockholmFile;
16 import jalview.util.FileUtils;
17 import jalview.util.MessageManager;
18 import jalview.ws.params.ArgumentI;
19 import jalview.ws.params.simple.BooleanOption;
20 import jalview.ws.params.simple.Option;
22 import java.io.BufferedReader;
24 import java.io.FileReader;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Collections;
28 import java.util.List;
29 import java.util.Scanner;
31 import javax.swing.JOptionPane;
33 public class HMMSearch extends HmmerCommand
35 static final String HMMSEARCH = "hmmsearch";
37 boolean realign = false;
41 int seqsToReturn = Integer.MAX_VALUE;
45 private String databaseName;
48 * Constructor for the HMMSearchThread
52 public HMMSearch(AlignFrame af, List<ArgumentI> args)
58 * Runs the HMMSearchThread: the data on the alignment or group is exported,
59 * then the command is executed in the command line and then the data is
60 * imported and displayed in a new frame. Call this method directly to execute
61 * synchronously, or via start() in a new Thread for asynchronously.
66 HiddenMarkovModel hmm = getHmmProfile();
69 // shouldn't happen if we got this far
70 Cache.log.error("Error: no hmm for hmmsearch");
74 SequenceI hmmSeq = hmm.getConsensusSequence();
75 long msgId = System.currentTimeMillis();
76 af.setProgressBar(MessageManager.getString("status.running_search"),
81 File hmmFile = FileUtils.createTempFile("hmm", ".hmm");
82 File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
84 File searchOutputFile = FileUtils.createTempFile("searchOutput",
87 exportHmm(hmm, hmmFile.getAbsoluteFile());
89 boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
92 JvOptionPane.showInternalMessageDialog(af, MessageManager
93 .formatMessage("warn.command_failed", "hmmsearch"));
97 importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile);
98 // TODO make realignment of search results a step at this level
99 // and make it conditional on this.realign
100 } catch (IOException | InterruptedException e)
106 af.setProgressBar("", msgId);
111 * Executes an hmmsearch with the given hmm as input. The database to be
112 * searched is a local file as specified by the 'Database' parameter, or the
113 * current alignment (written to file) if none is specified.
115 * @param searchOutputFile
116 * @param hitsAlignmentFile
120 * @throws IOException
122 private boolean runCommand(File searchOutputFile, File hitsAlignmentFile,
123 File hmmFile) throws IOException
125 String command = getCommandPath(HMMSEARCH);
131 List<String> args = new ArrayList<>();
133 buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
135 return runCommand(args);
139 * Appends command line arguments to the given list, to specify input and
140 * output files for the search, and any additional options that may have been
141 * passed from the parameters dialog
144 * @param searchOutputFile
145 * @param hitsAlignmentFile
147 * @throws IOException
149 protected void buildArguments(List<String> args, File searchOutputFile,
150 File hitsAlignmentFile, File hmmFile) throws IOException
153 args.add(getFilePath(searchOutputFile, true));
155 args.add(getFilePath(hitsAlignmentFile, true));
157 boolean dbFound = false;
159 File databaseFile = null;
161 boolean useEvalueCutoff = false;
162 boolean useScoreCutoff = false;
163 String seqEvalueCutoff = null;
164 String domEvalueCutoff = null;
165 String seqScoreCutoff = null;
166 String domScoreCutoff = null;
167 databaseName = "Alignment";
168 boolean searchAlignment = false;
172 for (ArgumentI arg : params)
174 String name = arg.getName();
175 if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
178 seqsToReturn = Integer.parseInt(arg.getValue());
180 else if (MessageManager.getString("action.search").equals(name))
182 searchAlignment = arg.getValue().equals(
183 MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY));
185 else if (MessageManager.getString(DATABASE_KEY).equals(name))
187 dbPath = arg.getValue();
188 int pos = dbPath.lastIndexOf(File.separator);
189 databaseName = dbPath.substring(pos + 1);
190 databaseFile = new File(dbPath);
192 else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
197 else if (MessageManager.getString(USE_ACCESSIONS_KEY)
202 else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
205 if (CUTOFF_EVALUE.equals(arg.getValue()))
207 useEvalueCutoff = true;
209 else if (CUTOFF_SCORE.equals(arg.getValue()))
211 useScoreCutoff = true;
214 else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
216 seqEvalueCutoff = arg.getValue();
218 else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
220 seqScoreCutoff = arg.getValue();
222 else if (MessageManager.getString(DOM_EVALUE_KEY)
225 domEvalueCutoff = arg.getValue();
227 else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
229 domScoreCutoff = arg.getValue();
231 else if (MessageManager.getString(TRIM_TERMINI_KEY)
236 else if (MessageManager.getString(DATABASE_KEY).equals(name))
239 dbPath = arg.getValue();
240 if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
243 int pos = dbPath.lastIndexOf(File.separator);
244 databaseName = dbPath.substring(pos + 1);
245 databaseFile = new File(dbPath);
254 args.add(seqEvalueCutoff);
256 args.add(domEvalueCutoff);
258 else if (useScoreCutoff)
261 args.add(seqScoreCutoff);
263 args.add(domScoreCutoff);
266 // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
271 * no external database specified for search, so
272 * export current alignment as 'database' to search,
273 * excluding any HMM consensus sequences it contains
275 databaseFile = FileUtils.createTempFile("database", ".sto");
276 AlignmentI al = af.getViewport().getAlignment();
277 AlignmentI copy = new Alignment(al);
278 List<SequenceI> hmms = copy.getHmmSequences();
279 for (SequenceI hmmSeq : hmms)
281 copy.deleteSequence(hmmSeq);
283 exportStockholm(copy.getSequencesArray(), databaseFile, null);
286 args.add(getFilePath(hmmFile, true));
287 args.add(getFilePath(databaseFile, true));
291 * Imports the data from the temporary file to which the output of hmmsearch
292 * was directed. The results are optionally realigned using hmmalign.
296 private void importData(SequenceI hmmSeq, File inputAlignmentTemp,
297 File hmmTemp, File searchOutputFile)
298 throws IOException, InterruptedException
300 BufferedReader br = new BufferedReader(
301 new FileReader(inputAlignmentTemp));
304 if (br.readLine() == null)
306 JOptionPane.showMessageDialog(af,
307 MessageManager.getString("label.no_sequences_found"));
310 StockholmFile file = new StockholmFile(new FileParse(
311 inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
312 seqs = file.getSeqsAsArray();
313 // look for PP cons and ref seq in alignment only annotation
314 AlignmentAnnotation modelpos = null, ppcons = null;
315 for (AlignmentAnnotation aa : file.getAnnotations())
317 if (aa.sequenceRef == null)
319 if (aa.label.equals("Reference Positions")) // RF feature type in
324 if (aa.label.equals("Posterior Probability"))
330 readTable(searchOutputFile);
332 int seqCount = Math.min(seqs.length, seqsToReturn);
333 SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
334 hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
335 hmmAndSeqs[0] = hmmSeq;
336 System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
337 if (modelpos != null)
339 // TODO need - get ungapped sequence method
341 hmmSeq.getDatasetSequence().getSequenceAsString());
342 Annotation[] refpos = modelpos.annotations;
343 // insert gaps to match with refseq positions
344 int gc = 0, lcol = 0;
345 for (int c = 0; c < refpos.length; c++)
347 if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
351 hmmSeq.insertCharAt(lcol + 1, gc, '-');
364 realignResults(hmmAndSeqs);
368 AlignmentI al = new Alignment(hmmAndSeqs);
371 al.addAnnotation(ppcons);
373 if (modelpos != null)
375 al.addAnnotation(modelpos);
377 AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
378 AlignFrame.DEFAULT_HEIGHT);
379 String ttl = "hmmSearch of " + databaseName + " using "
381 Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
382 AlignFrame.DEFAULT_HEIGHT);
386 inputAlignmentTemp.delete();
387 searchOutputFile.delete();
398 * Realigns the given sequences using hmmalign, to the HMM profile sequence
399 * which is the first in the array, and opens the results in a new frame
403 protected void realignResults(SequenceI[] hmmAndSeqs)
406 * and align the search results to the HMM profile
408 AlignmentI al = new Alignment(hmmAndSeqs);
409 AlignFrame frame = new AlignFrame(al, 1, 1);
410 List<ArgumentI> alignArgs = new ArrayList<>();
411 String alignTo = hmmAndSeqs[0].getName();
412 List<String> options = Collections.singletonList(alignTo);
413 Option option = new Option(MessageManager.getString("label.use_hmm"),
414 "", true, alignTo, alignTo, options, null);
415 alignArgs.add(option);
418 alignArgs.add(new BooleanOption(
419 MessageManager.getString(TRIM_TERMINI_KEY),
420 MessageManager.getString("label.trim_termini_desc"), true,
423 HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
428 * Reads in the scores table output by hmmsearch and adds annotation to
429 * sequences for E-value and bit score
431 * @param inputTableTemp
432 * @throws IOException
434 void readTable(File inputTableTemp) throws IOException
436 BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
438 while (!line.startsWith("Query:"))
440 line = br.readLine();
442 while (!line.contains("-------"))
444 line = br.readLine();
446 line = br.readLine();
449 while (!" ------ inclusion threshold ------".equals(line)
452 SequenceI seq = seqs[index];
453 Scanner scanner = new Scanner(line);
454 String evalue = scanner.next();
455 String score = scanner.next();
456 addScoreAnnotations(evalue, score, seq);
458 line = br.readLine();
466 protected void addScoreAnnotations(String eValue, String bitScore,
469 String label = "Search Scores";
470 String description = "Full sequence bit score and E-Value";
474 AlignmentAnnotation annot = new AlignmentAnnotation(label,
478 annot.description = description;
480 annot.setCalcId(HMMSEARCH);
482 double dEValue = Double.parseDouble(eValue);
483 annot.setEValue(dEValue);
485 double dBitScore = Double.parseDouble(bitScore);
486 annot.setBitScore(dBitScore);
488 annot.setSequenceRef(seq);
489 seq.addAlignmentAnnotation(annot);
490 } catch (NumberFormatException e)
492 System.err.println("Error parsing " + label + " from " + eValue