3 import jalview.bin.Cache;
4 import jalview.datamodel.Alignment;
5 import jalview.datamodel.AlignmentAnnotation;
6 import jalview.datamodel.AlignmentI;
7 import jalview.datamodel.Annotation;
8 import jalview.datamodel.HiddenMarkovModel;
9 import jalview.datamodel.SequenceI;
10 import jalview.gui.AlignFrame;
11 import jalview.gui.Desktop;
12 import jalview.gui.JvOptionPane;
13 import jalview.io.DataSourceType;
14 import jalview.io.FileParse;
15 import jalview.io.StockholmFile;
16 import jalview.util.FileUtils;
17 import jalview.util.MessageManager;
18 import jalview.ws.params.ArgumentI;
19 import jalview.ws.params.simple.BooleanOption;
20 import jalview.ws.params.simple.Option;
22 import java.io.BufferedReader;
24 import java.io.FileReader;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Collections;
28 import java.util.Hashtable;
29 import java.util.List;
30 import java.util.Scanner;
32 import javax.swing.JOptionPane;
34 public class HMMSearch extends HmmerCommand
36 static final String HMMSEARCH = "hmmsearch";
38 boolean realign = false;
42 boolean returnNoOfNewSeqs = false;
44 int seqsToReturn = Integer.MAX_VALUE;
46 boolean searchAlignment = true;
50 private String databaseName;
52 Hashtable sequencesHash;
55 * Constructor for the HMMSearchThread
59 public HMMSearch(AlignFrame af, List<ArgumentI> args)
65 * Runs the HMMSearchThread: the data on the alignment or group is exported,
66 * then the command is executed in the command line and then the data is
67 * imported and displayed in a new frame. Call this method directly to execute
68 * synchronously, or via start() in a new Thread for asynchronously.
73 HiddenMarkovModel hmm = getHmmProfile();
76 // shouldn't happen if we got this far
77 Cache.log.error("Error: no hmm for hmmsearch");
81 SequenceI hmmSeq = hmm.getConsensusSequence();
82 long msgId = System.currentTimeMillis();
83 af.setProgressBar(MessageManager.getString("status.running_search"),
88 File hmmFile = FileUtils.createTempFile("hmm", ".hmm");
89 File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
91 File searchOutputFile = FileUtils.createTempFile("searchOutput",
94 exportHmm(hmm, hmmFile.getAbsoluteFile());
96 boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
99 JvOptionPane.showInternalMessageDialog(af, MessageManager
100 .formatMessage("warn.command_failed", "hmmsearch"));
104 importData(hmmSeq, hitsAlignmentFile, hmmFile, searchOutputFile);
105 // TODO make realignment of search results a step at this level
106 // and make it conditional on this.realign
107 } catch (IOException | InterruptedException e)
113 af.setProgressBar("", msgId);
118 * Executes an hmmsearch with the given hmm as input. The database to be
119 * searched is a local file as specified by the 'Database' parameter, or the
120 * current alignment (written to file) if none is specified.
122 * @param searchOutputFile
123 * @param hitsAlignmentFile
127 * @throws IOException
129 private boolean runCommand(File searchOutputFile, File hitsAlignmentFile,
130 File hmmFile) throws IOException
132 String command = getCommandPath(HMMSEARCH);
138 List<String> args = new ArrayList<>();
140 buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
142 return runCommand(args);
146 * Appends command line arguments to the given list, to specify input and
147 * output files for the search, and any additional options that may have been
148 * passed from the parameters dialog
151 * @param searchOutputFile
152 * @param hitsAlignmentFile
154 * @throws IOException
156 protected void buildArguments(List<String> args, File searchOutputFile,
157 File hitsAlignmentFile, File hmmFile) throws IOException
160 args.add(getFilePath(searchOutputFile, true));
162 args.add(getFilePath(hitsAlignmentFile, true));
164 boolean dbFound = false;
166 File databaseFile = null;
168 boolean useEvalueCutoff = false;
169 boolean useScoreCutoff = false;
170 String seqEvalueCutoff = null;
171 String domEvalueCutoff = null;
172 String seqScoreCutoff = null;
173 String domScoreCutoff = null;
174 databaseName = "Alignment";
178 for (ArgumentI arg : params)
180 String name = arg.getName();
181 if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
184 seqsToReturn = Integer.parseInt(arg.getValue());
186 else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
191 else if (MessageManager.getString(USE_ACCESSIONS_KEY)
196 else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
199 if (CUTOFF_EVALUE.equals(arg.getValue()))
201 useEvalueCutoff = true;
203 else if (CUTOFF_SCORE.equals(arg.getValue()))
205 useScoreCutoff = true;
208 else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
210 seqEvalueCutoff = arg.getValue();
212 else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
214 seqScoreCutoff = arg.getValue();
216 else if (MessageManager.getString(DOM_EVALUE_KEY)
219 domEvalueCutoff = arg.getValue();
221 else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
223 domScoreCutoff = arg.getValue();
225 else if (MessageManager.getString(TRIM_TERMINI_KEY)
230 else if (MessageManager.getString(DATABASE_KEY).equals(name))
233 dbPath = arg.getValue();
234 if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
237 int pos = dbPath.lastIndexOf(File.separator);
238 databaseName = dbPath.substring(pos + 1);
239 databaseFile = new File(dbPath);
241 searchAlignment = false;
243 else if (MessageManager.getString(RETURN_N_NEW_SEQ).equals(name))
245 returnNoOfNewSeqs = true;
253 args.add(seqEvalueCutoff);
255 args.add(domEvalueCutoff);
257 else if (useScoreCutoff)
260 args.add(seqScoreCutoff);
262 args.add(domScoreCutoff);
265 // if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
270 * no external database specified for search, so
271 * export current alignment as 'database' to search,
272 * excluding any HMM consensus sequences it contains
274 databaseFile = FileUtils.createTempFile("database", ".sto");
275 AlignmentI al = af.getViewport().getAlignment();
276 AlignmentI copy = new Alignment(al);
277 deleteHmmSequences(copy);
279 sequencesHash = stashSequences(copy.getSequencesArray());
281 exportStockholm(copy.getSequencesArray(), databaseFile, null);
285 args.add(getFilePath(hmmFile, true));
286 args.add(getFilePath(databaseFile, true));
290 * Imports the data from the temporary file to which the output of hmmsearch
291 * was directed. The results are optionally realigned using hmmalign.
295 private void importData(SequenceI hmmSeq, File inputAlignmentTemp,
296 File hmmTemp, File searchOutputFile)
297 throws IOException, InterruptedException
299 BufferedReader br = new BufferedReader(
300 new FileReader(inputAlignmentTemp));
303 if (br.readLine() == null)
305 JOptionPane.showMessageDialog(af,
306 MessageManager.getString("label.no_sequences_found"));
309 StockholmFile file = new StockholmFile(new FileParse(
310 inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
311 seqs = file.getSeqsAsArray();
315 recoverSequences(sequencesHash, seqs);
318 // look for PP cons and ref seq in alignment only annotation
319 AlignmentAnnotation modelpos = null, ppcons = null;
320 for (AlignmentAnnotation aa : file.getAnnotations())
322 if (aa.sequenceRef == null)
324 if (aa.label.equals("Reference Positions")) // RF feature type in
329 if (aa.label.equals("Posterior Probability"))
335 readTable(searchOutputFile);
337 int seqCount = Math.min(seqs.length, seqsToReturn);
338 SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
339 hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
340 hmmAndSeqs[0] = hmmSeq;
341 System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
342 if (modelpos != null)
344 // TODO need - get ungapped sequence method
346 hmmSeq.getDatasetSequence().getSequenceAsString());
347 Annotation[] refpos = modelpos.annotations;
348 // insert gaps to match with refseq positions
349 int gc = 0, lcol = 0;
350 for (int c = 0; c < refpos.length; c++)
352 if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
356 hmmSeq.insertCharAt(lcol + 1, gc, '-');
370 realignResults(hmmAndSeqs);
374 AlignmentI al = new Alignment(hmmAndSeqs);
377 al.addAnnotation(ppcons);
379 if (modelpos != null)
381 al.addAnnotation(modelpos);
383 AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
384 AlignFrame.DEFAULT_HEIGHT);
385 String ttl = "hmmSearch of " + databaseName + " using "
387 Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
388 AlignFrame.DEFAULT_HEIGHT);
390 int nNew = checkForNewSequences();
391 JvOptionPane.showMessageDialog(af.alignPanel, nNew + " "
392 + MessageManager.getString("label.new_returned"));
398 inputAlignmentTemp.delete();
399 searchOutputFile.delete();
409 private int checkForNewSequences()
411 int nNew = seqs.length;
413 for (SequenceI resultSeq : seqs)
415 for (SequenceI aliSeq : alignment.getSequencesArray())
417 if (resultSeq.getName().equals(aliSeq.getName()))
429 * Realigns the given sequences using hmmalign, to the HMM profile sequence
430 * which is the first in the array, and opens the results in a new frame
434 protected void realignResults(SequenceI[] hmmAndSeqs)
437 * and align the search results to the HMM profile
439 AlignmentI al = new Alignment(hmmAndSeqs);
440 AlignFrame frame = new AlignFrame(al, 1, 1);
441 List<ArgumentI> alignArgs = new ArrayList<>();
442 String alignTo = hmmAndSeqs[0].getName();
443 List<String> options = Collections.singletonList(alignTo);
444 Option option = new Option(MessageManager.getString("label.use_hmm"),
445 "", true, alignTo, alignTo, options, null);
446 alignArgs.add(option);
449 alignArgs.add(new BooleanOption(
450 MessageManager.getString(TRIM_TERMINI_KEY),
451 MessageManager.getString("label.trim_termini_desc"), true,
454 HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
457 int nNew = checkForNewSequences();
458 JvOptionPane.showMessageDialog(frame.alignPanel,
459 nNew + " " + MessageManager.getString("label.new_returned"));
463 * Reads in the scores table output by hmmsearch and adds annotation to
464 * sequences for E-value and bit score
466 * @param inputTableTemp
467 * @throws IOException
469 void readTable(File inputTableTemp) throws IOException
471 BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
473 while (!line.startsWith("Query:"))
475 line = br.readLine();
477 while (!line.contains("-------"))
479 line = br.readLine();
481 line = br.readLine();
484 while (!" ------ inclusion threshold ------".equals(line)
487 SequenceI seq = seqs[index];
488 Scanner scanner = new Scanner(line);
489 String evalue = scanner.next();
490 String score = scanner.next();
491 addScoreAnnotations(evalue, score, seq);
493 line = br.readLine();
501 protected void addScoreAnnotations(String eValue, String bitScore,
504 String label = "Search Scores";
505 String description = "Full sequence bit score and E-Value";
509 AlignmentAnnotation annot = new AlignmentAnnotation(label,
513 annot.description = description;
515 annot.setCalcId(HMMSEARCH);
517 double dEValue = Double.parseDouble(eValue);
518 annot.setEValue(dEValue);
520 double dBitScore = Double.parseDouble(bitScore);
521 annot.setBitScore(dBitScore);
523 annot.setSequenceRef(seq);
524 seq.addAlignmentAnnotation(annot);
525 } catch (NumberFormatException e)
527 System.err.println("Error parsing " + label + " from " + eValue