3 import jalview.analysis.SeqsetUtils;
4 import jalview.analysis.SeqsetUtils.SequenceInfo;
5 import jalview.bin.Cache;
6 import jalview.bin.Console;
7 import jalview.datamodel.Alignment;
8 import jalview.datamodel.AlignmentAnnotation;
9 import jalview.datamodel.AlignmentI;
10 import jalview.datamodel.AnnotatedCollectionI;
11 import jalview.datamodel.Annotation;
12 import jalview.datamodel.HiddenMarkovModel;
13 import jalview.datamodel.SequenceGroup;
14 import jalview.datamodel.SequenceI;
15 import jalview.gui.AlignFrame;
16 import jalview.gui.JvOptionPane;
17 import jalview.gui.Preferences;
18 import jalview.io.FastaFile;
19 import jalview.io.HMMFile;
20 import jalview.io.StockholmFile;
21 import jalview.util.FileUtils;
22 import jalview.util.MessageManager;
23 import jalview.util.Platform;
24 import jalview.ws.params.ArgumentI;
26 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.InputStreamReader;
30 import java.io.PrintWriter;
31 import java.nio.file.Paths;
32 import java.util.ArrayList;
33 import java.util.Hashtable;
34 import java.util.List;
38 * Base class for hmmbuild, hmmalign and hmmsearch
43 public abstract class HmmerCommand implements Runnable
45 public static final String HMMBUILD = "hmmbuild";
47 protected final AlignFrame af;
49 protected final AlignmentI alignment;
51 protected final List<ArgumentI> params;
54 * constants for i18n lookup of passed parameter names
56 static final String DATABASE_KEY = "label.database";
58 static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
60 static final String USE_ACCESSIONS_KEY = "label.use_accessions";
62 static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
64 static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
66 static final String NUMBER_OF_ITERATIONS = "label.number_of_iterations";
68 static final String TRIM_TERMINI_KEY = "label.trim_termini";
70 static final String RETURN_N_NEW_SEQ = "label.check_for_new_sequences";
72 static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
74 static final String CUTOFF_NONE = "label.default";
76 static final String CUTOFF_SCORE = "label.score";
78 static final String CUTOFF_EVALUE = "label.evalue";
80 static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue";
82 static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue";
84 static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score";
86 static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score";
88 static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue";
90 static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue";
92 static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score";
94 static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score";
96 static final String ARG_TRIM = "--trim";
98 static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold";
106 public HmmerCommand(AlignFrame alignFrame, List<ArgumentI> args)
109 alignment = af.getViewport().getAlignment();
114 * Answers true if preference HMMER_PATH is set, and its value is the path to
115 * a directory that contains an executable <code>hmmbuild</code> or
116 * <code>hmmbuild.exe</code>, else false
120 public static boolean isHmmerAvailable()
122 File exec = FileUtils.getExecutable(HMMBUILD,
123 Cache.getProperty(Preferences.HMMER_PATH));
128 * Uniquifies the sequences when exporting and stores their details in a
133 protected Map<String, SequenceInfo> stashSequences(SequenceI[] seqs)
135 return SeqsetUtils.uniquify(seqs, true);
139 * Restores the sequence data lost by uniquifying
141 * @param sequencesHash
144 protected void recoverSequences(Map<String, SequenceInfo> sequencesHash, SequenceI[] seqs)
146 SeqsetUtils.deuniquify(sequencesHash, seqs);
150 * Runs a command as a separate process and waits for it to complete. Answers
151 * true if the process return status is zero, else false.
154 * the executable command and any arguments to it
155 * @throws IOException
157 public boolean runCommand(List<String> commands)
160 List<String> args = Platform.isWindowsAndNotJS() ? wrapWithCygwin(commands)
165 ProcessBuilder pb = new ProcessBuilder(args);
166 pb.redirectErrorStream(true); // merge syserr to sysout
167 if (Platform.isWindowsAndNotJS())
169 String path = pb.environment().get("Path");
170 path = jalview.bin.Cache.getProperty("CYGWIN_PATH") + ";" + path;
171 pb.environment().put("Path", path);
173 final Process p = pb.start();
174 new Thread(new Runnable()
179 BufferedReader input = new BufferedReader(
180 new InputStreamReader(p.getInputStream()));
183 String line = input.readLine();
186 System.out.println(line);
187 line = input.readLine();
189 } catch (IOException e)
197 int exitValue = p.exitValue();
200 Console.error("Command failed, return code = " + exitValue);
201 Console.error("Command/args were: " + args.toString());
203 return exitValue == 0; // 0 is success, by convention
204 } catch (Exception e)
212 * Converts the given command to a Cygwin "bash" command wrapper. The hmmer
213 * command and any arguments to it are converted into a single parameter to the
218 protected List<String> wrapWithCygwin(List<String> commands)
220 File bash = FileUtils.getExecutable("bash",
221 Cache.getProperty(Preferences.CYGWIN_PATH));
224 Console.error("Cygwin shell not found");
228 List<String> wrapped = new ArrayList<>();
229 // wrapped.add("C:\Users\tva\run");
230 wrapped.add(bash.getAbsolutePath());
234 * combine hmmbuild/search/align and arguments to a single string
236 StringBuilder sb = new StringBuilder();
237 for (String cmd : commands)
239 sb.append(" ").append(cmd);
241 wrapped.add(sb.toString());
247 * Exports an alignment, and reference (RF) annotation if present, to the
248 * specified file, in Stockholm format, removing all HMM sequences
253 * @throws IOException
255 public void exportStockholm(SequenceI[] seqs, File toFile,
256 AnnotatedCollectionI annotated)
263 AlignmentI newAl = new Alignment(seqs);
265 if (!newAl.isAligned())
270 if (toFile != null && annotated != null)
272 AlignmentAnnotation[] annots = annotated.getAlignmentAnnotation();
275 for (AlignmentAnnotation annot : annots)
277 if (annot.label.contains("Reference") || "RF".equals(annot.label))
279 AlignmentAnnotation newRF;
280 if (annot.annotations.length > newAl.getWidth())
282 Annotation[] rfAnnots = new Annotation[newAl.getWidth()];
283 System.arraycopy(annot.annotations, 0, rfAnnots, 0,
285 newRF = new AlignmentAnnotation("RF", "Reference Positions",
290 newRF = new AlignmentAnnotation(annot);
292 newAl.addAnnotation(newRF);
298 for (SequenceI seq : newAl.getSequencesArray())
300 if (seq.getAnnotation() != null)
302 for (AlignmentAnnotation ann : seq.getAnnotation())
304 seq.removeAlignmentAnnotation(ann);
309 StockholmFile file = new StockholmFile(newAl);
310 String output = file.print(seqs, false);
311 PrintWriter writer = new PrintWriter(toFile);
312 writer.println(output);
317 * Answers the full path to the given hmmer executable, or null if file cannot
318 * be found or is not executable
321 * command short name e.g. hmmalign
323 * @throws IOException
325 protected String getCommandPath(String cmd)
328 String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH);
329 // ensure any symlink to the directory is resolved:
330 binariesFolder = Paths.get(binariesFolder).toRealPath().toString();
331 File file = FileUtils.getExecutable(cmd, binariesFolder);
332 if (file == null && af != null)
334 JvOptionPane.showInternalMessageDialog(af, MessageManager
335 .formatMessage("label.executable_not_found", cmd));
338 return file == null ? null : getFilePath(file, true);
342 * Exports an HMM to the specified file
346 * @throws IOException
348 public void exportHmm(HiddenMarkovModel hmm, File hmmFile)
353 HMMFile file = new HMMFile(hmm);
354 PrintWriter writer = new PrintWriter(hmmFile);
355 writer.print(file.print());
362 * Exports a sequence to the specified file
366 * @throws IOException
368 public void exportSequence(SequenceI seq, File seqFile) throws IOException
372 FastaFile file = new FastaFile();
373 PrintWriter writer = new PrintWriter(seqFile);
374 writer.print(file.print(new SequenceI[] { seq }, false));
380 * Answers the HMM profile for the profile sequence the user selected (default
381 * is just the first HMM sequence in the alignment)
385 protected HiddenMarkovModel getHmmProfile()
387 String alignToParamName = MessageManager.getString("label.use_hmm");
388 for (ArgumentI arg : params)
390 String name = arg.getName();
391 if (name.equals(alignToParamName))
393 String seqName = arg.getValue();
394 SequenceI hmmSeq = alignment.findName(seqName);
395 if (hmmSeq.hasHMMProfile())
397 return hmmSeq.getHMM();
405 * Answers the query sequence the user selected (default is just the first
406 * sequence in the alignment)
410 protected SequenceI getSequence()
412 String alignToParamName = MessageManager
413 .getString("label.use_sequence");
414 for (ArgumentI arg : params)
416 String name = arg.getName();
417 if (name.equals(alignToParamName))
419 String seqName = arg.getValue();
420 SequenceI seq = alignment.findName(seqName);
428 * Answers an absolute path to the given file, in a format suitable for
429 * processing by a hmmer command. On a Windows platform, the native Windows file
430 * path is converted to Cygwin format, by replacing '\'with '/' and drive letter
431 * X with /cygdrive/x.
435 * True if file is to be read/written from within the Cygwin
436 * shell. Should be false for any imports.
439 protected String getFilePath(File resultFile, boolean isInCygwin)
441 String path = resultFile.getAbsolutePath();
442 if (Platform.isWindowsAndNotJS() && isInCygwin)
444 // the first backslash escapes '\' for the regular expression argument
445 path = path.replaceAll("\\" + File.separator, "/");
446 int colon = path.indexOf(':');
449 String drive = path.substring(0, colon);
450 path = path.replaceAll(drive + ":", "/cygdrive/" + drive);
458 * A helper method that deletes any HMM consensus sequence from the given
459 * collection, and from the parent alignment if <code>ac</code> is a subgroup
463 void deleteHmmSequences(AnnotatedCollectionI ac)
465 List<SequenceI> hmmSeqs = ac.getHmmSequences();
466 for (SequenceI hmmSeq : hmmSeqs)
468 if (ac instanceof SequenceGroup)
470 ((SequenceGroup) ac).deleteSequence(hmmSeq, false);
471 AnnotatedCollectionI context = ac.getContext();
472 if (context != null && context instanceof AlignmentI)
474 ((AlignmentI) context).deleteSequence(hmmSeq);
479 ((AlignmentI) ac).deleteSequence(hmmSeq);
485 * Sets the names of any duplicates within the given sequences to include their
486 * respective lengths. Deletes any duplicates that have the same name after this
491 void renameDuplicates(AlignmentI al)
494 SequenceI[] seqs = al.getSequencesArray();
495 List<Boolean> wasRenamed = new ArrayList<>();
497 for (SequenceI seq : seqs)
499 wasRenamed.add(false);
502 for (int i = 0; i < seqs.length; i++)
504 for (int j = 0; j < seqs.length; j++)
506 if (seqs[i].getName().equals(seqs[j].getName()) && i != j
507 && !wasRenamed.get(j))
510 wasRenamed.set(i, true);
511 String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd();
512 // setting sequence name to include range - to differentiate between
513 // sequences of the same name. Currently have to include the range twice
514 // because the range is removed (once) when setting the name
515 // TODO come up with a better way of doing this
516 seqs[j].setName(seqs[j].getName() + range + range);
520 if (wasRenamed.get(i))
522 String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd();
523 seqs[i].setName(seqs[i].getName() + range + range);
527 for (int i = 0; i < seqs.length; i++)
529 for (int j = 0; j < seqs.length; j++)
531 if (seqs[i].getName().equals(seqs[j].getName()) && i != j)
533 al.deleteSequence(j);