package jalview.hmmer; import jalview.analysis.SeqsetUtils; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.JvOptionPane; import jalview.gui.Preferences; import jalview.io.FastaFile; import jalview.io.HMMFile; import jalview.io.StockholmFile; import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.util.Platform; import jalview.ws.params.ArgumentI; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; /** * Base class for hmmbuild, hmmalign and hmmsearch * * @author TZVanaalten * */ public abstract class HmmerCommand implements Runnable { public static final String HMMBUILD = "hmmbuild"; protected final AlignFrame af; protected final AlignmentI alignment; protected final List params; /* * constants for i18n lookup of passed parameter names */ static final String DATABASE_KEY = "label.database"; static final String THIS_ALIGNMENT_KEY = "label.this_alignment"; static final String USE_ACCESSIONS_KEY = "label.use_accessions"; static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs"; static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results"; static final String NUMBER_OF_ITERATIONS = "label.number_of_iterations"; static final String TRIM_TERMINI_KEY = "label.trim_termini"; static final String RETURN_N_NEW_SEQ = "label.check_for_new_sequences"; static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff"; static final String CUTOFF_NONE = "label.default"; static final String CUTOFF_SCORE = "label.score"; static final String CUTOFF_EVALUE = "label.evalue"; static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue"; static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue"; static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score"; static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score"; static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue"; static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue"; static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score"; static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score"; static final String ARG_TRIM = "--trim"; static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold"; /** * Constructor * * @param alignFrame * @param args */ public HmmerCommand(AlignFrame alignFrame, List args) { af = alignFrame; alignment = af.getViewport().getAlignment(); params = args; } /** * Answers true if preference HMMER_PATH is set, and its value is the path to * a directory that contains an executable hmmbuild or * hmmbuild.exe, else false * * @return */ public static boolean isHmmerAvailable() { File exec = FileUtils.getExecutable(HMMBUILD, Cache.getProperty(Preferences.HMMER_PATH)); return exec != null; } /** * Uniquifies the sequences when exporting and stores their details in a * hashtable * * @param seqs */ protected Hashtable stashSequences(SequenceI[] seqs) { return SeqsetUtils.uniquify(seqs, true); } /** * Restores the sequence data lost by uniquifying * * @param hashtable * @param seqs */ protected void recoverSequences(Hashtable hashtable, SequenceI[] seqs) { SeqsetUtils.deuniquify(hashtable, seqs); } /** * Runs a command as a separate process and waits for it to complete. Answers * true if the process return status is zero, else false. * * @param commands * the executable command and any arguments to it * @throws IOException */ public boolean runCommand(List commands) throws IOException { List args = Platform.isWindowsAndNotJS() ? wrapWithCygwin(commands) : commands; try { ProcessBuilder pb = new ProcessBuilder(args); pb.redirectErrorStream(true); // merge syserr to sysout if (Platform.isWindowsAndNotJS()) { String path = pb.environment().get("Path"); path = jalview.bin.Cache.getProperty("CYGWIN_PATH") + ";" + path; pb.environment().put("Path", path); } final Process p = pb.start(); new Thread(new Runnable() { @Override public void run() { BufferedReader input = new BufferedReader( new InputStreamReader(p.getInputStream())); try { String line = input.readLine(); while (line != null) { System.out.println(line); line = input.readLine(); } } catch (IOException e) { e.printStackTrace(); } } }).start(); p.waitFor(); int exitValue = p.exitValue(); if (exitValue != 0) { Cache.log.error("Command failed, return code = " + exitValue); Cache.log.error("Command/args were: " + args.toString()); } return exitValue == 0; // 0 is success, by convention } catch (Exception e) { e.printStackTrace(); return false; } } /** * Converts the given command to a Cygwin "bash" command wrapper. The hmmer * command and any arguments to it are converted into a single parameter to the * bash command. * * @param commands */ protected List wrapWithCygwin(List commands) { File bash = FileUtils.getExecutable("bash", Cache.getProperty(Preferences.CYGWIN_PATH)); if (bash == null) { Cache.log.error("Cygwin shell not found"); return commands; } List wrapped = new ArrayList<>(); // wrapped.add("C:\Users\tva\run"); wrapped.add(bash.getAbsolutePath()); wrapped.add("-c"); /* * combine hmmbuild/search/align and arguments to a single string */ StringBuilder sb = new StringBuilder(); for (String cmd : commands) { sb.append(" ").append(cmd); } wrapped.add(sb.toString()); return wrapped; } /** * Exports an alignment, and reference (RF) annotation if present, to the * specified file, in Stockholm format, removing all HMM sequences * * @param seqs * @param toFile * @param annotated * @throws IOException */ public void exportStockholm(SequenceI[] seqs, File toFile, AnnotatedCollectionI annotated) throws IOException { if (seqs == null) { return; } AlignmentI newAl = new Alignment(seqs); if (!newAl.isAligned()) { newAl.padGaps(); } if (toFile != null && annotated != null) { AlignmentAnnotation[] annots = annotated.getAlignmentAnnotation(); if (annots != null) { for (AlignmentAnnotation annot : annots) { if (annot.label.contains("Reference") || "RF".equals(annot.label)) { AlignmentAnnotation newRF; if (annot.annotations.length > newAl.getWidth()) { Annotation[] rfAnnots = new Annotation[newAl.getWidth()]; System.arraycopy(annot.annotations, 0, rfAnnots, 0, rfAnnots.length); newRF = new AlignmentAnnotation("RF", "Reference Positions", rfAnnots); } else { newRF = new AlignmentAnnotation(annot); } newAl.addAnnotation(newRF); } } } } for (SequenceI seq : newAl.getSequencesArray()) { if (seq.getAnnotation() != null) { for (AlignmentAnnotation ann : seq.getAnnotation()) { seq.removeAlignmentAnnotation(ann); } } } StockholmFile file = new StockholmFile(newAl); String output = file.print(seqs, false); PrintWriter writer = new PrintWriter(toFile); writer.println(output); writer.close(); } /** * Answers the full path to the given hmmer executable, or null if file cannot * be found or is not executable * * @param cmd * command short name e.g. hmmalign * @return * @throws IOException */ protected String getCommandPath(String cmd) throws IOException { String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH); // ensure any symlink to the directory is resolved: binariesFolder = Paths.get(binariesFolder).toRealPath().toString(); File file = FileUtils.getExecutable(cmd, binariesFolder); if (file == null && af != null) { JvOptionPane.showInternalMessageDialog(af, MessageManager .formatMessage("label.executable_not_found", cmd)); } return file == null ? null : getFilePath(file, true); } /** * Exports an HMM to the specified file * * @param hmm * @param hmmFile * @throws IOException */ public void exportHmm(HiddenMarkovModel hmm, File hmmFile) throws IOException { if (hmm != null) { HMMFile file = new HMMFile(hmm); PrintWriter writer = new PrintWriter(hmmFile); writer.print(file.print()); writer.close(); } } // TODO is needed? /** * Exports a sequence to the specified file * * @param hmm * @param hmmFile * @throws IOException */ public void exportSequence(SequenceI seq, File seqFile) throws IOException { if (seq != null) { FastaFile file = new FastaFile(); PrintWriter writer = new PrintWriter(seqFile); writer.print(file.print(new SequenceI[] { seq }, false)); writer.close(); } } /** * Answers the HMM profile for the profile sequence the user selected (default * is just the first HMM sequence in the alignment) * * @return */ protected HiddenMarkovModel getHmmProfile() { String alignToParamName = MessageManager.getString("label.use_hmm"); for (ArgumentI arg : params) { String name = arg.getName(); if (name.equals(alignToParamName)) { String seqName = arg.getValue(); SequenceI hmmSeq = alignment.findName(seqName); if (hmmSeq.hasHMMProfile()) { return hmmSeq.getHMM(); } } } return null; } /** * Answers the query sequence the user selected (default is just the first * sequence in the alignment) * * @return */ protected SequenceI getSequence() { String alignToParamName = MessageManager .getString("label.use_sequence"); for (ArgumentI arg : params) { String name = arg.getName(); if (name.equals(alignToParamName)) { String seqName = arg.getValue(); SequenceI seq = alignment.findName(seqName); return seq; } } return null; } /** * Answers an absolute path to the given file, in a format suitable for * processing by a hmmer command. On a Windows platform, the native Windows file * path is converted to Cygwin format, by replacing '\'with '/' and drive letter * X with /cygdrive/x. * * @param resultFile * @param isInCygwin * True if file is to be read/written from within the Cygwin * shell. Should be false for any imports. * @return */ protected String getFilePath(File resultFile, boolean isInCygwin) { String path = resultFile.getAbsolutePath(); if (Platform.isWindowsAndNotJS() && isInCygwin) { // the first backslash escapes '\' for the regular expression argument path = path.replaceAll("\\" + File.separator, "/"); int colon = path.indexOf(':'); if (colon > 0) { String drive = path.substring(0, colon); path = path.replaceAll(drive + ":", "/cygdrive/" + drive); } } return path; } /** * A helper method that deletes any HMM consensus sequence from the given * collection, and from the parent alignment if ac is a subgroup * * @param ac */ void deleteHmmSequences(AnnotatedCollectionI ac) { List hmmSeqs = ac.getHmmSequences(); for (SequenceI hmmSeq : hmmSeqs) { if (ac instanceof SequenceGroup) { ((SequenceGroup) ac).deleteSequence(hmmSeq, false); AnnotatedCollectionI context = ac.getContext(); if (context != null && context instanceof AlignmentI) { ((AlignmentI) context).deleteSequence(hmmSeq); } } else { ((AlignmentI) ac).deleteSequence(hmmSeq); } } } /** * Sets the names of any duplicates within the given sequences to include their * respective lengths. Deletes any duplicates that have the same name after this * step * * @param seqs */ void renameDuplicates(AlignmentI al) { SequenceI[] seqs = al.getSequencesArray(); List wasRenamed = new ArrayList<>(); for (SequenceI seq : seqs) { wasRenamed.add(false); } for (int i = 0; i < seqs.length; i++) { for (int j = 0; j < seqs.length; j++) { if (seqs[i].getName().equals(seqs[j].getName()) && i != j && !wasRenamed.get(j)) { wasRenamed.set(i, true); String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd(); // setting sequence name to include range - to differentiate between // sequences of the same name. Currently have to include the range twice // because the range is removed (once) when setting the name // TODO come up with a better way of doing this seqs[j].setName(seqs[j].getName() + range + range); } } if (wasRenamed.get(i)) { String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd(); seqs[i].setName(seqs[i].getName() + range + range); } } for (int i = 0; i < seqs.length; i++) { for (int j = 0; j < seqs.length; j++) { if (seqs[i].getName().equals(seqs[j].getName()) && i != j) { al.deleteSequence(j); } } } } }