X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fhmmer%2FHmmerCommand.java;h=f38be0296b7145db164072a7af541d9d00b0b4fa;hb=72404f561ccd190eab1990874f017bbe94bf9e10;hp=a9d7018d4582fba4518a99ab91a026c91a57b03e;hpb=6dda78dd65a8f6f25356d9fe95078afbba4eadcd;p=jalview.git diff --git a/src/jalview/hmmer/HmmerCommand.java b/src/jalview/hmmer/HmmerCommand.java index a9d7018..f38be02 100644 --- a/src/jalview/hmmer/HmmerCommand.java +++ b/src/jalview/hmmer/HmmerCommand.java @@ -1,5 +1,6 @@ package jalview.hmmer; +import jalview.analysis.SeqsetUtils; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -7,19 +8,26 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.JvOptionPane; import jalview.gui.Preferences; +import jalview.io.FastaFile; import jalview.io.HMMFile; import jalview.io.StockholmFile; +import jalview.util.FileUtils; import jalview.util.MessageManager; +import jalview.util.Platform; +import jalview.ws.params.ArgumentI; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; +import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Hashtable; import java.util.List; @@ -29,74 +37,135 @@ import java.util.List; * @author TZVanaalten * */ -public class HmmerCommand +public abstract class HmmerCommand implements Runnable { public static final String HMMBUILD = "hmmbuild"; - public String JALVIEWDIRECTORY = System.getProperty("user.dir") - + File.separator; + protected final AlignFrame af; - public String OUTPUTALIGNMENT; + protected final AlignmentI alignment; - public final String SPACE = " "; + protected final List params; - public final String ALLCOL = "--allcol "; + /* + * constants for i18n lookup of passed parameter names + */ + static final String DATABASE_KEY = "label.database"; + + static final String THIS_ALIGNMENT_KEY = "label.this_alignment"; + + static final String USE_ACCESSIONS_KEY = "label.use_accessions"; + + static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs"; + + static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results"; + + static final String TRIM_TERMINI_KEY = "label.trim_termini"; + + static final String RETURN_N_NEW_SEQ = "label.check_for_new_sequences"; + + static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff"; + + static final String CUTOFF_NONE = "label.default"; + + static final String CUTOFF_SCORE = "label.score"; - public final String TRIM = "--trim "; + static final String CUTOFF_EVALUE = "label.evalue"; - public final String FORCEAMINO = "--amino "; + static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue"; - public final String FORCEDNA = "--dna "; + static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue"; - public final String FORCERNA = "--rna "; + static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score"; - Hashtable hash = new Hashtable(); + static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score"; - List hmmSeqs; + static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue"; - protected AlignFrame af; + static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue"; + static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score"; + + static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score"; + + static final String ARG_TRIM = "--trim"; + + static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold"; + + /** + * Constructor + * + * @param alignFrame + * @param args + */ + public HmmerCommand(AlignFrame alignFrame, List args) + { + af = alignFrame; + alignment = af.getViewport().getAlignment(); + params = args; + } + + /** + * Answers true if preference HMMER_PATH is set, and its value is the path to + * a directory that contains an executable hmmbuild or + * hmmbuild.exe, else false + * + * @return + */ public static boolean isHmmerAvailable() { - File exec = getExecutable(HMMBUILD, Cache.getProperty(Preferences.HMMER_PATH)); + File exec = FileUtils.getExecutable(HMMBUILD, + Cache.getProperty(Preferences.HMMER_PATH)); return exec != null; } /** * Uniquifies the sequences when exporting and stores their details in a - * hashtable. + * hashtable * * @param seqs */ - public void uniquifySequences(SequenceI[] seqs) + protected Hashtable stashSequences(SequenceI[] seqs) { - hash = jalview.analysis.SeqsetUtils.uniquify(seqs, true); + return SeqsetUtils.uniquify(seqs, true); } /** - * Recover the sequence data lost by uniquifying. + * Restores the sequence data lost by uniquifying * + * @param hashtable * @param seqs */ - public void recoverSequenceNames(SequenceI[] seqs) + protected void recoverSequences(Hashtable hashtable, SequenceI[] seqs) { - jalview.analysis.SeqsetUtils.deuniquify(hash, seqs); + SeqsetUtils.deuniquify(hashtable, seqs); } /** - * Runs a command in the command line. + * Runs a command as a separate process and waits for it to complete. Answers + * true if the process return status is zero, else false. * - * @param command + * @param commands + * the executable command and any arguments to it * @throws IOException - * @throws InterruptedException */ - public boolean runCommand(String command) - throws IOException, InterruptedException + public boolean runCommand(List commands) + throws IOException { + List args = Platform.isWindows() ? wrapWithCygwin(commands) + : commands; + try { - final Process p = Runtime.getRuntime().exec(command); - + ProcessBuilder pb = new ProcessBuilder(args); + pb.redirectErrorStream(true); // merge syserr to sysout + if (Platform.isWindows()) + { + String path = pb.environment().get("Path"); + path = jalview.bin.Cache.getProperty("CYGWIN_PATH") + ";" + path; + pb.environment().put("Path", path); + } + final Process p = pb.start(); new Thread(new Runnable() { @Override @@ -104,13 +173,13 @@ public class HmmerCommand { BufferedReader input = new BufferedReader( new InputStreamReader(p.getInputStream())); - String line = null; - try { - while ((line = input.readLine()) != null) + String line = input.readLine(); + while (line != null) { System.out.println(line); + line = input.readLine(); } } catch (IOException e) { @@ -120,30 +189,85 @@ public class HmmerCommand }).start(); p.waitFor(); + int exitValue = p.exitValue(); + if (exitValue != 0) + { + Cache.log.error("Command failed, return code = " + exitValue); + Cache.log.error("Command/args were: " + args.toString()); + } + return exitValue == 0; // 0 is success, by convention } catch (Exception e) { e.printStackTrace(); return false; } - return true; } /** - * Exports an alignment and/or HMM to the specified file. + * Converts the given command to a Cygwin "bash" command wrapper. The hmmer + * command and any arguments to it are converted into a single parameter to the + * bash command. + * + * @param commands + */ + protected List wrapWithCygwin(List commands) + { + File bash = FileUtils.getExecutable("bash", + Cache.getProperty(Preferences.CYGWIN_PATH)); + if (bash == null) + { + Cache.log.error("Cygwin shell not found"); + return commands; + } + + List wrapped = new ArrayList<>(); + // wrapped.add("C:\Users\tva\run"); + wrapped.add(bash.getAbsolutePath()); + wrapped.add("-c"); + + /* + * combine hmmbuild/search/align and arguments to a single string + */ + StringBuilder sb = new StringBuilder(); + for (String cmd : commands) + { + sb.append(" ").append(cmd); + } + wrapped.add(sb.toString()); + + return wrapped; + } + + /** + * Exports an alignment, and reference (RF) annotation if present, to the + * specified file, in Stockholm format, removing all HMM sequences * - * @param alignment + * @param seqs + * @param toFile + * @param annotated * @throws IOException */ - public void exportData(SequenceI[] seqs, File stoLocation, - HiddenMarkovModel hmm, File hmmLocation, AnnotatedCollectionI al) + public void exportStockholm(SequenceI[] seqs, File toFile, + AnnotatedCollectionI annotated) throws IOException { - if (seqs != null) + if (seqs == null) { - AlignmentI newAl = new Alignment(seqs); - if (stoLocation != null && al != null) + return; + } + AlignmentI newAl = new Alignment(seqs); + + if (!newAl.isAligned()) + { + newAl.padGaps(); + } + + if (toFile != null && annotated != null) + { + AlignmentAnnotation[] annots = annotated.getAlignmentAnnotation(); + if (annots != null) { - for (AlignmentAnnotation annot : al.getAlignmentAnnotation()) + for (AlignmentAnnotation annot : annots) { if (annot.label.contains("Reference") || "RF".equals(annot.label)) { @@ -164,88 +288,247 @@ public class HmmerCommand } } } + } - StockholmFile file = new StockholmFile(newAl); - String output = file.print(seqs, false); - PrintWriter writer = new PrintWriter(stoLocation); - writer.println(output); - writer.close(); + for (SequenceI seq : newAl.getSequencesArray()) + { + if (seq.getAnnotation() != null) + { + for (AlignmentAnnotation ann : seq.getAnnotation()) + { + seq.removeAlignmentAnnotation(ann); + } + } + } + + StockholmFile file = new StockholmFile(newAl); + String output = file.print(seqs, false); + PrintWriter writer = new PrintWriter(toFile); + writer.println(output); + writer.close(); + } + + /** + * Answers the full path to the given hmmer executable, or null if file cannot + * be found or is not executable + * + * @param cmd + * command short name e.g. hmmalign + * @return + * @throws IOException + */ + protected String getCommandPath(String cmd) + throws IOException + { + String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH); + // ensure any symlink to the directory is resolved: + binariesFolder = Paths.get(binariesFolder).toRealPath().toString(); + File file = FileUtils.getExecutable(cmd, binariesFolder); + if (file == null && af != null) + { + JvOptionPane.showInternalMessageDialog(af, MessageManager + .formatMessage("label.executable_not_found", cmd)); } + return file == null ? null : getFilePath(file, true); + } + + /** + * Exports an HMM to the specified file + * + * @param hmm + * @param hmmFile + * @throws IOException + */ + public void exportHmm(HiddenMarkovModel hmm, File hmmFile) + throws IOException + { if (hmm != null) { HMMFile file = new HMMFile(hmm); - PrintWriter writer = new PrintWriter(hmmLocation); + PrintWriter writer = new PrintWriter(hmmFile); writer.print(file.print()); writer.close(); } } + // TODO is needed? /** - * Returns the list of HMM sequences removed + * Exports a sequence to the specified file + * + * @param hmm + * @param hmmFile + * @throws IOException + */ + public void exportSequence(SequenceI seq, File seqFile) throws IOException + { + if (seq != null) + { + FastaFile file = new FastaFile(); + PrintWriter writer = new PrintWriter(seqFile); + writer.print(file.print(new SequenceI[] { seq }, false)); + writer.close(); + } + } + + /** + * Answers the HMM profile for the profile sequence the user selected (default + * is just the first HMM sequence in the alignment) * * @return */ - public List getHmmSeqs() + protected HiddenMarkovModel getHmmProfile() { - return hmmSeqs; + String alignToParamName = MessageManager.getString("label.use_hmm"); + for (ArgumentI arg : params) + { + String name = arg.getName(); + if (name.equals(alignToParamName)) + { + String seqName = arg.getValue(); + SequenceI hmmSeq = alignment.findName(seqName); + if (hmmSeq.hasHMMProfile()) + { + return hmmSeq.getHMM(); + } + } + } + return null; } /** - * Sets the list of removed HMM sequences + * Answers the query sequence the user selected (default is just the first + * sequence in the alignment) * - * @param hmmSeqs + * @return */ - public void setHmmSeqs(List hmmSeqs) + protected SequenceI getSequence() { - this.hmmSeqs = hmmSeqs; + String alignToParamName = MessageManager + .getString("label.use_sequence"); + for (ArgumentI arg : params) + { + String name = arg.getName(); + if (name.equals(alignToParamName)) + { + String seqName = arg.getValue(); + SequenceI seq = alignment.findName(seqName); + return seq; + } + } + return null; } /** - * Answers the full path to the given hmmer executable, or null if file cannot - * be found or is not executable + * Answers an absolute path to the given file, in a format suitable for + * processing by a hmmer command. On a Windows platform, the native Windows file + * path is converted to Cygwin format, by replacing '\'with '/' and drive letter + * X with /cygdrive/x. * - * @param cmd - * command short name e.g. hmmalign + * @param resultFile + * @param isInCygwin + * True if file is to be read/written from within the Cygwin + * shell. Should be false for any imports. * @return */ - protected String getCommandRoot(String cmd) + protected String getFilePath(File resultFile, boolean isInCygwin) { - String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH); - File file = getExecutable(cmd, binariesFolder); - if (file == null && af != null) + String path = resultFile.getAbsolutePath(); + if (Platform.isWindows() && isInCygwin) { - JvOptionPane.showInternalMessageDialog(af, - MessageManager.getString("warn.hmm_command_failed")); + // the first backslash escapes '\' for the regular expression argument + path = path.replaceAll("\\" + File.separator, "/"); + int colon = path.indexOf(':'); + if (colon > 0) + { + String drive = path.substring(0, colon); + path = path.replaceAll(drive + ":", "/cygdrive/" + drive); + } } - return file == null ? null : file.getAbsolutePath(); + return path; } /** - * Answers the executable file for the given hmmer command, or null if not - * found or not executable. The path to the executable is the command name - * prefixed by the hmmer binaries folder path, optionally with .exe appended. + * A helper method that deletes any HMM consensus sequence from the given + * collection, and from the parent alignment if ac is a subgroup * - * @param cmd - * hmmer command short name, for example hmmbuild - * @param binaryPath - * parent folder containing hmmer executables - * @return + * @param ac */ - public static File getExecutable(String cmd, String binaryPath) + void deleteHmmSequences(AnnotatedCollectionI ac) { - File file = new File(binaryPath, cmd); - if (!file.canExecute()) + List hmmSeqs = ac.getHmmSequences(); + for (SequenceI hmmSeq : hmmSeqs) { - file = new File(binaryPath, cmd + ".exe"); + if (ac instanceof SequenceGroup) { - if (!file.canExecute()) + ((SequenceGroup) ac).deleteSequence(hmmSeq, false); + AnnotatedCollectionI context = ac.getContext(); + if (context != null && context instanceof AlignmentI) { - file = null; + ((AlignmentI) context).deleteSequence(hmmSeq); } } + else + { + ((AlignmentI) ac).deleteSequence(hmmSeq); + } } - return file; } + + /** + * Sets the names of any duplicates within the given sequences to include their + * respective lengths. Deletes any duplicates that have the same name after this + * step + * + * @param seqs + */ + void renameDuplicates(AlignmentI al) + { + + SequenceI[] seqs = al.getSequencesArray(); + List wasRenamed = new ArrayList<>(); + + for (SequenceI seq : seqs) + { + wasRenamed.add(false); + } + + for (int i = 0; i < seqs.length; i++) + { + for (int j = 0; j < seqs.length; j++) + { + if (seqs[i].getName().equals(seqs[j].getName()) && i != j + && !wasRenamed.get(j)) + { + + wasRenamed.set(i, true); + String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd(); + // setting sequence name to include range - to differentiate between + // sequences of the same name. Currently have to include the range twice + // because the range is removed (once) when setting the name + // TODO come up with a better way of doing this + seqs[j].setName(seqs[j].getName() + range + range); + } + + } + if (wasRenamed.get(i)) + { + String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd(); + seqs[i].setName(seqs[i].getName() + range + range); + } + } + + for (int i = 0; i < seqs.length; i++) + { + for (int j = 0; j < seqs.length; j++) + { + if (seqs[i].getName().equals(seqs[j].getName()) && i != j) + { + al.deleteSequence(j); + } + } + } + } + }