package jalview.hmmer;
+import jalview.analysis.SeqsetUtils;
import jalview.bin.Cache;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.Annotation;
import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
import jalview.gui.JvOptionPane;
import jalview.gui.Preferences;
+import jalview.io.FastaFile;
import jalview.io.HMMFile;
import jalview.io.StockholmFile;
+import jalview.util.FileUtils;
import jalview.util.MessageManager;
+import jalview.util.Platform;
+import jalview.ws.params.ArgumentI;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
+import java.nio.file.Paths;
+import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
* @author TZVanaalten
*
*/
-public class HmmerCommand
+public abstract class HmmerCommand implements Runnable
{
public static final String HMMBUILD = "hmmbuild";
- public String JALVIEWDIRECTORY = System.getProperty("user.dir")
- + File.separator;
+ protected final AlignFrame af;
- public String OUTPUTALIGNMENT;
+ protected final AlignmentI alignment;
- public final String SPACE = " ";
+ protected final List<ArgumentI> params;
- public final String ALLCOL = "--allcol ";
+ /*
+ * constants for i18n lookup of passed parameter names
+ */
+ static final String DATABASE_KEY = "label.database";
+
+ static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
+
+ static final String USE_ACCESSIONS_KEY = "label.use_accessions";
+
+ static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
+
+ static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
+
+ static final String TRIM_TERMINI_KEY = "label.trim_termini";
+
+ static final String RETURN_N_NEW_SEQ = "label.check_for_new_sequences";
+
+ static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
+
+ static final String CUTOFF_NONE = "label.default";
+
+ static final String CUTOFF_SCORE = "label.score";
- public final String TRIM = "--trim ";
+ static final String CUTOFF_EVALUE = "label.evalue";
- public final String FORCEAMINO = "--amino ";
+ static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue";
- public final String FORCEDNA = "--dna ";
+ static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue";
- public final String FORCERNA = "--rna ";
+ static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score";
- Hashtable hash = new Hashtable();
+ static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score";
- List<SequenceI> hmmSeqs;
+ static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue";
- protected AlignFrame af;
+ static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue";
+ static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score";
+
+ static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score";
+
+ static final String ARG_TRIM = "--trim";
+
+ static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold";
+
+ /**
+ * Constructor
+ *
+ * @param alignFrame
+ * @param args
+ */
+ public HmmerCommand(AlignFrame alignFrame, List<ArgumentI> args)
+ {
+ af = alignFrame;
+ alignment = af.getViewport().getAlignment();
+ params = args;
+ }
+
+ /**
+ * Answers true if preference HMMER_PATH is set, and its value is the path to
+ * a directory that contains an executable <code>hmmbuild</code> or
+ * <code>hmmbuild.exe</code>, else false
+ *
+ * @return
+ */
public static boolean isHmmerAvailable()
{
- File exec = getExecutable(HMMBUILD, Cache.getProperty(Preferences.HMMER_PATH));
+ File exec = FileUtils.getExecutable(HMMBUILD,
+ Cache.getProperty(Preferences.HMMER_PATH));
return exec != null;
}
/**
* Uniquifies the sequences when exporting and stores their details in a
- * hashtable.
+ * hashtable
*
* @param seqs
*/
- public void uniquifySequences(SequenceI[] seqs)
+ protected Hashtable stashSequences(SequenceI[] seqs)
{
- hash = jalview.analysis.SeqsetUtils.uniquify(seqs, true);
+ return SeqsetUtils.uniquify(seqs, true);
}
/**
- * Recover the sequence data lost by uniquifying.
+ * Restores the sequence data lost by uniquifying
*
+ * @param hashtable
* @param seqs
*/
- public void recoverSequenceNames(SequenceI[] seqs)
+ protected void recoverSequences(Hashtable hashtable, SequenceI[] seqs)
{
- jalview.analysis.SeqsetUtils.deuniquify(hash, seqs);
+ SeqsetUtils.deuniquify(hashtable, seqs);
}
/**
- * Runs a command in the command line.
+ * Runs a command as a separate process and waits for it to complete. Answers
+ * true if the process return status is zero, else false.
*
- * @param command
+ * @param commands
+ * the executable command and any arguments to it
* @throws IOException
- * @throws InterruptedException
*/
- public boolean runCommand(String command)
- throws IOException, InterruptedException
+ public boolean runCommand(List<String> commands)
+ throws IOException
{
+ List<String> args = Platform.isWindows() ? wrapWithCygwin(commands)
+ : commands;
+
try
{
- final Process p = Runtime.getRuntime().exec(command);
-
+ ProcessBuilder pb = new ProcessBuilder(args);
+ pb.redirectErrorStream(true); // merge syserr to sysout
+ if (Platform.isWindows())
+ {
+ String path = pb.environment().get("Path");
+ path = jalview.bin.Cache.getProperty("CYGWIN_PATH") + ";" + path;
+ pb.environment().put("Path", path);
+ }
+ final Process p = pb.start();
new Thread(new Runnable()
{
@Override
{
BufferedReader input = new BufferedReader(
new InputStreamReader(p.getInputStream()));
- String line = null;
-
try
{
- while ((line = input.readLine()) != null)
+ String line = input.readLine();
+ while (line != null)
{
System.out.println(line);
+ line = input.readLine();
}
} catch (IOException e)
{
}).start();
p.waitFor();
+ int exitValue = p.exitValue();
+ if (exitValue != 0)
+ {
+ Cache.log.error("Command failed, return code = " + exitValue);
+ Cache.log.error("Command/args were: " + args.toString());
+ }
+ return exitValue == 0; // 0 is success, by convention
} catch (Exception e)
{
e.printStackTrace();
return false;
}
- return true;
}
/**
- * Exports an alignment and/or HMM to the specified file.
+ * Converts the given command to a Cygwin "bash" command wrapper. The hmmer
+ * command and any arguments to it are converted into a single parameter to the
+ * bash command.
+ *
+ * @param commands
+ */
+ protected List<String> wrapWithCygwin(List<String> commands)
+ {
+ File bash = FileUtils.getExecutable("bash",
+ Cache.getProperty(Preferences.CYGWIN_PATH));
+ if (bash == null)
+ {
+ Cache.log.error("Cygwin shell not found");
+ return commands;
+ }
+
+ List<String> wrapped = new ArrayList<>();
+ // wrapped.add("C:\Users\tva\run");
+ wrapped.add(bash.getAbsolutePath());
+ wrapped.add("-c");
+
+ /*
+ * combine hmmbuild/search/align and arguments to a single string
+ */
+ StringBuilder sb = new StringBuilder();
+ for (String cmd : commands)
+ {
+ sb.append(" ").append(cmd);
+ }
+ wrapped.add(sb.toString());
+
+ return wrapped;
+ }
+
+ /**
+ * Exports an alignment, and reference (RF) annotation if present, to the
+ * specified file, in Stockholm format, removing all HMM sequences
*
- * @param alignment
+ * @param seqs
+ * @param toFile
+ * @param annotated
* @throws IOException
*/
- public void exportData(SequenceI[] seqs, File stoLocation,
- HiddenMarkovModel hmm, File hmmLocation, AnnotatedCollectionI al)
+ public void exportStockholm(SequenceI[] seqs, File toFile,
+ AnnotatedCollectionI annotated)
throws IOException
{
- if (seqs != null)
+ if (seqs == null)
{
- AlignmentI newAl = new Alignment(seqs);
- if (stoLocation != null && al != null)
+ return;
+ }
+ AlignmentI newAl = new Alignment(seqs);
+
+ if (!newAl.isAligned())
+ {
+ newAl.padGaps();
+ }
+
+ if (toFile != null && annotated != null)
+ {
+ AlignmentAnnotation[] annots = annotated.getAlignmentAnnotation();
+ if (annots != null)
{
- for (AlignmentAnnotation annot : al.getAlignmentAnnotation())
+ for (AlignmentAnnotation annot : annots)
{
if (annot.label.contains("Reference") || "RF".equals(annot.label))
{
}
}
}
+ }
- StockholmFile file = new StockholmFile(newAl);
- String output = file.print(seqs, false);
- PrintWriter writer = new PrintWriter(stoLocation);
- writer.println(output);
- writer.close();
+ for (SequenceI seq : newAl.getSequencesArray())
+ {
+ if (seq.getAnnotation() != null)
+ {
+ for (AlignmentAnnotation ann : seq.getAnnotation())
+ {
+ seq.removeAlignmentAnnotation(ann);
+ }
+ }
+ }
+
+ StockholmFile file = new StockholmFile(newAl);
+ String output = file.print(seqs, false);
+ PrintWriter writer = new PrintWriter(toFile);
+ writer.println(output);
+ writer.close();
+ }
+
+ /**
+ * Answers the full path to the given hmmer executable, or null if file cannot
+ * be found or is not executable
+ *
+ * @param cmd
+ * command short name e.g. hmmalign
+ * @return
+ * @throws IOException
+ */
+ protected String getCommandPath(String cmd)
+ throws IOException
+ {
+ String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH);
+ // ensure any symlink to the directory is resolved:
+ binariesFolder = Paths.get(binariesFolder).toRealPath().toString();
+ File file = FileUtils.getExecutable(cmd, binariesFolder);
+ if (file == null && af != null)
+ {
+ JvOptionPane.showInternalMessageDialog(af, MessageManager
+ .formatMessage("label.executable_not_found", cmd));
}
+ return file == null ? null : getFilePath(file, true);
+ }
+
+ /**
+ * Exports an HMM to the specified file
+ *
+ * @param hmm
+ * @param hmmFile
+ * @throws IOException
+ */
+ public void exportHmm(HiddenMarkovModel hmm, File hmmFile)
+ throws IOException
+ {
if (hmm != null)
{
HMMFile file = new HMMFile(hmm);
- PrintWriter writer = new PrintWriter(hmmLocation);
+ PrintWriter writer = new PrintWriter(hmmFile);
writer.print(file.print());
writer.close();
}
}
+ // TODO is needed?
/**
- * Returns the list of HMM sequences removed
+ * Exports a sequence to the specified file
+ *
+ * @param hmm
+ * @param hmmFile
+ * @throws IOException
+ */
+ public void exportSequence(SequenceI seq, File seqFile) throws IOException
+ {
+ if (seq != null)
+ {
+ FastaFile file = new FastaFile();
+ PrintWriter writer = new PrintWriter(seqFile);
+ writer.print(file.print(new SequenceI[] { seq }, false));
+ writer.close();
+ }
+ }
+
+ /**
+ * Answers the HMM profile for the profile sequence the user selected (default
+ * is just the first HMM sequence in the alignment)
*
* @return
*/
- public List<SequenceI> getHmmSeqs()
+ protected HiddenMarkovModel getHmmProfile()
{
- return hmmSeqs;
+ String alignToParamName = MessageManager.getString("label.use_hmm");
+ for (ArgumentI arg : params)
+ {
+ String name = arg.getName();
+ if (name.equals(alignToParamName))
+ {
+ String seqName = arg.getValue();
+ SequenceI hmmSeq = alignment.findName(seqName);
+ if (hmmSeq.hasHMMProfile())
+ {
+ return hmmSeq.getHMM();
+ }
+ }
+ }
+ return null;
}
/**
- * Sets the list of removed HMM sequences
+ * Answers the query sequence the user selected (default is just the first
+ * sequence in the alignment)
*
- * @param hmmSeqs
+ * @return
*/
- public void setHmmSeqs(List<SequenceI> hmmSeqs)
+ protected SequenceI getSequence()
{
- this.hmmSeqs = hmmSeqs;
+ String alignToParamName = MessageManager
+ .getString("label.use_sequence");
+ for (ArgumentI arg : params)
+ {
+ String name = arg.getName();
+ if (name.equals(alignToParamName))
+ {
+ String seqName = arg.getValue();
+ SequenceI seq = alignment.findName(seqName);
+ return seq;
+ }
+ }
+ return null;
}
/**
- * Answers the full path to the given hmmer executable, or null if file cannot
- * be found or is not executable
+ * Answers an absolute path to the given file, in a format suitable for
+ * processing by a hmmer command. On a Windows platform, the native Windows file
+ * path is converted to Cygwin format, by replacing '\'with '/' and drive letter
+ * X with /cygdrive/x.
*
- * @param cmd
- * command short name e.g. hmmalign
+ * @param resultFile
+ * @param isInCygwin
+ * True if file is to be read/written from within the Cygwin
+ * shell. Should be false for any imports.
* @return
*/
- protected String getCommandRoot(String cmd)
+ protected String getFilePath(File resultFile, boolean isInCygwin)
{
- String binariesFolder = Cache.getProperty(Preferences.HMMER_PATH);
- File file = getExecutable(cmd, binariesFolder);
- if (file == null && af != null)
+ String path = resultFile.getAbsolutePath();
+ if (Platform.isWindows() && isInCygwin)
{
- JvOptionPane.showInternalMessageDialog(af,
- MessageManager.getString("warn.hmm_command_failed"));
+ // the first backslash escapes '\' for the regular expression argument
+ path = path.replaceAll("\\" + File.separator, "/");
+ int colon = path.indexOf(':');
+ if (colon > 0)
+ {
+ String drive = path.substring(0, colon);
+ path = path.replaceAll(drive + ":", "/cygdrive/" + drive);
+ }
}
- return file == null ? null : file.getAbsolutePath();
+ return path;
}
/**
- * Answers the executable file for the given hmmer command, or null if not
- * found or not executable. The path to the executable is the command name
- * prefixed by the hmmer binaries folder path, optionally with .exe appended.
+ * A helper method that deletes any HMM consensus sequence from the given
+ * collection, and from the parent alignment if <code>ac</code> is a subgroup
*
- * @param cmd
- * hmmer command short name, for example hmmbuild
- * @param binaryPath
- * parent folder containing hmmer executables
- * @return
+ * @param ac
*/
- public static File getExecutable(String cmd, String binaryPath)
+ void deleteHmmSequences(AnnotatedCollectionI ac)
{
- File file = new File(binaryPath, cmd);
- if (!file.canExecute())
+ List<SequenceI> hmmSeqs = ac.getHmmSequences();
+ for (SequenceI hmmSeq : hmmSeqs)
{
- file = new File(binaryPath, cmd + ".exe");
+ if (ac instanceof SequenceGroup)
{
- if (!file.canExecute())
+ ((SequenceGroup) ac).deleteSequence(hmmSeq, false);
+ AnnotatedCollectionI context = ac.getContext();
+ if (context != null && context instanceof AlignmentI)
{
- file = null;
+ ((AlignmentI) context).deleteSequence(hmmSeq);
}
}
+ else
+ {
+ ((AlignmentI) ac).deleteSequence(hmmSeq);
+ }
}
- return file;
}
+
+ /**
+ * Sets the names of any duplicates within the given sequences to include their
+ * respective lengths. Deletes any duplicates that have the same name after this
+ * step
+ *
+ * @param seqs
+ */
+ void renameDuplicates(AlignmentI al)
+ {
+
+ SequenceI[] seqs = al.getSequencesArray();
+ List<Boolean> wasRenamed = new ArrayList<>();
+
+ for (SequenceI seq : seqs)
+ {
+ wasRenamed.add(false);
+ }
+
+ for (int i = 0; i < seqs.length; i++)
+ {
+ for (int j = 0; j < seqs.length; j++)
+ {
+ if (seqs[i].getName().equals(seqs[j].getName()) && i != j
+ && !wasRenamed.get(j))
+ {
+
+ wasRenamed.set(i, true);
+ String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd();
+ // setting sequence name to include range - to differentiate between
+ // sequences of the same name. Currently have to include the range twice
+ // because the range is removed (once) when setting the name
+ // TODO come up with a better way of doing this
+ seqs[j].setName(seqs[j].getName() + range + range);
+ }
+
+ }
+ if (wasRenamed.get(i))
+ {
+ String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd();
+ seqs[i].setName(seqs[i].getName() + range + range);
+ }
+ }
+
+ for (int i = 0; i < seqs.length; i++)
+ {
+ for (int j = 0; j < seqs.length; j++)
+ {
+ if (seqs[i].getName().equals(seqs[j].getName()) && i != j)
+ {
+ al.deleteSequence(j);
+ }
+ }
+ }
+ }
+
}