package jalview.hmmer; import jalview.api.AlignViewportI; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.ResidueCount; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.HMMFile; import jalview.util.FileUtils; import jalview.util.MessageManager; import jalview.ws.params.ArgumentI; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; /** * A class that runs the hmmbuild command as a separate process. * * @author gmcarstairs * */ public class HMMBuild extends HmmerCommand { static final String ARG_AMINO = "--amino"; static final String ARG_DNA = "--dna"; static final String ARG_RNA = "--rna"; /** * Constructor * * @param alignFrame * @param args */ public HMMBuild(AlignFrame alignFrame, List args) { super(alignFrame, args); } /** * Builds a HMM from an alignment (and/or groups), then imports and adds it to * the alignment (and/or groups). Call this method directly to execute * synchronously, or via start() in a new Thread for asynchronously. */ @Override public void run() { if (params == null || params.isEmpty()) { Cache.log.error("No parameters to HMMBuild!|"); return; } long msgID = System.currentTimeMillis(); af.setProgressBar(MessageManager.getString("status.running_hmmbuild"), msgID); AlignViewportI viewport = af.getViewport(); try { /* * run hmmbuild for alignment and/or groups as selected */ List runBuildFor = parseParameters(viewport); for (AnnotatedCollectionI grp : runBuildFor) { runHMMBuild(grp); } } finally { af.setProgressBar("", msgID); viewport.alignmentChanged(af.alignPanel); af.buildColourMenu(); // to enable HMMER colour schemes } } /** * Scans the parameters to determine whether to run hmmmbuild for the whole * alignment or specified subgroup(s) or both * * @param viewport * @return */ protected List parseParameters( AlignViewportI viewport) { List runBuildFor = new ArrayList<>(); boolean foundArg = false; for (ArgumentI arg : params) { String name = arg.getName(); if (MessageManager.getString("label.hmmbuild_for").equals(name)) { foundArg = true; String value = arg.getValue(); if (MessageManager.getString("label.alignment").equals(value)) { runBuildFor.add(viewport.getAlignmentView(false) .getVisibleAlignment('-')); } else if (MessageManager.getString("label.groups_and_alignment") .equals(value)) { AlignmentView av = viewport.getAlignmentView(true); runBuildFor.add(av.getVisibleAlignment('-')); runBuildFor.addAll(av.getVisibleGroups('-')); } else if (MessageManager.getString("label.groups").equals(value)) { AlignmentView av = viewport.getAlignmentView(false); runBuildFor.addAll(av.getVisibleGroups('-')); } else if (MessageManager.getString("label.selected_group") .equals(value)) { AlignmentView av = viewport.getAlignmentView(true); runBuildFor.add(av.getVisibleAlignment('-')); } } else if (MessageManager.getString("label.use_reference") .equals(name)) { // todo disable this option if no RF annotation on alignment if (!af.getViewport().hasReferenceAnnotation()) { JvOptionPane.showInternalMessageDialog(af, MessageManager .getString("warn.no_reference_annotation")); // return; } } } /* * default is to build for the whole alignment */ if (!foundArg) { runBuildFor.add(alignment); } return runBuildFor; } /** * Runs hmmbuild on the given sequences (alignment or group) * * @param grp */ private void runHMMBuild(AnnotatedCollectionI ac) { File hmmFile = null; File alignmentFile = null; try { hmmFile = FileUtils.createTempFile("hmm", ".hmm"); alignmentFile = FileUtils.createTempFile("output", ".sto"); if (ac instanceof Alignment) { AlignmentI al = (Alignment) ac; // todo pad gaps in an unaligned SequenceGroup as well? if (!al.isAligned()) { al.padGaps(); } } deleteHmmSequences(ac); List copy = new ArrayList<>(); if (ac instanceof Alignment) { copy.addAll(ac.getSequences()); } else { SequenceI[] sel = ((SequenceGroup) ac) .getSelectionAsNewSequences((AlignmentI) ac.getContext()); for (SequenceI seq : sel) { copy.add(seq); } } // TODO rather than copy alignment data we should anonymize in situ - // export/File import could use anonymization hash to reinstate references // at import level ? SequenceI[] copyArray = copy.toArray(new SequenceI[copy.size()]); Hashtable sequencesHash = stashSequences(copyArray); exportStockholm(copyArray, alignmentFile, ac, false); recoverSequences(sequencesHash, copy.toArray(new SequenceI[] {})); boolean ran = runCommand(alignmentFile, hmmFile, ac); if (!ran) { JvOptionPane.showInternalMessageDialog(af, MessageManager .formatMessage("warn.command_failed", "hmmbuild")); return; } importData(hmmFile, ac); } catch (Exception e) { e.printStackTrace(); } finally { if (hmmFile != null) { hmmFile.delete(); } if (alignmentFile != null) { alignmentFile.delete(); } } } /** * Constructs and executes the hmmbuild command as a separate process * * @param sequencesFile * the alignment from which the HMM is built * @param hmmFile * the output file to which the HMM is written * @param group * alignment or group for which the hmm is generated * * @return * @throws IOException */ private boolean runCommand(File sequencesFile, File hmmFile, AnnotatedCollectionI group) throws IOException { String cmd = getCommandPath(HMMBUILD); if (cmd == null) { return false; // executable not found } List args = new ArrayList<>(); args.add(cmd); /* * HMM name (will be given to consensus sequence) is * - as specified by an input parameter if set * - else group name with _HMM appended (if for a group) * - else align frame title with _HMM appended (if title is not too long) * - else "Alignment_HMM" */ String name = ""; if (params != null) { for (ArgumentI arg : params) { String argName = arg.getName(); switch (argName) { case "HMM Name": name = arg.getValue().trim(); break; case "Use Reference Annotation": args.add("--hand"); break; } } } if (group instanceof SequenceGroup) { name = ((SequenceGroup) group).getName() + "_HMM"; } if ("".equals(name)) { if (af != null && af.getTitle().length() < 15) { name = af.getTitle(); } else { name = "Alignment_HMM"; } } args.add("-n"); args.add(name.replace(' ', '_')); if (!alignment.isNucleotide()) { args.add(ARG_AMINO); // TODO check for rna } else { args.add(ARG_DNA); } args.add(getFilePath(hmmFile, true)); args.add(getFilePath(sequencesFile, true)); return runCommand(args); } /** * Imports the .hmm file produced by hmmbuild, and inserts the HMM consensus * sequence (with attached HMM profile) as the first sequence in the alignment * or group for which it was generated * * @param hmmFile * @param ac * (optional) the group for which the hmm was generated * @throws IOException */ private void importData(File hmmFile, AnnotatedCollectionI ac) throws IOException { if (hmmFile.length() == 0L) { Cache.log.error("Error: hmmbuild produced empty hmm file"); return; } HMMFile file = new HMMFile( new FileParse(hmmFile.getAbsolutePath(), DataSourceType.FILE)); SequenceI hmmSeq = file.getHMM().getConsensusSequence(); ResidueCount counts = new ResidueCount(alignment.getSequences()); hmmSeq.getHMM().setBackgroundFrequencies(counts); if (hmmSeq == null) { // hmmbuild failure not detected earlier return; } if (ac instanceof SequenceGroup) { SequenceGroup grp = (SequenceGroup) ac; char gapChar = alignment.getGapCharacter(); hmmSeq.insertCharAt(0, ac.getStartRes(), gapChar); hmmSeq.insertCharAt(ac.getEndRes() + 1, alignment.getWidth() - ac.getEndRes() - 1, gapChar); SequenceI topSeq = grp.getSequencesInOrder(alignment)[0]; int topIndex = alignment.findIndex(topSeq); alignment.insertSequenceAt(topIndex, hmmSeq); ac.setSeqrep(hmmSeq); grp.addSequence(hmmSeq, false); } else { alignment.insertSequenceAt(0, hmmSeq); } } }