package jalview.hmmer;
+import jalview.bin.Cache;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.HiddenMarkovModel;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
+import jalview.gui.Desktop;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FileParse;
import jalview.io.StockholmFile;
+import jalview.util.FileUtils;
import jalview.util.MessageManager;
import jalview.ws.params.ArgumentI;
import jalview.ws.params.simple.BooleanOption;
+import jalview.ws.params.simple.Option;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Scanner;
SequenceI[] seqs;
+ private String databaseName;
+
/**
* Constructor for the HMMSearchThread
*
@Override
public void run()
{
- HiddenMarkovModel hmm = af.getSelectedHMM();
+ HiddenMarkovModel hmm = getHmmProfile();
if (hmm == null)
{
- JOptionPane.showMessageDialog(af,
- MessageManager.getString("warn.no_selected_hmm"));
+ // shouldn't happen if we got this far
+ Cache.log.error("Error: no hmm for hmmsearch");
return;
}
- SequenceI hmmSeq = af.getSelectedHMMSequence();
+ SequenceI hmmSeq = hmm.getConsensusSequence();
long msgId = System.currentTimeMillis();
- af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
+ af.setProgressBar(MessageManager.getString("status.running_search"),
msgId);
try
{
- File hmmFile = createTempFile("hmm", ".hmm");
- File hitsAlignmentFile = createTempFile("hitAlignment", ".sto");
- File searchOutputFile = createTempFile("searchOutput", ".sto");
+ File hmmFile = FileUtils.createTempFile("hmm", ".hmm");
+ File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
+ ".sto");
+ File searchOutputFile = FileUtils.createTempFile("searchOutput",
+ ".sto");
exportHmm(hmm, hmmFile.getAbsoluteFile());
boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, hmmFile);
if (!ran)
{
- JvOptionPane.showInternalMessageDialog(af,
- MessageManager.getString("warn.hmmsearch_failed"));
+ JvOptionPane.showInternalMessageDialog(af, MessageManager
+ .formatMessage("warn.command_failed", "hmmsearch"));
return;
}
List<String> args = new ArrayList<>();
args.add(command);
+ buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
+
+ return runCommand(args);
+ }
+
+ /**
+ * Appends command line arguments to the given list, to specify input and
+ * output files for the search, and any additional options that may have been
+ * passed from the parameters dialog
+ *
+ * @param args
+ * @param searchOutputFile
+ * @param hitsAlignmentFile
+ * @param hmmFile
+ * @throws IOException
+ */
+ protected void buildArguments(List<String> args, File searchOutputFile,
+ File hitsAlignmentFile, File hmmFile) throws IOException
+ {
args.add("-o");
- args.add(searchOutputFile.getAbsolutePath());
+ args.add(getFilePath(searchOutputFile, true));
args.add("-A");
- args.add(hitsAlignmentFile.getAbsolutePath());
+ args.add(getFilePath(hitsAlignmentFile, true));
boolean dbFound = false;
String dbPath = "";
File databaseFile = null;
+ boolean useEvalueCutoff = false;
+ boolean useScoreCutoff = false;
+ String seqEvalueCutoff = null;
+ String domEvalueCutoff = null;
+ String seqScoreCutoff = null;
+ String domScoreCutoff = null;
+ databaseName = "Alignment";
+ boolean searchAlignment = false;
+
if (params != null)
{
for (ArgumentI arg : params)
{
String name = arg.getName();
- if (MessageManager.getString("label.number_of_results")
+ if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
.equals(name))
{
seqsToReturn = Integer.parseInt(arg.getValue());
}
- else if (MessageManager.getString("label.auto_align_seqs")
+ else if (MessageManager.getString("action.search").equals(name))
+ {
+ searchAlignment = arg.getValue().equals(
+ MessageManager.getString(HMMSearch.THIS_ALIGNMENT_KEY));
+ }
+ else if (MessageManager.getString(DATABASE_KEY).equals(name))
+ {
+ dbPath = arg.getValue();
+ int pos = dbPath.lastIndexOf(File.separator);
+ databaseName = dbPath.substring(pos + 1);
+ databaseFile = new File(dbPath);
+ }
+ else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
.equals(name))
{
- realign = true; // TODO: not used
+ realign = true;
}
- else if (MessageManager.getString("label.use_accessions")
+ else if (MessageManager.getString(USE_ACCESSIONS_KEY)
.equals(name))
{
args.add("--acc");
}
- else if (MessageManager.getString("label.seq_e_value").equals(name))
+ else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
+ .equals(name))
{
- args.add("--incE");
- args.add(arg.getValue());
+ if (CUTOFF_EVALUE.equals(arg.getValue()))
+ {
+ useEvalueCutoff = true;
+ }
+ else if (CUTOFF_SCORE.equals(arg.getValue()))
+ {
+ useScoreCutoff = true;
+ }
}
- else if (MessageManager.getString("label.seq_score").equals(name))
+ else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
{
- args.add("-incT");
- args.add(arg.getValue());
+ seqEvalueCutoff = arg.getValue();
}
- else if (MessageManager.getString("label.dom_e_value_desc")
+ else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
+ {
+ seqScoreCutoff = arg.getValue();
+ }
+ else if (MessageManager.getString(DOM_EVALUE_KEY)
.equals(name))
{
- args.add("--incdomE");
- args.add(arg.getValue());
+ domEvalueCutoff = arg.getValue();
}
- else if (MessageManager.getString("label.dom_score").equals(name))
+ else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
{
- args.add("--incdomT");
- args.add(arg.getValue());
+ domScoreCutoff = arg.getValue();
}
- else if (MessageManager.getString("label.trim_termini")
+ else if (MessageManager.getString(TRIM_TERMINI_KEY)
.equals(name))
{
trim = true;
}
- else if (MessageManager.getString("label.database").equals(name))
+ else if (MessageManager.getString(DATABASE_KEY).equals(name))
{
dbFound = true;
dbPath = arg.getValue();
- if (!MessageManager.getString("label.this_alignment")
+ if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
.equals(dbPath))
{
+ int pos = dbPath.lastIndexOf(File.separator);
+ databaseName = dbPath.substring(pos + 1);
databaseFile = new File(dbPath);
}
}
}
}
- if (!dbFound || MessageManager.getString("label.this_alignment")
- .equals(dbPath))
+ if (useEvalueCutoff)
+ {
+ args.add("-E");
+ args.add(seqEvalueCutoff);
+ args.add("--domE");
+ args.add(domEvalueCutoff);
+ }
+ else if (useScoreCutoff)
+ {
+ args.add("-T");
+ args.add(seqScoreCutoff);
+ args.add("--domT");
+ args.add(domScoreCutoff);
+ }
+
+// if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
+// .equals(dbPath))
+ if (searchAlignment)
{
/*
* no external database specified for search, so
- * export current alignment as 'database' to search
+ * export current alignment as 'database' to search,
+ * excluding any HMM consensus sequences it contains
*/
- databaseFile = createTempFile("database", ".sto");
+ databaseFile = FileUtils.createTempFile("database", ".sto");
AlignmentI al = af.getViewport().getAlignment();
AlignmentI copy = new Alignment(al);
- SequenceI hmms = copy.getHmmConsensus();
- if (hmms != null)
+ List<SequenceI> hmms = copy.getHmmSequences();
+ for (SequenceI hmmSeq : hmms)
{
- copy.deleteSequence(hmms);
+ copy.deleteSequence(hmmSeq);
}
exportStockholm(copy.getSequencesArray(), databaseFile, null);
- // StockholmFile stoFile = new StockholmFile(copy);
- // stoFile.setSeqs(copy.getSequencesArray());
- // String alignmentString = stoFile.print();
- // PrintWriter writer = new PrintWriter(databaseFile);
- // writer.print(alignmentString);
- // writer.close();
}
- args.add(hmmFile.getAbsolutePath());
- args.add(databaseFile.getAbsolutePath());
-
- return runCommand(args);
+ args.add(getFilePath(hmmFile, true));
+ args.add(getFilePath(databaseFile, true));
}
/**
* Imports the data from the temporary file to which the output of hmmsearch
- * is directed.
+ * was directed. The results are optionally realigned using hmmalign.
*
* @param hmmSeq
*/
StockholmFile file = new StockholmFile(new FileParse(
inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
seqs = file.getSeqsAsArray();
-
+ // look for PP cons and ref seq in alignment only annotation
+ AlignmentAnnotation modelpos = null, ppcons = null;
+ for (AlignmentAnnotation aa : file.getAnnotations())
+ {
+ if (aa.sequenceRef == null)
+ {
+ if (aa.label.equals("Reference Positions")) // RF feature type in
+ // stockholm parser
+ {
+ modelpos = aa;
+ }
+ if (aa.label.equals("Posterior Probability"))
+ {
+ ppcons = aa;
+ }
+ }
+ }
readTable(searchOutputFile);
int seqCount = Math.min(seqs.length, seqsToReturn);
SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
+ hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
hmmAndSeqs[0] = hmmSeq;
System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
-
- AlignmentI alignment = new Alignment(hmmAndSeqs);
- AlignFrame frame = new AlignFrame(alignment, 1, 1);
- frame.setSelectedHMMSequence(hmmSeq);
- List<ArgumentI> alignArgs = new ArrayList<>();
- if (trim)
+ if (modelpos != null)
+ {
+ // TODO need - get ungapped sequence method
+ hmmSeq.setSequence(
+ hmmSeq.getDatasetSequence().getSequenceAsString());
+ Annotation[] refpos = modelpos.annotations;
+ // insert gaps to match with refseq positions
+ int gc = 0, lcol = 0;
+ for (int c = 0; c < refpos.length; c++)
+ {
+ if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
+ {
+ if (gc > 0)
+ {
+ hmmSeq.insertCharAt(lcol + 1, gc, '-');
+ }
+ gc = 0;
+ lcol = c;
+ }
+ else
+ {
+ gc++;
+ }
+ }
+ }
+ if (realign)
+ {
+ realignResults(hmmAndSeqs);
+ }
+ else
{
- alignArgs.add(new BooleanOption(
- MessageManager.getString("label.trim_termini"),
- MessageManager.getString("label.trim_termini_desc"), true,
- true, true, null));
+ AlignmentI al = new Alignment(hmmAndSeqs);
+ if (ppcons != null)
+ {
+ al.addAnnotation(ppcons);
+ }
+ if (modelpos != null)
+ {
+ al.addAnnotation(modelpos);
+ }
+ AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
+ AlignFrame.DEFAULT_HEIGHT);
+ String ttl = "hmmSearch of " + databaseName + " using "
+ + hmmSeq.getName();
+ Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
+ AlignFrame.DEFAULT_HEIGHT);
}
- HMMAlign hmmalign = new HMMAlign(frame, alignArgs);
- hmmalign.run();
- frame = null;
+
hmmTemp.delete();
inputAlignmentTemp.delete();
searchOutputFile.delete();
}
}
+ /**
+ * Realigns the given sequences using hmmalign, to the HMM profile sequence
+ * which is the first in the array, and opens the results in a new frame
+ *
+ * @param hmmAndSeqs
+ */
+ protected void realignResults(SequenceI[] hmmAndSeqs)
+ {
+ /*
+ * and align the search results to the HMM profile
+ */
+ AlignmentI al = new Alignment(hmmAndSeqs);
+ AlignFrame frame = new AlignFrame(al, 1, 1);
+ List<ArgumentI> alignArgs = new ArrayList<>();
+ String alignTo = hmmAndSeqs[0].getName();
+ List<String> options = Collections.singletonList(alignTo);
+ Option option = new Option(MessageManager.getString("label.use_hmm"),
+ "", true, alignTo, alignTo, options, null);
+ alignArgs.add(option);
+ if (trim)
+ {
+ alignArgs.add(new BooleanOption(
+ MessageManager.getString(TRIM_TERMINI_KEY),
+ MessageManager.getString("label.trim_termini_desc"), true,
+ true, true, null));
+ }
+ HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
+ hmmalign.run();
+ }
+
+ /**
+ * Reads in the scores table output by hmmsearch and adds annotation to
+ * sequences for E-value and bit score
+ *
+ * @param inputTableTemp
+ * @throws IOException
+ */
void readTable(File inputTableTemp) throws IOException
{
BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
{
line = br.readLine();
}
- for (int i = 0; i < 5; i++)
+ while (!line.contains("-------"))
{
line = br.readLine();
}
+ line = br.readLine();
int index = 0;
while (!" ------ inclusion threshold ------".equals(line)
&& !"".equals(line))
{
+ SequenceI seq = seqs[index];
+ AlignmentAnnotation pp = seq
+ .getAlignmentAnnotations("", "Posterior Probability")
+ .get(0);
Scanner scanner = new Scanner(line);
-
- String str = scanner.next(); // full sequence eValue score
- float eValue = Float.parseFloat(str);
- int seqLength = seqs[index].getLength();
- Annotation[] annots = new Annotation[seqLength];
- for (int j = 0; j < seqLength; j++)
- {
- annots[j] = new Annotation(eValue);
- }
- AlignmentAnnotation annot = new AlignmentAnnotation("E-value",
- "Score", annots);
- annot.setScore(Double.parseDouble(str));
- annot.setSequenceRef(seqs[index]);
- seqs[index].addAlignmentAnnotation(annot);
-
+ String str = scanner.next();
+ addScoreAnnotation(str, seq, "hmmsearch E-value",
+ "Full sequence E-value", pp);
+ str = scanner.next();
+ addScoreAnnotation(str, seq, "hmmsearch Score",
+ "Full sequence bit score", pp);
+ seq.removeAlignmentAnnotation(pp);
scanner.close();
line = br.readLine();
index++;
br.close();
}
+ /**
+ * A helper method that adds one score-only (non-positional) annotation to a
+ * sequence
+ *
+ * @param value
+ * @param seq
+ * @param label
+ * @param description
+ */
+ protected void addScoreAnnotation(String value, SequenceI seq,
+ String label, String description)
+ {
+ addScoreAnnotation(value, seq, label, description, null);
+ }
+
+ /**
+ * A helper method that adds one score-only (non-positional) annotation to a
+ * sequence
+ *
+ * @param value
+ * @param seq
+ * @param label
+ * @param description
+ * @param pp
+ * existing posterior probability annotation - values copied to new
+ * annotation row
+ */
+ protected void addScoreAnnotation(String value, SequenceI seq,
+ String label, String description, AlignmentAnnotation pp)
+ {
+ try
+ {
+ AlignmentAnnotation annot = null;
+ if (pp == null)
+ {
+ new AlignmentAnnotation(label,
+ description, null);
+ }
+ else
+ {
+ annot = new AlignmentAnnotation(pp);
+ annot.label = label;
+ annot.description = description;
+ }
+ annot.setCalcId(HMMSEARCH);
+ double eValue = Double.parseDouble(value);
+ annot.setScore(eValue);
+ annot.setSequenceRef(seq);
+ seq.addAlignmentAnnotation(annot);
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Error parsing " + label + " from " + value);
+ }
+ }
+
}