import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
import jalview.datamodel.HiddenMarkovModel;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Scanner;
import javax.swing.JOptionPane;
-public class HMMSearch extends HmmerCommand
+public class HMMSearch extends Search
{
- static final String HMMSEARCH = "hmmsearch";
-
- /*
- * constants for i18n lookup of passed parameter names
- */
- static final String DATABASE_KEY = "label.database";
-
- static final String THIS_ALIGNMENT_KEY = "label.this_alignment";
-
- static final String USE_ACCESSIONS_KEY = "label.use_accessions";
-
- static final String AUTO_ALIGN_SEQS_KEY = "label.auto_align_seqs";
-
- static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results";
-
- static final String TRIM_TERMINI_KEY = "label.trim_termini";
-
- static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff";
-
- static final String CUTOFF_NONE = "None";
-
- static final String CUTOFF_SCORE = "Score";
-
- static final String CUTOFF_EVALUE = "E-Value";
-
- static final String SEQ_EVALUE_KEY = "label.seq_evalue";
-
- static final String DOM_EVALUE_KEY = "label.dom_evalue";
-
- static final String SEQ_SCORE_KEY = "label.seq_score";
-
- static final String DOM_SCORE_KEY = "label.dom_score";
boolean realign = false;
boolean trim = false;
- int seqsToReturn = Integer.MAX_VALUE;
+ boolean returnNoOfNewSeqs = false;
- SequenceI[] seqs;
+ int seqsToReturn = Integer.MAX_VALUE;
- private String databaseName;
/**
* Constructor for the HMMSearchThread
SequenceI hmmSeq = hmm.getConsensusSequence();
long msgId = System.currentTimeMillis();
- af.setProgressBar(MessageManager.getString("status.running_hmmsearch"),
+ af.setProgressBar(MessageManager.getString("status.running_search"),
msgId);
try
List<String> args = new ArrayList<>();
args.add(command);
- args.add("-o");
- args.add(getFilePath(searchOutputFile));
- args.add("-A");
- args.add(getFilePath(hitsAlignmentFile));
-
- boolean dbFound = false;
- String dbPath = "";
- File databaseFile = null;
-
- boolean useEvalueCutoff = false;
- boolean useScoreCutoff = false;
- String seqEvalueCutoff = null;
- String domEvalueCutoff = null;
- String seqScoreCutoff = null;
- String domScoreCutoff = null;
- databaseName = "Alignment";
-
- if (params != null)
- {
- for (ArgumentI arg : params)
- {
- String name = arg.getName();
- if (MessageManager.getString(NUMBER_OF_RESULTS_KEY)
- .equals(name))
- {
- seqsToReturn = Integer.parseInt(arg.getValue());
- }
- else if (MessageManager.getString(AUTO_ALIGN_SEQS_KEY)
- .equals(name))
- {
- realign = true;
- }
- else if (MessageManager.getString(USE_ACCESSIONS_KEY)
- .equals(name))
- {
- args.add("--acc");
- }
- else if (MessageManager.getString(REPORTING_CUTOFF_KEY)
- .equals(name))
- {
- if (CUTOFF_EVALUE.equals(arg.getValue()))
- {
- useEvalueCutoff = true;
- }
- else if (CUTOFF_SCORE.equals(arg.getValue()))
- {
- useScoreCutoff = true;
- }
- }
- else if (MessageManager.getString(SEQ_EVALUE_KEY).equals(name))
- {
- seqEvalueCutoff = arg.getValue();
- }
- else if (MessageManager.getString(SEQ_SCORE_KEY).equals(name))
- {
- seqScoreCutoff = arg.getValue();
- }
- else if (MessageManager.getString(DOM_EVALUE_KEY)
- .equals(name))
- {
- domEvalueCutoff = arg.getValue();
- }
- else if (MessageManager.getString(DOM_SCORE_KEY).equals(name))
- {
- domScoreCutoff = arg.getValue();
- }
- else if (MessageManager.getString(TRIM_TERMINI_KEY)
- .equals(name))
- {
- trim = true;
- }
- else if (MessageManager.getString(DATABASE_KEY).equals(name))
- {
- dbFound = true;
- dbPath = arg.getValue();
- if (!MessageManager.getString(THIS_ALIGNMENT_KEY)
- .equals(dbPath))
- {
- int pos = dbPath.lastIndexOf(File.separator);
- databaseName = dbPath.substring(pos + 1);
- databaseFile = new File(dbPath);
- }
- }
- }
- }
-
- if (useEvalueCutoff)
- {
- args.add("-E");
- args.add(seqEvalueCutoff);
- args.add("--domE");
- args.add(domEvalueCutoff);
- }
- else if (useScoreCutoff)
- {
- args.add("-T");
- args.add(seqScoreCutoff);
- args.add("--domT");
- args.add(domScoreCutoff);
- }
-
- if (!dbFound || MessageManager.getString(THIS_ALIGNMENT_KEY)
- .equals(dbPath))
- {
- /*
- * no external database specified for search, so
- * export current alignment as 'database' to search,
- * excluding any HMM consensus sequences it contains
- */
- databaseFile = FileUtils.createTempFile("database", ".sto");
- AlignmentI al = af.getViewport().getAlignment();
- AlignmentI copy = new Alignment(al);
- List<SequenceI> hmms = copy.getHmmSequences();
- for (SequenceI hmmSeq : hmms)
- {
- copy.deleteSequence(hmmSeq);
- }
- exportStockholm(copy.getSequencesArray(), databaseFile, null);
- }
-
- args.add(getFilePath(hmmFile));
- args.add(getFilePath(databaseFile));
+ buildArguments(args, searchOutputFile, hitsAlignmentFile, hmmFile);
return runCommand(args);
}
+
/**
* Imports the data from the temporary file to which the output of hmmsearch
* was directed. The results are optionally realigned using hmmalign.
inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
seqs = file.getSeqsAsArray();
- readTable(searchOutputFile);
+ readDomainTable(searchOutputFile, false);
+
+ if (searchAlignment)
+ {
+ recoverSequences(sequencesHash, seqs);
+ }
+
+ // look for PP cons and ref seq in alignment only annotation
+ AlignmentAnnotation modelpos = null, ppcons = null;
+ for (AlignmentAnnotation aa : file.getAnnotations())
+ {
+ if (aa.sequenceRef == null)
+ {
+ if (aa.label.equals("Reference Positions")) // RF feature type in
+ // stockholm parser
+ {
+ modelpos = aa;
+ }
+ if (aa.label.equals("Posterior Probability"))
+ {
+ ppcons = aa;
+ }
+ }
+ }
+
int seqCount = Math.min(seqs.length, seqsToReturn);
SequenceI[] hmmAndSeqs = new SequenceI[seqCount + 1];
+ hmmSeq = hmmSeq.deriveSequence(); // otherwise all bad things happen
hmmAndSeqs[0] = hmmSeq;
System.arraycopy(seqs, 0, hmmAndSeqs, 1, seqCount);
+ if (modelpos != null)
+ {
+ // TODO need - get ungapped sequence method
+ hmmSeq.setSequence(
+ hmmSeq.getDatasetSequence().getSequenceAsString());
+ Annotation[] refpos = modelpos.annotations;
+ // insert gaps to match with refseq positions
+ int gc = 0, lcol = 0;
+ for (int c = 0; c < refpos.length; c++)
+ {
+ if (refpos[c] != null && ("x".equals(refpos[c].displayCharacter)))
+ {
+ if (gc > 0)
+ {
+ hmmSeq.insertCharAt(lcol + 1, gc, '-');
+ }
+ gc = 0;
+ lcol = c;
+ }
+ else
+ {
+ gc++;
+ }
+ }
+ }
if (realign)
{
else
{
AlignmentI al = new Alignment(hmmAndSeqs);
+ if (ppcons != null)
+ {
+ al.addAnnotation(ppcons);
+ }
+ if (modelpos != null)
+ {
+ al.addAnnotation(modelpos);
+ }
AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
AlignFrame.DEFAULT_HEIGHT);
String ttl = "hmmSearch of " + databaseName + " using "
+ hmmSeq.getName();
Desktop.addInternalFrame(alignFrame, ttl, AlignFrame.DEFAULT_WIDTH,
AlignFrame.DEFAULT_HEIGHT);
+
+ if (returnNoOfNewSeqs)
+ {
+ int nNew = checkForNewSequences();
+ JvOptionPane.showMessageDialog(af.alignPanel, nNew + " "
+ + MessageManager.getString("label.new_returned"));
+ }
+
}
+
hmmTemp.delete();
inputAlignmentTemp.delete();
searchOutputFile.delete();
}
}
+ private int checkForNewSequences()
+ {
+ int nNew = seqs.length;
+
+ for (SequenceI resultSeq : seqs)
+ {
+ for (SequenceI aliSeq : alignment.getSequencesArray())
+ {
+ if (resultSeq.getName().equals(aliSeq.getName()))
+ {
+ nNew--;
+ break;
+ }
+ }
+ }
+
+ return nNew;
+
+ }
+
/**
* Realigns the given sequences using hmmalign, to the HMM profile sequence
* which is the first in the array, and opens the results in a new frame
}
HmmerCommand hmmalign = new HMMAlign(frame, alignArgs);
hmmalign.run();
- }
-
- /**
- * Reads in the scores table output by hmmsearch and adds annotation to
- * sequences for E-value and bit score
- *
- * @param inputTableTemp
- * @throws IOException
- */
- void readTable(File inputTableTemp) throws IOException
- {
- BufferedReader br = new BufferedReader(new FileReader(inputTableTemp));
- String line = "";
- while (!line.startsWith("Query:"))
- {
- line = br.readLine();
- }
- for (int i = 0; i < 5; i++)
- {
- line = br.readLine();
- }
- int index = 0;
- while (!" ------ inclusion threshold ------".equals(line)
- && !"".equals(line))
- {
- SequenceI seq = seqs[index];
- Scanner scanner = new Scanner(line);
- String str = scanner.next();
- addScoreAnnotation(str, seq, "hmmsearch E-value",
- "Full sequence E-value");
- str = scanner.next();
- addScoreAnnotation(str, seq, "hmmsearch Score",
- "Full sequence bit score");
- scanner.close();
- line = br.readLine();
- index++;
- }
-
- br.close();
- }
-
- /**
- * A helper method that adds one score-only (non-positional) annotation to a
- * sequence
- *
- * @param value
- * @param seq
- * @param label
- * @param description
- */
- protected void addScoreAnnotation(String value, SequenceI seq,
- String label, String description)
- {
- try
- {
- AlignmentAnnotation annot = new AlignmentAnnotation(label,
- description, null);
- annot.setCalcId(HMMSEARCH);
- double eValue = Double.parseDouble(value);
- annot.setScore(eValue);
- annot.setSequenceRef(seq);
- seq.addAlignmentAnnotation(annot);
- } catch (NumberFormatException e)
+ if (returnNoOfNewSeqs)
{
- System.err.println("Error parsing " + label + " from " + value);
+ int nNew = checkForNewSequences();
+ JvOptionPane.showMessageDialog(frame.alignPanel,
+ nNew + " " + MessageManager.getString("label.new_returned"));
}
}