X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fhmmer%2FHmmerCommand.java;h=e24100811d3ea8fcfdce13e46dcd4b4114038980;hb=841485c28403b4a8e00c9bddae9caa9411667b8a;hp=85f64bfa7c5ee72c48f9f95ca186d76c0b71e1b5;hpb=1beac3545a78d4c5c3274dbb53296708d693efe0;p=jalview.git diff --git a/src/jalview/hmmer/HmmerCommand.java b/src/jalview/hmmer/HmmerCommand.java index 85f64bf..e241008 100644 --- a/src/jalview/hmmer/HmmerCommand.java +++ b/src/jalview/hmmer/HmmerCommand.java @@ -1,6 +1,7 @@ package jalview.hmmer; import jalview.analysis.SeqsetUtils; +import jalview.analysis.SeqsetUtils.SequenceInfo; import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -8,6 +9,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; import jalview.gui.JvOptionPane; @@ -22,7 +24,6 @@ import jalview.ws.params.ArgumentI; import java.io.BufferedReader; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; @@ -30,6 +31,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; +import java.util.Map; /** * Base class for hmmbuild, hmmalign and hmmsearch @@ -60,26 +62,40 @@ public abstract class HmmerCommand implements Runnable static final String NUMBER_OF_RESULTS_KEY = "label.number_of_results"; + static final String NUMBER_OF_ITERATIONS = "label.number_of_iterations"; + static final String TRIM_TERMINI_KEY = "label.trim_termini"; + static final String RETURN_N_NEW_SEQ = "label.check_for_new_sequences"; + static final String REPORTING_CUTOFF_KEY = "label.reporting_cutoff"; - static final String CUTOFF_NONE = "None"; + static final String CUTOFF_NONE = "label.default"; + + static final String CUTOFF_SCORE = "label.score"; + + static final String CUTOFF_EVALUE = "label.evalue"; + + static final String REPORTING_SEQ_EVALUE_KEY = "label.reporting_seq_evalue"; - static final String CUTOFF_SCORE = "Score"; + static final String REPORTING_DOM_EVALUE_KEY = "label.reporting_dom_evalue"; - static final String CUTOFF_EVALUE = "E-Value"; + static final String REPORTING_SEQ_SCORE_KEY = "label.reporting_seq_score"; - static final String SEQ_EVALUE_KEY = "label.seq_evalue"; + static final String REPORTING_DOM_SCORE_KEY = "label.reporting_dom_score"; - static final String DOM_EVALUE_KEY = "label.dom_evalue"; + static final String INCLUSION_SEQ_EVALUE_KEY = "label.inclusion_seq_evalue"; - static final String SEQ_SCORE_KEY = "label.seq_score"; + static final String INCLUSION_DOM_EVALUE_KEY = "label.inclusion_dom_evalue"; - static final String DOM_SCORE_KEY = "label.dom_score"; + static final String INCLUSION_SEQ_SCORE_KEY = "label.inclusion_seq_score"; + + static final String INCLUSION_DOM_SCORE_KEY = "label.inclusion_dom_score"; static final String ARG_TRIM = "--trim"; + static final String INCLUSION_THRESHOLD_KEY = "label.inclusion_threshold"; + /** * Constructor * @@ -113,7 +129,7 @@ public abstract class HmmerCommand implements Runnable * * @param seqs */ - protected Hashtable stashSequences(SequenceI[] seqs) + protected Map stashSequences(SequenceI[] seqs) { return SeqsetUtils.uniquify(seqs, true); } @@ -121,12 +137,12 @@ public abstract class HmmerCommand implements Runnable /** * Restores the sequence data lost by uniquifying * - * @param hashtable + * @param sequencesHash * @param seqs */ - protected void recoverSequences(Hashtable hashtable, SequenceI[] seqs) + protected void recoverSequences(Map sequencesHash, SequenceI[] seqs) { - SeqsetUtils.deuniquify(hashtable, seqs); + SeqsetUtils.deuniquify(sequencesHash, seqs); } /** @@ -140,14 +156,14 @@ public abstract class HmmerCommand implements Runnable public boolean runCommand(List commands) throws IOException { - List args = Platform.isWindows() ? wrapWithCygwin(commands) + List args = Platform.isWindowsAndNotJS() ? wrapWithCygwin(commands) : commands; try { ProcessBuilder pb = new ProcessBuilder(args); pb.redirectErrorStream(true); // merge syserr to sysout - if (Platform.isWindows()) + if (Platform.isWindowsAndNotJS()) { String path = pb.environment().get("Path"); path = jalview.bin.Cache.getProperty("CYGWIN_PATH") + ";" + path; @@ -228,7 +244,7 @@ public abstract class HmmerCommand implements Runnable /** * Exports an alignment, and reference (RF) annotation if present, to the - * specified file, in Stockholm format + * specified file, in Stockholm format, removing all HMM sequences * * @param seqs * @param toFile @@ -236,13 +252,15 @@ public abstract class HmmerCommand implements Runnable * @throws IOException */ public void exportStockholm(SequenceI[] seqs, File toFile, - AnnotatedCollectionI annotated) throws IOException + AnnotatedCollectionI annotated) + throws IOException { if (seqs == null) { return; } AlignmentI newAl = new Alignment(seqs); + if (!newAl.isAligned()) { newAl.padGaps(); @@ -276,6 +294,17 @@ public abstract class HmmerCommand implements Runnable } } + for (SequenceI seq : newAl.getSequencesArray()) + { + if (seq.getAnnotation() != null) + { + for (AlignmentAnnotation ann : seq.getAnnotation()) + { + seq.removeAlignmentAnnotation(ann); + } + } + } + StockholmFile file = new StockholmFile(newAl); String output = file.print(seqs, false); PrintWriter writer = new PrintWriter(toFile); @@ -283,23 +312,6 @@ public abstract class HmmerCommand implements Runnable writer.close(); } - public void exportFasta(SequenceI[] seqs, File toFile) - { - FastaFile file = new FastaFile(); - String output = file.print(seqs, false); - PrintWriter writer; - try - { - writer = new PrintWriter(toFile); - writer.println(output); - writer.close(); - } catch (FileNotFoundException e) - { - e.printStackTrace(); - } - - } - /** * Answers the full path to the given hmmer executable, or null if file cannot * be found or is not executable @@ -389,8 +401,8 @@ public abstract class HmmerCommand implements Runnable } /** - * Answers the HMM profile for the profile sequence the user selected (default - * is just the first HMM sequence in the alignment) + * Answers the query sequence the user selected (default is just the first + * sequence in the alignment) * * @return */ @@ -426,7 +438,7 @@ public abstract class HmmerCommand implements Runnable protected String getFilePath(File resultFile, boolean isInCygwin) { String path = resultFile.getAbsolutePath(); - if (Platform.isWindows() && isInCygwin) + if (Platform.isWindowsAndNotJS() && isInCygwin) { // the first backslash escapes '\' for the regular expression argument path = path.replaceAll("\\" + File.separator, "/"); @@ -440,4 +452,87 @@ public abstract class HmmerCommand implements Runnable return path; } + + /** + * A helper method that deletes any HMM consensus sequence from the given + * collection, and from the parent alignment if ac is a subgroup + * + * @param ac + */ + void deleteHmmSequences(AnnotatedCollectionI ac) + { + List hmmSeqs = ac.getHmmSequences(); + for (SequenceI hmmSeq : hmmSeqs) + { + if (ac instanceof SequenceGroup) + { + ((SequenceGroup) ac).deleteSequence(hmmSeq, false); + AnnotatedCollectionI context = ac.getContext(); + if (context != null && context instanceof AlignmentI) + { + ((AlignmentI) context).deleteSequence(hmmSeq); + } + } + else + { + ((AlignmentI) ac).deleteSequence(hmmSeq); + } + } + } + + /** + * Sets the names of any duplicates within the given sequences to include their + * respective lengths. Deletes any duplicates that have the same name after this + * step + * + * @param seqs + */ + void renameDuplicates(AlignmentI al) + { + + SequenceI[] seqs = al.getSequencesArray(); + List wasRenamed = new ArrayList<>(); + + for (SequenceI seq : seqs) + { + wasRenamed.add(false); + } + + for (int i = 0; i < seqs.length; i++) + { + for (int j = 0; j < seqs.length; j++) + { + if (seqs[i].getName().equals(seqs[j].getName()) && i != j + && !wasRenamed.get(j)) + { + + wasRenamed.set(i, true); + String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd(); + // setting sequence name to include range - to differentiate between + // sequences of the same name. Currently have to include the range twice + // because the range is removed (once) when setting the name + // TODO come up with a better way of doing this + seqs[j].setName(seqs[j].getName() + range + range); + } + + } + if (wasRenamed.get(i)) + { + String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd(); + seqs[i].setName(seqs[i].getName() + range + range); + } + } + + for (int i = 0; i < seqs.length; i++) + { + for (int j = 0; j < seqs.length; j++) + { + if (seqs[i].getName().equals(seqs[j].getName()) && i != j) + { + al.deleteSequence(j); + } + } + } + } + }