From 3e49da65865c812f4b2b81f40ef65c983f14aa83 Mon Sep 17 00:00:00 2001 From: tva Date: Fri, 5 Jul 2019 10:58:52 +0100 Subject: [PATCH] JAL-2629 fix file format issues preventing hmmsearch, jackhmmer running --- src/jalview/hmmer/HMMAlign.java | 2 +- src/jalview/hmmer/HMMBuild.java | 2 +- src/jalview/hmmer/HMMSearch.java | 8 +++- src/jalview/hmmer/HmmerCommand.java | 76 +++++++++++++++++++++++------------ src/jalview/hmmer/JackHMMER.java | 14 +++++-- 5 files changed, 69 insertions(+), 33 deletions(-) diff --git a/src/jalview/hmmer/HMMAlign.java b/src/jalview/hmmer/HMMAlign.java index d66ec33..13bbba4 100644 --- a/src/jalview/hmmer/HMMAlign.java +++ b/src/jalview/hmmer/HMMAlign.java @@ -87,7 +87,7 @@ public class HMMAlign extends HmmerCommand File alignmentFile = FileUtils.createTempFile("output", ".sto"); File resultFile = FileUtils.createTempFile("input", ".sto"); - exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null); + exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null, false); exportHmm(hmm, modelFile.getAbsoluteFile()); boolean ran = runCommand(modelFile, alignmentFile, resultFile); diff --git a/src/jalview/hmmer/HMMBuild.java b/src/jalview/hmmer/HMMBuild.java index 974d2b2..85abd50 100644 --- a/src/jalview/hmmer/HMMBuild.java +++ b/src/jalview/hmmer/HMMBuild.java @@ -202,7 +202,7 @@ public class HMMBuild extends HmmerCommand SequenceI[] copyArray = copy.toArray(new SequenceI[copy.size()]); Hashtable sequencesHash = stashSequences(copyArray); - exportStockholm(copyArray, alignmentFile, ac); + exportStockholm(copyArray, alignmentFile, ac, false); recoverSequences(sequencesHash, copy.toArray(new SequenceI[] {})); diff --git a/src/jalview/hmmer/HMMSearch.java b/src/jalview/hmmer/HMMSearch.java index 64802c7..b40b079 100644 --- a/src/jalview/hmmer/HMMSearch.java +++ b/src/jalview/hmmer/HMMSearch.java @@ -276,7 +276,13 @@ public class HMMSearch extends HmmerCommand AlignmentI al = af.getViewport().getAlignment(); AlignmentI copy = new Alignment(al); deleteHmmSequences(copy); - exportStockholm(copy.getSequencesArray(), databaseFile, null); + + SequenceI[] seqs = copy.getSequencesArray(); + + // hmmsearch fails if duplicate sequence names in file + renameDuplicates(seqs); + + exportStockholm(copy.getSequencesArray(), databaseFile, null, true); } args.add(getFilePath(hmmFile, true)); diff --git a/src/jalview/hmmer/HmmerCommand.java b/src/jalview/hmmer/HmmerCommand.java index 79fcb4c..dd6adc8 100644 --- a/src/jalview/hmmer/HmmerCommand.java +++ b/src/jalview/hmmer/HmmerCommand.java @@ -23,7 +23,6 @@ import jalview.ws.params.ArgumentI; import java.io.BufferedReader; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; @@ -237,13 +236,15 @@ public abstract class HmmerCommand implements Runnable * @throws IOException */ public void exportStockholm(SequenceI[] seqs, File toFile, - AnnotatedCollectionI annotated) throws IOException + AnnotatedCollectionI annotated, boolean removeSS) + throws IOException { if (seqs == null) { return; } AlignmentI newAl = new Alignment(seqs); + if (!newAl.isAligned()) { newAl.padGaps(); @@ -277,6 +278,24 @@ public abstract class HmmerCommand implements Runnable } } + for (SequenceI seq : newAl.getSequencesArray()) + { + if (removeSS && seq.getAnnotation() != null) + { + for (AlignmentAnnotation ann : seq.getAnnotation()) + { + // TODO investigate how to make hmmsearch and jackhmmer work with annotations + /* + if (ann.label.equals("Secondary Structure")) + { + seq.removeAlignmentAnnotation(ann); + } + */ + seq.removeAlignmentAnnotation(ann); + } + } + } + StockholmFile file = new StockholmFile(newAl); String output = file.print(seqs, false); PrintWriter writer = new PrintWriter(toFile); @@ -285,30 +304,6 @@ public abstract class HmmerCommand implements Runnable } /** - * Exports the given alignment withotu any anotations to a fasta file - * - * @param seqs - * @param toFile - */ - public void exportFasta(AlignmentI al, File toFile) - { - FastaFile file = new FastaFile(); - - String output = file.print(al.getSequencesArray(), false); - PrintWriter writer; - try - { - writer = new PrintWriter(toFile); - writer.println(output); - writer.close(); - } catch (FileNotFoundException e) - { - e.printStackTrace(); - } - - } - - /** * Answers the full path to the given hmmer executable, or null if file cannot * be found or is not executable * @@ -475,4 +470,33 @@ public abstract class HmmerCommand implements Runnable } } } + + void renameDuplicates(SequenceI[] seqs) + { + // rename duplicate sequences, hmmsearch fails db contains duplicates + for (int i = 0; i < seqs.length; i++) + { + boolean renamed = false; + for (int j = 0; j < seqs.length; j++) + { + renamed = true; + if (seqs[i].getName().equals(seqs[j].getName()) && i != j) + { + String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd(); + // setting sequence name to include range - to differentiate between + // sequences of the same name. Currently have to include the range twice + // because the range is removed (once) when setting the name + // TODO come up with a better way of doing this + seqs[j].setName(seqs[j].getName() + range + range); + } + + } + if (renamed) + { + String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd(); + seqs[i].setName(seqs[i].getName() + range + range); + } + } + } + } diff --git a/src/jalview/hmmer/JackHMMER.java b/src/jalview/hmmer/JackHMMER.java index 16dcebb..58b1b75 100644 --- a/src/jalview/hmmer/JackHMMER.java +++ b/src/jalview/hmmer/JackHMMER.java @@ -72,13 +72,14 @@ public class JackHMMER extends HmmerCommand try { - File seqFile = FileUtils.createTempFile("seq", ".fa"); + File seqFile = FileUtils.createTempFile("seq", ".sto"); File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment", ".sto"); File searchOutputFile = FileUtils.createTempFile("searchOutput", ".txt"); - exportSequence(seq, seqFile.getAbsoluteFile()); + exportStockholm(new SequenceI[] { seq }, seqFile.getAbsoluteFile(), + null, true); boolean ran = runCommand(searchOutputFile, hitsAlignmentFile, seqFile); @@ -244,11 +245,16 @@ public class JackHMMER extends HmmerCommand * no external database specified for search, so * export current alignment as 'database' to search */ - databaseFile = FileUtils.createTempFile("database", ".fa"); + databaseFile = FileUtils.createTempFile("database", ".sto"); AlignmentI al = af.getViewport().getAlignment(); AlignmentI copy = new Alignment(al); + deleteHmmSequences(copy); - exportFasta(copy, databaseFile); + + // jackhmmer fails if file contains duplicate sequence names + renameDuplicates(copy.getSequencesArray()); + + exportStockholm(copy.getSequencesArray(), databaseFile, null, true); } args.add(getFilePath(seqFile, true)); -- 1.7.10.2