JAL-2629 fix file format issues preventing hmmsearch, jackhmmer running
authortva <tva@10.205.251.175>
Fri, 5 Jul 2019 09:58:52 +0000 (10:58 +0100)
committertva <tva@10.205.251.175>
Fri, 5 Jul 2019 09:58:52 +0000 (10:58 +0100)
src/jalview/hmmer/HMMAlign.java
src/jalview/hmmer/HMMBuild.java
src/jalview/hmmer/HMMSearch.java
src/jalview/hmmer/HmmerCommand.java
src/jalview/hmmer/JackHMMER.java

index d66ec33..13bbba4 100644 (file)
@@ -87,7 +87,7 @@ public class HMMAlign extends HmmerCommand
         File alignmentFile = FileUtils.createTempFile("output", ".sto");
         File resultFile = FileUtils.createTempFile("input", ".sto");
 
-        exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null);
+        exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null, false);
         exportHmm(hmm, modelFile.getAbsoluteFile());
 
         boolean ran = runCommand(modelFile, alignmentFile, resultFile);
index 974d2b2..85abd50 100644 (file)
@@ -202,7 +202,7 @@ public class HMMBuild extends HmmerCommand
       SequenceI[] copyArray = copy.toArray(new SequenceI[copy.size()]);
       Hashtable sequencesHash = stashSequences(copyArray);
 
-      exportStockholm(copyArray, alignmentFile, ac);
+      exportStockholm(copyArray, alignmentFile, ac, false);
 
       recoverSequences(sequencesHash, copy.toArray(new SequenceI[] {}));
 
index 64802c7..b40b079 100644 (file)
@@ -276,7 +276,13 @@ public class HMMSearch extends HmmerCommand
       AlignmentI al = af.getViewport().getAlignment();
       AlignmentI copy = new Alignment(al);
       deleteHmmSequences(copy);
-      exportStockholm(copy.getSequencesArray(), databaseFile, null);
+
+      SequenceI[] seqs = copy.getSequencesArray();
+
+      // hmmsearch fails if duplicate sequence names in file
+      renameDuplicates(seqs);
+
+      exportStockholm(copy.getSequencesArray(), databaseFile, null, true);
     }
 
     args.add(getFilePath(hmmFile, true));
index 79fcb4c..dd6adc8 100644 (file)
@@ -23,7 +23,6 @@ import jalview.ws.params.ArgumentI;
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
@@ -237,13 +236,15 @@ public abstract class HmmerCommand implements Runnable
    * @throws IOException
    */
   public void exportStockholm(SequenceI[] seqs, File toFile,
-          AnnotatedCollectionI annotated) throws IOException
+          AnnotatedCollectionI annotated, boolean removeSS)
+          throws IOException
   {
     if (seqs == null)
     {
       return;
     }
     AlignmentI newAl = new Alignment(seqs);
+
     if (!newAl.isAligned())
     {
       newAl.padGaps();
@@ -277,6 +278,24 @@ public abstract class HmmerCommand implements Runnable
       }
     }
 
+    for (SequenceI seq : newAl.getSequencesArray())
+    {
+      if (removeSS && seq.getAnnotation() != null)
+      {
+        for (AlignmentAnnotation ann : seq.getAnnotation())
+        {
+          // TODO investigate how to make hmmsearch and jackhmmer work with annotations
+          /*
+          if (ann.label.equals("Secondary Structure"))
+          {
+            seq.removeAlignmentAnnotation(ann);
+          }
+          */
+          seq.removeAlignmentAnnotation(ann);
+        }
+      }
+    }
+
     StockholmFile file = new StockholmFile(newAl);
     String output = file.print(seqs, false);
     PrintWriter writer = new PrintWriter(toFile);
@@ -285,30 +304,6 @@ public abstract class HmmerCommand implements Runnable
   }
 
   /**
-   * Exports the given alignment withotu any anotations to a fasta file
-   * 
-   * @param seqs
-   * @param toFile
-   */
-  public void exportFasta(AlignmentI al, File toFile)
-  {
-    FastaFile file = new FastaFile();
-
-    String output = file.print(al.getSequencesArray(), false);
-    PrintWriter writer;
-    try
-    {
-      writer = new PrintWriter(toFile);
-      writer.println(output);
-      writer.close();
-    } catch (FileNotFoundException e)
-    {
-      e.printStackTrace();
-    }
-
-  }
-
-  /**
    * Answers the full path to the given hmmer executable, or null if file cannot
    * be found or is not executable
    * 
@@ -475,4 +470,33 @@ public abstract class HmmerCommand implements Runnable
       }
     }
   }
+
+  void renameDuplicates(SequenceI[] seqs)
+  {
+    // rename duplicate sequences, hmmsearch fails db contains duplicates
+    for (int i = 0; i < seqs.length; i++)
+    {
+      boolean renamed = false;
+      for (int j = 0; j < seqs.length; j++)
+      {
+        renamed = true;
+        if (seqs[i].getName().equals(seqs[j].getName()) && i != j)
+        {
+          String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd();
+          // setting sequence name to include range - to differentiate between
+          // sequences of the same name. Currently have to include the range twice
+          // because the range is removed (once) when setting the name
+          // TODO come up with a better way of doing this
+          seqs[j].setName(seqs[j].getName() + range + range);
+        }
+
+      }
+      if (renamed)
+      {
+        String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd();
+        seqs[i].setName(seqs[i].getName() + range + range);
+      }
+    }
+  }
+
 }
index 16dcebb..58b1b75 100644 (file)
@@ -72,13 +72,14 @@ public class JackHMMER extends HmmerCommand
 
     try
     {
-      File seqFile = FileUtils.createTempFile("seq", ".fa");
+      File seqFile = FileUtils.createTempFile("seq", ".sto");
       File hitsAlignmentFile = FileUtils.createTempFile("hitAlignment",
               ".sto");
       File searchOutputFile = FileUtils.createTempFile("searchOutput",
               ".txt");
 
-      exportSequence(seq, seqFile.getAbsoluteFile());
+      exportStockholm(new SequenceI[] { seq }, seqFile.getAbsoluteFile(),
+              null, true);
 
       boolean ran = runCommand(searchOutputFile, hitsAlignmentFile,
               seqFile);
@@ -244,11 +245,16 @@ public class JackHMMER extends HmmerCommand
        * no external database specified for search, so
        * export current alignment as 'database' to search
        */
-      databaseFile = FileUtils.createTempFile("database", ".fa");
+      databaseFile = FileUtils.createTempFile("database", ".sto");
       AlignmentI al = af.getViewport().getAlignment();
       AlignmentI copy = new Alignment(al);
+
       deleteHmmSequences(copy);
-      exportFasta(copy, databaseFile);
+
+      // jackhmmer fails if file contains duplicate sequence names
+      renameDuplicates(copy.getSequencesArray());
+
+      exportStockholm(copy.getSequencesArray(), databaseFile, null, true);
     }
 
     args.add(getFilePath(seqFile, true));