File alignmentFile = FileUtils.createTempFile("output", ".sto");
File resultFile = FileUtils.createTempFile("input", ".sto");
- exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null, false);
+ exportStockholm(seqs, alignmentFile.getAbsoluteFile(), null);
exportHmm(hmm, modelFile.getAbsoluteFile());
boolean ran = runCommand(modelFile, alignmentFile, resultFile);
// TODO rather than copy alignment data we should anonymize in situ -
// export/File import could use anonymization hash to reinstate references
// at import level ?
+
SequenceI[] copyArray = copy.toArray(new SequenceI[copy.size()]);
Hashtable sequencesHash = stashSequences(copyArray);
- exportStockholm(copyArray, alignmentFile, ac, false);
+ exportStockholm(copyArray, alignmentFile, ac);
recoverSequences(sequencesHash, copy.toArray(new SequenceI[] {}));
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Hashtable;
import java.util.List;
import java.util.Scanner;
private String databaseName;
+ Hashtable sequencesHash;
+
/**
* Constructor for the HMMSearchThread
*
AlignmentI copy = new Alignment(al);
deleteHmmSequences(copy);
- SequenceI[] seqs = copy.getSequencesArray();
+ sequencesHash = stashSequences(copy.getSequencesArray());
- // hmmsearch fails if duplicate sequence names in file
- renameDuplicates(seqs);
+ exportStockholm(copy.getSequencesArray(), databaseFile, null);
- exportStockholm(copy.getSequencesArray(), databaseFile, null, true);
}
args.add(getFilePath(hmmFile, true));
StockholmFile file = new StockholmFile(new FileParse(
inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
seqs = file.getSeqsAsArray();
+
+ recoverSequences(sequencesHash, seqs);
+
// look for PP cons and ref seq in alignment only annotation
AlignmentAnnotation modelpos = null, ppcons = null;
for (AlignmentAnnotation aa : file.getAnnotations())
* @throws IOException
*/
public void exportStockholm(SequenceI[] seqs, File toFile,
- AnnotatedCollectionI annotated, boolean removeSS)
+ AnnotatedCollectionI annotated)
throws IOException
{
if (seqs == null)
for (SequenceI seq : newAl.getSequencesArray())
{
- if (removeSS && seq.getAnnotation() != null)
+ if (seq.getAnnotation() != null)
{
for (AlignmentAnnotation ann : seq.getAnnotation())
{
- // TODO investigate how to make hmmsearch and jackhmmer work with annotations
- /*
- if (ann.label.equals("Secondary Structure"))
- {
- seq.removeAlignmentAnnotation(ann);
- }
- */
seq.removeAlignmentAnnotation(ann);
}
}
}
}
- void renameDuplicates(SequenceI[] seqs)
+ /**
+ * Sets the names of any duplicates within the given sequences to include their
+ * respective lengths. Deletes any duplicates that have the same name after this
+ * step
+ *
+ * @param seqs
+ */
+ void renameDuplicates(AlignmentI al)
{
- // rename duplicate sequences, hmmsearch fails db contains duplicates
+
+ SequenceI[] seqs = al.getSequencesArray();
+ List<Boolean> wasRenamed = new ArrayList<>();
+
+ for (SequenceI seq : seqs)
+ {
+ wasRenamed.add(false);
+ }
+
for (int i = 0; i < seqs.length; i++)
{
- boolean renamed = false;
for (int j = 0; j < seqs.length; j++)
{
- renamed = true;
- if (seqs[i].getName().equals(seqs[j].getName()) && i != j)
+ if (seqs[i].getName().equals(seqs[j].getName()) && i != j
+ && !wasRenamed.get(j))
{
+
+ wasRenamed.set(i, true);
String range = "/" + seqs[j].getStart() + "-" + seqs[j].getEnd();
// setting sequence name to include range - to differentiate between
// sequences of the same name. Currently have to include the range twice
}
}
- if (renamed)
+ if (wasRenamed.get(i))
{
String range = "/" + seqs[i].getStart() + "-" + seqs[i].getEnd();
seqs[i].setName(seqs[i].getName() + range + range);
}
}
+
+ for (int i = 0; i < seqs.length; i++)
+ {
+ for (int j = 0; j < seqs.length; j++)
+ {
+ if (seqs[i].getName().equals(seqs[j].getName()) && i != j)
+ {
+ al.deleteSequence(j);
+ }
+ }
+ }
}
}
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Hashtable;
import java.util.List;
import java.util.Scanner;
private String databaseName;
+ Hashtable sequencesHash;
+
/**
* Constructor for the JackhmmerThread
*
".txt");
exportStockholm(new SequenceI[] { seq }, seqFile.getAbsoluteFile(),
- null, true);
+ null);
boolean ran = runCommand(searchOutputFile, hitsAlignmentFile,
seqFile);
deleteHmmSequences(copy);
- // jackhmmer fails if file contains duplicate sequence names
- renameDuplicates(copy.getSequencesArray());
+ sequencesHash = stashSequences(copy.getSequencesArray());
- exportStockholm(copy.getSequencesArray(), databaseFile, null, true);
+ exportStockholm(copy.getSequencesArray(), databaseFile, null);
}
args.add(getFilePath(seqFile, true));
inputAlignmentTemp.getAbsolutePath(), DataSourceType.FILE));
seqs = file.getSeqsAsArray();
+ recoverSequences(sequencesHash, seqs);
+
readTable(searchOutputFile);
int seqCount = Math.min(seqs.length, seqsToReturn);