********************************************************************/
/*
- * RCS $Id: mymain.c 255 2011-06-22 15:59:07Z fabian $
+ * RCS $Id: mymain.c 296 2014-10-07 12:15:41Z fabian $
*/
#ifdef HAVE_CONFIG_H
#include "mymain.h"
-/* hhalign (parameters) */
-#include "hhalign/general.h"
-
typedef struct {
/* Sequence input
* info for background-HMM creation */
bool bDealignInputSeqs;
+ /** Sequence input format
+ */
+ int iSeqInFormat;
+
/* profiles: : pre-aligned sequences, whose alignment will not be changed
*/
/** profile 1: pre-aligned sequence input. not directly used by Align() */
char *pcProfile1Infile ;
/** profile 2: pre-aligned sequence input. not directly used by Align() */
char *pcProfile2Infile;
-
+ /** profiles that contain no gaps are rejected, force them */
+ bool bIsProfile;
+ /** up to version 1.1.1 Kimura distance was default, change default, make Kimura optional */
+ /*bool bUseKimura; */
+ /** distance matrix output is default, allow %-ID output */
+ bool bPercentID;
+
/** Input limitations
*/
/** maximum allowed number of input sequences */
/** force overwriting of files */
bool bForceFileOverwrite;
+ /* line wrapping, FS, r274 -> */
+ int iWrap;
+ /* residue number in Clustal format, FS, r274 -> */
+ bool bResno;
+ /* output order, FS, r274 -> */
+ int iOutputOrder;
+
/* multithreading
*/
/** number of threads */
int iThreads;
+ /* pseudo-count file
+ */
+ char *pcPseudoFile;
/* logging
*/
char *pcLogFile;
/* log-file used for non-essential logging in prLog */
FILE *prLogFile = NULL;
+const char *CITATION = " Sievers F, Wilm A, Dineen D, Gibson TJ, Karplus K, Li W, Lopez R, McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG."
+ "\n"
+ " Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega."
+ "\n"
+ " Mol Syst Biol. 2011 Oct 11;7:539. doi: 10.1038/msb.2011.75. PMID: 21988835.";
+
/**
void
SetDefaultUserOpts(cmdline_opts_t *opts)
{
-
assert(NULL != opts);
opts->iSeqType = SEQTYPE_UNKNOWN;
opts->bDealignInputSeqs = FALSE;
opts->pcProfile1Infile = NULL;
opts->pcProfile2Infile = NULL;
-
+ opts->bIsProfile = FALSE;
+ opts->aln_opts.bUseKimura = FALSE;
+ opts->aln_opts.bPercID = FALSE;
+ opts->aln_opts.pcHMMBatch = NULL;
+
opts->iMaxNumSeq = INT_MAX;
opts->iMaxSeqLen = INT_MAX;
opts->pcAlnOutfile = NULL;
opts->iAlnOutFormat = MSAFILE_A2M;
opts->bForceFileOverwrite = FALSE;
+ opts->iWrap = 60;
+ opts->bResno = FALSE;
+ opts->iOutputOrder = INPUT_ORDER;
#ifdef HAVE_OPENMP
/* defaults to # of CPUs */
opts->iThreads = 1;
#endif
+ opts->pcPseudoFile = NULL;
opts->pcLogFile = NULL;
SetDefaultAlnOpts(& opts->aln_opts);
}
-/* end of SetDefaultAlnOpts() */
+/* end of SetDefaultUserOpts() */
/* keep in same order as in struct. FIXME could this be derived from argtable?
*/
- /* we only allow protein anyway: fprintf(prFile, "seq-type = %s\n", SeqTypeToStr(opts->iSeqType)); */
+ fprintf(prFile, "seq-type = %s\n", SeqTypeToStr(opts->iSeqType));
+ fprintf(prFile, "seq-in-fmt = %s\n", SeqfileFormat2String(opts->iSeqInFormat));
fprintf(prFile, "option: seq-in = %s\n",
NULL != opts->pcSeqInfile? opts->pcSeqInfile: "(null)");
fprintf(prFile, "option: dealign = %d\n", opts->bDealignInputSeqs);
NULL != opts->pcProfile1Infile? opts->pcProfile1Infile: "(null)");
fprintf(prFile, "option: profile2 = %s\n",
NULL != opts->pcProfile2Infile? opts->pcProfile2Infile: "(null)");
+ /*fprintf(prFile, "option: percent-id = %d\n", opts->aln_opts.bPercID);*/
+ fprintf(prFile, "option: is-profile = %d\n", opts->bIsProfile);
+ /*fprintf(prFile, "option: use-kimura = %d\n", opts->aln_opts.bUseKimura);*/
+
fprintf(prFile, "option: max-num-seq = %d\n", opts->iMaxNumSeq);
fprintf(prFile, "option: max-seq-len = %d\n", opts->iMaxSeqLen);
fprintf(prFile, "option: aln-out-file = %s\n",
NULL != opts->pcAlnOutfile? opts->pcAlnOutfile: "(null)");
fprintf(prFile, "option: aln-out-format = %s\n", SeqfileFormat2String(opts->iAlnOutFormat));
fprintf(prFile, "option: force-file-overwrite = %d\n", opts->bForceFileOverwrite);
+ fprintf(prFile, "option: line wrap = %d\n", opts->iWrap);
+ fprintf(prFile, "option: print residue numbers = %d\n", opts->bResno);
+ fprintf(prFile, "option: order alignment like input/tree = %d\n", opts->iOutputOrder);
+
fprintf(prFile, "option: threads = %d\n", opts->iThreads);
+ fprintf(prFile, "option: PseudoFile = %s\n", opts->pcPseudoFile);
fprintf(prFile, "option: logFile = %s\n", opts->pcLogFile);
}
/* end of PrintUserOpts */
if (NULL != user_opts->pcLogFile) {
CKFREE(user_opts->pcLogFile);
}
+ if (NULL != user_opts->pcPseudoFile) {
+ CKFREE(user_opts->pcPseudoFile);
+ }
FreeAlnOpts(& user_opts->aln_opts);
"You requested alignment output to stdout and verbose logging.",
" Alignment and log messages would get mixed up.");
}
- /* distance matrix output impossible in mbed mode, only have clusters, FS, r254 -> */
+ /* distance matrix output impossible in mBed mode, only have clusters, FS, r254 -> */
+#if 1
+ /* allow distance matrix output if initial mBed but subsequent full matrix during iteration, FS, r274 -> */
+ if (NULL != opts->aln_opts.pcDistmatOutfile){
+ if ( (TRUE == opts->aln_opts.bUseMbed) && (opts->aln_opts.iNumIterations < 1) ){
+ Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
+ }
+ if ( (TRUE == opts->aln_opts.bUseMbed) && (TRUE == opts->aln_opts.bUseMbedForIteration) ){
+ Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
+ }
+ if ( (TRUE == opts->aln_opts.bUseMbed) && (opts->aln_opts.iNumIterations > 0) &&
+ (opts->aln_opts.iMaxGuidetreeIterations < 1) ){
+ Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
+ }
+ }
+#else
if ( (NULL != opts->aln_opts.pcDistmatOutfile) && (TRUE == opts->aln_opts.bUseMbed) ) {
Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
}
+#endif
+
+ /* percentage identity cannot be printed in Kimura mode */
+ if ( (TRUE == opts->aln_opts.bUseKimura) && (TRUE == opts->aln_opts.bPercID) ){
+ Log(&rLog, LOG_FATAL, "Percentage Identity cannot be calculated if Kimura Distances are used.");
+ }
+
+ /* iteration destroys effect of pile-up */
+ if ( (TRUE == opts->aln_opts.bPileup) && (opts->aln_opts.iNumIterations > 0) ){
+ Log(&rLog, LOG_WARN, "Iteration destroys effect of pile-up.");
+ }
+
+ AlnOptsLogicCheck(& opts->aln_opts);
}
/* end of UserOptsLogicCheck */
struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>",
/*min*/ 0, /*max*/ 128,
"HMM input files");
+ struct arg_file *opt_hmm_batch = arg_file0(NULL, "hmm-batch", "<file>", /* FS, r291 -> */
+ "specify HMMs for individual sequences");
struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign",
"Dealign input sequences");
- struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
- "{Protein,RNA,DNA}",
- "Force a sequence type (default: auto)");
struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1",
"<file>",
"Pre-aligned multiple sequence file (aligned columns will be kept fix)");
struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2",
"<file>",
"Pre-aligned multiple sequence file (aligned columns will be kept fix)");
+ struct arg_lit *opt_isprofile = arg_lit0(NULL, "is-profile",
+ "disable check if profile, force profile (default no)");
+ struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
+ "{Protein, RNA, DNA}",
+ "Force a sequence type (default: auto)");
+/* struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein",
+ "Set sequence type to protein even if Clustal guessed nucleic acid"); */
+ struct arg_str *opt_infmt = arg_str0(NULL, "infmt",
+ "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
+ "Forced sequence input file format (default: auto)");
+ struct arg_lit *opt_resno = arg_lit0(NULL, "residuenumber,resno",
+ "in Clustal format print residue numbers (default no)");
struct arg_rem *rem_guidetree = arg_rem(NULL, "\nClustering:");
struct arg_str *opt_pairdist = arg_str0("p", "pairdist",
struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter",
"Use Mbed-like clustering also during iteration");
*/
- /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */
+ struct arg_lit *opt_pileup = arg_lit0(NULL, "pileup",
+ "Sequentially align sequences");
struct arg_lit *opt_full = arg_lit0(NULL, "full",
"Use full distance matrix for guide-tree calculation (might be slow; mBed is default)");
struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter",
"Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)");
+ struct arg_int *opt_clustersize = arg_int0(NULL, "cluster-size", "<n>",
+ "soft maximum of sequences in sub-clusters"); /* FS, r274 -> */
+
+ struct arg_file *opt_clustfile = arg_file0(NULL, "clustering-out",
+ "<file>",
+ "Clustering output file"); /* FS, r274 -> */
+ struct arg_int *opt_trans = arg_int0(NULL, "trans", "<n>", "use transitivity (default: 0)");
+ struct arg_file *opt_posteriorfile = arg_file0(NULL, "posterior-out",
+ "<file>",
+ "Posterior probability output file"); /* FS, r288 -> */
+ struct arg_lit *opt_usekimura = arg_lit0(NULL, "use-kimura",
+ "use Kimura distance correction for aligned sequences (default no)");
+ struct arg_lit *opt_percentid = arg_lit0(NULL, "percent-id",
+ "convert distances into percent identities (default no)");
+
struct arg_str *opt_clustering = arg_str0("c", "clustering",
"{UPGMA}",
"Clustering method for guide tree");
struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt",
"{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
"MSA output file format (default: fasta)");
+ struct arg_int *opt_wrap = arg_int0(NULL, "wrap", "<n>",
+ "number of residues before line-wrap in output");
+ struct arg_str *opt_output_order = arg_str0(NULL, "output-order",
+ "{input-order,tree-order}",
+ "MSA output order like in input/guide-tree");
struct arg_rem *rem_iteration = arg_rem(NULL, "\nIteration:");
/* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */
"<n>", "Number of (combined guide-tree/HMM) iterations");
struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations",
- "<n>", "Maximum number guidetree iterations");
+ "<n>", "Maximum number of guidetree iterations");
struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations",
"<n>", "Maximum number of HMM iterations");
"Set options automatically (might overwrite some of your options)");
struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>",
"Number of processors to use");
+ struct arg_file *opt_pseudo = arg_file0(NULL, "pseudo", "<file>",
+ "Input file for pseudo-count parameters");
struct arg_file *opt_logfile = arg_file0("l", "log",
"<file>",
"Log all non-essential output to this file");
struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */
NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/);
-
struct arg_end *opt_end = arg_end(10); /* maximum number of errors
* to store */
void *argtable[] = {rem_seq_input,
opt_seqin,
opt_hmm_in,
+ opt_hmm_batch, /* FS, r291 -> */
opt_dealign,
-#if 0
- /* unused since we only support protein for now */
- opt_seqtype,
-#endif
opt_profile1,
opt_profile2,
-
+ opt_isprofile, /* FS, r282 ->*/
+ opt_seqtype,
+ /* opt_force_protein, */
+ opt_infmt,
rem_guidetree,
#if 0
/* no other options then default available or not implemented */
opt_distmat_out,
opt_guidetree_in,
opt_guidetree_out,
+ opt_pileup, /* FS, r288 -> */
opt_full, /* FS, r250 -> */
opt_full_iter, /* FS, r250 -> */
+ opt_clustersize, /* FS, r274 -> */
+ opt_clustfile, /* FS, r274 -> */
+ opt_trans, /* FS, r290 -> */
+ opt_posteriorfile, /* FS, r288 -> */
+ opt_usekimura, /* FS, r282 ->*/
+ opt_percentid, /* FS, r282 ->*/
#if 0
/* no other options then default available */
opt_clustering,
rem_aln_output,
opt_outfile,
opt_outfmt,
+ opt_resno, /* FS, 274 -> */
+ opt_wrap, /* FS, 274 -> */
+ opt_output_order, /* FS, 274 -> */
rem_iteration,
opt_num_iterations,
rem_misc,
opt_autooptions,
opt_threads,
+ opt_pseudo,
opt_logfile,
opt_help,
opt_verbose,
printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME);
printf("\n");
+ printf("If you like Clustal-Omega please cite:\n%s\n", CITATION);
+ printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n");
+ /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */
+ printf("\n");
printf("Check http://www.clustal.org for more information and updates.\n");
-
- /*printf("\n");
- printf("FIXME more info e.g. how it works, pointers to references etc...\n");
- FIXME which paper to cite etc
- */
-
printf("\n");
printf("Usage: %s", basename(argv[0]));
user_opts->aln_opts.bUseMbedForIteration = FALSE;
}
- user_opts->bForceFileOverwrite = opt_force->count;
+ if (opt_pileup->count){
+ user_opts->aln_opts.bPileup = TRUE;
+ }
+ user_opts->bForceFileOverwrite = opt_force->count;
/* log-file
*/
}
+ /* pseudo-count-file
+ */
+ if (opt_pseudo->count > 0) {
+ user_opts->pcPseudoFile = CkStrdup(opt_pseudo->filename[0]);
+ if (! FileExists(user_opts->pcPseudoFile)) {
+ Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcPseudoFile);
+ }
+ }
+
+
/* normal sequence input (no profile)
*/
if (opt_seqin->count > 0) {
user_opts->iMaxSeqLen = opt_max_seqlen->ival[0];
}
+ /* Output format
+ */
+ if (opt_infmt->count > 0) {
+ /* avoid gcc warning about discarded qualifier */
+ char *tmp = (char *)opt_infmt->sval[0];
+ user_opts->iSeqInFormat = String2SeqfileFormat(tmp);
+ } else {
+ user_opts->iSeqInFormat = SQFILE_UNKNOWN;
+ }
+
+
/* Sequence type
*/
if (opt_seqtype->count > 0) {
Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]);
}
}
+/* if (opt_force_protein->count > 0) {
+ user_opts->iSeqType = SEQTYPE_PROTEIN;
+ } */
/* Profile input
*/
}
}
+ if (opt_isprofile->count){
+ user_opts->bIsProfile = TRUE;
+ }
+ if (opt_usekimura->count){
+ user_opts->aln_opts.bUseKimura = TRUE;
+ }
+ if (opt_percentid->count){
+ user_opts->aln_opts.bPercID = TRUE;
+ }
+
/* HMM input
*/
}
+ /* HMM Batch, FS, r291 ->
+ */
+ user_opts->aln_opts.pcHMMBatch = NULL;
+ if (opt_hmm_batch->count>0){
+ user_opts->aln_opts.pcHMMBatch = CkStrdup(opt_hmm_batch->filename[0]);
+ if (! FileExists(user_opts->aln_opts.pcHMMBatch)){
+ Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcHMMBatch);
+ }
+ }
+
/* Pair distance method
*/
if (opt_pairdist->count > 0) {
}
}
-
+ if (opt_clustersize->count > 0){ /* FS, r274 -> */
+ if (opt_clustersize->ival[0] > 0){
+ user_opts->aln_opts.iClustersizes = opt_clustersize->ival[0];
+ }
+ }
+
+ if (opt_clustfile->count > 0){ /* FS, r274 -> */
+ user_opts->aln_opts.pcClustfile = CkStrdup(opt_clustfile->filename[0]);
+ /*if (! FileExists(user_opts->aln_opts.pcClustfile)) {
+ Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcClustfile);
+ }*/
+ }
+
+ if (opt_trans->count > 0){ /* FS, r290 -> */
+ user_opts->aln_opts.iTransitivity = opt_trans->ival[0];
+ }
+ if (opt_posteriorfile->count > 0){ /* FS, r288 -> */
+ user_opts->aln_opts.pcPosteriorfile = CkStrdup(opt_posteriorfile->filename[0]);
+ /*if (! FileExists(user_opts->aln_opts.pcPosteriorfile)) {
+ Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcPosteriorfile);
+ }*/
+ }
+
+
/* Guidetree input
*/
if (opt_guidetree_in->count > 0) {
/* max MAC RAM (maximum amount of RAM set aside for MAC algorithm)
*/
if (opt_macram->count > 0) { /* FS, r240 -> r241 */
- user_opts->aln_opts.iMacRam = opt_macram->ival[0];
+ user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0];
}
+ /* Number of residues in output before line-wrap
+ */
+ if (opt_wrap->count > 0) { /* FS, r274 -> */
+ user_opts->iWrap = opt_wrap->ival[0];
+ }
+
+ user_opts->bResno = opt_resno->count;
+
+ /* output-order
+ * like input (INPUT_ORDER = 0) or tree (TREE_ORDER = 1)
+ * if output-order format not valid use INPUT_ORDER
+ */
+ if (opt_output_order->count > 0){
+ user_opts->iOutputOrder = (0 == strcmp(opt_output_order->sval[0], "input-order")) ? INPUT_ORDER :
+ (0 == strcmp(opt_output_order->sval[0], "tree-order")) ? TREE_ORDER : INPUT_ORDER;
+ }
arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
mseq_t *prMSeq = NULL;
mseq_t *prMSeqProfile1 = NULL;
mseq_t *prMSeqProfile2 = NULL;
- cmdline_opts_t cmdline_opts;
- hhalign_para rHhalignPara = {0};
+ cmdline_opts_t cmdline_opts = {0};
/* Must happen first: setup logger */
LogDefaultSetup(&rLog);
/*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/
- Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME); /* FS, r237 -> 238 */
+ /*Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME);*/ /* FS, r237 -> 238 */
SetDefaultUserOpts(&(cmdline_opts));
PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts);
PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts));
}
- /* write relevant command-line options for hhalign
- *
- */
- rHhalignPara.iMacRamMB = cmdline_opts.aln_opts.iMacRam;
+#if 1
+ if (NULL != cmdline_opts.pcPseudoFile){
+ ReadPseudoCountParams(&cmdline_opts.aln_opts.rHhalignPara, cmdline_opts.pcPseudoFile);
+ }
+#endif
/* Read sequence file
*
if (NULL != cmdline_opts.pcSeqInfile) {
NewMSeq(&prMSeq);
if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile,
- cmdline_opts.iSeqType,
- cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
+ cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
+ cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs,
+ cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) {
Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
}
#if TRACE
Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align",
cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s");
}
+ /* if there are fewer sequences than target size of clusters,
+ * then mBed is unnecessary, FS, r283-> */
+ if ( (prMSeq) && (prMSeq->nseqs <= cmdline_opts.aln_opts.iClustersizes) ){
+ cmdline_opts.aln_opts.bUseMbed = FALSE;
+ cmdline_opts.aln_opts.bUseMbedForIteration = FALSE;
+ Log(&rLog, LOG_INFO, "not more sequences (%d) than cluster-size (%d), turn off mBed",
+ prMSeq->nseqs, cmdline_opts.aln_opts.iClustersizes);
+ }
/* Dealign if requested and neccessary
*/
if (NULL != prMSeq) {
- if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) {
+ if (/*TRUE == prMSeq->aligned &&*/ cmdline_opts.bDealignInputSeqs) {
Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested.");
DealignMSeq(prMSeq);
}
if (NULL != cmdline_opts.pcProfile1Infile) {
NewMSeq(&prMSeqProfile1);
if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile,
- cmdline_opts.iSeqType,
- cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
+ cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
+ cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs,
+ cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) {
Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
cmdline_opts.pcProfile1Infile);
}
if (NULL != cmdline_opts.pcProfile2Infile) {
NewMSeq(&prMSeqProfile2);
if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile,
- cmdline_opts.iSeqType,
- cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
+ cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
+ cmdline_opts.bIsProfile, cmdline_opts.bDealignInputSeqs,
+ cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen, cmdline_opts.aln_opts.pcHMMBatch)) {
Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
cmdline_opts.pcProfile2Infile);
}
Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
cmdline_opts.pcProfile2Infile);
}*/
- if (FALSE == prMSeqProfile1->aligned) {
+ if (FALSE == prMSeqProfile2->aligned) {
Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
cmdline_opts.pcProfile2Infile);
}
* (ii) profile profile alignment
*
*/
+
if (NULL != prMSeq) {
- if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts, rHhalignPara)) {
+
+ if (2 == prMSeq->nseqs){
+ /* if there are only 2 sequences then the order does not matter */
+ /* this is important, because for pair-wise alignment we don't do
+ * tree indexing*, and trying to use tree-indexing during output
+ * will cause a segmentation fault */
+ cmdline_opts.iOutputOrder = INPUT_ORDER;
+ }
+ if (TREE_ORDER == cmdline_opts.iOutputOrder){
+ /* this is a crutch, if tree_order==NULL use input order,
+ * otherwise use guide-tree-order */
+ prMSeq->tree_order = (int *)CKMALLOC(prMSeq->nseqs * sizeof(int));
+ }
+ if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts)) {
Log(&rLog, LOG_FATAL, "An error occured during the alignment");
}
- if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile,
- cmdline_opts.iAlnOutFormat)) {
- Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
+ if (cmdline_opts.aln_opts.iMaxHMMIterations >= 0){
+ if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile,
+ cmdline_opts.iAlnOutFormat, cmdline_opts.iWrap, cmdline_opts.bResno)) {
+ Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
+ }
}
#if 0
{
} else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) {
- if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, rHhalignPara)) {
+ if (AlignProfiles(prMSeqProfile1, prMSeqProfile2,
+ cmdline_opts.aln_opts.rHhalignPara)) {
Log(&rLog, LOG_FATAL, "An error occured during the alignment");
}
if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile,
- cmdline_opts.iAlnOutFormat)) {
+ cmdline_opts.iAlnOutFormat, cmdline_opts.iWrap, cmdline_opts.bResno)) {
Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
}
}