1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
6 * Copyright (C) 2010 University College Dublin
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This file is part of Clustal-Omega.
15 ********************************************************************/
18 * RCS $Id: mymain.c 255 2011-06-22 15:59:07Z fabian $
29 #include <argtable2.h>
32 #include <libgen.h> /* for basename only */
35 #include "clustal-omega.h"
39 /* hhalign (parameters) */
40 #include "hhalign/general.h"
46 /** sequence type (from cmdline arg) */
48 /** sequence input file. not directly used by Align() */
50 /** Dealign sequences on input. Otherwise we use the alignment
51 * info for background-HMM creation */
52 bool bDealignInputSeqs;
54 /* profiles: : pre-aligned sequences, whose alignment will not be changed
56 /** profile 1: pre-aligned sequence input. not directly used by Align() */
57 char *pcProfile1Infile ;
58 /** profile 2: pre-aligned sequence input. not directly used by Align() */
59 char *pcProfile2Infile;
63 /** maximum allowed number of input sequences */
65 /** maximum allowed input sequence length */
70 /** alignment output file */
72 /** alignment output format */
74 /** force overwriting of files */
75 bool bForceFileOverwrite;
79 /** number of threads */
88 /* changes here will have to be reflected in SetDefaultUserOpts(),
89 * FreeUserOpts(), PrintUserOpts() and UserOptsLogicCheck() etc
95 /* log-file used for non-essential logging in prLog */
96 FILE *prLogFile = NULL;
101 * @brief Sets default user/commandline options
104 * The option structure to initialise
108 SetDefaultUserOpts(cmdline_opts_t *opts)
111 assert(NULL != opts);
113 opts->iSeqType = SEQTYPE_UNKNOWN;
114 opts->pcSeqInfile = NULL;
115 opts->bDealignInputSeqs = FALSE;
116 opts->pcProfile1Infile = NULL;
117 opts->pcProfile2Infile = NULL;
119 opts->iMaxNumSeq = INT_MAX;
120 opts->iMaxSeqLen = INT_MAX;
122 opts->pcAlnOutfile = NULL;
123 opts->iAlnOutFormat = MSAFILE_A2M;
124 opts->bForceFileOverwrite = FALSE;
127 /* defaults to # of CPUs */
128 opts->iThreads = omp_get_max_threads();
133 opts->pcLogFile = NULL;
135 SetDefaultAlnOpts(& opts->aln_opts);
137 /* end of SetDefaultAlnOpts() */
143 * @brief FIXME add doc
147 PrintUserOpts(FILE *prFile, cmdline_opts_t *opts) {
149 /* keep in same order as in struct. FIXME could this be derived from argtable?
151 /* we only allow protein anyway: fprintf(prFile, "seq-type = %s\n", SeqTypeToStr(opts->iSeqType)); */
152 fprintf(prFile, "option: seq-in = %s\n",
153 NULL != opts->pcSeqInfile? opts->pcSeqInfile: "(null)");
154 fprintf(prFile, "option: dealign = %d\n", opts->bDealignInputSeqs);
155 fprintf(prFile, "option: profile1 = %s\n",
156 NULL != opts->pcProfile1Infile? opts->pcProfile1Infile: "(null)");
157 fprintf(prFile, "option: profile2 = %s\n",
158 NULL != opts->pcProfile2Infile? opts->pcProfile2Infile: "(null)");
159 fprintf(prFile, "option: max-num-seq = %d\n", opts->iMaxNumSeq);
160 fprintf(prFile, "option: max-seq-len = %d\n", opts->iMaxSeqLen);
161 fprintf(prFile, "option: aln-out-file = %s\n",
162 NULL != opts->pcAlnOutfile? opts->pcAlnOutfile: "(null)");
163 fprintf(prFile, "option: aln-out-format = %s\n", SeqfileFormat2String(opts->iAlnOutFormat));
164 fprintf(prFile, "option: force-file-overwrite = %d\n", opts->bForceFileOverwrite);
165 fprintf(prFile, "option: threads = %d\n", opts->iThreads);
166 fprintf(prFile, "option: logFile = %s\n", opts->pcLogFile);
168 /* end of PrintUserOpts */
173 * @brief Frees user opt members allocated during parsing
175 * @param[out] user_opts
176 * user options whose members are to free
178 * @see ParseCommandLine()
182 FreeUserOpts(cmdline_opts_t *user_opts)
185 if (NULL != user_opts->pcSeqInfile) {
186 CKFREE(user_opts->pcSeqInfile);
188 if (NULL != user_opts->pcProfile1Infile) {
189 CKFREE(user_opts->pcProfile1Infile);
191 if (NULL != user_opts->pcProfile2Infile) {
192 CKFREE(user_opts->pcProfile2Infile);
194 if (NULL != user_opts->pcAlnOutfile) {
195 CKFREE(user_opts->pcAlnOutfile);
197 if (NULL != user_opts->pcLogFile) {
198 CKFREE(user_opts->pcLogFile);
201 FreeAlnOpts(& user_opts->aln_opts);
205 /* end of FreeUserOpts() */
211 * @brief Do quick&dirty logic check of used options and call Log(&rLog, LOG_FATAL, ) in case
212 * of any inconsistencies
215 * option structure to check
219 UserOptsLogicCheck(cmdline_opts_t *opts)
224 if (NULL == opts->pcSeqInfile) {
225 if (NULL == opts->pcProfile1Infile && NULL == opts->pcProfile2Infile) {
226 Log(&rLog, LOG_FATAL, "No sequence input was provided. For more information try: --help");
229 if (NULL != opts->pcProfile1Infile && NULL != opts->pcProfile2Infile) {
230 Log(&rLog, LOG_FATAL, "Can't align two profile alignments AND a 'normal' sequence file");
233 /* if a profile was given it should always be no 1, not 2 */
234 if (NULL == opts->pcProfile1Infile && NULL != opts->pcProfile2Infile) {
235 Log(&rLog, LOG_FATAL, "Got a second profile, but no first one.");
240 if (rLog.iLogLevelEnabled < LOG_WARN && NULL==opts->pcAlnOutfile && NULL==opts->pcLogFile) {
241 Log(&rLog, LOG_FATAL, "%s %s",
242 "You requested alignment output to stdout and verbose logging.",
243 " Alignment and log messages would get mixed up.");
245 /* distance matrix output impossible in mbed mode, only have clusters, FS, r254 -> */
246 if ( (NULL != opts->aln_opts.pcDistmatOutfile) && (TRUE == opts->aln_opts.bUseMbed) ) {
247 Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
250 /* end of UserOptsLogicCheck */
255 * @brief Parse command line parameters. Will exit if help/usage etc
256 * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected.
258 * @param[out] user_opts
259 * User parameter struct, with defaults already set.
267 ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv)
270 /* argtable command line parsing:
272 * http://argtable.sourceforge.net/doc/argtable2-intro.html
274 * basic structure is: arg_xxxN:
275 * xxx can be int, lit, db1, str, rex, file or date
276 * If N = 0, arguments may appear zero-or-once; N = 1 means
277 * exactly once, N = n means up to maxcount times
280 * @note: changes here, might also affect main.cpp:ConvertOldCmdLine()
284 struct arg_rem *rem_seq_input = arg_rem(NULL, "\nSequence Input:");
285 struct arg_file *opt_seqin = arg_file0("i", "in,infile",
287 "Multiple sequence input file (- for stdin)");
288 struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>",
289 /*min*/ 0, /*max*/ 128,
291 struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign",
292 "Dealign input sequences");
293 struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
295 "Force a sequence type (default: auto)");
296 struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1",
298 "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
299 struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2",
301 "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
304 struct arg_rem *rem_guidetree = arg_rem(NULL, "\nClustering:");
305 struct arg_str *opt_pairdist = arg_str0("p", "pairdist",
307 "Pairwise alignment distance measure");
308 struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in",
310 "Pairwise distance matrix input file (skips distance computation)");
311 struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out",
313 "Pairwise distance matrix output file");
314 struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in",
316 "Guide tree input file (skips distance computation and guide-tree clustering step)");
317 struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out",
319 "Guide tree output file");
320 /* AW: mbed is default since at least R253
321 struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed",
322 "Fast, Mbed-like clustering for guide-tree calculation");
323 struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter",
324 "Use Mbed-like clustering also during iteration");
326 /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */
327 struct arg_lit *opt_full = arg_lit0(NULL, "full",
328 "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)");
329 struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter",
330 "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)");
332 struct arg_str *opt_clustering = arg_str0("c", "clustering",
334 "Clustering method for guide tree");
337 struct arg_rem *rem_aln_output = arg_rem(NULL, "\nAlignment Output:");
338 struct arg_file *opt_outfile = arg_file0("o", "out,outfile",
340 "Multiple sequence alignment output file (default: stdout)");
341 struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt",
342 "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
343 "MSA output file format (default: fasta)");
346 struct arg_rem *rem_iteration = arg_rem(NULL, "\nIteration:");
347 struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter",
348 /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */
349 "<n>", "Number of (combined guide-tree/HMM) iterations");
350 struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations",
351 "<n>", "Maximum number guidetree iterations");
352 struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations",
353 "<n>", "Maximum number of HMM iterations");
356 struct arg_rem *rem_limits = arg_rem(NULL, "\nLimits (will exit early, if exceeded):");
357 struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>",
358 "Maximum allowed number of sequences");
359 struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>",
360 "Maximum allowed sequence length");
363 struct arg_rem *rem_misc = arg_rem(NULL, "\nMiscellaneous:");
365 struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto",
366 "Set options automatically (might overwrite some of your options)");
367 struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>",
368 "Number of processors to use");
369 struct arg_file *opt_logfile = arg_file0("l", "log",
371 "Log all non-essential output to this file");
372 struct arg_lit *opt_help = arg_lit0("h", "help",
373 "Print this help and exit");
374 struct arg_lit *opt_version = arg_lit0(NULL, "version",
375 "Print version information and exit");
376 struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version",
377 "Print long version information and exit");
378 struct arg_lit *opt_verbose = arg_litn("v", "verbose",
380 "Verbose output (increases if given multiple times)");
381 struct arg_lit *opt_force = arg_lit0(NULL, "force",
382 "Force file overwriting");
383 struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */
384 NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/);
387 struct arg_end *opt_end = arg_end(10); /* maximum number of errors
390 void *argtable[] = {rem_seq_input,
395 /* unused since we only support protein for now */
403 /* no other options then default available or not implemented */
410 opt_full, /* FS, r250 -> */
411 opt_full_iter, /* FS, r250 -> */
413 /* no other options then default available */
422 opt_max_guidetree_iterations,
423 opt_max_hmm_iterations,
438 opt_macram, /* FS, r240 -> r241 */
444 /* Verify the argtable[] entries were allocated sucessfully
446 if (arg_nullcheck(argtable)) {
447 Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)");
450 /* Parse the command line as defined by argtable[]
452 nerrors = arg_parse(argc, argv, argtable);
454 /* Special case: '--help' takes precedence over error reporting
456 if (opt_help->count > 0) {
457 printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME);
460 printf("Check http://www.clustal.org for more information and updates.\n");
463 printf("FIXME more info e.g. how it works, pointers to references etc...\n");
464 FIXME which paper to cite etc
469 printf("Usage: %s", basename(argv[0]));
470 arg_print_syntax(stdout,argtable, "\n");
473 printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n",
475 printf("See below for a list of all options.\n");
477 arg_print_glossary(stdout, argtable, " %-25s %s\n");
478 arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0]));
482 /* Special case: '--version' takes precedence over error reporting
484 if (opt_version->count > 0) {
485 printf("%s\n", PACKAGE_VERSION);
486 arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
490 /* Special case: '--long-version' takes precedence over error reporting
492 if (opt_long_version->count > 0) {
493 char zcLongVersion[1024];
494 PrintLongVersion(zcLongVersion, sizeof(zcLongVersion));
495 printf("%s\n", zcLongVersion);
496 arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
500 /* If the parser returned any errors then display them and exit
503 /* Display the error details contained in the arg_end struct.*/
504 arg_print_errors(stdout, opt_end, PACKAGE);
505 fprintf(stderr, "For more information try: %s --help\n", argv[0]);
506 arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
511 /* ------------------------------------------------------------
513 * Command line successfully parsed. Now transfer values to
514 * user_opts. While doing so, make sure that given input files
515 * exist and given output files are writable do not exist, or if
516 * they do, should be overwritten.
518 * No logic checks here! They are done in a different function
520 * ------------------------------------------------------------*/
523 /* not part of user_opts because it declared in src/util.h */
524 if (0 == opt_verbose->count) {
525 rLog.iLogLevelEnabled = LOG_WARN;
526 } else if (1 == opt_verbose->count) {
527 rLog.iLogLevelEnabled = LOG_INFO;
528 } else if (2 == opt_verbose->count) {
529 rLog.iLogLevelEnabled = LOG_VERBOSE;
530 } else if (3 == opt_verbose->count) {
531 rLog.iLogLevelEnabled = LOG_DEBUG;
534 user_opts->aln_opts.bAutoOptions = opt_autooptions->count;
536 user_opts->bDealignInputSeqs = opt_dealign->count;
538 /* NOTE: full distance matrix used to be default - there was
539 --mbed flag but no --full flag after r250 decided that mBed
540 should be default - now need --full flag to turn off mBed.
541 wanted to retain mBed Boolean, so simply added --full flag. if
542 both flags set (erroneously) want --mbed to overwrite --full,
543 therefore do --full 1st, the --mbed, FS, r250 */
544 if (opt_full->count){
545 user_opts->aln_opts.bUseMbed = FALSE;
548 if (opt_full_iter->count){
549 user_opts->aln_opts.bUseMbedForIteration = FALSE;
552 user_opts->bForceFileOverwrite = opt_force->count;
558 if (opt_logfile->count > 0) {
559 user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]);
561 /* warn if already exists or not writable */
562 if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) {
563 Log(&rLog, LOG_FATAL, "%s '%s'. %s",
564 "Cowardly refusing to overwrite already existing file",
565 user_opts->pcLogFile,
566 "Use --force to force overwriting.");
568 if (! FileIsWritable(user_opts->pcLogFile)) {
569 Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
570 user_opts->pcLogFile);
575 /* normal sequence input (no profile)
577 if (opt_seqin->count > 0) {
578 user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]);
583 /* maximum number of sequences */
584 if (opt_max_seq->count > 0) {
585 user_opts->iMaxNumSeq = opt_max_seq->ival[0];
588 /* maximum sequence length */
589 if (opt_max_seqlen->count > 0) {
590 user_opts->iMaxSeqLen = opt_max_seqlen->ival[0];
595 if (opt_seqtype->count > 0) {
596 if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) {
597 user_opts->iSeqType = SEQTYPE_PROTEIN;
598 } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) {
599 user_opts->iSeqType = SEQTYPE_RNA;
600 } else if (STR_NC_EQ(opt_seqtype->sval[0], "dna")) {
601 user_opts->iSeqType = SEQTYPE_DNA;
603 Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]);
609 if (opt_profile1->count > 0) {
610 user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]);
611 if (! FileExists(user_opts->pcProfile1Infile)) {
612 Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile);
616 if (opt_profile2->count > 0) {
617 user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]);
618 if (! FileExists(user_opts->pcProfile2Infile)) {
619 Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile);
626 user_opts->aln_opts.iHMMInputFiles = 0;
627 user_opts->aln_opts.ppcHMMInput = NULL;
628 if (opt_hmm_in->count>0) {
630 user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count;
631 user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC(
632 user_opts->aln_opts.iHMMInputFiles * sizeof(char*));
633 for (iAux=0; iAux<opt_hmm_in->count; iAux++) {
634 user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]);
635 if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) {
636 Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]);
642 /* Pair distance method
644 if (opt_pairdist->count > 0) {
645 if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) {
646 user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE;
648 Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]);
653 /* Distance matrix input
655 if (opt_distmat_in->count > 0) {
656 user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]);
657 if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) {
658 Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile);
663 /* Distance matrix output
665 if (opt_distmat_out->count > 0) {
666 user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]);
668 /* warn if already exists or not writable */
669 if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) {
670 Log(&rLog, LOG_FATAL, "%s '%s'. %s",
671 "Cowardly refusing to overwrite already existing file",
672 user_opts->aln_opts.pcDistmatOutfile,
673 "Use --force to force overwriting.");
675 if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) {
676 Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
677 user_opts->aln_opts.pcDistmatOutfile);
684 if (opt_clustering->count > 0) {
685 if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) {
686 user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA;
688 Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]);
695 if (opt_guidetree_in->count > 0) {
696 user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]);
697 if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) {
698 Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile);
705 if (opt_guidetree_out->count > 0) {
706 user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]);
708 /* warn if already exists or not writable */
709 if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) {
710 Log(&rLog, LOG_FATAL, "%s '%s'. %s",
711 "Cowardly refusing to overwrite already existing file",
712 user_opts->aln_opts.pcGuidetreeOutfile,
713 "Use --force to force overwriting.");
715 if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) {
716 Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
717 user_opts->aln_opts.pcGuidetreeOutfile);
722 /* max guidetree iterations
724 if (opt_max_guidetree_iterations->count > 0) {
725 user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0];
729 /* max guidetree iterations
731 if (opt_max_hmm_iterations->count > 0) {
732 user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0];
735 /* number of iterations
737 if (opt_num_iterations->count > 0) {
738 if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) {
739 Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment.");
740 user_opts->aln_opts.bIterationsAuto = TRUE;
744 user_opts->aln_opts.bIterationsAuto = FALSE;
745 for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) {
746 if (! isdigit(opt_num_iterations->sval[0][iAux])) {
747 Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s",
748 opt_num_iterations->sval[0]);
751 user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]);
758 if (opt_outfile->count > 0) {
759 user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]);
761 /* warn if already exists or not writable */
762 if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) {
763 Log(&rLog, LOG_FATAL, "%s '%s'. %s",
764 "Cowardly refusing to overwrite already existing file",
765 user_opts->pcAlnOutfile,
766 "Use --force to force overwriting.");
768 if (! FileIsWritable(user_opts->pcAlnOutfile)) {
769 Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
770 user_opts->pcAlnOutfile);
777 if (opt_outfmt->count > 0) {
778 /* avoid gcc warning about discarded qualifier */
779 char *tmp = (char *)opt_outfmt->sval[0];
780 user_opts->iAlnOutFormat = String2SeqfileFormat(tmp);
781 if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) {
782 Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]);
789 if (opt_threads->count > 0) {
790 if (opt_threads->ival[0] <= 0) {
791 Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.",
792 opt_threads->ival[0]);
794 user_opts->iThreads = opt_threads->ival[0];
798 if (opt_threads->count > 0) {
799 if (opt_threads->ival[0] > 1) {
800 Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.",
801 opt_threads->ival[0], PACKAGE_NAME);
807 /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm)
809 if (opt_macram->count > 0) { /* FS, r240 -> r241 */
810 user_opts->aln_opts.iMacRam = opt_macram->ival[0];
815 arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
817 UserOptsLogicCheck(user_opts);
821 /* end of ParseCommandLine() */
828 * @brief the 'real' main function
832 MyMain(int argc, char **argv)
834 mseq_t *prMSeq = NULL;
835 mseq_t *prMSeqProfile1 = NULL;
836 mseq_t *prMSeqProfile2 = NULL;
837 cmdline_opts_t cmdline_opts;
838 hhalign_para rHhalignPara = {0};
840 /* Must happen first: setup logger */
841 LogDefaultSetup(&rLog);
843 /*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/
844 Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME); /* FS, r237 -> 238 */
846 SetDefaultUserOpts(&(cmdline_opts));
848 ParseCommandLine(&cmdline_opts, argc, argv);
850 if (NULL != cmdline_opts.pcLogFile) {
851 prLogFile = fopen(cmdline_opts.pcLogFile, "w");
852 LogSetFP(&rLog, LOG_INFO, prLogFile);
853 LogSetFP(&rLog, LOG_VERBOSE, prLogFile);
854 LogSetFP(&rLog, LOG_DEBUG, prLogFile);
857 InitClustalOmega(cmdline_opts.iThreads);
859 if (rLog.iLogLevelEnabled < LOG_INFO) {
860 PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts);
861 PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts));
863 /* write relevant command-line options for hhalign
866 rHhalignPara.iMacRamMB = cmdline_opts.aln_opts.iMacRam;
868 /* Read sequence file
871 if (NULL != cmdline_opts.pcSeqInfile) {
873 if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile,
874 cmdline_opts.iSeqType,
875 cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
876 Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
881 for (iAux=0; iAux<prMSeq->nseqs; iAux++) {
882 Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", iAux, prMSeq->seq[iAux]);
883 LogSqInfo(&prMSeq->sqinfo[iAux]);
888 /* k-tuple pairwise distance calculation seg-faults if
889 * only one sequence, simply exit early.
890 * note that for profile/profile alignment prMSeq is NULL
892 if (prMSeq && (prMSeq->nseqs <= 1)){
893 Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align",
894 cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s");
897 /* Dealign if requested and neccessary
899 if (NULL != prMSeq) {
900 if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) {
901 Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested.");
910 if (NULL != cmdline_opts.pcProfile1Infile) {
911 NewMSeq(&prMSeqProfile1);
912 if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile,
913 cmdline_opts.iSeqType,
914 cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
915 Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
916 cmdline_opts.pcProfile1Infile);
918 /* FIXME: commented out. FS, r240 -> r241
919 * for explanation see below */
920 /*if (1==prMSeqProfile1->nseqs) {
921 Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
922 cmdline_opts.pcProfile1Infile);
924 if (FALSE == prMSeqProfile1->aligned) {
925 Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
926 cmdline_opts.pcProfile1Infile);
935 if (NULL != cmdline_opts.pcProfile2Infile) {
936 NewMSeq(&prMSeqProfile2);
937 if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile,
938 cmdline_opts.iSeqType,
939 cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
940 Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
941 cmdline_opts.pcProfile2Infile);
943 /* FIXME: there is no (clean) way to align a single sequence to a profile.
944 * if we go down the -i route, it causes a seg-fault in the pair-wise
945 * k-tuple distance calculation. However, single sequences can be
946 * understood as 1-profiles. Therefore we have to allow for 1-profiles.
949 /*if (1==prMSeqProfile2->nseqs) {
950 Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
951 cmdline_opts.pcProfile2Infile);
953 if (FALSE == prMSeqProfile1->aligned) {
954 Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
955 cmdline_opts.pcProfile2Infile);
960 /* Depending on the input we got perform
962 * (i) normal alignment: seq + optional profile
964 * (ii) profile profile alignment
967 if (NULL != prMSeq) {
968 if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts, rHhalignPara)) {
969 Log(&rLog, LOG_FATAL, "An error occured during the alignment");
972 if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile,
973 cmdline_opts.iAlnOutFormat)) {
974 Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
978 bool bSampling = FALSE; /* better set to TRUE for many sequences */
979 bool bReportAll = TRUE;
980 AliStat(prMSeq, bSampling, bReportAll);
985 } else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) {
986 if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, rHhalignPara)) {
987 Log(&rLog, LOG_FATAL, "An error occured during the alignment");
989 if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile,
990 cmdline_opts.iAlnOutFormat)) {
991 Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
998 if (NULL != prMSeq) {
1001 if (NULL != prMSeqProfile1) {
1002 FreeMSeq(&prMSeqProfile1);
1004 if (NULL != prMSeqProfile2) {
1005 FreeMSeq(&prMSeqProfile2);
1008 FreeUserOpts(&cmdline_opts);
1010 Log(&rLog, LOG_DEBUG, "Successful program exit");
1012 if (NULL != cmdline_opts.pcLogFile) {
1015 return EXIT_SUCCESS;
1017 /* end of MyMain() */