1 /*********************************************************************
2 * Clustal Omega - Multiple sequence alignment
4 * Copyright (C) 2010 University College Dublin
6 * Clustal-Omega is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This file is part of Clustal-Omega.
13 ********************************************************************/
16 * RCS $Id: seq.h 296 2014-10-07 12:15:41Z fabian $
19 #ifndef CLUSTALO_SEQ_H
20 #define CLUSTALO_SEQ_H
22 #include "squid/squid.h"
28 * int-encoded sequence types.
29 * these are in sync with squid's seqtypes and only used for
32 #define SEQTYPE_UNKNOWN kOtherSeq
33 #define SEQTYPE_DNA kDNA
34 #define SEQTYPE_RNA kRNA
35 #define SEQTYPE_PROTEIN kAmino
37 /* Alphabets are defined in squid.h: AMINO_ALPHABET, DNA_ALPHABET,
38 * RNA_ALPHABET (all uppercase)
40 #define AMINOACID_ANY 'X'
41 #define NUCLEOTIDE_ANY 'N'
44 * @brief structure for storing multiple sequences
48 int nseqs; /**< number of sequences */
49 int seqtype; /**< sequence type */
50 char *filename; /**< input file / source of sequences */
51 bool aligned; /**< true if all seqs are same length **/
53 /** (working) sequence residues as char pointer.
54 * range for first index: 0--nseq-1.
55 * changes during alignment.
59 /** original sequence residues as char pointer.
60 * range for first index: 0--nseq-1.
61 * only set during input
65 /** order in which sequences appear in guide-tree
70 * @brief Squid's sequence info structure.
71 * Index range: 0--nseq-1.
73 * extra data are available:
77 * char name[SQINFO_NAMELEN];
79 * database identifier:
80 * char id[SQINFO_NAMELEN];
82 * database accession no:
83 * char acc[SQINFO_NAMELEN];
86 * char desc[SQINFO_DESCLEN];
88 * length of this seq, incl gaps in our case!:
91 * start position on source seq (valid range: 1..len):
94 * end position on source seq (valid range: 1..len):
97 * original length of source seq:
100 * kRNA, kDNA, kAmino, or kOther:
103 * secondary structure string (index range: 0..len-1):
106 * percent side chain surface access (index range: 0..len-1):
115 /* HMM batch information */
116 char ***pppcHMMBNames;
121 AliStat(mseq_t *prMSeq, bool bSampling, bool bReportAll);
124 AddSeq(mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes);
127 SeqSwap(mseq_t *mseq, int i, int j);
130 DealignMSeq(mseq_t *mseq);
133 SeqTypeToStr(int seqtype);
136 ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile,
137 int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs,
138 int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch);
141 NewMSeq(mseq_t **mseq);
144 FreeMSeq(mseq_t **mseq);
147 CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc);
150 LogSqInfo(SQINFO *sqinfo);
153 FindSeqName(char *seqname, mseq_t *mseq);
156 WriteAlignment(mseq_t *mseq, const char *aln_outfile, int msafile_format, int iWrap, bool bResno);
159 DealignSeq(char *seq);
162 ShuffleMSeq(mseq_t *prMSeq);
165 SortMSeqByLength(mseq_t *prMSeq, const char cOrder);
168 JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd);
171 SeqsAreAligned(mseq_t *prMSeq, bool bIsProfile, bool bDealignInputSeqs);