1 /*********************************************************************
2 * Clustal Omega - Multiple sequence alignment
4 * Copyright (C) 2010 University College Dublin
6 * Clustal-Omega is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This file is part of Clustal-Omega.
13 ********************************************************************/
16 * RCS $Id: seq.h 234 2011-04-13 05:26:16Z andreas $
19 #ifndef CLUSTALO_SEQ_H
20 #define CLUSTALO_SEQ_H
22 #include "squid/squid.h"
28 * int-encoded sequence types.
29 * these are in sync with squid's seqtypes and only used for
32 #define SEQTYPE_UNKNOWN kOtherSeq
33 #define SEQTYPE_DNA kDNA
34 #define SEQTYPE_RNA kRNA
35 #define SEQTYPE_PROTEIN kAmino
37 /* Alphabets are defined in squid.h: AMINO_ALPHABET, DNA_ALPHABET,
38 * RNA_ALPHABET (all uppercase)
40 #define AMINOACID_ANY 'X'
41 #define NUCLEOTIDE_ANY 'N'
44 * @brief structure for storing multiple sequences
48 int nseqs; /**< number of sequences */
49 int seqtype; /**< sequence type */
50 char *filename; /**< input file / source of sequences */
51 bool aligned; /**< true if all seqs are same length **/
53 /** (working) sequence residues as char pointer.
54 * range for first index: 0--nseq-1.
55 * changes during alignment.
59 /** original sequence residues as char pointer.
60 * range for first index: 0--nseq-1.
61 * only set during input
66 * @brief Squid's sequence info structure.
67 * Index range: 0--nseq-1.
69 * extra data are available:
73 * char name[SQINFO_NAMELEN];
75 * database identifier:
76 * char id[SQINFO_NAMELEN];
78 * database accession no:
79 * char acc[SQINFO_NAMELEN];
82 * char desc[SQINFO_DESCLEN];
84 * length of this seq, incl gaps in our case!:
87 * start position on source seq (valid range: 1..len):
90 * end position on source seq (valid range: 1..len):
93 * original length of source seq:
96 * kRNA, kDNA, kAmino, or kOther:
99 * secondary structure string (index range: 0..len-1):
102 * percent side chain surface access (index range: 0..len-1):
113 AliStat(mseq_t *prMSeq, bool bSampling, bool bReportAll);
116 AddSeq(mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes);
119 SeqSwap(mseq_t *mseq, int i, int j);
122 DealignMSeq(mseq_t *mseq);
125 SeqTypeToStr(int seqtype);
128 ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile, int iSeqType,
129 int iMaxNumSeq, int iMaxSeqLen);
132 NewMSeq(mseq_t **mseq);
135 FreeMSeq(mseq_t **mseq);
138 CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc);
141 LogSqInfo(SQINFO *sqinfo);
144 FindSeqName(char *seqname, mseq_t *mseq);
147 WriteAlignment(mseq_t *mseq, const char *aln_outfile, int msafile_format);
150 DealignSeq(char *seq);
153 ShuffleMSeq(mseq_t *prMSeq);
156 SortMSeqByLength(mseq_t *prMSeq, const char cOrder);
159 JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd);
162 SeqsAreAligned(mseq_t *prMSeq);