binaries/src/clustalo/src/clustal/seq.h

   1 /*********************************************************************
   2  * Clustal Omega - Multiple sequence alignment
   3  *
   4  * Copyright (C) 2010 University College Dublin
   5  *
   6  * Clustal-Omega is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License as
   8  * published by the Free Software Foundation; either version 2 of the
   9  * License, or (at your option) any later version.
  10  *
  11  * This file is part of Clustal-Omega.
  12  *
  13  ********************************************************************/
  14
  15 /*
  16  *  RCS $Id: seq.h 296 2014-10-07 12:15:41Z fabian $
  17  */
  18
  19 #ifndef CLUSTALO_SEQ_H
  20 #define CLUSTALO_SEQ_H
  21
  22 #include "squid/squid.h"
  23
  24 #include "util.h"
  25
  26
  27 /**
  28  * int-encoded sequence types.
  29  * these are in sync with squid's seqtypes and only used for
  30  * convenience here
  31  */
  32 #define SEQTYPE_UNKNOWN kOtherSeq
  33 #define SEQTYPE_DNA kDNA
  34 #define SEQTYPE_RNA kRNA
  35 #define SEQTYPE_PROTEIN kAmino
  36
  37 /* Alphabets are defined in squid.h: AMINO_ALPHABET, DNA_ALPHABET,
  38  * RNA_ALPHABET (all uppercase)
  39  */
  40 #define AMINOACID_ANY 'X'
  41 #define NUCLEOTIDE_ANY 'N'
  42
  43 /**
  44  * @brief structure for storing multiple sequences
  45  *
  46  */
  47 typedef struct {
  48     int nseqs; /**< number of sequences */
  49     int seqtype; /**< sequence type */
  50     char *filename; /**< input file / source of sequences */
  51     bool aligned; /**< true if all seqs are same length **/
  52
  53     /** (working) sequence residues as char pointer.
  54      * range for first index: 0--nseq-1.
  55      * changes during alignment.
  56      */
  57     char **seq;
  58
  59     /** original sequence residues as char pointer.
  60      * range for first index: 0--nseq-1.
  61      * only set during input
  62      */
  63     char **orig_seq;
  64
  65   /** order in which sequences appear in guide-tree
  66    */
  67   int *tree_order;
  68
  69     /**
  70      * @brief Squid's sequence info structure.
  71      * Index range: 0--nseq-1.
  72      *
  73      * extra data are available:
  74      * int flags;
  75      *
  76      * name:
  77      * char name[SQINFO_NAMELEN];
  78      *
  79      * database identifier:
  80      * char id[SQINFO_NAMELEN];
  81      *
  82      * database accession no:
  83      * char acc[SQINFO_NAMELEN];
  84      *
  85      * description:
  86      * char desc[SQINFO_DESCLEN];
  87      *
  88      * length of this seq, incl gaps in our case!:
  89      * int len;
  90      *
  91      * start position on source seq (valid range: 1..len):
  92      * int start;
  93      *
  94      * end position on source seq (valid range: 1..len):
  95      * int stop;
  96      *
  97      * original length of source seq:
  98      * int olen;
  99      *
 100      * kRNA, kDNA, kAmino, or kOther:
 101      * int type;
 102      *
 103      * secondary structure string (index range: 0..len-1):
 104      * char *ss;
 105      *
 106      * percent side chain surface access (index range: 0..len-1):
 107      * char *sa;
 108      *
 109      * @see squid.h
 110      * @see LogSqInfo()
 111      *
 112      */
 113     SQINFO *sqinfo;
 114
 115   /* HMM batch information */
 116   char ***pppcHMMBNames;
 117   int **ppiHMMBindex;
 118 } mseq_t;
 119
 120 extern void
 121 AliStat(mseq_t *prMSeq, bool bSampling, bool bReportAll);
 122
 123 extern void
 124 AddSeq(mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes);
 125
 126 extern void
 127 SeqSwap(mseq_t *mseq, int i, int j);
 128
 129 extern void
 130 DealignMSeq(mseq_t *mseq);
 131
 132 extern const char *
 133 SeqTypeToStr(int seqtype);
 134
 135 extern int
 136 ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile,
 137               int iSeqType,  int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs,
 138               int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch);
 139
 140 extern void
 141 NewMSeq(mseq_t **mseq);
 142
 143 extern void
 144 FreeMSeq(mseq_t **mseq);
 145
 146 extern void
 147 CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc);
 148
 149 extern void
 150 LogSqInfo(SQINFO *sqinfo);
 151
 152 extern int
 153 FindSeqName(char *seqname, mseq_t *mseq);
 154
 155 extern int
 156 WriteAlignment(mseq_t *mseq, const char *aln_outfile, int msafile_format, int iWrap, bool bResno);
 157
 158 extern void
 159 DealignSeq(char *seq);
 160
 161 extern void
 162 ShuffleMSeq(mseq_t *prMSeq);
 163
 164 extern void
 165 SortMSeqByLength(mseq_t *prMSeq, const char cOrder);
 166
 167 void
 168 JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd);
 169
 170 bool
 171 SeqsAreAligned(mseq_t *prMSeq, bool bIsProfile, bool bDealignInputSeqs);
 172
 173 #endif