+++ /dev/null
-/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-
-/*********************************************************************
- * Clustal Omega - Multiple sequence alignment
- *
- * Copyright (C) 2010 University College Dublin
- *
- * Clustal-Omega is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This file is part of Clustal-Omega.
- *
- ********************************************************************/
-
-/*
- * RCS $Id: hhalignment.h 154 2010-11-09 18:29:05Z fabian $
- */
-
-// hhalignment.h
-
-class Alignment
-{
-public:
- int L; // number of match states of alignment
- int N_in; // total number of sequences in alignment
- int N_filtered; /* number of sequences after sequence identity
- filtering */
- int N_ss; // number of >ss_ or >sa sequences
-
- int kss_dssp; /* index of sequence with secondary structure
- by dssp -1:no >ss_dssp line found */
- int ksa_dssp; /* index of sequence with solvent accessibility
- by dssp -1:no >sa_dssp line found */
- int kss_pred; /* index of sequence with predicted secondary
- structure -1:no >ss_pred line found */
- int kss_conf; /* index of sequence with confidence values of
- prediction -1:no >ss_conf line found */
- int kfirst; // index of first real sequence
-
- char* longname; /* Full name of first sequence of original alignment
- (NAME field) */
- char name[NAMELEN]; // HMM name = first word in longname in lower case
- char fam[NAMELEN]; // family ID (derived from name) (FAM field)
- char file[NAMELEN]; /* Rootname (w/o path, with extension) of alignment
- file that is used to construct the HMM */
-
- int n_display; /* number of sequences to be displayed
- (INCLUDING >ss_pred, >ss_conf, >ss_dssp sequences) */
- char** sname; // names of display sequences (first seq=0, first char=0)
- char** seq; // residues of display sequences (first char=1)
- int* l; // l[i] = position of i'th match state in alignment
-
- char* keep; /* keep[k]=1 if sequence is included in amino acid
- frequencies; 0 otherwise (first=0) */
-
- double *pdExWeight; /* external sequence weight as given by tree FIXME (FS) */
-
- Alignment(int maxseq=MAXSEQ, int maxres=/*MAXRES*/par.maxResLen);
- ~Alignment();
-
- // Read alignment into X (uncompressed) in ASCII characters
- void Read(FILE* inf, char infile[NAMELEN], char* line=NULL);
-#ifdef CLUSTALO
- void Transfer(char **ppcProf, int iCnt);
- void ClobberGlobal();
-#endif
-
- /* Convert ASCII to numbers between 0 and 20, throw out all insert states,
- record their number in I[k][i] and store sequences to be displayed
- in sname[k] and seq[k] */
- void Compress(const char infile[NAMELEN]);
-
- // Apply sequence identity filter
- inline int FilterForDisplay(int max_seqid, int coverage=0, int qid=0, float qsc=0, int N=0);
- inline int Filter(int max_seqid, int coverage=0, int qid=0, float qsc=0, int N=0);
- int Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, int seqid2, int Ndiff);
-
- bool FilterNeff(); /* MR1 */
- float filter_by_qsc(float qsc, char* dummy); /* MR1 */
-
- // Filter alignment for min score per column with core query profile, defined by min_coverage_core and min_seqid_core
- int HomologyFilter(int coverage_core, float qsc_core, float coresc);
-
- // Calculate AA frequencies q.p[i][a] and transition probabilities q.tr[i][a] from alignment
- void FrequenciesAndTransitions(HMM& q, char* in=NULL);
-
- // Calculate freqs q.f[i][a] and transitions q.tr[i][a] (a=MM,MI,MD) with pos-specific subalignments
- void Amino_acid_frequencies_and_transitions_from_M_state(HMM& q, char* in);
-
- // Calculate transitions q.tr[i][a] (a=DM,DD) with pos-specific subalignments
- void Transitions_from_D_state(HMM& q, char* in);
-
- // Calculate transitions q.tr[i][a] (a=DM,DD) with pos-specific subalignments
- void Transitions_from_I_state(HMM& q, char* in);
-
- // Write alignment without insert states to alignment file
- void WriteWithoutInsertsToFile(char* alnfile);
-
- // Write alignment to alignment file
- void WriteToFile(char* alnfile, const char format[]=NULL);
-
- // Read a3m slave alignment of hit from ta3mfile and merge into (query) master alignment
- void MergeMasterSlave(Hit& hit, char ta3mfile[]);
-
- // Read a3m alignment of hit from ta3mfile and merge-combine with query alignment
- void Merge(Hit& hit, char ta3mfile[]);
-
- // Add a sequence to Qali
- void AddSequence(char Xk[], int Ik[]=NULL);
-
- // Determine matrix of position-specific weights w[k][i] for multiple alignment
- void GetPositionSpecificWeights(float* w[]);
-
- char readCommentLine; // Set to 1, if a comment line with '#' is read /* MR1 */
-
-private:
- char** X; // X[k][i] contains column i of sequence k in alignment (first seq=0, first char=1) (0-3: ARND ..., 20:X, 21:GAP)
- short unsigned int** I; // I[k][i] contains the number of inserts AFTER match state i (first=0)
- char* display; // display[k]=1 if sequence will be displayed in output alignments; 0 otherwise (first=0)
- float* wg; // w[k] = global weight of sequence k
- int* nseqs; // number of sequences in subalignment i (only for DEBUGGING)
- int* nres; // number of residues in sequence k
- int* first; // first residue in sequence k
- int* last; // last residue in sequence k
- int* ksort; // index for sorting sequences: X[ksort[k]]
- int FilterWithCoreHMM(char in[], float coresc, HMM& qcore);
-};