+++ /dev/null
-/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-
-/*********************************************************************
- * Clustal Omega - Multiple sequence alignment
- *
- * Copyright (C) 2010 University College Dublin
- *
- * Clustal-Omega is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This file is part of Clustal-Omega.
- *
- ********************************************************************/
-
-/*
- * RCS $Id: hhhalfalignment-C.h 227 2011-03-28 17:03:09Z fabian $
- */
-
-// hhfullalignment.C
-
-#ifndef MAIN
-#define MAIN
-#include <iostream> // cin, cout, cerr
-#include <fstream> // ofstream, ifstream
-#include <stdio.h> // printf
-#include <stdlib.h> // exit
-#include <string> // strcmp, strstr
-#include <math.h> // sqrt, pow
-#include <limits.h> // INT_MIN
-#include <float.h> // FLT_MIN
-#include <time.h> // clock
-#include <ctype.h> // islower, isdigit etc
-using std::ios;
-using std::ifstream;
-using std::ofstream;
-using std::cout;
-using std::cerr;
-using std::endl;
-#include "util-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
-#include "list.h" // list data structure
-#include "hash.h" // hash data structure
-#include "hhdecl-C.h" // constants, class
-#include "hhutil-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
-#include "hhhmm.h" // class HMM
-#include "hhalignment.h" // class Alignment
-#include "hhhit.h"
-#endif
-
-/////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////
-// Methods of class HalfAlignment
-/////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////
-
-
-
-/////////////////////////////////////////////////////////////////////////////
-// Constructor
-HalfAlignment::HalfAlignment(int maxseqdis)
-{
- n=0;
- sname=seq=NULL;
- nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons= -1;
- h = new(int[maxseqdis]); //h[k] = next position of sequence k to be written
- s = new(char*[maxseqdis]); //s[k][h] = character in column h, sequence k of output alignment
- l = new(int*[maxseqdis]); //counts non-gap residues: l[k][i] = index of last residue AT OR BEFORE match state i in seq k
- m = new(int*[maxseqdis]); //counts positions: m[k][i] = position of match state i in string seq[k]
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-// Destructor
-HalfAlignment::~HalfAlignment()
-{
- Unset();
- delete[] h; h = NULL;
- delete[] s; s = NULL;
- delete[] l; l = NULL;
- delete[] m; m = NULL;
-}
-
-
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Free memory in HalfAlignment arrays s[][], l[][], and m[][]
- */
-void
-HalfAlignment::Unset()
-{
- // Free memory for alignment characters and residue counts
- for (int k=0; k<n; k++)
- {
- delete[] s[k]; s[k] = NULL;
- delete[] l[k]; l[k] = NULL;
- delete[] m[k]; m[k] = NULL;
- }
- n=0;
- sname=seq=NULL;
- nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons= -1;
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Prepare a2m/a3m alignment:
- * Calculate l[k][i] (residue indices) and m[k][i] (position in seq[k])
-*/
-void
-HalfAlignment::Set(char* name, char** seq_in, char** sname_in, int n_in, int L_in, int n1, int n2, int n3, int n4, int nc, int L_in2/*<--FS*/)
-{
- int i; /* counts match states in seq[k] */
- int ll; /* counts residues LEFT from or at current position in seq[k] */
- int mm; /* counts postions in string seq[k] */
- int k; /* counts sequences */
- char c;
- char warned=0;
-
- nss_dssp=n1; nss_pred=n2; nss_conf=n3; nsa_dssp=n4; ncons=nc;
- seq=seq_in; /* flat copy of sequences */
- sname=sname_in; /* flat copy of sequence names */
- n=n_in;
- L=L_in;
- pos=0;
-
- /* Allocate memory for alignment characters and residue counts */
- for (k=0; k<n; k++) {
- s[k]=new char[LINELEN];
- l[k]=new int[L+10+L_in2/*<--FS*/];
- m[k]=new int[L+10+L_in2/*<--FS*/];
- if (!s[k] || !l[k] || !m[k]) MemoryError("space for formatting HMM-HMM alignment");
- h[k]=0; //starting positions in alignment = 0
- } /* k <= 0 < n (= n_in) */
-
- for (k=0; k<n; k++) {
- m[k][0]=0; // 0'th match state (virtual) is begin state at 0
- //if k is consensus sequence
- if (k==nc) {
- for (i=1; i<=L; i++) m[k][i]=l[k][i]=i;
- m[k][L+1]=l[k][L+1]=L;
- continue;
- }
- i=1; mm=1; ll=1;
- while ((c=seq[k][mm]))
- {
- if (MatchChr(c)==c) //count match/delete states
- {
- l[k][i]=ll;
- m[k][i]=mm;
- i++;
- }
- if (WordChr(c)) ll++; //index of next residue
- mm++;
- }
- l[k][i]=ll-1; //set l[k][L+1] eq number of residues in seq k (-1 since there is no residue at L+1st match state)
- m[k][i]=mm; //set m[k][L+1]
-
- if ((i-1)!=L && !warned)
- {
- cerr<<"Warning: sequence "<<sname[k]<<" in HMM "<<name<<" has "<<i<<" match states but should have "<<L<<"\n";
- warned=1;
- }
- } /* k <= 0 < n (= n_in) */
- //DEBUG
- if (v>=5)
- {
- printf(" i chr m l\n");
- for(i=0;i<=L+1;i++) printf("%3i %1c %3i %3i\n",i,seq[0][m[0][i]],m[0][i],l[0][i]);
- printf("\n");
- }
-
-} /*** end HalfAlignment::Set() ***/
-
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Fill in insert states following match state i (without inserting '.' to fill up)
- */
-void
-HalfAlignment::AddInserts(int i)
-{
- for (int k=0; k<n; k++) // for all sequences...
- for (int mm=m[k][i]+1; mm<m[k][i+1]; mm++) // for all inserts between match state i and i+1...
- s[k][h[k]++]=seq[k][mm]; // fill inserts into output alignment s[k]
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Fill up alignment with gaps '.' to generate flush end (all h[k] equal)
- */
-void
-HalfAlignment::FillUpGaps()
-{
- int k; //counts sequences
- pos=0;
-
- // Determine max position h[k]
- for (k=0; k<n; k++) pos = imax(h[k],pos);
-
- // Fill in gaps up to pos
- for (k=0; k<n; k++)
- {
- for (int hh=h[k]; hh<pos; hh++) s[k][hh]='.';
- h[k]=pos;
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Fill in insert states following match state i and fill up gaps with '.'
- */
-void
-HalfAlignment::AddInsertsAndFillUpGaps(int i)
-{
- AddInserts(i);
- FillUpGaps();
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Add gap column '.'
- */
-void
-HalfAlignment::AddChar(char c)
-{
- for (int k=0; k<n; k++) s[k][h[k]++]=c;
- pos++;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Add match state column i as is
- */
-void
-HalfAlignment::AddColumn(int i)
-{
- for (int k=0; k<n; k++) s[k][h[k]++]=seq[k][m[k][i]];
- pos++;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Add match state column i as insert state
- */
-void
-HalfAlignment::AddColumnAsInsert(int i)
-{
- char c;
- for (int k=0; k<n; k++)
- if ((c=seq[k][m[k][i]])!='-' && (c<'0' || c>'9'))
- s[k][h[k]++]=InsertChr(c);
- pos++;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Remove all characters c from template sequences
- */
-void
-HalfAlignment::RemoveChars(char c)
-{
- int k,h,hh;
- for (k=0; k<n; k++)
- {
- for (h=hh=0; h<pos; h++)
- if (s[k][h]!=c) s[k][hh++]=s[k][h];
- s[k][++hh]='\0';
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Transform alignment sequences from A3M to A2M (insert ".")
- */
-void
-HalfAlignment::BuildFASTA()
-{
- AddInserts(0);
- FillUpGaps();
- for (int i=1; i<=L; i++)
- {
- AddColumn(i);
- AddInserts(i);
- FillUpGaps();
- }
- ToFASTA();
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Transform alignment sequences from A3M to A2M (insert ".")
- */
-void
-HalfAlignment::BuildA2M()
-{
- AddInserts(0);
- FillUpGaps();
- for (int i=1; i<=L; i++)
- {
- AddColumn(i);
- AddInserts(i);
- FillUpGaps();
- }
- AddChar('\0');
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Transform alignment sequences from A3M to A2M (insert ".")
- */
-void
-HalfAlignment::BuildA3M()
-{
- AddInserts(0);
- for (int i=1; i<=L; i++)
- {
- AddColumn(i);
- AddInserts(i);
- }
- AddChar('\0');
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Transform alignment sequences from A2M to FASTA ( lowercase to uppercase and '.' to '-')
- */
-void
-HalfAlignment::ToFASTA()
-{
- for (int k=0; k<n; k++)
- {
- uprstr(s[k]);
- strtr(s[k],".","-");
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Align query (HalfAlignment) to template (i.e. hit) match state structure
- */
-void
-HalfAlignment::AlignToTemplate(Hit& hit)
-{
- int i,j;
- int step; // column of the HMM-HMM alignment (first:nstep, last:1)
- char state;
-
- if(0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
- // If in global mode: Add part of alignment before first MM state
- AddInserts(0); // Fill in insert states before first match state
- for (i=1; i<hit.i[hit.nsteps]; i++)
- {
- AddColumnAsInsert(i);
- AddInserts(i);
- if (par.outformat<=2) FillUpGaps();
- }
- }
-
- // Add endgaps (First state must be an MM state!!)
- for (j=1; j<hit.j[hit.nsteps]; j++)
- {
- AddChar('-');
- }
-
- // Add alignment between first and last MM state
- for (step=hit.nsteps; step>=1; step--)
- {
- state = hit.states[step];
- i = hit.i[step];
-
- switch(state)
- {
- case MM: //MM pair state (both query and template in Match state)
- AddColumn(i);
- AddInserts(i);
- break;
- case DG: //D- state
- case MI: //MI state
- AddColumnAsInsert(i);
- AddInserts(i);
- break;
- case GD: //-D state
- case IM: //IM state
- AddChar('-');
- break;
- }
- if (par.outformat<=2) FillUpGaps();
-
- }
-
- if(0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
-
- // If in global mode: Add part of alignment after last MM state
- for (i=hit.i[1]+1; i<=L; i++)
- {
- AddColumnAsInsert(i);
- AddInserts(i);
- if (par.outformat==2) FillUpGaps();
- }
- }
-
- // Add endgaps
- for (j=hit.j[1]+1; j<=hit.L; j++)
- {
- AddChar('-');
- }
-
- // Add end-of-string character
- AddChar('\0');
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Write the a2m/a3m alignment into alnfile
- */
-void
-HalfAlignment::Print(char* alnfile)
-{
- int k; //counts sequences
- int omitted=0; // counts number of sequences with no residues in match states
- FILE *outf;
- if (strcmp(alnfile,"stdout"))
- {
- if (par.append) outf=fopen(alnfile,"a"); else outf=fopen(alnfile,"w");
- if (!outf) OpenFileError(alnfile);
- }
- else
- outf = stdout;
- if (v>=3) cout<<"Writing alignment to "<<alnfile<<"\n";
-
- for (k=0; k<n; k++)
- {
- // Print sequence only if it contains at least one residue in a match state
- if (1) //strpbrk(s[k],"ABCDEFGHIKLMNPQRSTUVWXYZ1234567890"))
- {
- fprintf(outf,">%s\n",sname[k]);
- fprintf(outf,"%s\n",s[k]);
- } else {
- omitted++;
- if (v>=3) printf("%-14.14s contains no residue in match state. Omitting sequence\n",sname[k]);
- }
- }
- if (v>=2 && omitted) printf("Omitted %i sequences in %s which contained no residue in match state\n",omitted,alnfile);
- fclose(outf);
-}
-
-
-/** EOF hhhalfalignment-C.h **/