--- /dev/null
+/*
+ Fast, but crude, pairwise structural Alignments of RNA sequences
+
+ Possible structures of each RNA are encoded in a linear
+ "probability profile", by computing for each base the probability
+ of being unpaired, or paired upstream or downstream. These profiles
+ can be aligned using standard string alignment.
+
+ The is an extension of the old method in ProfileDist.c with the
+ following changes:
+ - use sequence as well as structure profile for scoring
+ - use similarity alignment instead of distance (maybe add local alinment)
+ - use affine gap costs
+
+ C Ivo L Hofacker, Vienna RNA Package
+*/
+/* Last changed Time-stamp: <2004-08-02 10:11:13 ivo> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+#include "dist_vars.h"
+#include "fold_vars.h"
+#include "part_func.h"
+#include "utils.h"
+#include "profiledist.h"
+#include "ProfileAln.h"
+
+
+/*@unused@*/
+static char rcsid[] = "$Id: ProfileAln.c,v 1.5 2006/01/18 13:00:30 ivo Exp $";
+
+#define EQUAL(x,y) (fabs((x)-(y)) <= fabs(x)*2*FLT_EPSILON)
+
+PRIVATE int *alignment[2];
+
+PRIVATE void sprint_aligned_bppm(const float *T1, const char *seq1,
+ const float *T2, const char *seq2);
+PRIVATE double PrfEditScore(const float *p1, const float *p2,
+ char c1, char c2);
+PRIVATE double average(double x, double y);
+
+PRIVATE double open=-1.5, ext=-0.666; /* defaults from clustalw */
+PRIVATE double seqw=0.5;
+PRIVATE int free_ends=1; /* whether to use free end gaps */
+
+/*---------------------------------------------------------------------------*/
+
+PRIVATE float **newmat(int l1, int l2) {
+ float **a;
+ int i;
+ a = (float **) space((l1+1)*sizeof(float *));
+ for (i=0; i<=l1; i++) a[i] = (float *) space((l2+1)*sizeof(float));
+ return a;
+}
+
+PUBLIC float profile_aln(const float *T1, const char *seq1,
+ const float *T2, const char *seq2)
+{
+ /* align the 2 probability profiles T1, T2 */
+ /* This is like a Needleman-Wunsch alignment, with affine gap-costs
+ ala Gotoh. The score looks at both seq and pair profile */
+
+ float **S, **E, **F, tot_score;
+ int i, j, length1, length2;
+
+ length1 = strlen(seq1);
+ length2 = strlen(seq2);
+ S = newmat(length1, length2);
+ E = newmat(length1, length2);
+ F = newmat(length1, length2);
+
+ E[0][0] = F[0][0] = open - ext;
+ S[0][0] = 0;
+ for (i=1; i<=length1; i++) F[i][0] = -9999; /* impossible */
+ for (j=1; j<=length2; j++) E[0][j] = -9999; /* impossible */
+ if (!free_ends) {
+ for (i=1; i<=length1; i++) S[i][0] = E[i][0] = E[i-1][0] +ext;
+ for (j=1; j<=length2; j++) S[0][j] = F[0][j] = F[0][j-1] +ext;
+ }
+
+ for (i=1; i<=length1; i++) {
+ for (j=1; j<=length2; j++) {
+ float M;
+ E[i][j] = MAX2(E[i-1][j]+ext, S[i-1][j]+open);
+ F[i][j] = MAX2(F[i][j-1]+ext, S[i][j-1]+open);
+ M = S[i-1][j-1] + PrfEditScore(T1+3*i,T2+3*j, seq1[i-1], seq2[j-1]);
+ S[i][j] = MAX3(M, E[i][j], F[i][j]);
+ }
+ }
+
+ if (edit_backtrack) {
+ double score=0;
+ char state = 'S';
+ int pos, i,j;
+ alignment[0] = (int *) space((length1+length2+1)*sizeof(int));
+ alignment[1] = (int *) space((length1+length2+1)*sizeof(int));
+
+ pos = length1+length2;
+ i = length1;
+ j = length2;
+
+ tot_score = S[length1][length2];
+
+ if (free_ends) {
+ /* find starting point for backtracking,
+ search for highest entry in last row or column */
+ int imax=0;
+ for (i=1; i<=length1; i++) {
+ if (S[i][length2]>score) {
+ score=S[i][length2];
+ imax=i;
+ }
+ }
+ for (j=1; j<=length2; j++) {
+ if (S[length1][j]>score) {
+ score=S[length1][j];
+ imax=-j;
+ }
+ }
+ if (imax<0) {
+ for (j=length2; j> -imax; j--) {
+ alignment[0][pos] = 0;
+ alignment[1][pos--] = j;
+ }
+ i=length1;
+ } else {
+ for (i=length1; i>imax; i--) {
+ alignment[0][pos] = i;
+ alignment[1][pos--] = 0;
+ }
+ j=length2;
+ }
+ tot_score=score;
+ }
+
+ while (i>0 && j>0) {
+ switch (state) {
+ case 'E':
+ score = E[i][j];
+ alignment[0][pos] = i;
+ alignment[1][pos--] = 0;
+ if (EQUAL(score, S[i-1][j] + open)) state = 'S';
+ i--;
+ break;
+ case 'F':
+ score = F[i][j];
+ alignment[0][pos] = 0;
+ alignment[1][pos--] = j;
+ if (EQUAL(score, S[i][j-1] + open)) state = 'S';
+ j--;
+ break;
+ case 'S':
+ score = S[i][j];
+ if (EQUAL(score, E[i][j])) state = 'E';
+ else if (EQUAL(score, F[i][j])) state = 'F';
+ else if (EQUAL(score, S[i-1][j-1] +
+ PrfEditScore(T1+3*i,T2+3*j, seq1[i-1], seq2[j-1]))) {
+ alignment[0][pos] = i;
+ alignment[1][pos--] = j;
+ i--; j--;
+ }
+ else nrerror("backtrack of alignment failed");
+ break;
+ }
+ }
+
+ for (; j>0; j--) {
+ alignment[0][pos] = 0;
+ alignment[1][pos--] = j;
+ }
+ for (; i>0; i--) {
+ alignment[0][pos] = i;
+ alignment[1][pos--] = 0;
+ }
+
+ for(i=pos+1; i<=length1+length2; i++){
+ alignment[0][i-pos] = alignment[0][i];
+ alignment[1][i-pos] = alignment[1][i];
+ }
+ alignment[0][0] = length1+length2-pos; /* length of alignment */
+
+ sprint_aligned_bppm(T1,seq1, T2,seq2);
+ free(alignment[0]);
+ free(alignment[1]);
+ }
+ for (i=0; i<=length1; i++) {
+ free(S[i]); free(E[i]); free(F[i]);
+ }
+ free(S); free(E); free(F);
+
+ return tot_score;
+}
+
+
+/*---------------------------------------------------------------------------*/
+PRIVATE inline double average(double x, double y) {
+ /*
+ As in Bonhoeffer et al (1993) 'RNA Multi Structure Landscapes',
+ Eur. Biophys. J. 22: 13-24 we have chosen the geometric mean.
+ */
+ return (float) sqrt(x*y);
+}
+
+PRIVATE double PrfEditScore(const float *p1, const float *p2, char c1, char c2)
+{
+ double score;
+ int k;
+
+ for(score=0.,k=0; k<3; k++)
+ score += average(p1[k],p2[k]);
+
+ score *= (1- seqw);
+ if (c1==c2) score += seqw;
+ else if (((c1=='A') && (c2=='G')) ||
+ ((c1=='G') && (c2=='A')) ||
+ ((c1=='C') && (c2=='U')) ||
+ ((c1=='U') && (c2=='C')))
+ score += 0.5*seqw;
+ else score -= 0.9*seqw;
+ return score;
+}
+
+/*---------------------------------------------------------------------------*/
+
+PRIVATE void sprint_aligned_bppm(const float *T1, const char *seq1,
+ const float *T2, const char *seq2) {
+ int i, length;
+ length = alignment[0][0];
+ for (i=0; i<4; i++) {
+ if (aligned_line[i] != NULL) free(aligned_line[i]);
+ aligned_line[i] = (char *) space((length+1)*sizeof(char));
+ }
+ for(i=1; i<=length; i++){
+ if (alignment[0][i]==0)
+ aligned_line[0][i-1] = aligned_line[2][i-1] = '_';
+ else {
+ aligned_line[0][i-1] = bppm_symbol(T1+alignment[0][i]*3);
+ aligned_line[2][i-1] = seq1[alignment[0][i]-1];
+ }
+ if (alignment[1][i]==0)
+ aligned_line[1][i-1] = aligned_line[3][i-1] = '_';
+ else {
+ aligned_line[1][i-1] = bppm_symbol(T2+alignment[1][i]*3);
+ aligned_line[3][i-1] = seq2[alignment[1][i]-1];
+ }
+ }
+}
+
+PUBLIC int set_paln_params(double gap_open, double gap_ext,
+ double seq_weight, int freeends) {
+ open = (gap_open>0) ? -gap_open : gap_open;
+ ext = (gap_ext>0) ? -gap_ext : gap_ext;
+ if (open > ext) fprintf(stderr, "Gap extension penalty is smaller than "
+ "gap open. Do you realy want this?\n");
+ seqw = seq_weight;
+ if (seqw<0) {
+ seqw = 0;
+ fprintf(stderr, "Sequence weight set to 0 (must be in [0..1])\n");
+ } else
+ if (seqw>1) {
+ seqw = 1;
+ fprintf(stderr, "Sequence weight set to 1 (must be in [0..1])\n");
+ }
+ free_ends = (freeends) ? 1 : 0;
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/