2 Functions for handling the Base Pair Probability Matrix
3 Peter F Stadler, Ivo L Hofacker
6 /* Last changed Time-stamp: <2002-11-07 12:46:16 ivo> */
13 #include "dist_vars.h"
14 #include "fold_vars.h"
15 #include "part_func.h"
17 #include "profiledist.h"
20 static char rcsid[] = "$Id: ProfileDist.c,v 1.6 2002/11/07 11:49:59 ivo Exp $";
22 PRIVATE int *alignment[2];
24 PRIVATE void sprint_aligned_bppm(const float *T1, const float *T2);
25 PRIVATE double PrfEditCost(int i, int j, const float *T1, const float *T2);
26 PRIVATE double average(double x, double y);
28 /*---------------------------------------------------------------------------*/
30 PUBLIC float profile_edit_distance(const float *T1, const float *T2)
32 /* align the 2 probability profiles T1, T2 */
33 /* This is like a Needleman-Wunsch alignment,
34 we should really use affine gap-costs ala Gotoh */
37 short **i_point, **j_point;
39 int i, j, i1, j1, pos, length1,length2;
40 float minus, plus, change, temp;
42 length1 = (int) T1[0];
43 length2 = (int) T2[0];
44 distance = (float **) space((length1 +1)*sizeof(float *));
46 i_point = (short **) space((length1 +1)*sizeof(short *));
47 j_point = (short **) space((length1 +1)*sizeof(short *));
49 for(i=0; i<= length1; i++){
50 distance[i] = (float *) space( (length2+1)*sizeof(float));
52 i_point[i] = (short *) space( (length2+1)*sizeof(short));
53 j_point[i] = (short *) space( (length2+1)*sizeof(short));
57 for(i = 1; i <= length1; i++) {
58 distance[i][0] = distance[i-1][0]+PrfEditCost(i,0,T1,T2);
59 if(edit_backtrack){ i_point[i][0] = (short) i-1; j_point[i][0] = 0; }
61 for(j = 1; j <= length2; j++) {
62 distance[0][j] = distance[0][j-1]+PrfEditCost(0,j,T1,T2);
63 if(edit_backtrack){ i_point[0][j] = 0; j_point[0][j] = (short) j-1; }
65 for (i = 1; i <= length1; i++) {
66 for (j = 1; j <= length2 ; j++) {
67 minus = distance[i-1][j] + PrfEditCost(i,0,T1,T2);
68 plus = distance[i][j-1] + PrfEditCost(0,j,T1,T2);
69 change = distance[i-1][j-1]+ PrfEditCost(i,j,T1,T2);
71 distance[i][j] = MIN3(minus, plus, change);
72 /* printf("%g ", distance[i][j]); */
75 if(distance[i][j] == change) {
76 i_point[i][j]= (short)i-1; j_point[i][j]= (short) j-1; }
77 else if(distance[i][j] == plus) {
78 i_point[i][j]= (short)i ; j_point[i][j]= (short)j-1; }
80 i_point[i][j]= (short)i-1; j_point[i][j]= (short)j ; }
86 temp = distance[length1][length2];
87 for(i=0;i<=length1;i++)
92 alignment[0] = (int *) space((length1+length2+1)*sizeof(int));
93 alignment[1] = (int *) space((length1+length2+1)*sizeof(int));
95 pos = length1+length2;
98 while( (i>0)||(j>0) ) {
101 if( ((i-i1)==1)&&((j-j1)==1) ) { /* substitution */
102 alignment[0][pos] = i;
103 alignment[1][pos] = j;
105 if( ((i-i1)==1)&&(j==j1) ) { /* Deletion in [1] */
106 alignment[0][pos] = i;
107 alignment[1][pos] = 0;
109 if( (i==i1)&&((j-j1)==1) ) { /* Deletion in [0] */
110 alignment[0][pos] = 0;
111 alignment[1][pos] = j;
117 for(i=pos+1; i<=length1+length2; i++){
118 alignment[0][i-pos] = alignment[0][i];
119 alignment[1][i-pos] = alignment[1][i];
121 alignment[0][0] = length1+length2-pos; /* length of alignment */
123 for(i=0; i<=length1; i++){
124 free(i_point[i]); free(j_point[i]);
126 free(i_point); free(j_point);
127 sprint_aligned_bppm(T1,T2);
136 /*---------------------------------------------------------------------------*/
138 PRIVATE double PrfEditCost(int i, int j, const float *T1, const float *T2)
144 if ((int) T2[1] != kmax) nrerror("inconsistent Profiles in PrfEditCost");
147 for(dist = 0. ,k=0 ; k<kmax ; k++)
148 dist += T2[j*kmax+k];
151 for(dist = 0. ,k=0 ; k<kmax ; k++)
152 dist += T1[i*kmax+k];
155 for(dist = 2.,k=0; k<kmax; k++)
156 dist -= 2.*average(T1[i*kmax+k],T2[j*kmax+k]);
161 /*---------------------------------------------------------------------------*/
163 PRIVATE double average(double x, double y)
165 /* can be essentially anything that fulfils :
167 2.) a(x,y) >= 0 for 0<= x,y <= 1
168 3.) a(x,y) <= (x+y)/2
169 4.) a(x,x) >= a(x,y) for 0<= x,y <= 1
170 As in Bonhoeffer et al (1993) 'RNA Multi Structure Landscapes',
171 Eur. Biophys. J. 22: 13-24 we have chosen the geometric mean.
176 a = (float) sqrt(x*y);
180 /*---------------------------------------------------------------------------*/
182 PUBLIC float *Make_bp_profile_bppm(FLT_OR_DBL *bppm, int length){
185 float *P; /* P[i*3+0] unpaired, P[i*3+1] upstream, P[i*3+2] downstream p */
186 int *index = get_iindx((unsigned) length);
188 P = (float *) space((length+1)*3*sizeof(float));
189 /* indices start at 1 use first entries to store length and dimension */
190 P[0] = (float) length;
193 for( i=1; i<length; i++)
194 for( j=i+1; j<=length; j++ ) {
195 P[i*L+1] += bppm[index[i]-j];
196 P[j*L+2] += bppm[index[i]-j];
198 for( i=1; i<=length; i++)
199 P[i*3+0] = 1 - P[i*3+1] - P[i*3+2];
206 /*---------------------------------------------------------------------------*/
208 PRIVATE void sprint_aligned_bppm(const float *T1, const float *T2)
211 length = alignment[0][0];
212 aligned_line[0] = (char *) space((length+1)*sizeof(char));
213 aligned_line[1] = (char *) space((length+1)*sizeof(char));
214 for(i=1; i<=length; i++){
215 if(alignment[0][i] ==0) aligned_line[0][i-1] = '_';
216 else { aligned_line[0][i-1] = bppm_symbol(T1+alignment[0][i]*3); }
217 if(alignment[1][i] ==0) aligned_line[1][i-1] = '_';
218 else { aligned_line[1][i-1] = bppm_symbol(T2+alignment[1][i]*3); }
222 /*---------------------------------------------------------------------------*/
224 PUBLIC void print_bppm(const float *T)
227 for(i=1; i<=( (int)T[0]); i++)
228 printf("%c",bppm_symbol(T+i*3));
232 /*---------------------------------------------------------------------------*/
234 PUBLIC void free_profile(float *T)
239 /*###########################################*/
240 /*# deprecated functions below #*/
241 /*###########################################*/
243 PUBLIC float *Make_bp_profile(int length){
244 return Make_bp_profile_bppm(pr, length);