#include #include #include #include #include #include #include "fold.h" #include "move_set.h" /* maximum degeneracy value - if degeneracy is greater than this, program segfaults*/ #define MAX_DEGEN 100 #define MINGAP 3 #define space(a) malloc(a) #define bool int #define true 1 #define false 0 int compare(short *lhs, short *rhs) { /* printf("%d ", (int)lhs[0]); */ int i=1; char l=0,r=0; while (i<=lhs[0]) { l = (lhs[i]==0?'.':(lhs[i]r); } int find_min(short *arr[MAX_DEGEN], int begin, int end) { short *min = arr[begin]; short min_num = begin; int i; for (i=begin+1; ifirst[0]) return 1; else return 0; } void copy_arr(short *dest, short *src) { if (!src || !dest) { fprintf(stderr, "Empty pointer in copying\n"); return; } memcpy(dest, src, sizeof(short)*(src[0]+1)); } short *allocopy(short *src) { short *res = (short*) space(sizeof(short)*(src[0]+1)); copy_arr(res, src); return res; } /* ############################## DECLARATION #####################################*/ /* private functions & declarations*/ static int cnt_move = 0; int count_move() {return cnt_move;} /*check if base is lone*/ int lone_base(short *pt, int i); /* can move be done on this structure? (move is in the Enc)*/ int check_insert(struct_en *str, int i, int j); /* internal struct with moves, sequence, degeneracy and options*/ typedef struct _Encoded { /* sequence*/ short *s0; short *s1; char *seq; /* moves*/ int bp_left; int bp_right; int bp_left2; /* if noLP is enabled (and for shift moves)*/ int bp_right2; /* options*/ int noLP; int verbose_lvl; int first; int shift; /* degeneracy*/ int begin_unpr; int begin_pr; int end_unpr; int end_pr; short *processed[MAX_DEGEN]; short *unprocessed[MAX_DEGEN]; int current_en; /* moves in random (needs to be freed afterwards)*/ int *moves_from; int *moves_to; int num_moves; /* function for flooding */ int (*funct) (struct_en*, struct_en*); } Encoded; /* frees all things allocated by degeneracy...*/ void free_degen(Encoded *Enc) { int i; for (i=Enc->begin_unpr; iend_unpr; i++) { if (Enc->unprocessed[i]) { free(Enc->unprocessed[i]); Enc->unprocessed[i]=NULL; } } for (i=Enc->begin_pr; iend_pr; i++) { if (Enc->processed[i]) { free(Enc->processed[i]); Enc->processed[i]=NULL; } } Enc->begin_pr=0; Enc->begin_unpr=0; Enc->end_pr=0; Enc->end_unpr=0; } /* ############################## IMPLEMENTATION #####################################*/ /* reads a line no matter how long*/ char* my_getline(FILE *fp) { char s[512], *line, *cp; line = NULL; do { if(fgets(s, 512, fp) == NULL) break; cp = strchr(s, '\n'); if(cp != NULL) *cp = '\0'; if(line == NULL) line = (char *) calloc(strlen(s) + 1, sizeof(char)); else line = (char *) realloc(line, strlen(s) + strlen(line) + 1); strcat (line, s); } while (cp == NULL); return (line); } inline void do_move(short *pt, int bp_left, int bp_right) { /* delete*/ if (bp_left<0) { pt[-bp_left]=0; pt[-bp_right]=0; } else { /* insert*/ pt[bp_left]=bp_right; pt[bp_right]=bp_left; } } /* done with all structures along the way to deepest*/ int update_deepest(Encoded *Enc, struct_en *str, struct_en *min) { /* apply move + get its energy*/ int tmp_en; tmp_en = str->energy + energy_of_move_pt(str->structure, Enc->s0, Enc->s1, Enc->bp_left, Enc->bp_right); do_move(str->structure, Enc->bp_left, Enc->bp_right); if (Enc->bp_left2 != 0) { tmp_en += energy_of_move_pt(str->structure, Enc->s0, Enc->s1, Enc->bp_left2, Enc->bp_right2); do_move(str->structure, Enc->bp_left2, Enc->bp_right2); } int last_en = str->energy; str->energy = tmp_en; /* use f_point if we have it */ if (Enc->funct) { int end = Enc->funct(str, min); /* undo moves */ if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2); do_move(str->structure, -Enc->bp_left, -Enc->bp_right); str->energy = last_en; Enc->bp_left=0; Enc->bp_right=0; Enc->bp_left2=0; Enc->bp_right2=0; return (end?1:0); } if (Enc->verbose_lvl>1) { fprintf(stderr, " "); print_str(stderr, str->structure); fprintf(stderr, " %d\n", tmp_en); } /* better deepest*/ if (tmp_en < min->energy) { min->energy = tmp_en; copy_arr(min->structure, str->structure); /* delete degeneracy*/ free_degen(Enc); /* undo moves*/ if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2); do_move(str->structure, -Enc->bp_left, -Enc->bp_right); str->energy = last_en; Enc->bp_left=0; Enc->bp_right=0; Enc->bp_left2=0; Enc->bp_right2=0; return 1; } /* degeneracy*/ if ((str->energy == min->energy) && (Enc->current_en == min->energy)) { int found = 0; int i; for (i=Enc->begin_pr; iend_pr; i++) { if (equals(Enc->processed[i], str->structure)) { found = 1; break; } } for (i=Enc->begin_unpr; !found && iend_pr; i++) { if (equals(Enc->unprocessed[i], str->structure)) { found = 1; break; } } if (!found) { Enc->unprocessed[Enc->end_unpr]=allocopy(str->structure); Enc->end_unpr++; } } /* undo moves*/ if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2); do_move(str->structure, -Enc->bp_left, -Enc->bp_right); str->energy = last_en; Enc->bp_left=0; Enc->bp_right=0; Enc->bp_left2=0; Enc->bp_right2=0; return 0; } /* deletions move set*/ int deletions(Encoded *Enc, struct_en *str, struct_en *minim) { int cnt = 0; short *pt = str->structure; int len = pt[0]; int i; for (i=1; i<=len; i++) { if (pt[i]>pt[pt[i]]) { /* '('*/ Enc->bp_left=-i; Enc->bp_right=-pt[i]; /*if nolp enabled, make (maybe) 2nd delete*/ if (Enc->noLP) { int lone = -1; if (lone_base(pt, i-1)) lone=i-1; else if (lone_base(pt, i+1)) lone=i+1; else if (lone_base(pt, pt[i]-1)) lone=pt[i]-1; else if (lone_base(pt, pt[i]+1)) lone=pt[i]+1; /* check*/ if (lone != -1 && (pt[lone]==0 || pt[pt[lone]]==0)) { fprintf(stderr, "WARNING: pt[%d(or %d)]!=\'.\'", lone, pt[lone]); } if (lone != -1) { Enc->bp_left2=-lone-1; Enc->bp_right2=-pt[lone]-1; } if (!lone_base(pt, pt[lone]-1) && !lone_base(pt, pt[lone]+1)) { cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } else { /* nolp not enabled*/ cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } } return cnt; } /* compatible base pair?*/ inline bool compat(char a, char b) { if (a=='A' && b=='U') return true; if (a=='C' && b=='G') return true; if (a=='G' && b=='U') return true; if (a=='U' && b=='A') return true; if (a=='G' && b=='C') return true; if (a=='U' && b=='G') return true; /* and with T's*/ if (a=='A' && b=='T') return true; if (a=='T' && b=='A') return true; if (a=='G' && b=='T') return true; if (a=='T' && b=='G') return true; return false; } /* try insert base pair (i,j)*/ inline bool try_insert(const short *pt, const char *seq, int i, int j) { if (i<=0 || j<=0 || i>pt[0] || j>pt[0]) return false; return (j-i>MINGAP && pt[j]==0 && pt[i]==0 && compat(seq[i-1], seq[j-1])); } /* try insert base pair (i,j) */ inline bool try_insert_seq(const char *seq, int i, int j) { if (i<=0 || j<=0) return false; return (j-i>MINGAP && compat(seq[i-1], seq[j-1])); } /* insertions move set*/ int insertions(Encoded *Enc, struct_en *str, struct_en *minim) { int cnt = 0; short *pt = str->structure; int len = pt[0]; int i,j; for (i=1; i<=len; i++) { if (pt[i]==0) { for (j=i+1; j<=len; j++) { /* end if found closing bracket*/ if (pt[j]!=0 && pt[j]j) { /*'('*/ j = pt[j]; continue; } /* if conditions are met, do insert*/ if (try_insert(pt, Enc->seq, i, j)) { Enc->bp_left=i; Enc->bp_right=j; if (Enc->noLP) { /* if lone bases occur, try inserting one another base*/ if (lone_base(pt, i) || lone_base(pt, j)) { /* inside*/ if (try_insert(pt, Enc->seq, i+1, j-1)) { Enc->bp_left2=i+1; Enc->bp_right2=j-1; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } else /*outside*/ if (try_insert(pt, Enc->seq, i-1, j+1)) { Enc->bp_left2=i-1; Enc->bp_right2=j+1; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } else { cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } else { cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } } } } return cnt; } /*shift move set*/ int shifts(Encoded *Enc, struct_en *str, struct_en *minim) { int cnt = 0; int brack_num = 0; short *pt = str->structure; int len = pt[0]; int i, k; for (i=1; i<=len; i++) { if (pt[i]!=0 && pt[i]>i) { /*'('*/ int j=pt[i]; /* outer switch left*/ if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, outer switch left\n", brack_num+1, i); for (k=i-1; k>0; k--) { if (pt[k]!=0 && pt[k]>k/*'('*/) break; if (pt[k]!=0 && pt[k]MINGAP && compat(Enc->seq[k-1], Enc->seq[j-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=k; Enc->bp_right2=j; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } /* switch (i,j) to (k,i)*/ if (i-k>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=k; Enc->bp_right2=i; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } /* outer switch right*/ if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, outer switch right\n", brack_num+1, i); for (k=j+1; k<=len; k++) { if (pt[k]!=0 && pt[k]k/*'('*/) { k = pt[k]; continue; } /* check*/ if (pt[k]!=0) { fprintf(stderr, "WARNING: \'%c\'should be \'.\' at pos %d!\n", pt[k], k); } /* switch (i,j) to (i,k)*/ if (k-i>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=i; Enc->bp_right2=k; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } /* switch (i,j) to (j,k)*/ if (k-j>MINGAP && compat(Enc->seq[j-1], Enc->seq[k-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=j; Enc->bp_right2=k; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, inner switch\n", brack_num+1, i); /* inner switch*/ for (k=i+1; kk/*'('*/) { k=pt[k]; continue; } /* left switch (i,j) to (k,j)*/ if (j-k>MINGAP && compat(Enc->seq[k-1], Enc->seq[j-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=k; Enc->bp_right2=j; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } /* right switch (i,j) to (i,k)*/ if (k-i>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) { Enc->bp_left=-i; Enc->bp_right=-j; Enc->bp_left2=i; Enc->bp_right2=k; cnt += update_deepest(Enc, str, minim); /* in case useFirst is on and structure is found, end*/ if (Enc->first && cnt > 0) return cnt; } } /* end inner switch for*/ brack_num++; } /* end if (pt[i]=='(')*/ } /* end for in switches*/ return cnt; } /* move to deepest (or first) neighbour*/ int move_set(Encoded *Enc, struct_en *str) { /* count how many times called*/ cnt_move++; /* count better neighbours*/ int cnt = 0; /* deepest descent*/ struct_en min; min.structure = allocopy(str->structure); min.energy = str->energy; Enc->current_en = str->energy; if (Enc->verbose_lvl>0) { fprintf(stderr, " start of MS:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); } /* if using first dont do all of them*/ bool end = false; /* insertions*/ if (!end) cnt += insertions(Enc, str, &min); if (Enc->first && cnt>0) end = true; if (Enc->verbose_lvl>1) fprintf(stderr, "\n"); /* deletions*/ if (!end) cnt += deletions(Enc, str, &min); if (Enc->first && cnt>0) end = true; /* shifts (only if enabled + noLP disabled)*/ if (!end && Enc->shift && !Enc->noLP) { cnt += shifts(Enc, str, &min); if (Enc->first && cnt>0) end = true; } /* if degeneracy occurs, solve it!*/ if (!end && (Enc->end_unpr - Enc->begin_unpr)>0) { Enc->processed[Enc->end_pr] = str->structure; Enc->end_pr++; str->structure = Enc->unprocessed[Enc->begin_unpr]; Enc->unprocessed[Enc->begin_unpr]=NULL; Enc->begin_unpr++; cnt += move_set(Enc, str); } else { /* write output to str*/ copy_arr(str->structure, min.structure); str->energy = min.energy; } /* release minimal*/ free(min.structure); /* resolve degeneracy in local minima*/ if ((Enc->end_pr - Enc->begin_pr)>0) { Enc->processed[Enc->end_pr]=str->structure; Enc->end_pr++; int min = find_min(Enc->processed, Enc->begin_pr, Enc->end_pr); short *tmp = Enc->processed[min]; Enc->processed[min] = Enc->processed[Enc->begin_pr]; Enc->processed[Enc->begin_pr] = tmp; str->structure = Enc->processed[Enc->begin_pr]; Enc->begin_pr++; free_degen(Enc); } if (Enc->verbose_lvl>1 && !(Enc->first)) { fprintf(stderr, "\n end of MS:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); } return cnt; } void construct_moves(Encoded *Enc, short *structure) { /* generate all possible moves (less than n^2)*/ Enc->num_moves = 0; int i; for (i=1; i<=structure[0]; i++) { if (structure[i]!=0) { if (structure[i]moves_from[Enc->num_moves]=-i; Enc->moves_to[Enc->num_moves]=-structure[i]; Enc->num_moves++; /* fprintf(stderr, "add d(%d, %d)\n", i, str.structure[i]); */ } else { int j; for (j=i+1; j<=structure[0]; j++) { /* fprintf(stderr, "check (%d, %d)\n", i, j); */ if (structure[j]==0) { if (try_insert_seq(Enc->seq,i,j)) { Enc->moves_from[Enc->num_moves]=i; Enc->moves_to[Enc->num_moves]=j; Enc->num_moves++; /* fprintf(stderr, "add i(%d, %d)\n", i, j); */ continue; } } else if (structure[j]>j) { /* '(' */ j = structure[j]; } else break; } } } /* permute them */ for (i=0; inum_moves-1; i++) { int rnd = rand(); rnd = rnd % (Enc->num_moves-i) + i; int swp; swp = Enc->moves_from[i]; Enc->moves_from[i]=Enc->moves_from[rnd]; Enc->moves_from[rnd]=swp; swp = Enc->moves_to[i]; Enc->moves_to[i]=Enc->moves_to[rnd]; Enc->moves_to[rnd]=swp; } } int move_rset(Encoded *Enc, struct_en *str) { /* count how many times called*/ cnt_move++; /* count better neighbours*/ int cnt = 0; /* deepest descent*/ struct_en min; min.structure = allocopy(str->structure); min.energy = str->energy; Enc->current_en = str->energy; if (Enc->verbose_lvl>0) { fprintf(stderr, " start of MR:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); } /* construct and permute possible moves */ construct_moves(Enc, str->structure); /* find first lower one*/ int i; for (i=0; inum_moves; i++) { Enc->bp_left = Enc->moves_from[i]; Enc->bp_right = Enc->moves_to[i]; cnt = update_deepest(Enc, str, &min); if (cnt) break; } /* if degeneracy occurs, solve it!*/ if (!cnt && (Enc->end_unpr - Enc->begin_unpr)>0) { Enc->processed[Enc->end_pr] = str->structure; Enc->end_pr++; str->structure = Enc->unprocessed[Enc->begin_unpr]; Enc->unprocessed[Enc->begin_unpr]=NULL; Enc->begin_unpr++; cnt += move_rset(Enc, str); } else { /* write output to str*/ copy_arr(str->structure, min.structure); str->energy = min.energy; } /* release minimal*/ free(min.structure); /* resolve degeneracy in local minima*/ if ((Enc->end_pr - Enc->begin_pr)>0) { Enc->processed[Enc->end_pr]=str->structure; Enc->end_pr++; int min = find_min(Enc->processed, Enc->begin_pr, Enc->end_pr); short *tmp = Enc->processed[min]; Enc->processed[min] = Enc->processed[Enc->begin_pr]; Enc->processed[Enc->begin_pr] = tmp; str->structure = Enc->processed[Enc->begin_pr]; Enc->begin_pr++; free_degen(Enc); } return cnt; } /*check if base is lone*/ int lone_base(short *pt, int i) { if (i<=0 || i>pt[0]) return 0; /* is not a base pair*/ if (pt[i]==0) return 0; /* base is lone:*/ if (i-1>0) { /* is base pair and is the same bracket*/ if (pt[i-1]!=0 && ((pt[i-1]str[str[i]]) { /* '('*/ if (i+1==str[0] || str[i+1]==0 || str[i+1]str[str[i+1]]) i++; } if (str[i]str[str[i+1]]) { return i; } else while (i+1!=str[0] && str[i+1]!=0 && str[i+1]structure); fprintf(out, " %6.2f\n", str->energy/100.0); } void print_str(FILE *out, short *str) { int i; for (i=1; i<=str[0]; i++) { if (str[i]==0) fprintf(out, "."); else if (str[i]