12 /* maximum degeneracy value - if degeneracy is greater than this, program segfaults*/
17 #define space(a) malloc(a)
23 int compare(short *lhs, short *rhs)
26 /* printf("%d ", (int)lhs[0]); */
31 l = (lhs[i]==0?'.':(lhs[i]<lhs[lhs[i]]?'(':')'));
32 r = (rhs[i]==0?'.':(rhs[i]<rhs[rhs[i]]?'(':')'));
37 return (i<=lhs[0] && l>r);
40 int find_min(short *arr[MAX_DEGEN], int begin, int end) {
41 short *min = arr[begin];
42 short min_num = begin;
45 for (i=begin+1; i<end; i++) {
46 if (compare(arr[i], min)) {
54 int equals(const short *first, const short *second)
57 while (i<=first[0] && first[i]==second[i]) {
60 if (i>first[0]) return 1;
64 void copy_arr(short *dest, short *src)
67 fprintf(stderr, "Empty pointer in copying\n");
70 memcpy(dest, src, sizeof(short)*(src[0]+1));
73 short *allocopy(short *src)
75 short *res = (short*) space(sizeof(short)*(src[0]+1));
80 /* ############################## DECLARATION #####################################*/
81 /* private functions & declarations*/
83 static int cnt_move = 0;
84 int count_move() {return cnt_move;}
86 /*check if base is lone*/
87 int lone_base(short *pt, int i);
89 /* can move be done on this structure? (move is in the Enc)*/
90 int check_insert(struct_en *str, int i, int j);
92 /* internal struct with moves, sequence, degeneracy and options*/
93 typedef struct _Encoded {
103 int bp_left2; /* if noLP is enabled (and for shift moves)*/
117 short *processed[MAX_DEGEN];
118 short *unprocessed[MAX_DEGEN];
121 /* moves in random (needs to be freed afterwards)*/
126 /* function for flooding */
127 int (*funct) (struct_en*, struct_en*);
132 /* frees all things allocated by degeneracy...*/
133 void free_degen(Encoded *Enc)
136 for (i=Enc->begin_unpr; i<Enc->end_unpr; i++) {
137 if (Enc->unprocessed[i]) {
138 free(Enc->unprocessed[i]);
139 Enc->unprocessed[i]=NULL;
142 for (i=Enc->begin_pr; i<Enc->end_pr; i++) {
143 if (Enc->processed[i]) {
144 free(Enc->processed[i]);
145 Enc->processed[i]=NULL;
154 /* ############################## IMPLEMENTATION #####################################*/
156 /* reads a line no matter how long*/
157 char* my_getline(FILE *fp)
159 char s[512], *line, *cp;
162 if(fgets(s, 512, fp) == NULL) break;
163 cp = strchr(s, '\n');
164 if(cp != NULL) *cp = '\0';
165 if(line == NULL) line = (char *) calloc(strlen(s) + 1, sizeof(char));
166 else line = (char *) realloc(line, strlen(s) + strlen(line) + 1);
168 } while (cp == NULL);
172 inline void do_move(short *pt, int bp_left, int bp_right)
179 pt[bp_left]=bp_right;
180 pt[bp_right]=bp_left;
184 /* done with all structures along the way to deepest*/
185 int update_deepest(Encoded *Enc, struct_en *str, struct_en *min)
187 /* apply move + get its energy*/
189 tmp_en = str->energy + energy_of_move_pt(str->structure, Enc->s0, Enc->s1, Enc->bp_left, Enc->bp_right);
190 do_move(str->structure, Enc->bp_left, Enc->bp_right);
191 if (Enc->bp_left2 != 0) {
192 tmp_en += energy_of_move_pt(str->structure, Enc->s0, Enc->s1, Enc->bp_left2, Enc->bp_right2);
193 do_move(str->structure, Enc->bp_left2, Enc->bp_right2);
195 int last_en = str->energy;
196 str->energy = tmp_en;
199 /* use f_point if we have it */
201 int end = Enc->funct(str, min);
204 if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2);
205 do_move(str->structure, -Enc->bp_left, -Enc->bp_right);
206 str->energy = last_en;
215 if (Enc->verbose_lvl>1) { fprintf(stderr, " "); print_str(stderr, str->structure); fprintf(stderr, " %d\n", tmp_en); }
218 if (tmp_en < min->energy) {
219 min->energy = tmp_en;
220 copy_arr(min->structure, str->structure);
222 /* delete degeneracy*/
226 if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2);
227 do_move(str->structure, -Enc->bp_left, -Enc->bp_right);
228 str->energy = last_en;
237 if ((str->energy == min->energy) && (Enc->current_en == min->energy)) {
240 for (i=Enc->begin_pr; i<Enc->end_pr; i++) {
241 if (equals(Enc->processed[i], str->structure)) {
246 for (i=Enc->begin_unpr; !found && i<Enc->end_pr; i++) {
247 if (equals(Enc->unprocessed[i], str->structure)) {
254 Enc->unprocessed[Enc->end_unpr]=allocopy(str->structure);
260 if (Enc->bp_left2!=0) do_move(str->structure, -Enc->bp_left2, -Enc->bp_right2);
261 do_move(str->structure, -Enc->bp_left, -Enc->bp_right);
262 str->energy = last_en;
271 /* deletions move set*/
272 int deletions(Encoded *Enc, struct_en *str, struct_en *minim)
275 short *pt = str->structure;
279 for (i=1; i<=len; i++) {
280 if (pt[i]>pt[pt[i]]) { /* '('*/
282 Enc->bp_right=-pt[i];
284 /*if nolp enabled, make (maybe) 2nd delete*/
287 if (lone_base(pt, i-1)) lone=i-1;
288 else if (lone_base(pt, i+1)) lone=i+1;
289 else if (lone_base(pt, pt[i]-1)) lone=pt[i]-1;
290 else if (lone_base(pt, pt[i]+1)) lone=pt[i]+1;
293 if (lone != -1 && (pt[lone]==0 || pt[pt[lone]]==0)) {
294 fprintf(stderr, "WARNING: pt[%d(or %d)]!=\'.\'", lone, pt[lone]);
298 Enc->bp_left2=-lone-1;
299 Enc->bp_right2=-pt[lone]-1;
301 if (!lone_base(pt, pt[lone]-1) && !lone_base(pt, pt[lone]+1)) {
302 cnt += update_deepest(Enc, str, minim);
303 /* in case useFirst is on and structure is found, end*/
304 if (Enc->first && cnt > 0) return cnt;
306 } else { /* nolp not enabled*/
307 cnt += update_deepest(Enc, str, minim);
308 /* in case useFirst is on and structure is found, end*/
309 if (Enc->first && cnt > 0) return cnt;
316 /* compatible base pair?*/
317 inline bool compat(char a, char b) {
318 if (a=='A' && b=='U') return true;
319 if (a=='C' && b=='G') return true;
320 if (a=='G' && b=='U') return true;
321 if (a=='U' && b=='A') return true;
322 if (a=='G' && b=='C') return true;
323 if (a=='U' && b=='G') return true;
325 if (a=='A' && b=='T') return true;
326 if (a=='T' && b=='A') return true;
327 if (a=='G' && b=='T') return true;
328 if (a=='T' && b=='G') return true;
332 /* try insert base pair (i,j)*/
333 inline bool try_insert(const short *pt, const char *seq, int i, int j)
335 if (i<=0 || j<=0 || i>pt[0] || j>pt[0]) return false;
336 return (j-i>MINGAP && pt[j]==0 && pt[i]==0 && compat(seq[i-1], seq[j-1]));
339 /* try insert base pair (i,j) */
340 inline bool try_insert_seq(const char *seq, int i, int j)
342 if (i<=0 || j<=0) return false;
343 return (j-i>MINGAP && compat(seq[i-1], seq[j-1]));
346 /* insertions move set*/
347 int insertions(Encoded *Enc, struct_en *str, struct_en *minim)
350 short *pt = str->structure;
354 for (i=1; i<=len; i++) {
356 for (j=i+1; j<=len; j++) {
357 /* end if found closing bracket*/
358 if (pt[j]!=0 && pt[j]<j) break; /*')'*/
359 if (pt[j]!=0 && pt[j]>j) { /*'('*/
363 /* if conditions are met, do insert*/
364 if (try_insert(pt, Enc->seq, i, j)) {
369 /* if lone bases occur, try inserting one another base*/
370 if (lone_base(pt, i) || lone_base(pt, j)) {
372 if (try_insert(pt, Enc->seq, i+1, j-1)) {
375 cnt += update_deepest(Enc, str, minim);
376 /* in case useFirst is on and structure is found, end*/
377 if (Enc->first && cnt > 0) return cnt;
379 if (try_insert(pt, Enc->seq, i-1, j+1)) {
382 cnt += update_deepest(Enc, str, minim);
383 /* in case useFirst is on and structure is found, end*/
384 if (Enc->first && cnt > 0) return cnt;
387 cnt += update_deepest(Enc, str, minim);
388 /* in case useFirst is on and structure is found, end*/
389 if (Enc->first && cnt > 0) return cnt;
392 cnt += update_deepest(Enc, str, minim);
393 /* in case useFirst is on and structure is found, end*/
394 if (Enc->first && cnt > 0) return cnt;
404 int shifts(Encoded *Enc, struct_en *str, struct_en *minim)
408 short *pt = str->structure;
412 for (i=1; i<=len; i++) {
413 if (pt[i]!=0 && pt[i]>i) { /*'('*/
416 /* outer switch left*/
417 if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, outer switch left\n", brack_num+1, i);
418 for (k=i-1; k>0; k--) {
419 if (pt[k]!=0 && pt[k]>k/*'('*/) break;
420 if (pt[k]!=0 && pt[k]<k/*')'*/) {
426 fprintf(stderr, "WARNING: \'%c\'should be \'.\' at pos %d!\n", pt[k], k);
429 /* switch (i,j) to (k,j)*/
430 if (j-k>MINGAP && compat(Enc->seq[k-1], Enc->seq[j-1])) {
435 cnt += update_deepest(Enc, str, minim);
436 /* in case useFirst is on and structure is found, end*/
437 if (Enc->first && cnt > 0) return cnt;
440 /* switch (i,j) to (k,i)*/
441 if (i-k>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) {
446 cnt += update_deepest(Enc, str, minim);
447 /* in case useFirst is on and structure is found, end*/
448 if (Enc->first && cnt > 0) return cnt;
453 /* outer switch right*/
454 if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, outer switch right\n", brack_num+1, i);
455 for (k=j+1; k<=len; k++) {
456 if (pt[k]!=0 && pt[k]<k/*')'*/) break;
457 if (pt[k]!=0 && pt[k]>k/*'('*/) {
464 fprintf(stderr, "WARNING: \'%c\'should be \'.\' at pos %d!\n", pt[k], k);
466 /* switch (i,j) to (i,k)*/
467 if (k-i>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) {
472 cnt += update_deepest(Enc, str, minim);
473 /* in case useFirst is on and structure is found, end*/
474 if (Enc->first && cnt > 0) return cnt;
476 /* switch (i,j) to (j,k)*/
477 if (k-j>MINGAP && compat(Enc->seq[j-1], Enc->seq[k-1])) {
482 cnt += update_deepest(Enc, str, minim);
483 /* in case useFirst is on and structure is found, end*/
484 if (Enc->first && cnt > 0) return cnt;
488 if (Enc->verbose_lvl>1) fprintf(stderr, "%2d bracket %2d position, inner switch\n", brack_num+1, i);
490 for (k=i+1; k<j; k++) {
491 /* jump to end of the sub-bracketing*/
492 if (pt[k]!=0 && pt[k]>k/*'('*/) {
497 /* left switch (i,j) to (k,j)*/
498 if (j-k>MINGAP && compat(Enc->seq[k-1], Enc->seq[j-1])) {
503 cnt += update_deepest(Enc, str, minim);
504 /* in case useFirst is on and structure is found, end*/
505 if (Enc->first && cnt > 0) return cnt;
508 /* right switch (i,j) to (i,k)*/
509 if (k-i>MINGAP && compat(Enc->seq[i-1], Enc->seq[k-1])) {
514 cnt += update_deepest(Enc, str, minim);
515 /* in case useFirst is on and structure is found, end*/
516 if (Enc->first && cnt > 0) return cnt;
518 } /* end inner switch for*/
520 } /* end if (pt[i]=='(')*/
521 } /* end for in switches*/
525 /* move to deepest (or first) neighbour*/
526 int move_set(Encoded *Enc, struct_en *str)
528 /* count how many times called*/
531 /* count better neighbours*/
536 min.structure = allocopy(str->structure);
537 min.energy = str->energy;
538 Enc->current_en = str->energy;
540 if (Enc->verbose_lvl>0) { fprintf(stderr, " start of MS:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); }
542 /* if using first dont do all of them*/
545 if (!end) cnt += insertions(Enc, str, &min);
546 if (Enc->first && cnt>0) end = true;
547 if (Enc->verbose_lvl>1) fprintf(stderr, "\n");
550 if (!end) cnt += deletions(Enc, str, &min);
551 if (Enc->first && cnt>0) end = true;
553 /* shifts (only if enabled + noLP disabled)*/
554 if (!end && Enc->shift && !Enc->noLP) {
555 cnt += shifts(Enc, str, &min);
556 if (Enc->first && cnt>0) end = true;
559 /* if degeneracy occurs, solve it!*/
560 if (!end && (Enc->end_unpr - Enc->begin_unpr)>0) {
561 Enc->processed[Enc->end_pr] = str->structure;
563 str->structure = Enc->unprocessed[Enc->begin_unpr];
564 Enc->unprocessed[Enc->begin_unpr]=NULL;
566 cnt += move_set(Enc, str);
568 /* write output to str*/
569 copy_arr(str->structure, min.structure);
570 str->energy = min.energy;
575 /* resolve degeneracy in local minima*/
576 if ((Enc->end_pr - Enc->begin_pr)>0) {
577 Enc->processed[Enc->end_pr]=str->structure;
580 int min = find_min(Enc->processed, Enc->begin_pr, Enc->end_pr);
581 short *tmp = Enc->processed[min];
582 Enc->processed[min] = Enc->processed[Enc->begin_pr];
583 Enc->processed[Enc->begin_pr] = tmp;
584 str->structure = Enc->processed[Enc->begin_pr];
589 if (Enc->verbose_lvl>1 && !(Enc->first)) { fprintf(stderr, "\n end of MS:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); }
594 void construct_moves(Encoded *Enc, short *structure)
596 /* generate all possible moves (less than n^2)*/
599 for (i=1; i<=structure[0]; i++) {
600 if (structure[i]!=0) {
601 if (structure[i]<i) continue;
602 Enc->moves_from[Enc->num_moves]=-i;
603 Enc->moves_to[Enc->num_moves]=-structure[i];
605 /* fprintf(stderr, "add d(%d, %d)\n", i, str.structure[i]); */
608 for (j=i+1; j<=structure[0]; j++) {
609 /* fprintf(stderr, "check (%d, %d)\n", i, j); */
610 if (structure[j]==0) {
611 if (try_insert_seq(Enc->seq,i,j)) {
612 Enc->moves_from[Enc->num_moves]=i;
613 Enc->moves_to[Enc->num_moves]=j;
615 /* fprintf(stderr, "add i(%d, %d)\n", i, j); */
618 } else if (structure[j]>j) { /* '(' */
626 for (i=0; i<Enc->num_moves-1; i++) {
628 rnd = rnd % (Enc->num_moves-i) + i;
630 swp = Enc->moves_from[i];
631 Enc->moves_from[i]=Enc->moves_from[rnd];
632 Enc->moves_from[rnd]=swp;
633 swp = Enc->moves_to[i];
634 Enc->moves_to[i]=Enc->moves_to[rnd];
635 Enc->moves_to[rnd]=swp;
639 int move_rset(Encoded *Enc, struct_en *str)
641 /* count how many times called*/
644 /* count better neighbours*/
649 min.structure = allocopy(str->structure);
650 min.energy = str->energy;
651 Enc->current_en = str->energy;
653 if (Enc->verbose_lvl>0) { fprintf(stderr, " start of MR:\n "); print_str(stderr, str->structure); fprintf(stderr, " %d\n\n", str->energy); }
655 /* construct and permute possible moves */
656 construct_moves(Enc, str->structure);
658 /* find first lower one*/
660 for (i=0; i<Enc->num_moves; i++) {
661 Enc->bp_left = Enc->moves_from[i];
662 Enc->bp_right = Enc->moves_to[i];
663 cnt = update_deepest(Enc, str, &min);
667 /* if degeneracy occurs, solve it!*/
668 if (!cnt && (Enc->end_unpr - Enc->begin_unpr)>0) {
669 Enc->processed[Enc->end_pr] = str->structure;
671 str->structure = Enc->unprocessed[Enc->begin_unpr];
672 Enc->unprocessed[Enc->begin_unpr]=NULL;
674 cnt += move_rset(Enc, str);
676 /* write output to str*/
677 copy_arr(str->structure, min.structure);
678 str->energy = min.energy;
683 /* resolve degeneracy in local minima*/
684 if ((Enc->end_pr - Enc->begin_pr)>0) {
685 Enc->processed[Enc->end_pr]=str->structure;
688 int min = find_min(Enc->processed, Enc->begin_pr, Enc->end_pr);
689 short *tmp = Enc->processed[min];
690 Enc->processed[min] = Enc->processed[Enc->begin_pr];
691 Enc->processed[Enc->begin_pr] = tmp;
692 str->structure = Enc->processed[Enc->begin_pr];
700 /*check if base is lone*/
701 int lone_base(short *pt, int i)
703 if (i<=0 || i>pt[0]) return 0;
704 /* is not a base pair*/
705 if (pt[i]==0) return 0;
709 /* is base pair and is the same bracket*/
710 if (pt[i-1]!=0 && ((pt[i-1]<pt[pt[i-1]]) == (pt[i]<pt[pt[i]]))) return 0;
714 if (pt[i+1]!=0 && ((pt[i-1]<pt[pt[i-1]]) == (pt[i]<pt[pt[i]]))) return 0;
720 /* if the structure has lone pairs*/
721 int find_lone_pair(short* str)
724 for(i=1; i<str[0]; i++) {
725 if (str[i]==0) continue; /* '.'*/
727 if (str[i]>str[str[i]]) { /* '('*/
728 if (i+1==str[0] || str[i+1]==0 || str[i+1]<str[str[i+1]]) {
730 } else while (i+1!=str[0] && str[i+1]!=0 && str[i+1]>str[str[i+1]]) i++;
733 if (str[i]<str[str[i]]) { /* ')'*/
734 if (i+1==str[0] || str[i+1]==0 || str[i+1]>str[str[i+1]]) {
736 } else while (i+1!=str[0] && str[i+1]!=0 && str[i+1]<str[str[i+1]]) i++;
744 int move_deepest( char *string,
767 enc.verbose_lvl=verbosity_level;
782 for (i=0; i<MAX_DEGEN; i++) enc.processed[i]=enc.unprocessed[i]=NULL;
785 str.structure = allocopy(ptable);
786 str.energy = energy_of_structure_pt(enc.seq, str.structure, enc.s0, enc.s1, 0);
788 while (move_set(&enc, &str)!=0) {
793 copy_arr(ptable, str.structure);
799 int move_first( char *string,
822 enc.verbose_lvl=verbosity_level;
837 for (i=0; i<MAX_DEGEN; i++) enc.processed[i]=enc.unprocessed[i]=NULL;
840 str.structure = allocopy(ptable);
841 str.energy = energy_of_structure_pt(enc.seq, str.structure, enc.s0, enc.s1, 0);
843 while (move_set(&enc, &str)!=0) {
848 copy_arr(ptable, str.structure);
854 int move_rand( char *string,
877 enc.verbose_lvl=verbosity_level;
891 /* allocate memory for moves */
892 enc.moves_from = (int*) space(ptable[0]*ptable[0]*sizeof(int));
893 enc.moves_to = (int*) space(ptable[0]*ptable[0]*sizeof(int));
896 for (i=0; i<MAX_DEGEN; i++) enc.processed[i]=enc.unprocessed[i]=NULL;
899 str.structure = allocopy(ptable);
900 str.energy = energy_of_structure_pt(enc.seq, str.structure, enc.s0, enc.s1, 0);
902 while (move_rset(&enc, &str)!=0) {
907 copy_arr(ptable, str.structure);
909 free(enc.moves_from);
915 int browse_neighs( char *string,
922 int (*funct) (struct_en*, struct_en*))
939 enc.verbose_lvl=verbosity_level;
954 for (i=0; i<MAX_DEGEN; i++) enc.processed[i]=enc.unprocessed[i]=NULL;
957 str.structure = allocopy(ptable);
958 str.energy = energy_of_structure_pt(enc.seq, str.structure, enc.s0, enc.s1, 0);
960 move_set(&enc, &str);
963 copy_arr(ptable, str.structure);
970 void print_stren(FILE *out, struct_en *str) {
971 print_str(out, str->structure);
972 fprintf(out, " %6.2f\n", str->energy/100.0);
975 void print_str(FILE *out, short *str) {
977 for (i=1; i<=str[0]; i++) {
978 if (str[i]==0) fprintf(out, ".");
979 else if (str[i]<i) fprintf(out, ")");
980 else fprintf(out, "(");