2 Last changed Time-stamp: <2008-07-04 16:15:50 ulim>
3 $Id: RNAup.c,v 1.5 2008/07/04 14:27:09 ivo Exp $
5 Ineractive Access to cofolding routines
22 #include "fold_vars.h"
25 #include "part_func.h"
26 #include "part_func_up.h"
28 #include "energy_const.h"
29 #include "RNAup_cmdl.h"
32 static char rcsid[] = "$Id: RNAup.c,v 1.5 2008/07/04 14:27:09 ivo Exp $";
34 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
35 #define MAX(x,y) (((x)>(y)) ? (x) : (y))
36 #define EQUAL(A,B) (fabs((A)-(B)) < 1000*DBL_EPSILON)
37 #define ULENGTH_MAXIMUM 21
38 #define COMMANDLINE_PARAMETERS_INIT_LENGTH 1024
40 PRIVATE void tokenize(char *line, char **seq1, char **seq2);
42 PRIVATE void seperate_bp(char **inter,int len1,char **intra_l,char **intra_s);
43 PRIVATE void print_interaction(interact *Int, char *s1, char *s2, pu_contrib *p_c, pu_contrib *p_c2, int w, int incr3, int incr5);
44 PRIVATE void print_unstru(pu_contrib *p_c, int w);
45 PRIVATE int compare_unpaired_values(const void *p1, const void *p2);
46 PRIVATE int move_useless_unpaired_values(const void *p1, const void *p2);
47 PRIVATE void adjustUnpairedValues(int ***unpaired_values); /* this function sorts and cleans up the unpaired values given at command line */
48 PRIVATE void appendCmdlParameter(char **param_dest, const char *parameter, int *param_dest_length);
50 /* defaults for -u and -w */
51 PRIVATE int default_u; /* -u options for plotting: plot pr_unpaired for 4 nucleotides */
54 /*--------------------------------------------------------------------------*/
55 int main(int argc, char *argv[]){
56 struct RNAup_args_info args_info;
57 unsigned int input_type, up_mode;
58 char temp_name[512], my_contrib[10], up_out[250], name[512];
59 char fname1[FILENAME_MAX_LENGTH], fname2[FILENAME_MAX_LENGTH], fname_target[FILENAME_MAX_LENGTH];
60 char *ParamFile, *ns_bases, *c, *structure;
61 char *head, *input_string, *s1, *s2, *s3, *s_target, *cstruc1, *cstruc2, *cstruc_target, *cstruc_combined;
62 char cmdl_tmp[2048], *cmdl_parameters, *orig_s1, *orig_s2, *orig_target;
63 int cmdl_parameters_length;
64 int i, j, length1, length2, l, length_target, sym, r, istty, rotated;
65 double energy, min_en;
69 int **unpaired_values; /* very new array that contains the different ulength values */
70 int ulength_num = 0; /* number of ulength values given on commandline */
72 /* variables for output */
73 pu_contrib *unstr_out, *unstr_short, *unstr_target, *contrib1, *contrib2;
77 /* commandline parameters */
78 int w = 25; /* length of region of interaction */
79 int incr3 = 0; /* add x unpaired bases after 3'end of short RNA*/
80 int incr5 = 0; /* add x unpaired bases after 5'end of short RNA*/
81 int unstr; /* length of unpaired region for output*/
82 int longerSeqFirst = 1; /* rotate input seq. to ensure that first sequence is the longer one (RNA_UP_MODE_2) */
83 int header = 1; /* print header in output file */
84 int output = 1; /* create output file */
86 /* more default settings for RNAup */
87 up_mode = RNA_UP_MODE_1; /* default RNAup mode, single sequence unpaired probabilities */
94 /* early initializing */
98 input_string = s1 = s2 = s3 = s_target = cstruc1 = cstruc2 = cstruc_target = cstruc_combined = NULL;
99 length1 = length2 = length_target = 0;
101 unstr_out = unstr_short = unstr_target = contrib1 = contrib2 = NULL;
102 structure = ParamFile = ns_bases = head = orig_s1 = orig_s2 = orig_target = NULL;
103 fname_target[0] = '\0';
104 /* allocate init length for commandline parameter string */
105 cmdl_parameters_length = COMMANDLINE_PARAMETERS_INIT_LENGTH;
106 cmdl_parameters = (char *)space(sizeof(char) * cmdl_parameters_length);
107 sprintf(cmdl_parameters, "RNAup ");
110 #############################################
111 # check the command line prameters
112 #############################################
114 if(RNAup_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
116 /* do not create header */
117 if(args_info.no_header_given) header = 0;
120 if(args_info.temp_given){
121 temperature = args_info.temp_arg;
122 /* collect parameter if header is needed */
124 sprintf(cmdl_tmp, "-T %f ", temperature);
125 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
129 /* structure constraint */
130 if(args_info.constraint_given){
132 /* collect parameter if header is needed */
134 appendCmdlParameter(&cmdl_parameters, "-C ", &cmdl_parameters_length);
137 /* do not take special tetra loop energies into account */
138 if(args_info.noTetra_given){
141 appendCmdlParameter(&cmdl_parameters, "-4 ", &cmdl_parameters_length);
144 /* set dangle model */
145 if(args_info.dangles_given){
146 if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
147 warn_user("required dangle model not implemented, falling back to default dangles=2");
149 dangles = args_info.dangles_arg;
152 sprintf(cmdl_tmp, "-d %d ", dangles);
153 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
157 /* do not allow weak pairs */
158 if(args_info.noLP_given){
161 appendCmdlParameter(&cmdl_parameters, "--noLP ", &cmdl_parameters_length);
164 /* do not allow wobble pairs (GU) */
165 if(args_info.noGU_given){
168 appendCmdlParameter(&cmdl_parameters, "--noGU ", &cmdl_parameters_length);
171 /* do not allow weak closing pairs (AU,GU) */
172 if(args_info.noClosingGU_given){
175 appendCmdlParameter(&cmdl_parameters, "--noClosingGU ", &cmdl_parameters_length);
178 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
179 if(args_info.noconv_given){
182 appendCmdlParameter(&cmdl_parameters, "--noconv ", &cmdl_parameters_length);
185 /* set energy model */
186 if(args_info.energyModel_given){
187 energy_set = args_info.energyModel_arg;
189 sprintf(cmdl_tmp, "-e %d ", energy_set);
190 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
193 /* take another energy parameter set */
194 if(args_info.paramFile_given){
195 ParamFile = strdup(args_info.paramFile_arg);
197 sprintf(cmdl_tmp, "-P %s ", ParamFile);
198 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
202 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
203 if(args_info.nsp_given){
204 ns_bases = strdup(args_info.nsp_arg);
206 sprintf(cmdl_tmp, "--nsp %s ", ns_bases);
207 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
211 /* set pf scaling factor */
212 if(args_info.pfScale_given){
213 sfact = args_info.pfScale_arg;
215 sprintf(cmdl_tmp, "-S %f ", sfact);
216 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
220 /* set the maximal length of interaction region */
221 if(args_info.window_given){
222 w = args_info.window_arg;
224 sprintf(cmdl_tmp, "-w %d ", w);
225 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
229 /* do not make an output file */
230 if(args_info.no_output_file_given){
234 /* set mode to unpaired regions in both RNAs */
235 if(args_info.include_both_given){
236 up_mode = RNA_UP_MODE_3;
238 appendCmdlParameter(&cmdl_parameters, "--include_both ", &cmdl_parameters_length);
241 /* set interaction mode 1 (pairwise interaction) */
242 if(args_info.interaction_pairwise_given){
243 up_mode = RNA_UP_MODE_2;
245 appendCmdlParameter(&cmdl_parameters, "--interaction_pairwise ", &cmdl_parameters_length);
248 /* set interaction mode 2 (first sequence interacts with all others) */
249 if(args_info.interaction_first_given){
250 up_mode = RNA_UP_MODE_3;
252 appendCmdlParameter(&cmdl_parameters, "--interaction_first ", &cmdl_parameters_length);
255 /* extend unpaired region 5' */
256 if(args_info.extend5_given){
257 incr5 = args_info.extend5_arg;
259 sprintf(cmdl_tmp, "-5 %d ", incr5);
260 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
264 /* extend unpaired region 3' */
265 if(args_info.extend3_given){
266 incr3 = args_info.extend3_arg;
268 sprintf(cmdl_tmp, "-3 %d ", incr3);
269 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
273 /* set contribution output */
274 if(args_info.contributions_given){
275 strncpy(my_contrib, args_info.contributions_arg, 10);
277 sprintf(cmdl_tmp, "-c %s ", my_contrib);
278 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
282 /* set length(s) of unpaired (unstructured) region(s) */
284 i = (args_info.ulength_given == 0) ? 1 : args_info.ulength_given;
286 /* here's the very new way of treating multiple ulength values/ranges */
287 unpaired_values = (int **)space(sizeof(int *) * (i + 1));
289 if(header && args_info.ulength_given)
290 appendCmdlParameter(&cmdl_parameters, "-u ", &cmdl_parameters_length);
292 for(i = 0; i < args_info.ulength_given; i++){
293 unpaired_values[++ulength_num] = (int *)space(2 * sizeof(int));
294 /* we got a ulength range... */
295 if(sscanf(args_info.ulength_arg[i], "%d-%d", &min, &max) == 2){
296 if(min > max){ tmp = min; min = max; max = tmp;}
298 unpaired_values[ulength_num][0] = min;
299 unpaired_values[ulength_num][1] = -1;
302 unpaired_values[ulength_num][0] = min;
303 unpaired_values[ulength_num][1] = max;
305 max_u = MAX2(max_u, max);
307 else if(sscanf(args_info.ulength_arg[i], "%d", &max) == 1){
308 unpaired_values[ulength_num][0] = max;
309 unpaired_values[ulength_num][1] = -1;
310 max_u = MAX2(max_u, max);
313 if(i < args_info.ulength_given - 1)
314 sprintf(cmdl_tmp, "%s,", args_info.ulength_arg[i]);
316 sprintf(cmdl_tmp, "%s ", args_info.ulength_arg[i]);
317 appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length);
321 /* use default settings */
322 unpaired_values[++ulength_num] = (int *)space(2 * sizeof(int));
323 unpaired_values[ulength_num][0] = 4;
324 unpaired_values[ulength_num][1] = -1;
327 /* store number of entries at position [0][0] */
328 unpaired_values[0] = (int *)space(2 * sizeof(int));
329 unpaired_values[0][0] = ulength_num;
330 unpaired_values[0][1] = -1;
332 /* adjust ranges and values such that everything is non-redundant
333 WARNING: after this step ulength_num may not reflect actual number of ulength values anymore */
334 adjustUnpairedValues(&unpaired_values);
336 /* free allocated memory of command line data structure */
337 RNAup_cmdline_parser_free (&args_info);
340 #############################################
342 #############################################
345 if (ParamFile != NULL)
346 read_parameter_file(ParamFile);
348 if (ns_bases != NULL) {
349 nonstandards = space(33);
357 nonstandards[i++]=*c++;
358 nonstandards[i++]=*c;
359 if ((sym)&&(*c!=*(c-1))) {
360 nonstandards[i++]=*c;
361 nonstandards[i++]=*(c-1);
368 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
369 if ((fold_constrained)&&(istty)) {
370 print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_RND_BRACK);
371 printf("constraints for intramolecular folding only:\n");
372 print_tty_constraint(VRNA_CONSTRAINT_NO_HEADER | VRNA_CONSTRAINT_ANG_BRACK);
373 printf("constraints for cofolding (intermolecular folding) only:\n");
374 print_tty_constraint(VRNA_CONSTRAINT_NO_HEADER | VRNA_CONSTRAINT_PIPE);
377 RT = ((temperature+K0)*GASCONST/1000.0);
379 #############################################
380 # main loop: continue until end of file
381 #############################################
389 ########################################################
390 # handle user input from 'stdin'
391 ########################################################
395 case RNA_UP_MODE_1: /* just calculate the probability of beeing unpaired for the given interval(s) */
396 print_tty_input_seq();
398 case RNA_UP_MODE_2: /* pairwise interaction mode, former -Xp mode */
399 print_tty_input_seq_str("Use either '&' to connect the 2 sequences or give each sequence on an extra line.");
401 case RNA_UP_MODE_3: /* consecutive multi interaction mode ;) first sequence pairs with all following, former -Xf mode */
402 /* either we wait for the first two sequences */
404 print_tty_input_seq_str("Give each sequence on an extra line. "
405 "The first seq. is stored, every other seq. is compared to the first one.");
406 /* or we already have them and wait for the next sequence */
408 print_tty_input_seq_str("Enter another sequence.");
410 default: nrerror("This should never happen (again)");
415 /* extract filename from fasta header if available */
416 while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
417 (void) sscanf(input_string, "%" XSTR(FILENAME_ID_LENGTH) "s", fname1);
418 printf(">%s\n", input_string); /* print fasta header if available */
422 /* break on any error, EOF or quit request */
423 if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
424 /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
426 tokenize(input_string, &s1, &s2); /* this also frees the input_string */
428 length1 = (int)strlen(s1);
429 length2 = (s2) ? (int)strlen(s2) : 0;
431 /* now we got the first and maybe a second sequence */
433 /* check if we have to change the mode we are operating in */
434 if((cut_point != -1) && (up_mode & RNA_UP_MODE_1)){
435 warn_user("Two concatenated sequences given, switching to pairwise interaction mode!");
436 up_mode = RNA_UP_MODE_2;
442 case RNA_UP_MODE_2: if(cut_point == -1)
445 case RNA_UP_MODE_3: if(cut_point == -1){
446 if(s_target == NULL) read_again = 1;
448 else if(s_target != NULL)
450 "After the first sequence (pair): Input a single sequence (no &)!\n"
451 "Each input seq. is compared to the very first seq. given.\n"
458 /* we are in this block only if we just have 1 sequence yet but need a second, too */
460 /* extract filename from fasta header if available */
461 while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
462 (void) sscanf(input_string, "%" XSTR(FILENAME_ID_LENGTH) "s", fname2);
463 printf(">%s\n", input_string); /* print fasta header if available */
466 /* break on any error, EOF or quit request */
467 if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
468 /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
470 tokenize(input_string, &s2, &s3); /* this also frees the input_string */
473 nrerror("Don't confuse me by mixing concatenated (&) with single sequences! Go, have some sleep and then check your input again...");
474 length2 = (int)strlen(s2);
477 /* convert DNA alphabet to RNA if not explicitely switched off */
482 /* store case-unmodified sequence */
483 orig_s1 = strdup(s1);
484 if(s2) orig_s2 = strdup(s2);
485 /* convert sequence to uppercase letters only */
489 /** read structure constraint(s) if necessary */
490 if (fold_constrained) {
491 char *cstruc_tmp = NULL;
494 input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
495 if(input_type & VRNA_INPUT_QUIT){ break;}
496 else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
499 tokenize(input_string, &cstruc1, &cstruc2);
501 else nrerror("constraints missing");
503 /* now that we've got the constraining structure(s) check if the input was valid */
504 if(old_cut != cut_point){
505 nrerror("RNAup -C: mixed single/dual sequence or constraint strings or different cut points");
511 if(up_mode & RNA_UP_MODE_2) read_again = 1;
512 else if((up_mode & RNA_UP_MODE_3) && (s_target == NULL)) read_again = 1;
516 input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
517 if(input_type & VRNA_INPUT_QUIT){ break;}
518 else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
519 tokenize(input_string, &cstruc2, &cstruc_tmp);
521 else nrerror("constraints missing");
524 nrerror("Don't confuse me by mixing concatenated (&) with single sequences! Go, have some sleep and then check your input again...");
527 /* check length(s) of input sequence(s) and constraint(s) */
528 if(strlen(cstruc1) != length1){
529 fprintf(stderr, "%s\n%s\n", s1, cstruc1);
530 nrerror("RNAup -C: constraint string and structure have unequal length");
533 if(strlen(cstruc2) != length2){
534 fprintf(stderr, "%s\n%s\n", s2, cstruc2);
535 nrerror("RNAup -C: constraint string and structure have unequal length");
538 } /* thats all for constraint folding */
540 /* rotate input sequences if upmode>=2 to ensure first sequence is the longer one */
541 if(up_mode & (RNA_UP_MODE_2 | RNA_UP_MODE_3)){
542 if(length1 < length2 ){
544 /* rotate the sequences such that the longer is the first */
545 int l = length2; length2 = length1; length1 = l;
546 char *s = s2; s2 = s1; s1 = s;
547 s = orig_s2; orig_s2 = orig_s1; orig_s1 = s;
548 /* also rotate the file names */
549 char f[FILENAME_MAX_LENGTH];
550 strncpy(f, fname2, FILENAME_ID_LENGTH);
551 strncpy(fname2, fname1, FILENAME_ID_LENGTH);
552 strncpy(fname1, f, FILENAME_ID_LENGTH);
553 /* rotate constraint strings as well */
554 if(fold_constrained){
555 s = cstruc2; cstruc2 = cstruc1; cstruc1 = s;
560 /* check ulength values against sequences given */
562 nrerror("maximum unpaired region exceeds sequence length");
564 if(up_mode & RNA_UP_MODE_3){
565 /* if we haven't seen the target yet, store it now */
566 if(s_target == NULL){
569 orig_target = orig_s2;
572 length_target = length2;
573 strcpy(fname_target, fname2);
574 if(fold_constrained){
575 cstruc_target = cstruc2;
581 orig_target = orig_s1;
582 length_target = length1;
588 strcpy(fname_target, fname1);
589 strcpy(fname1, fname2);
590 if(fold_constrained){
591 cstruc_target = cstruc1;
601 ########################################################
602 # done with 'stdin' handling
603 ########################################################
606 /* compose file names */
608 /* first file name */
609 if (fname1[0] != '\0'){
610 strcpy(up_out, fname1);
611 if(up_mode & (RNA_UP_MODE_2 | RNA_UP_MODE_3)){
612 if(fname2[0] != '\0'){
614 strcat(up_out, fname2);
616 else if(fname_target != '\0'){
618 strcat(up_out, fname_target);
622 strcpy(up_out, "RNA");
625 /* now, up_out has a maximal length of 104, it should be safe to concatenate more chars */
626 if(!(up_mode & RNA_UP_MODE_1)){
627 sprintf(temp_name,"_w%d",w);
628 strncat(up_out, temp_name, 10);
631 structure = (char *) space(sizeof(char) * (MAX2(length_target, MAX2(length1, length2)) + 1));
633 /* begin actual computations */
634 update_fold_params();
636 /* calc mfe of first sequence */
638 strncpy(structure, cstruc1, length1+1);
640 min_en = fold(s1, structure);
642 (void) fflush(stdout);
644 /* calc probability to be unstructured for 1st sequence (in upmode=3 this is not the target!) */
647 if(!(up_mode & RNA_UP_MODE_3)){
648 wplus += incr3 + incr5;
649 /* reset window size if maximum unstructured region is exceeds it */
650 if(max_u > wplus) wplus = max_u;
652 /* reset window size if sequence length is shorter */
653 if(length1 < wplus) wplus = length1;
655 /* calc mfe for first sequence (2nd if upmode = 3) */
656 if(cstruc1 != NULL) strncpy(structure, cstruc1, length1+1);
657 min_en = fold(s1, structure);
658 pf_scale = exp(-(sfact*min_en)/RT/length1);
659 if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
660 if (cstruc1 != NULL) strncpy(structure, cstruc1, length1+1);
661 energy = pf_fold(s1, structure);
662 unstr_out = pf_unstru(s1, wplus);
665 if(fold_constrained && !(up_mode & RNA_UP_MODE_1)){
666 cstruc_combined = (char *)space(sizeof(char) * (length1 + length2 + 1));
667 strncpy(cstruc_combined, cstruc1, length1+1);
668 strcat(cstruc_combined, cstruc2);
671 contrib1 = contrib2 = NULL;
675 case RNA_UP_MODE_1: for (i = 1; i <= unpaired_values[0][0]; i++) {
676 j = unpaired_values[i][0];
677 do print_unstru(unstr_out, j); while(++j <= unpaired_values[i][1]);
679 if(output && header){
680 head = (char *)space(sizeof(char)*(length1 + strlen(cmdl_parameters) + 512));
681 sprintf(head, "# %s\n# %d %s\n# %s", cmdl_parameters, length1, fname1, orig_s1);
683 contrib1 = unstr_out;
685 case RNA_UP_MODE_2: inter_out = pf_interact(s1, s2, unstr_out, NULL, w, cstruc_combined, incr3, incr5);
686 print_interaction(inter_out, orig_s1, orig_s2, unstr_out, NULL, w, incr3, incr5);
687 if(output && header){
688 head = (char *)space(sizeof(char)*(length1 + length2 + strlen(cmdl_parameters) + 1024));
689 sprintf(head, "# %s\n# %d %s\n# %s\n# %d %s\n# %s", cmdl_parameters, length1, fname1, orig_s1, length2, fname2, orig_s2);
691 contrib1 = unstr_out;
693 case RNA_UP_MODE_3: /* calculate prob. unstruct. for target seq */
694 if(unstr_target == NULL){
695 wplus = w + incr3 + incr5;
696 if(max_u > wplus) wplus = max_u;
697 if(length_target < wplus) wplus = length_target;
698 if (cstruc_target != NULL)
699 strncpy(structure, cstruc_target, length_target + 1);
700 min_en = fold(s_target, structure);
701 pf_scale = exp(-(sfact*min_en)/RT/length_target);
702 if (length_target>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
703 if (cstruc_target != NULL)
704 strncpy(structure, cstruc_target, length_target + 1);
705 energy = pf_fold(s_target, structure);
706 unstr_target = pf_unstru(s_target, wplus);
707 free_pf_arrays(); /* for arrays for pf_fold(...) */
709 /* check if target sequence is actually longer than query, if not rotate both sequences */
710 if(length_target < length1){
711 inter_out = pf_interact(s1, s_target, unstr_out, unstr_target, w, cstruc_combined, incr3, incr5);
712 print_interaction(inter_out, orig_s1, orig_target, unstr_out, unstr_target, w, incr3, incr5);
713 contrib1 = unstr_out;
714 contrib2 = unstr_target;
717 inter_out = pf_interact(s_target, s1, unstr_target, unstr_out, w, cstruc_combined, incr3, incr5);
718 print_interaction(inter_out, orig_target, orig_s1, unstr_target, unstr_out, w, incr3, incr5);
719 contrib1 = unstr_target;
720 contrib2 = unstr_out;
722 if(output && header){
723 head = (char *)space(sizeof(char)*(length_target + length1 + strlen(cmdl_parameters) + 1024));
724 sprintf(head, "# %s\n# %d %s\n# %s\n# %d %s\n# %s", cmdl_parameters, length_target, fname_target, orig_target, length1, fname1, orig_s1);
729 /* create additional output */
731 /* how to best compose a reasonable filename */
732 printf("RNAup output in file: ");
733 strcpy(name, up_out);
735 /* since we do not limit the amount of ulength values anymore we just put
736 the maximum length into the filename, the actual printed lengths
737 should be somewhere in the output itself */
738 sprintf(temp_name, "%d.out", unpaired_values[0][0]);
739 strcat(name, temp_name);
742 Up_plot(contrib1, contrib2, inter_out, name, unpaired_values, my_contrib, head, up_mode);
746 ########################################################
748 ########################################################
751 /* we can save the pu contribution structure of the target sequence for the next run */
752 if(unstr_target != NULL){
753 if(length_target < length1) free_pu_contrib_struct(contrib1);
754 else free_pu_contrib_struct(contrib2);
757 if(contrib1 != NULL) free_pu_contrib_struct(contrib1);
758 if(contrib2 != NULL) free_pu_contrib_struct(contrib2);
761 if(inter_out != NULL)
762 free_interact(inter_out);
764 /* free all unnecessary character arrays */
765 if(structure != NULL) free(structure);
766 if(s1 != NULL) free(s1);
767 if(s2 != NULL) free(s2);
768 if(orig_s1) free(orig_s1);
769 if(orig_s2) free(orig_s2);
770 if(cstruc1 != NULL) free(cstruc1);
771 if(cstruc2 != NULL) free(cstruc2);
772 if(head != NULL) free(head);
773 if(cstruc_combined != NULL) free(cstruc_combined);
775 structure = s1 = s2 = orig_s1 = orig_s2 = cstruc1 = cstruc2 = head = cstruc_combined = NULL;
777 free_arrays(); /* for arrays for fold(...) */
779 free(cmdl_parameters);
783 PRIVATE int compare_unpaired_values(const void *p1, const void *p2){
784 if((*(int **)p1)[0] > (*(int **)p2)[0]) return 1;
785 if((*(int **)p1)[0] < (*(int **)p2)[0]) return -1;
789 PRIVATE int move_useless_unpaired_values(const void *p1, const void *p2){
790 if((*(int **)p1)[1] < (*(int **)p2)[1]) return 1;
791 if((*(int **)p1)[0] > (*(int **)p2)[0]) return -1;
795 PRIVATE void adjustUnpairedValues(int ***unpaired_values){
796 int i, last_max, real_count;
798 if(*unpaired_values == NULL) return;
799 if((*unpaired_values)[0][0] <= 0) return;
800 /* sort the ranges array */
801 qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), compare_unpaired_values);
803 last_max = (*unpaired_values)[1][1] != -1 ? (*unpaired_values)[1][1] : (*unpaired_values)[1][0];
805 for(i = 2; i <= (*unpaired_values)[0][0]; i++){
806 if((*unpaired_values)[i][1] == -1){
807 /* we just have a single value */
808 if((*unpaired_values)[i][0] <= last_max)
809 (*unpaired_values)[i][1] = -2; /* mark this entry to be removed */
811 last_max = (*unpaired_values)[i][0];
815 /* we have a range of values */
816 if(((*unpaired_values)[i][0] <= last_max) && ((*unpaired_values)[i][1] <= last_max))
817 (*unpaired_values)[i][1] = -2; /* mark this entry to be removed as the whole range is already covered */
818 else if(((*unpaired_values)[i][0] <= last_max) && ((*unpaired_values)[i][1] > last_max)){
819 (*unpaired_values)[i][0] = last_max + 1;
820 last_max = last_max + 1;
821 if((*unpaired_values)[i][1] == last_max)
822 (*unpaired_values)[i][1] = -1; /* range reduced to single value */
824 last_max = (*unpaired_values)[i][1]; /* maximum of range */
828 last_max = (*unpaired_values)[i][1];
834 /* sort entries again to get rid of useless ones */
835 qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), move_useless_unpaired_values);
837 /* free memory we dont need anymore */
838 for(i = real_count+1; i <= (*unpaired_values)[0][0]; i++)
839 free((*unpaired_values)[i]);
840 (*unpaired_values) = (int **)realloc((*unpaired_values), (real_count + 1) * sizeof(int **));
842 (*unpaired_values)[0][0] = real_count;
843 /* sort the array again */
844 qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), compare_unpaired_values);
849 *** Append a parameter char string to a larger char string savely
851 *** \param param_dest A pointer to the char array where the new parameter will be concatenated to
852 *** \param parameter The new parameter to be concatenated
853 *** \param param_dest_length A pointer to the size of the already allocated space of param_dest
855 PRIVATE void appendCmdlParameter(char **param_dest, const char *parameter, int *param_dest_length){
856 int l = strlen(*param_dest) + strlen(parameter);
857 if(l + 1 > *param_dest_length){
858 /* alloc more space */
859 *param_dest_length = (int)(1.2 * l);
860 *param_dest = xrealloc(*param_dest, *param_dest_length * sizeof(char));
862 strcat(*param_dest, parameter);
866 /* call: tokenize(line,&seq1,&seq2); the sequence string is split at the "&"
867 and the first seq is written in seq1, the second into seq2 */
868 /* using sscanf instead of strcpy get's rid of trainling junk on the input line */
869 void tokenize(char *line, char **seq1, char **seq2) {
873 pos = strchr(line, '&');
875 cut = (int) (pos-line)+1;
876 (*seq1) = (char *) space((cut+1)*sizeof(char));
877 (*seq2) = (char *) space(((strlen(line)-cut)+2)*sizeof(char));
879 if (strchr(pos+1, '&')) nrerror("more than one cut-point in input");
881 (void) sscanf(line, "%s", *seq1);
882 (void) sscanf(pos+1, "%s", *seq2);
884 (*seq1) = (char *) space((strlen(line)+1)*sizeof(char));
886 sscanf(line, "%s", *seq1);
890 if (cut_point==-1) cut_point = cut;
891 else if (cut_point != cut) {
892 fprintf(stderr,"cut_point = %d cut = %d\n", cut_point, cut);
893 nrerror("Sequence and Structure have different cut points.");
900 /* divide the constraints string in intermolecular constrains (inter)
901 and intramolecular constrains within both sequences */
902 /* len1 is the length of the LONGER input seq ! */
903 void seperate_bp(char **inter, int len1, char **intra_l, char **intra_s) {
906 char *temp_inter, *pt_inter;
908 len=strlen((*inter));
909 /* printf("inter\n%s\n",(*inter)); */
911 temp_inter=(char*)space(sizeof(char)*i);
912 /* to make a pair_table convert <|> to (|) */
913 pt_inter=(char*)space(sizeof(char)*i);
914 /* if shorter seq is first seq in constrained string, write the
915 longer one as the first one */
916 temp_inter[strlen((*inter))] = '\0';
917 pt_inter[strlen((*inter))] = '\0';
918 if (cut_point < len1) {
919 /* write the constrain for the longer seq first */
920 for (j=0,i=cut_point-1;i<len;i++,j++) {
921 switch ((*inter)[i]){
931 temp_inter[j] = (*inter)[i];
935 /* then add the constrain for the shorter seq */
936 for (i=0;i< cut_point-1;i++,j++) {
937 switch ((*inter)[i]){
947 temp_inter[j] = (*inter)[i];
952 strcpy((*inter),temp_inter);
954 for (i=0;i<strlen((*inter));i++) {
955 switch ((*inter)[i]){
968 pt = make_pair_table(pt_inter);
970 /* intramolecular structure in longer (_l) and shorter (_s) seq */
971 (*intra_l)=(char*)space(sizeof(char)*(len1+1));
972 (*intra_s)=(char*)space(sizeof(char)*(strlen((*inter))-len1+2));
973 (*intra_l)[len1] = '\0';
974 (*intra_s)[strlen((*inter))-len1+1] = '\0';
975 /* now seperate intermolecular from intramolecular bp */
976 for (i=1;i<=pt[0];i++) {
978 temp_inter[i-1] = (*inter)[i-1];
980 (*intra_l)[i-1] = (*inter)[i-1];
981 if ((*inter)[i-1] == '|')
982 (*intra_l)[i-1] = '.';
984 (*intra_s)[i-cut_point] = (*inter)[i-1];
985 if ((*inter)[i-1] == '|')
986 (*intra_s)[i-cut_point] = '.';
990 /* intermolekular bp */
991 if (pt[i]>=cut_point){
992 temp_inter[i-1] = (*inter)[i-1];
993 (*intra_l)[i-1] = '.';
994 (*intra_s)[pt[i]-cut_point] = '.';
995 } else { /* intramolekular bp */
996 (*intra_l)[i-1] = (*inter)[i-1];
997 temp_inter[i-1] = '.';
999 } else { /* i>=cut_point */
1000 /* intermolekular bp */
1001 if (pt[i] < cut_point){
1002 temp_inter[i-1] = (*inter)[i-1];
1003 /* (*intra_s)[i-1] = '.'; */
1004 } else { /* intramolekular bp */
1005 (*intra_s)[i-cut_point] = (*inter)[i-1];
1006 temp_inter[i-1] = '.';
1012 /* printf("%s -1\n%s -2\n%s -3\n%s -4\n",(*inter),temp_inter,(*intra_l),(*intra_s)); */
1013 strcpy((*inter),temp_inter);
1019 PRIVATE void print_interaction(interact *Int, char *s1, char *s2, pu_contrib *p_c, pu_contrib *p_c2, int w, int incr3, int incr5) {
1020 char *i_long,*i_short;
1021 int i,len, l_l, l_s, len1, end5, end3, i_min, j_min, l1, add_a, add_b,nix_up;
1023 double G_min,Gi_min,Gul, G_sum, Gus, diff;
1028 Gi_min = Int->Gikjl_wo;
1030 len=strlen(s1)+strlen(s2);
1032 /* use duplexfold() to fold the interaction site */
1033 l_l = (Int->i-Int->k+1);
1034 i_long = (char*)space(sizeof(char)*(l_l+1));
1035 l_s = (Int->l-Int->j+1);
1036 i_short = (char*)space(sizeof(char)*(l_s+1));
1038 strncpy(i_long,&s1[Int->k-1],l_l);
1040 strncpy(i_short,&s2[Int->j-1],l_s);
1041 i_short[l_s] = '\0';
1043 mfe = duplexfold(i_long,i_short);
1047 l1 = strchr(mfe.structure, '&')-mfe.structure;
1049 /* printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", mfe.structure, i_min+1-l1,
1050 i_min, j_min, j_min+strlen(mfe.structure)-l1-2, mfe.energy ); */
1052 /* structure by duplexfold is shorter than structure by RNAup:*/
1054 add_a = add_b = 0; /* length difference in longer / shorter sequence*/
1056 if(((i_min+1-l1) - i_min) != ( Int->k - Int->i)) {
1057 add_a = Int->i - Int->k + 2;
1059 if(((j_min+strlen(mfe.structure)-l1-2) - j_min) != (Int->l - Int->j)) {
1060 add_b = Int->l - Int->j +2;
1062 /* printf("add_a %d add_b %d\n",add_a,add_b); */
1063 if( add_a || add_b ) {
1065 if(add_a && add_b == 0) add_b = Int->l - Int->j + 2;
1066 if(add_a == 0 && add_b) add_a = Int->i - Int->k + 2;
1067 struc = (char*)space(sizeof(char)*(add_a+add_b+3));
1068 for(i=0;i<(add_a+add_b-1);i++) {
1069 if(i != l_l) struc[i] = '.';
1070 if(i == l_l) struc[i] = '&';
1074 l1=strlen(mfe.structure);
1075 struc = (char*)space(sizeof(char)*(l1+1));
1076 strcpy(struc,mfe.structure);
1079 end5 = MAX(1,Int->k-incr5);
1080 end3 = MIN(MIN(l_l-1+incr3,w+incr3+incr5),len1);
1081 p_c_S = p_c->H[end5][end3]+p_c->I[end5][end3]+p_c->M[end5][end3]+p_c->E[end5][end3];
1082 Gul = -RT*log(p_c_S);
1085 G_sum = Gi_min + Gul;
1087 /* printf("dG = dGint + dGu_l\n"); */
1088 printf("%s %3d,%-3d : %3d,%-3d (%.2f = %.2f + %.2f)\n",
1089 struc, Int->k, Int->i, Int->j, Int->l, G_min, Gi_min, Gul);
1090 printf("%s&%s\n",i_long,i_short);
1092 p_c_S = p_c2->H[Int->j][(Int->l)-(Int->j)]+
1093 p_c2->I[Int->j][(Int->l)-(Int->j)]+
1094 p_c2->M[Int->j][(Int->l)-(Int->j)]+
1095 p_c2->E[Int->j][(Int->l)-(Int->j)];
1096 Gus = -RT*log(p_c_S);
1097 G_sum = Gi_min + Gul +Gus;
1098 /* printf("dG = dGint + dGu_l + dGu_s\n"); */
1099 printf("%s %3d,%-3d : %3d,%-3d (%.2f = %.2f + %.2f + %.2f)\n",
1100 struc, Int->k, Int->i, Int->j, Int->l, G_min, Gi_min, Gul, Gus);
1101 printf("%s&%s\n",i_long,i_short);
1103 if (!EQUAL(G_min,G_sum)) {
1105 diff = fabs((G_min)-(G_sum));
1106 printf("diff %.18f\n",diff);
1108 if(nix_up) fprintf(stderr,"RNAduplex structure doesn't match any structure of RNAup structure ensemble\n");
1111 free(mfe.structure);
1115 /* print coordinates and free energy for the region of highest accessibility */
1116 PRIVATE void print_unstru(pu_contrib *p_c, int w) {
1117 int i,j,len,min_i,min_j;
1118 double dG_u, min_gu;
1124 for (i=1; i<=len; i++) {
1125 for (j=i; j < MIN((i+w),len+1); j++) {
1127 if ((j-i+1) == w && i+w-1 <= len) {
1128 blubb = p_c->H[i][j-i]+p_c->I[i][j-i]+p_c->M[i][j-i]+p_c->E[i][j-i];
1129 dG_u = -RT*log(blubb);
1130 if (dG_u < min_gu ) {
1138 printf("%4d,%4d \t (%.3f) \t for u=%3d\n",min_i,min_j,min_gu,w);
1140 nrerror("error with prob unpaired");