1 /* Last changed Time-stamp: <2007-10-29 17:34:19 htafer> */
3 Compute duplex structure of two RNA strands
17 #include <sys/types.h>
19 #include "energy_par.h"
25 #include "fold_vars.h"
30 #include "read_epars.h"
31 #include "RNAplex_cmdl.h"
39 /* timer declaration */
40 clock_t Begin; /* initialize Begin */
41 Begin = clock() ; /* start the timer */
45 clock_t EndTimer(clock_t begin)
48 End = clock() ; /* stop the timer */
53 /* --------------------end include timer */
54 extern int subopt_sorted;
55 /* static int print_struc(duplexT const *dup); */
56 static int ** average_accessibility_target(char **names, char **ALN, int number, char *access, double verhaeltnis,const int alignment_length, int binaries);
57 /* static int ** average_accessibility_query(char **names, char **ALN, int number, char *access, double verhaeltnis); */
58 static int get_max_u(const char *s, char delim);
59 static int ** read_plfold_i(char *fname, const int beg, const int end,double verhaeltnis, const int length);
60 /* static int ** read_plfold_j(char *fname, const int beg, const int end,double verhaeltnis); */
61 static int ** read_plfold_i_bin(char *fname, const int beg, const int end,double verhaeltnis, const int length);
62 static int get_sequence_length_from_alignment(char *sequence);
63 /* take as argument a list of hit from an alignment interaction */
64 /* Accessibility can currently not be provided */
65 static void aliprint_struct(FILE *Result, /* result file */
66 FILE *Target, /* target alignment */
68 const int WindowsLength);
69 static void linear_fit(int *a, int *b, int *c, int *d); /* get linear fits for Iopn, Iextension, Bopn, Bextension, I^1open and I^1extension */
72 *** Compute Tm based on silvana's parameters (1999)
74 double probcompute_silvana_98(char *s1, double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration);
75 double probcompute_silvana_04(char *s1, double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration);
76 double probcompute_xia_98(char *s1, double na_concentration, double probe_concentration);
77 double probcompute_sug_95(char *s1, double na_concentration, double probe_concentration);
78 double probcompute_newparameters(char *s1,double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration);
80 static int convert_plfold_i(char *fname);/* convert test accessibility into bin accessibility. */
81 static char rcsid[] = "$Id: rnaplex.c,v 1.10 2007/12/21 15:30:48 htafer Exp $";
83 static char scale[] = "....,....1....,....2....,....3....,....4"
84 "....,....5....,....6....,....7....,....8";
86 /*--------------------------------------------------------------------------*/
88 int main(int argc, char *argv[])
90 struct RNAplex_args_info args_info;
91 #define MAX_NUM_NAMES 500
92 char *temp1[MAX_NUM_NAMES], *temp2[MAX_NUM_NAMES], *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
93 char *s1=NULL, *s2=NULL, *line, *cstruc=NULL, *structure=NULL;
94 char *line_q=NULL, *line_t=NULL;
95 char* tname=NULL;char *qname=NULL;
97 char fname[FILENAME_MAX_LENGTH];
99 char *ns_bases=NULL, *c;
101 FILE *Result=NULL; /* file containing the results */
102 FILE *mRNA=NULL, *sRNA=NULL;
104 char *Resultfile=NULL;
105 int fold_constrained=0;
108 /* double kT, sfact=1.07; */
109 int istty, delta=-INF;
111 int extension_cost=0;
113 int alignment_length=40;
119 *** Defines how many nucleotides has to be added at the begining and end of the target and query sequence in order to generate the structure figure
122 int alignment_mode=0;
124 *** Define scaling factor for the accessibility. Default 1
126 double verhaeltnis=1;
128 *** Probe Tm computation
130 double probe_concentration=0.05;
131 double na_concentration=1;
132 double mg_concentration=0;
133 double k_concentration=0;
134 double tris_concentration=0;
137 #############################################
138 # check the command line parameters
139 #############################################
141 if(RNAplex_cmdline_parser (argc,argv,&args_info)!=0) exit(1);
143 if(args_info.temp_given) temperature = args_info.temp_arg;
145 if(args_info.query_given) qname = strdup(args_info.query_arg);
147 if(args_info.target_given) tname = strdup(args_info.target_arg);
148 /*interaction_length*/
149 alignment_length = args_info.interaction_length_arg;
151 extension_cost = args_info.extension_cost_arg;
153 deltaz = args_info.duplex_distance_arg;
155 delta = (int) (100*args_info.energy_threshold_arg);
157 fast = args_info.fast_folding_arg;
159 if(args_info.accessibility_dir_given) access = strdup(args_info.accessibility_dir_arg);
161 if(args_info.produce_ps_given) {Resultfile=strdup(args_info.produce_ps_arg);redraw = 1;}
163 WindowsLength = args_info.WindowLength_arg;
164 /*scale_accessibility_arg*/
165 verhaeltnis = args_info.scale_accessibility_arg;
167 if(args_info.constraint_given) fold_constrained = 1;
169 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
171 if(args_info.binary_given) binaries = 1;
173 if(args_info.convert_to_bin_given) convert=1;
175 if(args_info.alignment_mode_given) alignment_mode=1;
177 if(args_info.probe_mode_given) probe_mode=1;
178 /*sodium concentration*/
179 na_concentration = args_info.na_concentration_arg;
180 /*magnesium concentration*/
181 mg_concentration = args_info.mg_concentration_arg;
182 /*potassium concentration*/
183 k_concentration = args_info.k_concentration_arg;
184 /*tris concentration*/
185 tris_concentration = args_info.tris_concentration_arg;
186 /*probe concentration*/
187 probe_concentration = args_info.probe_concentration_arg;
189 /*Probe mode Salt concentration*/
190 if (ParamFile != NULL)
191 read_parameter_file(ParamFile);
192 if (ns_bases != NULL) {
193 nonstandards = space(33);
201 nonstandards[i++]=*c++;
202 nonstandards[i++]=*c;
203 if ((sym)&&(*c!=*(c-1))) {
204 nonstandards[i++]=*c;
205 nonstandards[i++]=*(c-1);
211 int il_a,il_b,b_a,b_b;
212 linear_fit(&il_a,&il_b,&b_a,&b_b);
214 *** check if we have two input files
220 *** Here we test if the user wants to convert a bunch of text opening energy files
225 nrerror("No query/target file allowed in Tm probe mode\nPlease pipe your input into RNAplex\n");
229 /* fix temperature to 37C */
231 printf("Probe mode\n");
235 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
236 update_fold_params();
237 P = scale_parameters();
240 /*Initialize parameter */
241 printf("Concentration K:%3.3f TNP:%3.3f Mg:%3.3f Na:%3.3f probe:%3.3f\n\n", k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
242 printf("%100s %7s %7s %7s %7s %7s\n", "sequence", "DDSL98", "DDSL04", "DRSU95", "RRXI98","CURRENT");
244 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
245 if ((line = get_line(stdin))==NULL) break;
246 /* skip empty lines, comment lines, name lines */
247 while ((*line=='*')||(*line=='\0')||(*line=='>')) {
248 printf("%s\n", line);
250 id_s1 = (char*) space (strlen(line)+2);
251 (void) sscanf(line,"%s",id_s1);
252 memmove(id_s1, id_s1+1, strlen(id_s1));
255 if ((line = get_line(stdin))==NULL) {
260 if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
261 s1 = (char *) space(strlen(line)+1);
263 /*compute duplex/entropy energy for the reverse complement*/;
265 Tm = probcompute_silvana_98(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
266 printf("%100s %*.2f ",s1,6,Tm);
267 Tm = probcompute_silvana_04(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
268 printf("%*.2f ",6, Tm);
269 Tm = probcompute_sug_95(line, na_concentration, probe_concentration);
270 printf("%*.2f ",6, Tm);
271 Tm = probcompute_xia_98(line, na_concentration, probe_concentration);
272 printf("%*.2f ",6, Tm);
273 Tm = probcompute_newparameters(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
274 printf("%*.2f\n",6,Tm);
277 RNAplex_cmdline_parser_free (&args_info);
280 if(convert && access) {
282 strcpy(pattern,"_openen");
287 *** Check if the directory where the opening energy files are located exists
290 while((dir=readdir(dfd))!=NULL){
296 strcat(name,dir->d_name);
297 while(name[strPos]!='\0'){
298 if(name[strPos]==pattern[0]){
300 *** Check if the files we are looking ends in openen and not openen_bin,
301 *** in order to avoid that RNAplex converts bin-file to bin-file
303 while(name[strPos+PatternPos]==pattern[PatternPos] && pattern[PatternPos]!='\0' && name[strPos+PatternPos]!='\0'){
306 if(name[strPos+PatternPos]=='\0' && pattern[PatternPos]=='\0'){
308 *** convert_plfold_i is the function that makes the hardwork
310 convert_plfold_i(name);
321 RNAplex_cmdline_parser_free (&args_info);
326 *** Here we check if the user wants to produce PS formatted structure files from existing RNAplex dot-parenthesis-formated results. Depending on the kind of input, Alignments or single sequence we will produce either a color annotated alignment or RNAfold-like structure, respectively.
330 *** Check single sequence case.
332 if(!(alignment_mode) && (tname && qname)) {
333 mRNA=fopen(tname, "r");
334 if(mRNA==NULL){printf("%s: Wrong target file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
335 sRNA=fopen(qname, "r");
336 if(sRNA==NULL){printf("%s: Wrong query file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
337 nrerror("Sorry not implemented yet");
339 *** We have no single sequence case. Check if we have alignments.
341 else if((alignment_mode) && (tname && qname)){
342 mRNA=fopen(tname, "r");
343 if(mRNA==NULL){printf("%s: Wrong target file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
344 sRNA=fopen(qname, "r");
345 if(sRNA==NULL){printf("%s: Wrong query file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
346 Result=fopen(Resultfile, "r");
347 if(sRNA==NULL){printf("%s: Wrong query file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
349 aliprint_struct(Result,mRNA,sRNA,(const int) WindowsLength);
350 RNAplex_cmdline_parser_free (&args_info);
355 *** User was not able to input either two alignments or two single sequence files
357 printf("Please enter either two alignments or single sequence files\n");
358 RNAplex_cmdline_parser_print_help();
362 update_fold_params();
363 if (ParamFile != NULL)
364 read_parameter_file(ParamFile);
365 if (ns_bases != NULL) {
366 nonstandards = space(33);
374 nonstandards[i++]=*c++;
375 nonstandards[i++]=*c;
376 if ((sym)&&(*c!=*(c-1))) {
377 nonstandards[i++]=*c;
378 nonstandards[i++]=*(c-1);
384 int il_a,il_b,b_a,b_b;
385 linear_fit(&il_a,&il_b,&b_a,&b_b);
388 *** check if we have two input files
390 if((qname==NULL && tname) || (qname && tname==NULL)) RNAplex_cmdline_parser_print_help();
391 else if(qname && tname && !(alignment_mode)) {
393 /*free allocated memory of commandline parser*/
394 RNAplex_cmdline_parser_free (&args_info);
396 if(!fold_constrained) {
399 mRNA=fopen(tname, "r");
400 if(mRNA==NULL){printf("%s: Wrong snoRNA file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
401 sRNA=fopen(qname, "r");
402 if(sRNA==NULL){printf("%s: Wrong target file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
403 do { /* main loop: continue until end of file */
404 if ((line_t = get_line(mRNA))==NULL) {
407 /*parse line, get id for further accessibility fetching*/
408 while ((*line_t=='*')||(*line_t=='\0')||(*line_t=='>')) {
414 id_s1 = (char*) space (strlen(line_t)+2);
415 (void) sscanf(line_t,"%s",id_s1);
416 memmove(id_s1, id_s1+1, strlen(id_s1));
423 if ((line_t = get_line(mRNA))==NULL) {
427 /*append N's to the sequence in order to avoid boundary checking*/
428 if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) {
435 s1 = (char *) space(strlen(line_t)+1+20);
436 strcpy(s1,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
439 strcat(s1,"NNNNNNNNNN\0");
442 for (l = 0; l < s1_len ; l++) {
443 s1[l] = toupper(s1[l]);
444 if (!noconv && s1[l] == 'T') s1[l] = 'U';
447 /*read accessibility*/
450 file_s1 = (char *) space(sizeof(char) * (strlen(id_s1)+strlen(access)+20));
451 strcpy(file_s1, access);
452 strcat(file_s1, "/");
453 strcat(file_s1, id_s1);
454 strcat(file_s1, "_openen");
456 access_s1 = read_plfold_i(file_s1,1,s1_len,verhaeltnis,alignment_length);
459 strcat(file_s1,"_bin");
460 access_s1 = read_plfold_i_bin(file_s1,1,s1_len,verhaeltnis,alignment_length);
462 if(access_s1 == NULL){
463 printf("Accessibility file %s not found or corrupt, look at next target RNA\n",file_s1);
472 if ((line_q = get_line(sRNA))==NULL) {
475 while ((*line_q=='*')||(*line_q=='\0')||(*line_q=='>')) {
481 id_s2 = (char*) space (strlen(line_q)+2);
482 (void) sscanf(line_q,"%s",id_s2);
483 memmove(id_s2, id_s2+1, strlen(id_s2));
490 if ((line_q = get_line(sRNA))==NULL) {
494 if ((line_q ==NULL) || (strcmp(line_q, "@") == 0)){
495 if(id_s2){free(id_s2);id_s2=NULL;}
502 s2 = (char *) space(strlen(line_q)+1+20);
503 strcpy(s2,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
506 strcat(s2,"NNNNNNNNNN\0");
509 for (l = 0; l < s2_len ; l++) {
510 s2[l] = toupper(s2[l]);
511 if (!noconv && s2[l] == 'T') s2[l] = 'U';
515 file_s2 = (char *) space(sizeof(char) * (strlen(id_s2)+strlen(access)+20));
516 strcpy(file_s2, access);
517 strcat(file_s2, "/");
518 strcat(file_s2, id_s2);
519 strcat(file_s2, "_openen");
521 access_s2 = read_plfold_i(file_s2,1,s2_len,verhaeltnis,alignment_length);
524 strcat(file_s2,"_bin");
525 access_s2 = read_plfold_i_bin(file_s2,1,s2_len,verhaeltnis,alignment_length);
527 if(access_s2 == NULL){
528 printf("Accessibility file %s not found, look at next target RNA\n",file_s2);
535 printf(">%s\n>%s\n", id_s1, id_s2);
536 double begin = BeginTimer();
537 Lduplexfold_XS(s1,s2, (const int **) access_s1, (const int **) access_s2, delta,alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
540 elapTicks = (EndTimer(begin) - begin);
541 elapMilli = elapTicks/1000;
542 /* printf("Millisecond %.2f\n",elapMilli); */
567 else if(access==NULL){ /* t and q are defined, but no accessibility is provided */
568 mRNA=fopen(tname, "r");
569 if(mRNA==NULL){printf("%s: Wrong snoRNA file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
570 sRNA=fopen(qname, "r");
571 if(sRNA==NULL){printf("%s: Wrong target file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
572 do { /* main loop: continue until end of file */
573 char *id_s1=NULL; /* header of the target file */
574 if ((line_t = get_line(mRNA))==NULL) {
577 /*parse line, get id for further accessibility fetching*/
578 while ((*line_t=='*')||(*line_t=='\0')||(*line_t=='>')) {
580 if(id_s1){ /* in case we have two header the one after the other */
581 free(id_s1); /* free the old header, a put the new one instead */
584 id_s1 = (char*) space (strlen(line_t)+2);
585 (void) sscanf(line_t,"%s",id_s1);
586 memmove(id_s1, id_s1+1, strlen(id_s1));
593 if ((line_t = get_line(mRNA))==NULL) {
597 /*append N's to the sequence in order to avoid boundary checking*/
598 if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) {
605 s1 = (char *) space(strlen(line_t)+1+20);
606 strcpy(s1,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
609 strcat(s1,"NNNNNNNNNN\0");
612 for (l = 0; l < s1_len ; l++) {
613 s1[l] = toupper(s1[l]);
614 if (!noconv && s1[l] == 'T') s1[l] = 'U';
619 if ((line_q = get_line(sRNA))==NULL) {
622 while ((*line_q=='*')||(*line_q=='\0')||(*line_q=='>')) {
628 id_s2 = (char*) space (strlen(line_q)+2);
629 (void) sscanf(line_q,"%s",id_s2);
630 memmove(id_s2, id_s2+1, strlen(id_s2));
637 if ((line_q = get_line(sRNA))==NULL) {
641 if ((line_q ==NULL) || (strcmp(line_q, "@") == 0)){
642 if(id_s2){free(id_s2);}
649 s2 = (char *) space(strlen(line_q)+1+20);
650 strcpy(s2,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
653 strcat(s2,"NNNNNNNNNN\0");
656 for (l = 0; l < s2_len ; l++) {
657 s2[l] = toupper(s2[l]);
658 if (!noconv && s2[l] == 'T') s2[l] = 'U';
660 printf(">%s\n>%s\n", id_s1, id_s2);
661 /* double begin = BeginTimer(); */
662 Lduplexfold(s1,s2,delta,extension_cost,alignment_length, deltaz, fast,il_a, il_b, b_a, b_b);
663 /* float elapTicks; */
664 /* float elapMilli; */
665 /* elapTicks = (EndTimer(begin) - begin); */
666 /* elapMilli = elapTicks/1000; */
667 /* printf("Millisecond %.2f\n",elapMilli); */
685 mRNA=fopen(tname, "r");
686 if(mRNA==NULL){printf("%s: Wrong snoRNA file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
687 sRNA=fopen(qname, "r");
688 if(sRNA==NULL){printf("%s: Wrong target file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
689 do { /* main loop: continue until end of file */
690 if ((line_t = get_line(mRNA))==NULL) {
693 /*parse line, get id for further accessibility fetching*/
694 while ((*line_t=='*')||(*line_t=='\0')||(*line_t=='>')) {
696 if(id_s1){ /* in case we have two header the one after the other */
697 free(id_s1); /* free the old header, a put the new one instead */
700 id_s1 = (char*) space (strlen(line_t)+2);
701 (void) sscanf(line_t,"%s",id_s1);
702 memmove(id_s1, id_s1+1, strlen(id_s1));
709 if ((line_t = get_line(mRNA))==NULL) {
713 /*append N's to the sequence in order to avoid boundary checking*/
714 if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) {
721 s1 = (char *) space(strlen(line_t)+1+20);
722 strcpy(s1,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
725 strcat(s1,"NNNNNNNNNN\0");
728 for (l = 0; l < s1_len ; l++) {
729 s1[l] = toupper(s1[l]);
730 if (!noconv && s1[l] == 'T') s1[l] = 'U';
733 /*read accessibility*/
736 file_s1 = (char *) space(sizeof(char) * (strlen(id_s1)+strlen(access)+20));
737 strcpy(file_s1, access);
738 strcat(file_s1, "/");
739 strcat(file_s1, id_s1);
740 strcat(file_s1, "_openen");
742 access_s1 = read_plfold_i(file_s1,1,s1_len,verhaeltnis,alignment_length);
745 strcat(file_s1,"_bin");
746 access_s1 = read_plfold_i_bin(file_s1,1,s1_len,verhaeltnis,alignment_length);
748 if(access_s1 == NULL){
749 printf("Accessibility file %s not found, look at next target RNA\n",file_s1);
759 if ((line_q = get_line(sRNA))==NULL) {
762 while ((*line_q=='*')||(*line_q=='\0')||(*line_q=='>')) {
768 id_s2 = (char*) space (strlen(line_q)+2);
769 (void) sscanf(line_q,"%s",id_s2);
770 memmove(id_s2, id_s2+1, strlen(id_s2));
777 if ((line_q = get_line(sRNA))==NULL) {
781 if ((line_q ==NULL) || (strcmp(line_q, "@") == 0)){
782 if(id_s2){free(id_s2);}
785 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
786 s2 = (char *) space(strlen(line_q)+1+20);
787 strcpy(s2,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
790 strcat(s2,"NNNNNNNNNN\0");
793 for (l = 0; l < s2_len ; l++) {
794 s2[l] = toupper(s2[l]);
795 if (!noconv && s2[l] == 'T') s2[l] = 'U';
797 structure = (char *) space((unsigned) s2_len+1);
798 cstruc = get_line(sRNA);
800 int dn3=strlen(cstruc)-(s2_len-20);
801 strcpy(structure,"..........");
802 strncat(structure, cstruc, s2_len-20);
804 strcat(structure,"..........\0");
808 strcat(structure,".");
810 strcat(structure,"\0");
815 fprintf(stderr, "constraints missing\n");
817 int a = strchr(structure,'|') - structure;
818 int b = strrchr(structure,'|') - structure;
819 if(alignment_length < b-a+1){
820 nrerror("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
824 file_s2 = (char *) space(sizeof(char) * (strlen(id_s2)+strlen(access)+20));
825 strcpy(file_s2, access);
826 strcat(file_s2, "/");
827 strcat(file_s2, id_s2);
828 strcat(file_s2, "_openen");
830 access_s2 = read_plfold_i(file_s2,1,s2_len,verhaeltnis,alignment_length);
833 strcat(file_s2,"_bin");
834 access_s2 = read_plfold_i_bin(file_s2,1,s2_len,verhaeltnis,alignment_length);
836 if(access_s2 == NULL){
837 printf("Accessibility file %s not found, look at next target RNA\n",file_s2);
838 free(file_s2);free(s2);free(id_s2);
841 printf(">%s\n>%s\n", id_s1, id_s2);
842 Lduplexfold_CXS(s1,s2, (const int **) access_s1, (const int **) access_s2, delta,alignment_length, deltaz, fast,structure,il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
850 free(access_s2);free(structure);
866 else if(access==NULL){ /* t and q are defined, but no accessibility is provided */
868 mRNA=fopen(tname, "r");
869 if(mRNA==NULL){printf("%s: Wrong snoRNA file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
870 sRNA=fopen(qname, "r");
871 if(sRNA==NULL){printf("%s: Wrong target file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
872 do { /* main loop: continue until end of file */
873 if ((line_t = get_line(mRNA))==NULL) {
876 /*parse line, get id for further accessibility fetching*/
877 while ((*line_t=='*')||(*line_t=='\0')||(*line_t=='>')) {
879 if(id_s1){ /* in case we have two header the one after the other */
880 free(id_s1); /* free the old header, a put the new one instead */
883 id_s1 = (char*) space (strlen(line_t)+2);
884 (void) sscanf(line_t,"%s",id_s1);
885 memmove(id_s1, id_s1+1, strlen(id_s1));
892 if ((line_t = get_line(mRNA))==NULL) {
896 /*append N's to the sequence in order to avoid boundary checking*/
897 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
898 if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) {
905 s1 = (char *) space(strlen(line_t)+1+20);
906 strcpy(s1,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
909 strcat(s1,"NNNNNNNNNN\0");
912 for (l = 0; l < s1_len ; l++) {
913 s1[l] = toupper(s1[l]);
914 if (!noconv && s1[l] == 'T') s1[l] = 'U';
919 if ((line_q = get_line(sRNA))==NULL) {
922 while ((*line_q=='*')||(*line_q=='\0')||(*line_q=='>')) {
928 id_s2 = (char*) space (strlen(line_q)+2);
929 (void) sscanf(line_q,"%s",id_s2);
930 memmove(id_s2, id_s2+1, strlen(id_s2));
937 if ((line_q = get_line(sRNA))==NULL) {
941 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
942 if ((line_q ==NULL) || (strcmp(line_q, "@") == 0)){
943 if(id_s2){free(id_s2);}
946 s2 = (char *) space(strlen(line_q)+1+20);
947 strcpy(s2,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
950 strcat(s2,"NNNNNNNNNN\0");
953 for (l = 0; l < s2_len ; l++) {
954 s2[l] = toupper(s2[l]);
955 if (!noconv && s2[l] == 'T') s2[l] = 'U';
957 structure = (char *) space((unsigned) s2_len+1);
958 cstruc = get_line(sRNA);
960 int dn3=strlen(cstruc)-(s2_len-20);
961 strcpy(structure,"..........");
962 strncat(structure, cstruc, s2_len-20);
964 strcat(structure,"..........\0");
968 strcat(structure,".");
970 strcat(structure,"\0");
975 fprintf(stderr, "constraints missing\n");
977 int a = strchr(structure,'|') - structure;
978 int b = strrchr(structure,'|') - structure;
979 if(alignment_length < b-a+1){
980 nrerror("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
982 printf(">%s\n>%s\n", id_s1, id_s2);
983 double begin = BeginTimer();
984 Lduplexfold_C(s1,s2,delta,extension_cost,alignment_length, deltaz, fast,structure,il_a,il_b,b_a, b_b);
987 elapTicks = EndTimer(begin);
988 elapMilli = elapTicks/1000;
989 /* printf("Millisecond %.2f\n",elapMilli); */
990 free(id_s2);free(s2);free(structure);
1002 else if(!qname && !tname && !(alignment_mode)) {
1003 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
1005 if ((fold_constrained) && (istty)) {
1006 printf("Input constraints using the following notation:\n");
1007 printf("| : paired with another base\n");
1008 printf(". : no constraint at all\n");
1009 printf("x : base must not pair\n");
1011 do { /* main loop: continue until end of file */
1012 /* duplexT mfe, *subopt */
1013 char *id_s1=NULL, *id_s2=NULL;
1015 printf("\nInput two sequences (one line each); @ to quit\n");
1016 printf("%s\n", scale);
1020 if ((line = get_line(stdin))==NULL) break;
1021 /* skip empty lines, comment lines, name lines */
1022 while ((*line=='*')||(*line=='\0')||(*line=='>')) {
1023 printf("%s\n", line);
1025 id_s1 = (char*) space (strlen(line)+2);
1026 (void) sscanf(line,"%s",id_s1);
1027 memmove(id_s1, id_s1+1, strlen(id_s1));
1030 if ((line = get_line(stdin))==NULL) {
1035 if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
1037 s1 = (char *) space(strlen(line)+1+20);
1038 strcpy(s1,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1041 strcat(s1,"NNNNNNNNNN\0");
1042 if ((line = get_line(stdin))==NULL) break;
1043 /* skip comment lines and get filenames */
1045 while ((*line=='*')||(*line=='\0')||(*line=='>')) {
1046 printf("%s\n", line);
1048 id_s2 = (char*) space (strlen(line)+2);
1049 (void) sscanf(line,"%s",id_s2);
1050 memmove(id_s2, id_s2+1, strlen(id_s2));
1053 if ((line = get_line(stdin))==NULL) {
1057 if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
1059 s2 = (char *) space(strlen(line)+1+20);
1060 strcpy(s2,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1063 strcat(s2,"NNNNNNNNNN\0");
1069 structure = (char *) space((unsigned) n2+1);
1070 if (fold_constrained) {
1071 cstruc = get_line(stdin);
1072 if (cstruc!=NULL && (cstruc[0]=='>')){
1073 int dn3=strlen(cstruc)-(n2-20);
1074 strcpy(structure,"..........");
1075 strncat(structure, cstruc, n2-20);
1077 strcat(structure,"..........\0");
1081 strcat(structure,".");
1083 strcat(structure,"\0");
1089 fprintf(stderr, "constraints missing\n");
1091 for (l = 0; l < n1 ; l++) {
1092 s1[l] = toupper(s1[l]);
1093 if (!noconv && s1[l] == 'T') s1[l] = 'U';
1095 for (l = 0; l < n2 ; l++) {
1096 s2[l] = toupper(s2[l]);
1097 if (!noconv && s2[l] == 'T') s2[l] = 'U';
1100 printf("lengths = %d,%d\n", strlen(s1), strlen(s2));
1102 if(alignment_length==0){alignment_length=40;}
1105 if(!fold_constrained){
1106 Lduplexfold(s1,s2,delta,extension_cost,alignment_length, deltaz, fast,il_a,il_b,b_a,b_b);
1109 int a = strchr(structure,'|') - structure;
1110 int b = strrchr(structure,'|') - structure;
1111 if(alignment_length < b-a+1){
1112 nrerror("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1114 Lduplexfold_C(s1,s2,delta,extension_cost,alignment_length, deltaz,fast,structure,il_a,il_b,b_a,b_b);
1118 int **access_s1, **access_s2;
1119 char *file_s1, *file_s2;
1121 s1_len = strlen(s1);
1122 s2_len = strlen(s2);
1123 if(!(id_s1 && id_s2)){nrerror("The fasta files has no header information..., cant fetch accessibility file\n");}
1124 file_s1 = (char *) space(sizeof(char) * (strlen(id_s1)+strlen(access)+20));
1125 file_s2 = (char *) space(sizeof(char) * (strlen(id_s2)+strlen(access)+20));
1126 strcpy(file_s1, access); strcpy(file_s2, access);
1127 strcat(file_s1, "/"); strcat(file_s2, "/");
1128 strcat(file_s1, id_s1);strcat(file_s2, id_s2);
1129 strcat(file_s1, "_openen");strcat(file_s2, "_openen");
1131 access_s1 = read_plfold_i(file_s1,1,s1_len,verhaeltnis,alignment_length);
1134 strcat(file_s1,"_bin");
1135 access_s1 = read_plfold_i_bin(file_s1,1,s1_len,verhaeltnis,alignment_length);
1137 if(access_s1 == NULL){
1147 access_s2 = read_plfold_i(file_s2,1,s2_len,verhaeltnis,alignment_length);
1150 strcat(file_s2,"_bin");
1151 access_s2 = read_plfold_i_bin(file_s2,1,s2_len,verhaeltnis,alignment_length);
1153 if(access_s2 == NULL){
1163 if(!fold_constrained){
1164 Lduplexfold_XS(s1,s2, (const int **) access_s1, (const int **) access_s2, delta,alignment_length, deltaz, fast,il_a,il_b,b_a,b_b);/* , target_dead, query_dead); */
1168 int a = strchr(structure,'|') - structure;
1169 int b = strrchr(structure,'|') - structure;
1170 if(alignment_length < b-a+1){
1171 nrerror("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1173 Lduplexfold_CXS(s1,s2, (const int **) access_s1, (const int **) access_s2, delta,alignment_length, deltaz, fast,structure,il_a,il_b,b_a,b_b);/* , target_dead, query_dead); */
1175 i = access_s1[0][0];
1179 i = access_s2[0][0];
1187 free(id_s1);id_s1=NULL;
1188 free(id_s2);id_s2=NULL;
1190 free(s1); free(s2);s1=NULL;s2=NULL;
1193 if(id_s1){free(id_s1);}if(id_s2){free(id_s2);}
1201 /* if(id_s1){free(id_s1);}if(id_s2){free(id_s2);} */
1203 else if(qname && tname && alignment_mode){
1205 mRNA=fopen(tname, "r");
1206 if(mRNA==NULL){printf("%s: Wrong target file name\n", tname); RNAplex_cmdline_parser_free (&args_info);return 0;}
1207 sRNA=fopen(qname, "r");
1208 if(sRNA==NULL){printf("%s: Wrong query file name\n", qname); RNAplex_cmdline_parser_free (&args_info);return 0;}
1209 n_seq = read_clustal(mRNA, temp1, names1);
1210 n_seq2 = read_clustal(sRNA, temp2, names2);
1211 fclose(mRNA); fclose(sRNA);
1212 if (n_seq != n_seq2){
1213 for (i=0; temp1[i]; i++) {
1217 nrerror("unequal number of seqs in alignments");
1219 for(i=0;temp1[i];i++){
1220 AS1[i] = (char*) space((strlen(temp1[i])+21)*sizeof(char));
1221 AS2[i] = (char*) space((strlen(temp2[i])+21)*sizeof(char));
1222 strcpy(AS1[i],"NNNNNNNNNN");
1223 strcpy(AS2[i],"NNNNNNNNNN");
1224 strcat(AS1[i],temp1[i]);
1225 strcat(AS2[i],temp2[i]);
1226 strcat(AS1[i],"NNNNNNNNNN\0");
1227 strcat(AS2[i],"NNNNNNNNNN\0");
1229 for (i=0; temp1[i]; i++) {
1238 aliLduplexfold((const char**) AS1,(const char**) AS2, n_seq*delta, extension_cost, alignment_length, deltaz, fast,il_a,il_b,b_a,b_b);
1241 int** target_access=NULL, **query_access=NULL;
1242 target_access=average_accessibility_target(names1, AS1, n_seq, access,verhaeltnis,alignment_length,binaries); /* get averaged accessibility for alignments */
1243 query_access =average_accessibility_target(names2, AS2, n_seq, access,verhaeltnis,alignment_length,binaries);
1244 if(!(target_access && query_access)){
1245 for (i=0; AS1[i]; i++) {
1246 free(AS1[i]); free(names1[i]);
1247 free(AS2[i]); free(names2[i]);
1249 RNAplex_cmdline_parser_free (&args_info);
1252 aliLduplexfold_XS((const char**)AS1, (const char**)AS2, (const int **) target_access, (const int **) query_access, n_seq*delta, alignment_length, deltaz, fast, il_a, il_b, b_a,b_b);
1253 if(!(target_access==NULL)){
1254 i = target_access[0][0];
1256 free(target_access[i]);
1258 free(target_access);
1260 if(!(query_access==NULL)){
1261 i = query_access[0][0];
1263 free(query_access[i]);
1268 for (i=0; AS1[i]; i++) {
1269 free(AS1[i]); free(names1[i]);
1270 free(AS2[i]); free(names2[i]);
1273 if(access){free(access);access=NULL;}
1274 if(qname){free(tname);access=NULL;}
1275 if(tname){free(qname);access=NULL;}
1276 RNAplex_cmdline_parser_free (&args_info);
1281 static int print_struc(duplexT const *dup) {
1282 /*this function print out the structure of a hybridization site
1283 and return the value from which one can begin to look for the next
1284 non-overlappig hybrid*/
1287 float energy=dup->energy*0.01;
1289 l1 = strchr(dup->structure, '&')-dup->structure;
1290 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", dup->structure, init-l1-1,
1291 init-2, dup->j+l1+2-strlen(dup->structure), dup->j, energy);
1295 static int ** read_plfold_i(char *fname, const int beg, const int end, double verhaeltnis, const int length)
1297 double begin = BeginTimer();
1298 FILE *in=fopen(fname,"r");
1300 fprintf(stderr,"File ' %s ' open error\n",fname);
1311 char tmp[2048]={0x0};
1313 if(fgets(tmp,sizeof(tmp),in)==0){
1314 perror("Empty File");
1316 if(strchr(tmp,'>')){
1317 fprintf(stderr,"file %s is not in RNAplfold format\n",fname);
1320 if(fgets(tmp,sizeof(tmp),in)==0){
1321 fprintf(stderr,"No accessibility data\n");
1325 dim_x=get_max_u(tmp,'\t');
1327 printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n",length,dim_x);
1328 printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1331 access = (int**) space(sizeof(int *) * (dim_x+2));
1332 for(i=0; i< dim_x+2; i++){
1333 access[i] =(int *) space(sizeof(int) * (end_r-beg_r+1)); /* normally +1 */
1335 for(i=0;i<end_r - beg_r +1;i++){
1336 for(j=0;j<dim_x+2;j++){
1340 access[0][0]=dim_x+2;
1341 while(fgets(tmp,sizeof(tmp),in)!=0 && --end_r > 10) /* read a record, before we had --end_r > 10*/
1348 if(sscanf(tmp,"%d\t%n",&seq_pos,&temp)==1){
1352 while(sscanf(tmp+offset,"%f%n",&n,&temp)==1 ){
1354 /* seq_pos - beg allows to get the accessibility right */
1355 access[count][seq_pos - beg +11]= (int) rint( 100 * n); /* round the number */
1356 access[count][seq_pos - beg +11]*=verhaeltnis; /* 10 stands here for the number of nucleotides */
1365 printf("Accessibility files contains %d less entries than expected based on the sequence length\n", end_r - 20);
1366 printf("Please recompute your profiles so that profile length and sequence length match\n");
1372 elapTicks = (EndTimer(begin) - begin);
1373 elapMilli = elapTicks/1000;
1374 /* printf("read_plfold_i Millisecond %.2f\n",elapMilli); */
1378 static int convert_plfold_i(char *fname)
1381 FILE *in=fopen(fname,"r");
1383 fprintf(stderr,"File ' %s ' open error\n",fname);
1386 char tmp[2048]={0x0};
1387 if(fgets(tmp,sizeof(tmp),in)==0){
1388 perror("Empty File");
1390 if(strchr(tmp,'>')){
1391 fprintf(stderr,"file %s is not in RNAplfold format\n",fname);
1394 if(fgets(tmp,sizeof(tmp),in)==0){
1395 fprintf(stderr,"No accessibility data\n");
1399 u_length=get_max_u(tmp,'\t'); /* get the x dimension */
1402 while((c=fgetc(in))!=EOF){
1408 int **access = read_plfold_i(fname,1,length+20,1,u_length);
1410 outname = (char*) space((strlen(fname)+5)*sizeof(char));
1411 strcpy(outname,fname);
1412 strcat(outname,"_bin");
1413 FILE *fp=fopen(outname,"wb");
1416 for (i=0; i< u_length +2 ; i++) {
1417 fwrite(access[i],sizeof(int),length+20,fp);
1420 fseek(fp,0,SEEK_SET);
1421 fwrite(&u_length,sizeof(int),1,fp);
1422 fwrite(&length,sizeof(int),1,fp);
1430 static int ** read_plfold_i_bin(char *fname, const int beg, const int end, double verhaeltnis, const int length)
1432 double begin = BeginTimer();
1433 FILE *fp=fopen(fname,"rb");
1436 fprintf(stderr,"File ' %s ' open error\n",fname);
1440 first_line =(int *) space(sizeof(int) * (end - beg+1)); /* check length of the line LOOK at read_plfold_i */
1441 if(!fread(first_line,sizeof(int),(end-beg)+1,fp)){
1442 fprintf(stderr, "Problem reading size of profile from '%s'/n", fname);/* get the value of the u option */
1446 lim_x=first_line[0];
1447 seqlength=first_line[1]; /* length of the sequence RNAplfold was ran on. */
1449 printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n",length,lim_x);
1450 printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1453 fseek(fp,0,SEEK_SET); /* set pointer to the begining of the file */
1454 int **access; /* here we store the access values */
1455 access = (int**) space(sizeof(int *) * (lim_x+1)); /* !!!! lim_x+1 -> lim_x+2 */
1458 for(count=0; count < lim_x+1; count++){ /* read file */
1459 access[count] = (int*) space(sizeof(int) * (end - beg+1)); /* declare array length */
1460 /* now we should be sure to read the right position */
1461 /* we first need a seek to the right position */
1462 /* 0->9 line begin, + begin */
1463 /* here we need information about the total line length..., we can get this if this is saved by RNAplfold... */
1465 fseek(fp,(beg-1)*sizeof(int),SEEK_CUR); /* go to the desired position, note the 10 offset */
1467 if(!fread(access[count],sizeof(int),(end-beg)+1,fp)){ /* read the needed number of accessibility values */
1468 printf("File '%s' is corrupted \n",fname);
1471 fseek(fp,(seqlength-end+20)*sizeof(int),SEEK_CUR); /* place to the begining of the next file */
1474 fseek(fp,0,SEEK_SET);
1476 access[0][0]+=1; /* !!!!!!!!!!!!!!!!!!!put 2 in case of problem */
1477 fclose(fp); /* close file */
1481 elapTicks = (EndTimer(begin) - begin);
1482 elapMilli = elapTicks/1000;
1483 /* printf("read_plfold_i_bin Millisecond %.2f\n",elapMilli); */ /* return time */
1484 return access; /* return access */
1493 static int get_max_u(const char *s, char delim){
1497 pch = strchr(s, delim);
1500 pch=strchr(pch+1, delim);
1507 static int **average_accessibility_target(char **names, char **ALN, int number, char *access,double verhaeltnis,const int alignment_length,int binaries)
1510 int *** master_access = NULL; /* contains the accessibility arrays for different */
1511 int ** average_access = NULL; /* contains the averaged accessibility */
1512 int aln_size=strlen(ALN[0]); /* aln size -> define size of the averaged accessibility array */
1513 int * index; /* contains the index used for navigating inside the alignments */
1514 long long int begin, end; /* contains the begin and end region to read the accessibility */
1515 index = (int*) space(sizeof(int) * number);
1516 for(i=0; i<number; i++){
1519 master_access = (int ***) space(sizeof(int**) * number);
1520 int dim_x; /* contains the minimal of the maximum u length for all sequences of the alignment */
1523 int location_flag; /* location flag checks if all line contains a location flag or not */
1524 /* if some line contains location information and other not, return a warning but keep going; */
1525 /* The location flag will be set to TRUE if the following pattern is found */
1526 /* sequence-name/[0-9]+-[0-9]+ */
1527 /* |----NAME----||BEG| |END| */
1529 if(sscanf(names[0],"%255[^/]/%lld-%lld",bla, &begin,&end)==3){ /* initialize location_flag; */
1536 for(i=0; i< number; i++) { /* be careful!!!! Name should contain all characters from begin till the "/" character */
1538 file_s1 = (char *) space(sizeof(char) * (strlen(names[i])+strlen(access)+20));
1540 int sequence_length = get_sequence_length_from_alignment(ALN[i]);
1541 end=sequence_length;
1542 if(sscanf(names[i],"%255[^/]/%lld-%lld",bla, &begin,&end)==3){
1543 /* if subsequence and range do not have the same length, stop RNAplex. */
1544 if(end-begin+1 != sequence_length-20){
1545 printf("Your range %lld %lld in sequence %s does not correspond to the sequence length %d\n",begin,end,names[i],sequence_length-20);
1546 printf("Please check your input alignments and rerun RNAplex");
1548 if(master_access != NULL){
1551 j = master_access[a][0][0];
1553 free(master_access[a][j]);
1555 free(master_access[a]);
1558 free(master_access);
1559 free(index);free(file_s1);
1560 return average_access;
1563 end+=20; /* add 20 to the end, in order to take the N's into account */
1564 if(location_flag==0){
1565 fprintf(stderr,"\n!! Line %d in your target alignment contains location information\n",i+1);
1566 fprintf(stderr,"while line %d did not. PLEASE CHECK your alignments!!\n",i);
1567 fprintf(stderr,"RNAplex will continue the target search.\n");
1570 strcpy(file_s1, access);
1571 strcat(file_s1, "/");
1572 strcat(file_s1,bla);
1575 if(location_flag==1){
1576 fprintf(stderr,"\n!! Line %d in your target alignment does not contain location information\n",i+1);
1577 fprintf(stderr,"while line %d in your target alignment did. PLEASE CHECK your alignments!!\n",i);
1578 fprintf(stderr,"RNAplex will continue the target search.\n");
1581 strcpy(file_s1, access);
1582 strcat(file_s1, "/");
1583 strcat(file_s1,names[i]);
1585 strcat(file_s1, "_openen");
1587 master_access[i]=read_plfold_i(file_s1,begin,end,verhaeltnis,alignment_length); /* read */
1590 strcat(file_s1,"_bin");
1591 master_access[i]=read_plfold_i_bin(file_s1,begin,end,verhaeltnis,alignment_length); /* read */
1596 dim_x=MIN2(dim_x, master_access[i][0][0]);
1598 average_access = (int **) space(sizeof(int*) * (dim_x));
1599 for(i=0;i<dim_x;i++){
1600 average_access[i] = (int *) space(sizeof(int) * (aln_size+9)); /* average_access is of the length of the alignment */
1601 /* while master_access is of the length of the sequences. */
1603 average_access[0][0]=dim_x;
1604 for(i=1;i<aln_size;i++){ /* go through all aln position */
1607 for(j=0;j<number; j++){ /* go through all aln members */
1608 if(!(ALN[j][i-1]=='-')) { /* if we have a gap, skip this position */
1609 for(u=1; u<dim_x; u++){ /* go through all u */
1610 if(index[j]<u+10){continue;}
1611 average_access[u][i]+=master_access[j][u][index[j]]; /* index[j] position in sequence j */
1613 index[j]++; /* increase position in sequence */
1619 if(!(count_j>0)){printf("Alignment contains only gap at column %d\n exiting program\n", i); return NULL;}
1620 for(u=1; u<dim_x; u++){
1621 average_access[u][i]=(int) rint(average_access[u][i]/(count_j));
1625 for(i=0; i<number; i++){
1627 j = master_access[i][0][0];
1629 free(master_access[i][j]);
1631 free(master_access[i]);
1633 free(master_access);
1635 return average_access;
1640 *** aliprint_struct generate two postscript files.
1641 *** The first one is a structure annotated alignment a la RNAalifold.
1642 *** The second file is a structural representation of the interaction
1643 *** with colour annotation of the base-pair conservation.
1646 static void aliprint_struct(FILE *Result, /* result file */
1647 FILE *mrna, /* target alignment */
1648 FILE *query, /* query alignment */
1649 const int WindowsLength /* Number of nucleotide around the target sequence */
1653 *** Defines arrays for names and sequences of the query and target alignment
1655 char *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
1656 int n_seq, n_seq2,i,s;
1658 *** Check if target and sequence alignment contains the same number of sequences
1659 *** The user should ensure that the sequence are in the same species order in both the target and query alignment files.
1661 n_seq = read_clustal(mrna, AS1, names1);
1662 n_seq2 = read_clustal(query, AS2, names2);
1663 if (n_seq != n_seq2){
1664 for (i=0; AS1[i]; i++) {
1668 nrerror("unequal number of seqs in alignments");
1671 *** Here we get the length of the target and query alignments. Important for initiliazing the necessary arrays
1673 int target_alignment_length, query_alignment_length;
1674 target_alignment_length=strlen(AS1[0]);
1675 query_alignment_length=strlen(AS2[0]);
1679 *** Initialize files containing sequences
1685 *** Quit if the result file does not exist
1687 if ((result = get_line(Result))==NULL) {
1692 *** If we have a line in the result file that looks like a result line, then parse it
1694 if(strchr(result, '(') && strchr(result,'&') && strchr(result, '(')&& strchr(result, ',') && strchr(result,':') && strchr(result, '-')){
1695 char *structure,*pos;
1697 *** tbegin,tend,qbegin,qend store the duplex boundaries
1698 *** on both the target and query sequences, respectively
1700 int tbegin,tend,qbegin,qend;
1703 *** sbegin, send are the coordinates of the target alignment slide
1707 *** Check in the line where is the first space.
1708 *** This gives us the total length of the duplex
1710 pos = strchr(result, ' ');
1711 posi = (int) (pos - result);
1714 *** Copy the structure in the line into variable structure
1716 structure = (char *) space((length+1) * sizeof(char));
1717 sscanf(result,"%s",structure);
1719 *** Save the coordinates on the target
1721 sscanf(pos, "%10d,%10d", &tbegin,&tend);
1722 pos = strchr(pos, ':');
1723 pos = strchr(pos, ' ');
1725 *** Save the coordinates on the query
1727 sscanf(pos, "%10d,%10d", &qbegin,&qend);
1730 *** To produce the ps files we use the full query alignment
1731 *** and a user defined section of the target coordinates.
1732 *** The size of the target slide is determined by the WindowsLength variable
1734 sbegin=MAX2(tbegin-WindowsLength,0);
1735 send=MIN2(tend+WindowsLength,target_alignment_length);
1737 *** We retrieved the coordinates of the duplex
1738 *** Now we can slide the alignment
1739 *** Target and query will hold the alignments for the target and query sequences
1744 target = (char**) space((n_seq+1)*sizeof(char*));
1745 query = (char**) space((n_seq+1)*sizeof(char*));
1746 join = (char**) space((n_seq+1)*sizeof(char*));
1747 for(s=0;s<n_seq;s++){
1748 target[s] = (char *) space((send-sbegin+2)*sizeof(char));
1749 strncpy(target[s], (AS1[s]+sbegin-1), (send-sbegin+1));
1750 target[s][send-sbegin+1]='\0';
1751 query[s] = (char *) space((query_alignment_length+1)*sizeof(char));
1752 strcpy(query[s], AS2[s]);
1753 query[s][query_alignment_length]='\0';
1754 join[s] = (char *) space((query_alignment_length+(send-sbegin)+3)*sizeof(char));
1755 strncpy(join[s],(AS1[s]+sbegin-1), (send-sbegin+1));
1756 strcat(join[s],"&");
1757 strcat(join[s],AS2[s]);
1759 target[n_seq]=NULL;query[n_seq]=NULL;
1761 *** Get the consensus structure for the query and target sequence
1762 *** This consensus structure is constrained such that the interaction sites are unbound.
1763 *** We define and set the query and target constraint
1765 char * target_constraint; char * query_constraint; char * joint_structure;
1766 target_constraint = (char *) space((unsigned) (send-sbegin+2));
1767 query_constraint = (char *) space((unsigned) (query_alignment_length+1));
1768 joint_structure = (char *) space((unsigned) (send-sbegin+3+query_alignment_length));
1769 for(i=0; i<strlen(target[0]);i++){
1770 if(i>tbegin-sbegin-1 && i<tend-sbegin+1){
1771 target_constraint[i]='x';
1774 target_constraint[i]='.';
1777 for(i=0; i<strlen(AS2[0]);i++) {
1778 if(i>qbegin-2 && i < qend){
1779 query_constraint[i]='x';
1782 query_constraint[i]='.';
1786 *** Now produce structure
1789 alifold((const char **) target, target_constraint);
1790 for(i=0; !(target_constraint[i] == '\0'); i++){
1791 if(target_constraint[i]=='('){
1792 target_constraint[i]='[';
1794 else if(target_constraint[i]==')'){
1795 target_constraint[i]=']';
1798 alifold((const char **) query , query_constraint);
1799 for(i=0; !(query_constraint[i] == '\0'); i++){
1800 if(query_constraint[i]=='('){
1801 query_constraint[i]='<';
1803 else if(query_constraint[i]==')'){
1804 query_constraint[i]='>';
1808 ***Add duplex structure, and produce joint structure
1810 int count_duplex_structure=0;
1811 for(i=tbegin-sbegin; i<tend-sbegin+1;i++){
1812 target_constraint[i]=structure[count_duplex_structure];
1813 count_duplex_structure++;
1815 count_duplex_structure++;
1816 for(i=qbegin-1; i < qend; i++){
1817 query_constraint[i]=structure[count_duplex_structure];
1818 count_duplex_structure++;
1820 strncpy(joint_structure,target_constraint,(send-sbegin+1));
1821 strcat(joint_structure,"&");
1822 strcat(joint_structure,query_constraint);
1824 *** Produce consensus sequence
1829 temp_target = consensus((const char **) target);
1830 temp_query = consensus((const char **) query);
1831 string = (char *) space((strlen(temp_target)+strlen(temp_query)+1)*sizeof(char));
1832 strcpy(string,temp_target);free(temp_target);
1833 strcat(string,temp_query);free(temp_query);
1835 *** now produce output name, based on the first two names of the alignment
1837 int length_name = strlen(names1[0]) + strlen(names2[0])+1;
1838 char *psoutput = (char*) space((length_name + 100)*sizeof(char));
1840 strcpy(psoutput,"annaln_");
1841 strcat(psoutput,names1[0]);
1842 strcat(psoutput,"_");
1843 strcat(psoutput,names2[0]);
1844 strcat(psoutput,"_");
1845 sprintf(str,"%d",tbegin);
1846 strcat(psoutput,str);
1847 strcat(psoutput,"_");
1848 sprintf(str,"%d",tend);
1849 strcat(psoutput,str);
1850 strcat(psoutput,"_");
1851 sprintf(str,"%d",qbegin);
1852 strcat(psoutput,str);
1853 strcat(psoutput,"_");
1854 sprintf(str,"%d",qend);
1855 strcat(psoutput,str);
1856 strcat(psoutput,".ps");
1858 while(psoutput[s] != '\0'){
1859 if(psoutput[s]=='\\' || psoutput[s]=='/'){
1865 *** Produce a structure annotated, colorated alignment
1867 aliPS_color_aln(joint_structure, psoutput, (const char**) join, (const char**) names1);
1870 *** We also need the consensus structure annotated with conservation information
1875 *** First we change the output file name from annaln -> annstr
1877 psoutput[3]='s';psoutput[4]='t';psoutput[5]='r';
1881 *** Now we change the different parenthesis into one
1884 char *joint_structure_parenthesis;
1885 joint_structure_parenthesis=(char *) space((strlen(joint_structure)+1)*sizeof(char));
1886 strcpy(joint_structure_parenthesis,joint_structure);
1887 for(i=0; i<strlen(joint_structure);i++){
1888 if((joint_structure[i]=='[') || (joint_structure[i]=='<')){
1889 joint_structure[i]='(';
1891 else if(joint_structure[i]==']' || joint_structure[i]=='>'){
1892 joint_structure[i]=')';
1895 cut_point=send-sbegin+1;
1897 *** Remove & from the alignment and the consensus structure
1900 for(i=0; i<strlen(joint_structure); i++){
1901 if(joint_structure[i]=='&'){
1904 joint_structure[counter]=joint_structure[i];
1905 joint_structure_parenthesis[counter]=joint_structure_parenthesis[i];
1906 for(s=0;s<n_seq;s++){
1907 join[s][counter]=join[s][i];
1912 string=consensus((const char**) join);
1913 printf("%s\n%s\n%s\n",join[0],string,joint_structure);
1915 //A=annote(joint_structure_parenthesis,(const char**) join);
1916 xrna_plex_plot(string,joint_structure_parenthesis,psoutput);
1923 for (i=0; i<n_seq+1; i++) {
1930 for (i=0; AS1[i]; i++) {
1941 All characters not being included into ATCGUN are considered as gap characters.
1943 static int get_sequence_length_from_alignment(char *sequence){
1945 while(!(*sequence == '\0')){
1946 if(*sequence == 'A' || *sequence== 'T' || *sequence == 'C' || *sequence == 'G' || *sequence == 'U' || *sequence == 'N'){
1954 static void linear_fit(int *il_a, int *il_b, int *b_a, int *b_b){ /*get fit parameters*/
1956 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
1957 update_fold_params();
1958 P = scale_parameters();
1961 int internal_loop_x[25] = {6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30};
1962 int internal_loop_y[25];
1964 for(i=0; i<25; i++){ /* initialize the y-value for internal loops */
1965 internal_loop_y[i] = P->internal_loop[internal_loop_x[i]];
1967 int sumx,sumy,sumxx,sumxy;
1968 sumx=sumy=sumxx=sumxy=0;
1969 double til_a, til_b;
1970 int n=25; /* only take data points till int_loop size <=20; */
1971 /* no measurement exists for larger loop && */
1972 /* such large loop are not expected in RNA-RNA interactions. */
1974 sumx+=internal_loop_x[i];
1975 sumy+=internal_loop_y[i];
1976 sumxx+=internal_loop_x[i]*internal_loop_x[i];
1977 sumxy+=internal_loop_x[i]*internal_loop_y[i];
1979 if((sumxx - (sumx*sumx)/n) < 1e-6){
1981 printf("divisor for internal loop is too small %d\n",(sumxx - (sumx*sumx)/n));
1982 nrerror("Problem in fitting");
1984 til_a = (sumxy - (sumx * sumy)/n)/(sumxx - (sumx*sumx)/n);
1985 til_b = sumy/n - (til_a * sumx/n);
1986 *il_a = (int) til_a;
1987 *il_b = (int) til_b;
1992 int bulge_loop_x[5] = {2,3,4,5,6};
1993 int bulge_loop_y[5];
1995 bulge_loop_y[i] = P->bulge[bulge_loop_x[i]];
1997 sumx=sumy=sumxx=sumxy=0;
2000 sumx+=bulge_loop_x[i];
2001 sumy+=bulge_loop_y[i];
2002 sumxx+=bulge_loop_x[i]*bulge_loop_x[i];
2003 sumxy+=bulge_loop_x[i]*bulge_loop_y[i];
2005 tb_a = (sumxy - (sumx * sumy)/n)/(sumxx - (sumx*sumx)/n);
2006 tb_b = sumy/n - (tb_a * sumx/n);
2010 if((sumxx - (sumx*sumx)/n) < 1e-6){
2011 printf("divisor for bulge loop is too small %d\n",(sumxx - (sumx*sumx)/n));
2012 nrerror("Problem in fitting");
2016 double probcompute_silvana_98(char *s1, double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration){
2017 double d_a = 3.92/100000.0; /*Parameters from the article of Owczarzy*/
2018 double d_b = 9.11/1000000; /*Parameters from the article of Owczarzy*/
2019 double d_c = 6.26/100000; /*Parameters from the article of Owczarzy*/
2020 double d_d = 1.42/100000; /*Parameters from the article of Owczarzy*/
2021 double d_e = 4.82/10000; /*Parameters from the article of Owczarzy*/
2022 double d_f = 5.25/10000; /*Parameters from the article of Owczarzy*/
2023 double d_g = 8.31/100000; /*Parameters from the article of Owczarzy*/
2024 double d_magn_corr_value = 0;
2026 double dH, dS; dS=0; dH=0;
2027 double salt_correction;
2028 double enthalpy[4][4]=
2029 {{-7.9,-8.4,-7.8,-7.2},
2030 {-8.5,-8.0,-10.6,-7.8},
2031 {-8.2,-10.6,-8.0,-8.4},
2032 {-7.2,-8.2,-8.5,-7.9}};
2033 double entropy[4][4]={{
2034 -22.2 ,-22.4, -21.0 , -20.4},
2035 {-22.7 ,-19.9, -27.2 , -21.0},
2036 {-22.2 ,-27.2, -19.9 , -22.4},
2037 {-21.3 ,-22.2, -22.7 , -22.2}
2040 int converted=encode_char(toupper(s1[posst]))-1;
2041 int seqlen=strlen(s1);
2043 /* terminal correction */
2044 if(s1[0]=='G' || s1[0]=='C'){dH+=0.1; dS+=-2.8;d_fgc++;}
2045 if(s1[0]=='A' || s1[0]=='T' || s1[0]=='U'){dH+=2.3; dS+=4.8;}
2046 if(s1[seqlen-1]=='G' || s1[seqlen-1]=='C'){dH+=0.1; dS+=-2.8;}
2047 if(s1[seqlen-1]=='A' || s1[seqlen-1]=='T' || s1[seqlen-1]=='U'){dH+=2.3; dS+=4.8;}
2048 /* compute dH and dH based on sequence */
2049 for(posst=1; posst<seqlen; posst++){
2050 if(toupper(s1[posst])=='G' || toupper(s1[posst])=='C'){d_fgc++;}
2051 int nextconverted=encode_char(toupper(s1[posst]))-1;
2052 dH+=enthalpy[converted][nextconverted];
2053 dS+=entropy[converted][nextconverted];
2054 converted=nextconverted;
2056 d_fgc=d_fgc/((double)(seqlen));
2057 if(mg_concentration==0){
2058 dS+=0.368 * (seqlen-1)*log(na_concentration);
2059 Tm=(1000*dH)/(dS+1.987*log(probe_concentration/4))-273.15;
2063 double single_charged= k_concentration + tris_concentration/2 + na_concentration;
2064 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2065 if(single_charged==0){
2068 d_b * log (mg_concentration) +
2069 d_fgc * (d_c + d_d * log(mg_concentration)) +
2070 1/(2 * ((double)seqlen - 1)) *
2071 (- d_e + d_f * log (mg_concentration) + d_g * pow(log (mg_concentration),2));
2074 if (d_ratio_ions < 0.22) {
2075 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1/100000 * log (single_charged) + 9.40 * 1/1000000 * pow(log (single_charged),2 );
2078 if (d_ratio_ions < 6) {
2080 d_a = 3.92/100000 * (0.843 - 0.352 * sqrt(single_charged) * log (single_charged));
2081 d_d = 1.42/100000 * (1.279 - 4.03/1000 * log (single_charged) - 8.03/1000 * pow(log (single_charged),2));
2082 d_g = 8.31/100000 * (0.486 - 0.258 * log (single_charged) + 5.25/1000 * pow(log (single_charged),3));
2084 d_magn_corr_value = d_a - d_b * log
2085 (mg_concentration) + d_fgc *
2086 (d_c + d_d * log (mg_concentration)) + 1/(2 * ((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log (mg_concentration),2));
2089 d_magn_corr_value = d_a - d_b * log (mg_concentration) + d_fgc * (d_c + d_d * log (mg_concentration)) + 1/(2 *((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log(mg_concentration),2));
2094 double temp_Tm = dH*1000 / (dS + 1.987 * log (probe_concentration/4));
2095 Tm = 1/(1/temp_Tm + d_magn_corr_value) - 273.15;
2099 double probcompute_silvana_04(char *s1, double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration){
2100 double d_a = 3.92/100000.0; /*Parameters from the article of Owczarzy*/
2101 double d_b = 9.11/1000000; /*Parameters from the article of Owczarzy*/
2102 double d_c = 6.26/100000; /*Parameters from the article of Owczarzy*/
2103 double d_d = 1.42/100000; /*Parameters from the article of Owczarzy*/
2104 double d_e = 4.82/10000; /*Parameters from the article of Owczarzy*/
2105 double d_f = 5.25/10000; /*Parameters from the article of Owczarzy*/
2106 double d_g = 8.31/100000; /*Parameters from the article of Owczarzy*/
2107 double d_magn_corr_value = 0;
2109 double dH, dS; dS=0; dH=0;
2110 double salt_correction;
2111 double enthalpy[4][4]=
2112 {{-7.6,-8.4,-7.8,-7.2},
2113 {-8.5,-8.0,-10.6,-7.8},
2114 {-8.2,-9.8,-8.0,-8.4},
2115 {-7.2,-8.2,-8.5,-7.6}};
2116 double entropy[4][4]={{
2117 -21.3 ,-22.4, -21.0 , -20.4},
2118 {-22.7 ,-19.9, -27.2 , -21.0},
2119 {-22.2 ,-24.4, -19.9 , -22.4},
2120 {-21.3 ,-22.2, -22.7 , -21.3}
2123 int converted=encode_char(toupper(s1[posst]))-1;
2124 int seqlen=strlen(s1);
2126 /* terminal correction */
2127 if(s1[0]=='G' || s1[0]=='C'){dH+=0.1; dS+=-2.85;d_fgc++;}
2128 if(s1[0]=='A' || s1[0]=='T' || s1[0]=='U'){dH+=2.4; dS+=4.1;}
2129 if(s1[seqlen-1]=='G' || s1[seqlen-1]=='C'){dH+=0.1; dS+=-2.85;}
2130 if(s1[seqlen-1]=='A' || s1[seqlen-1]=='T' || s1[seqlen-1]=='U'){dH+=2.4; dS+=4.1;}
2131 /* compute dH and dH based on sequence */
2132 for(posst=1; posst<seqlen; posst++){
2133 if(toupper(s1[posst])=='G' || toupper(s1[posst])=='C'){d_fgc++;}
2134 int nextconverted=encode_char(toupper(s1[posst]))-1;
2135 dH+=enthalpy[converted][nextconverted];
2136 dS+=entropy[converted][nextconverted];
2137 converted=nextconverted;
2139 d_fgc=d_fgc/((double)(seqlen));
2140 if(mg_concentration==0){
2141 dS+=0.368 * (seqlen-1)*log(na_concentration);
2142 Tm=(1000*dH)/(dS+1.987*log(probe_concentration/4))-273.15;
2146 double single_charged= k_concentration + tris_concentration/2 + na_concentration;
2147 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2148 if(single_charged==0){
2151 d_b * log (mg_concentration) +
2152 d_fgc * (d_c + d_d * log(mg_concentration)) +
2153 1/(2 * ((double)seqlen - 1)) *
2154 (- d_e + d_f * log (mg_concentration) + d_g * pow(log (mg_concentration),2));
2157 if (d_ratio_ions < 0.22) {
2158 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1/100000 * log (single_charged) + 9.40 * 1/1000000 * pow(log (single_charged),2 );
2161 if (d_ratio_ions < 6) {
2163 d_a = 3.92/100000 * (0.843 - 0.352 * sqrt(single_charged) * log (single_charged));
2164 d_d = 1.42/100000 * (1.279 - 4.03/1000 * log (single_charged) - 8.03/1000 * pow(log (single_charged),2));
2165 d_g = 8.31/100000 * (0.486 - 0.258 * log (single_charged) + 5.25/1000 * pow(log (single_charged),3));
2167 d_magn_corr_value = d_a - d_b * log
2168 (mg_concentration) + d_fgc *
2169 (d_c + d_d * log (mg_concentration)) + 1/(2 * ((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log (mg_concentration),2));
2172 d_magn_corr_value = d_a - d_b * log (mg_concentration) + d_fgc * (d_c + d_d * log (mg_concentration)) + 1/(2 *((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log(mg_concentration),2));
2177 double temp_Tm = dH*1000 / (dS + 1.987 * log (probe_concentration/4));
2178 Tm = 1/(1/temp_Tm + d_magn_corr_value) - 273.15;
2183 double probcompute_xia_98(char *s1, double na_concentration, double probe_concentration){
2184 double dH, dS; dS=0; dH=0;
2185 double salt_correction;
2186 double enthalpy[4][4]=
2187 {{ -6.820, -11.400, -10.480, -9.380},
2188 { -10.440, -13.390, -10.640, -10.480},
2189 { -12.440, -14.880, -13.390, -11.400},
2190 { -7.690, -12.440, -10.440, -6.820}};
2191 double entropy[4][4]=
2192 {{-19.0 , -29.5 , -27.1 , -26.7},
2193 {-26.9 , -32.7 , -26.7 , -27.1},
2194 {-32.5 , -36.9 , -32.7 , -29.5},
2195 {-20.5 , -32.5 , -26.9 , -19.0},
2198 int converted=encode_char(toupper(s1[posst]))-1;
2199 int seqlen=strlen(s1);
2201 /* terminal correction */
2202 if(s1[0]=='G' || s1[0]=='C'){dH+=3.61; dS+=-1.5;}
2203 if(s1[0]=='A' || s1[0]=='T' || s1[0]=='U'){dH+=7.73; dS+=9.0;}
2204 if(s1[seqlen-1]=='G' || s1[seqlen-1]=='C'){dH+=3.61; dS+=-1.5;}
2205 if(s1[seqlen-1]=='A' || s1[seqlen-1]=='T' || s1[seqlen-1]=='U'){dH+=7.73; dS+=9.0;}
2206 /* compute dH and dH based on sequence */
2207 for(posst=1; posst<seqlen; posst++){
2208 int nextconverted=encode_char(toupper(s1[posst]))-1;
2209 dH+=enthalpy[converted][nextconverted];
2210 dS+=entropy[converted][nextconverted];
2211 converted=nextconverted;
2213 dS+=0.368 * (seqlen-1)*log(na_concentration);
2214 Tm=(1000*dH)/(dS+1.987*log(probe_concentration/4))-273.15;
2218 double probcompute_sug_95(char *s1, double na_concentration, double probe_concentration){
2219 double dH, dS; dS=0; dH=0;
2220 double salt_correction;
2221 double enthalpy[4][4]=
2222 {{-11.500, -7.800, -7.000, -8.300},
2223 {-10.400, -12.800, -16.300, -9.100},
2224 { -8.600, -8.000, -9.300, -5.900},
2225 { -7.800, -5.500, -9.000, -7.800}};
2226 double entropy[4][4]=
2227 {{-36.4, -21.6, -19.7, -23.9},
2228 {-28.4, -31.9, -47.1, -23.5},
2229 {-22.9, -17.1, -23.2, -12.3},
2230 {-23.2, -13.5, -26.1, -21.9}};
2232 int converted=encode_char(toupper(s1[posst]))-1;
2233 int seqlen=strlen(s1);
2235 /* terminal correction */
2236 if(s1[0]=='G' || s1[0]=='C'){dH+=0.95; dS+=-1.95;}
2237 if(s1[0]=='A' || s1[0]=='T' || s1[0]=='U'){dH+=0.95; dS+=-1.95;}
2238 if(s1[seqlen-1]=='G' || s1[seqlen-1]=='C'){dH+=0.95; dS+=-1.95;}
2239 if(s1[seqlen-1]=='A' || s1[seqlen-1]=='T' || s1[seqlen-1]=='U'){dH+=0.95; dS+=-1.95;}
2240 /* compute dH and dH based on sequence */
2241 for(posst=1; posst<seqlen; posst++){
2242 int nextconverted=encode_char(toupper(s1[posst]))-1;
2243 dH+=enthalpy[converted][nextconverted];
2244 dS+=entropy[converted][nextconverted];
2245 converted=nextconverted;
2247 /* salt entropy correction von Ahsen 1999 */
2248 dS+=0.368 * (seqlen-1)*log(na_concentration);
2249 Tm=(1000*dH)/(dS+1.987*log(probe_concentration/4))-273.15;
2257 double probcompute_newparameters(char *s1, double k_concentration, double tris_concentration,double mg_concentration, double na_concentration, double probe_concentration){
2258 /* ////////////////////////////////////////// */
2260 /* ////////////////////////////////////////// */
2262 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
2263 update_fold_params();
2264 P = scale_parameters();
2267 /* ///////////////////////////////////////// */
2268 /* Salt Variable Declaration */
2269 /* ///////////////////////////////////////// */
2271 double d_temp; /* melting temperature */
2272 double d_temp_na; /*melting temperature in 1M na+ */
2273 double d_salt_corr_value = 0.0;
2274 double d_magn_corr_value = 0.0;
2276 double d_fgc; /* gc content */
2277 double d_conc_monovalents = k_concentration+na_concentration+tris_concentration/2;
2278 double d_ratio_ions = sqrt(mg_concentration)/d_conc_monovalents;
2279 double d_a = 3.92/100000.0; /*Parameters from the article of Owczarzy*/
2280 double d_b = 9.11/1000000; /*Parameters from the article of Owczarzy*/
2281 double d_c = 6.26/100000; /*Parameters from the article of Owczarzy*/
2282 double d_d = 1.42/100000; /*Parameters from the article of Owczarzy*/
2283 double d_e = 4.82/10000; /*Parameters from the article of Owczarzy*/
2284 double d_f = 5.25/10000; /*Parameters from the article of Owczarzy*/
2285 double d_g = 8.31/100000; /*Parameters from the article of Owczarzy*/
2287 /* ////////////////////////////////////// */
2288 /* Sequence Variable Declaration */
2289 /* ////////////////////////////////////// */
2291 int seqlen = strlen(s1);
2293 int converted=encode_char(toupper(s1[posst]));
2294 int revconverted=abs(5-converted);
2296 /* ////////////////////////////////////// */
2297 /* Energy Variable Declaration */
2298 /* ////////////////////////////////////// */
2303 /* ////////////////////////////////////// */
2305 /* ////////////////////////////////////// */
2310 for(posst=0; posst<seqlen; posst++){
2311 if(s1[posst] == 'G' || s1[posst] == 'C' || s1[posst] == 'g' || s1[posst] == 'c'){
2318 int type=pair[converted][revconverted];
2319 /* Add twice the duplex penalty */
2322 dS=(dH-dG)/(37+K0)*10;
2326 dS+=(TerminalAUdH-P->TerminalAU)/(37+K0)*10;
2328 for(posst=1; posst<seqlen; posst++){
2329 int nextconverted=encode_char(toupper(s1[posst]));
2330 int nextrevconverted=abs(5-nextconverted);
2331 int nexttype=pair[nextconverted][nextrevconverted];
2332 dG+=stack37[rtype[type]][nexttype];
2333 dH+=stackdH[rtype[type]][nexttype];
2334 dS+=(stackdH[rtype[type]][nexttype] - stack37[rtype[type]][nexttype])/(37+K0)*10;
2335 converted=nextconverted;
2336 revconverted=nextrevconverted;
2342 dS+=(TerminalAUdH-P->TerminalAU)/(37+K0)*10;
2344 /* add initiation again because of symmetry. */
2347 if(mg_concentration==0){
2348 dS+=0.368 * (seqlen-1)*log(na_concentration);
2349 double Tm; Tm=(10*dH)/(dS+1.987*log(probe_concentration/4))-273.15;
2353 double single_charged= k_concentration + tris_concentration/2 + na_concentration;
2354 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2355 if(single_charged==0){
2358 d_b * log (mg_concentration) +
2359 d_fgc * (d_c + d_d * log(mg_concentration)) +
2360 1/(2 * ((double)seqlen - 1)) *
2361 (- d_e + d_f * log (mg_concentration) + d_g * pow(log (mg_concentration),2));
2364 if (d_ratio_ions < 0.22) {
2365 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1/100000 * log (single_charged) + 9.40 * 1/1000000 * pow(log (single_charged),2 );
2368 if (d_ratio_ions < 6) {
2370 d_a = 3.92/100000 * (0.843 - 0.352 * sqrt(single_charged) * log (single_charged));
2371 d_d = 1.42/100000 * (1.279 - 4.03/1000 * log (single_charged) - 8.03/1000 * pow(log (single_charged),2));
2372 d_g = 8.31/100000 * (0.486 - 0.258 * log (single_charged) + 5.25/1000 * pow(log (single_charged),3));
2374 d_magn_corr_value = d_a - d_b * log
2375 (mg_concentration) + d_fgc *
2376 (d_c + d_d * log (mg_concentration)) + 1/(2 * ((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log (mg_concentration),2));
2379 d_magn_corr_value = d_a - d_b * log (mg_concentration) + d_fgc * (d_c + d_d * log (mg_concentration)) + 1/(2 *((double)seqlen - 1)) * (- d_e + d_f * log (mg_concentration) + d_g *pow(log(mg_concentration),2));
2384 double temp_Tm = dH*10 / (dS + 1.987 * log (probe_concentration/4));
2385 int Tm = 1/(1/temp_Tm + d_magn_corr_value) - 273.15;