1 /* Last changed Time-stamp: <2006-03-02 22:48:15 ivo> */
3 Local version of RNAalifold
5 c Ivo L Hofacker, Stephan Bernhart
16 #include "part_func.h"
17 #include "fold_vars.h"
24 #include "read_epars.h"
25 #include "RNALalifold_cmdl.h"
28 static const char rcsid[] = "$Id: RNALalifold.c,v 1.1 2007/06/23 09:52:29 ivo Exp $";
31 PRIVATE void usage(void);
32 PRIVATE char *annote(const char *structure, const char *AS[]);
33 PRIVATE void print_pi(const pair_info pi, FILE *file);
34 PRIVATE cpair *make_color_pinfo(const pair_info *pi);
35 PRIVATE cpair *make_color_pinfo2(char **sequences, plist *pl, int n_seq);
37 #define MAX_NUM_NAMES 500
39 int main(int argc, char *argv[]){
40 struct RNALalifold_args_info args_info;
41 char *string, *structure, *ParamFile, *ns_bases, *c;
42 char ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH];
43 int n_seq, i, length, sym, r, maxdist, unchangednc, unchangedcv;
46 double min_en, real_en, sfact;
47 char *AS[MAX_NUM_NAMES]; /* aligned sequences */
48 char *names[MAX_NUM_NAMES]; /* sequence names */
49 FILE *clust_file = stdin;
51 string = structure = ParamFile = ns_bases = NULL;
54 do_backtrack = unchangednc = unchangedcv = 1;
60 #############################################
61 # check the command line parameters
62 #############################################
64 if(RNALalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
66 if(args_info.temp_given) temperature = args_info.temp_arg;
67 /* structure constraint */
68 if(args_info.noTetra_given) tetra_loop=0;
69 /* set dangle model */
70 if(args_info.dangles_given){
71 if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
72 warn_user("required dangle model not implemented, falling back to default dangles=2");
74 dangles = args_info.dangles_arg;
76 /* do not allow weak pairs */
77 if(args_info.noLP_given) noLonelyPairs = 1;
78 /* do not allow wobble pairs (GU) */
79 if(args_info.noGU_given) noGU = 1;
80 /* do not allow weak closing pairs (AU,GU) */
81 if(args_info.noClosingGU_given) no_closingGU = 1;
82 /* set energy model */
83 if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
84 /* take another energy parameter set */
85 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
86 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
87 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
88 /* set pf scaling factor */
89 if(args_info.pfScale_given) sfact = args_info.pfScale_arg;
90 /* partition function settings */
91 if(args_info.partfunc_given){
93 if(args_info.partfunc_arg != -1)
94 do_backtrack = args_info.partfunc_arg;
97 if(args_info.cfactor_given){
98 cv_fact = args_info.cfactor_arg;
102 if(args_info.nfactor_given){
103 nc_fact = args_info.nfactor_arg;
106 /* set the maximum base pair span */
107 if(args_info.span_given) maxdist = args_info.span_arg;
108 /* set the pair probability cutoff */
109 if(args_info.cutoff_given) cutoff = args_info.cutoff_arg;
110 /* calculate most informative sequence */
111 if(args_info.mis_given) mis = 1;
112 if(args_info.csv_given) csv = 1;
113 if(args_info.ribosum_file_given){
114 RibosumFile = strdup(args_info.ribosum_file_arg);
117 if(args_info.ribosum_scoring_given){
122 /* check unnamed options a.k.a. filename of input alignment */
123 if(args_info.inputs_num == 1){
124 clust_file = fopen(args_info.inputs[0], "r");
125 if(clust_file == NULL){
126 fprintf(stderr, "can't open %s\n", args_info.inputs[0]);
130 RNALalifold_cmdline_parser_print_help();
134 /* free allocated memory of command line data structure */
135 RNALalifold_cmdline_parser_free (&args_info);
138 #############################################
140 #############################################
142 if ((ribo==1)&&(unchangednc)) nc_fact=0.5;
143 if ((ribo==1)&&(unchangedcv)) cv_fact=0.6;
145 if (ParamFile != NULL)
146 read_parameter_file(ParamFile);
148 if (ns_bases != NULL) {
149 nonstandards = space(33);
157 nonstandards[i++]=*c++;
158 nonstandards[i++]=*c;
159 if ((sym)&&(*c!=*(c-1))) {
160 nonstandards[i++]=*c;
161 nonstandards[i++]=*(c-1);
168 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
170 if (istty && (clust_file == stdin)) {
171 print_tty_input_seq_str("Input aligned sequences in clustalw format");
174 n_seq = read_clustal(clust_file, AS, names);
175 if (clust_file != stdin) fclose(clust_file);
177 nrerror("no sequences found");
179 length = (int) strlen(AS[0]);
180 if (length<maxdist) {
181 fprintf(stderr, "Alignment length < window size: setting L=%d\n",length);
185 structure = (char *) space((unsigned) length+1);
188 #############################################
190 #############################################
192 update_fold_params();
194 min_en = aliLfold((const char **) AS, structure, maxdist);
196 eos_debug=-1; /* shut off warnings about nonstandard pairs */
197 /* for (i=0; AS[i]!=NULL; i++)
198 s += energy_of_struct(AS[i], structure);
201 string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS);
202 printf("%s\n%s\n", string, structure);
204 printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n",
205 min_en, real_en, min_en - real_en);
207 printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en );
209 strcpy(ffname, "alirna.ps");
210 strcpy(gfname, "alirna.g");
212 /* if (length<=2500) {
214 A = annote(structure, (const char**) AS);
215 (void) PS_rna_plot_a(string, structure, ffname, NULL, A);
218 fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
220 /* {*/ /* free mfe arrays but preserve base_pair for PS_dot_plot */
222 bp = base_pair; base_pair = space(16);
223 free_alifold_arrays(); / * frees base_pair * /
231 mfe_struc = strdup(structure);
233 kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
234 pf_scale = -1;/*exp(-(sfact*min_en)/kT/length);*/
235 if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
238 /* init_alipf_fold(length); */
240 /* energy = alipfW_fold(AS, structure, &pl, maxdist, cutoff); */
243 printf("%s", structure);
244 /*if (!istty) printf(" [%6.2f]\n", energy);
248 /*if ((istty)||(!do_backtrack))
249 printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
251 /* printf(" frequency of mfe structure in ensemble %g\n",
252 exp((energy-min_en)/kT));*/
257 strcpy(ffname, "alifold.out");
258 aliout = fopen(ffname, "w");
260 fprintf(stderr, "can't open %s skipping output\n", ffname);
262 fprintf(aliout, "%d sequence; length of alignment %d\n",
264 fprintf(aliout, "alifold output\n");
266 fprintf(aliout, "%s\n", structure);
268 strcpy(ffname, "alidotL.ps");
269 cp = make_color_pinfo2(AS,pl,n_seq);
270 (void) PS_color_dot_plot_turn(string, cp, ffname, maxdist);
277 (void) fflush(stdout);
280 for (i=0; AS[i]; i++) {
281 free(AS[i]); free(names[i]);
286 PRIVATE void print_pi(const pair_info pi, FILE *file) {
287 const char *pname[8] = {"","CG","GC","GU","UG","AU","UA", "--"};
290 /* numbering starts with 1 in output */
291 fprintf(file, "%5d %5d %2d %5.1f%% %7.3f",
292 pi.i, pi.j, pi.bp[0], 100.*pi.p, pi.ent);
294 if (pi.bp[i]) fprintf(file, " %s:%-4d", pname[i], pi.bp[i]);
295 /* if ((!pi.sym)&&(pi.j>=0)) printf(" *"); */
296 if (!pi.comp) fprintf(file, " +");
300 PRIVATE cpair *make_color_pinfo(const pair_info *pi) {
303 for (n=0; pi[n].i>0; n++);
304 cp = (cpair *) space(sizeof(cpair)*(n+1));
305 for (i=0; i<n; i++) {
310 for (ncomp=0, j=1; j<=6; j++) if (pi[i].bp[j]) ncomp++;
311 cp[i].hue = (ncomp-1.0)/6.2; /* hue<6/6.9 (hue=1 == hue=0) */
312 cp[i].sat = 1 - MIN2( 1.0, pi[i].bp[0]/2.5);
313 cp[i].mfe = pi[i].comp;
320 PRIVATE char *annote(const char *structure, const char *AS[]) {
327 ps = (char *) space(maxl);
328 ptable = make_pair_table(structure);
329 for (i=1; i<=n; i++) {
330 char pps[64], ci='\0', cj='\0';
331 int j, type, pfreq[8] = {0,0,0,0,0,0,0,0}, vi=0, vj=0;
332 if ((j=ptable[i])<i) continue;
333 for (s=0; AS[s]!=NULL; s++) {
334 type = pair[encode_char(AS[s][i-1])][encode_char(AS[s][j-1])];
337 if (AS[s][i-1] != ci) { ci = AS[s][i-1]; vi++;}
338 if (AS[s][j-1] != cj) { cj = AS[s][j-1]; vj++;}
341 if (maxl - strlen(ps) < 128) {
343 ps = realloc(ps, maxl);
344 if (ps==NULL) nrerror("out of memory in realloc");
347 snprintf(pps, 64, "%d %d %d gmark\n", i, j, pfreq[0]);
351 snprintf(pps, 64, "%d cmark\n", i);
355 snprintf(pps, 64, "%d cmark\n", j);
363 /*-------------------------------------------------------------------------*/
365 PRIVATE cpair *make_color_pinfo2(char **sequences, plist *pl, int n_seq) {
369 for (n=0; pl[n].i>0; n++);
370 cp = (cpair *) space(sizeof(cpair)*(n+1));
371 for (i=0; i<n; i++) {
376 for (z=0; z<7; z++) franz[z]=0;
377 for (s=0; s<n_seq; s++) {
378 a=encode_char(toupper(sequences[s][cp[i].i-1]));
379 b=encode_char(toupper(sequences[s][cp[i].j-1]));
380 if ((sequences[s][cp[i].j-1]=='~')||(sequences[s][cp[i].i-1] == '~')) continue;
381 franz[BP_pair[a][b]]++;
383 for (z=1; z<7; z++) {
387 cp[i].hue = (ncomp-1.0)/6.2; /* hue<6/6.9 (hue=1 == hue=0) */
388 cp[i].sat = 1 - MIN2( 1.0, franz[0]/*pi[i].bp[0]*//2.5);
389 /*computation of entropy is sth for the ivo*/
390 /* cp[i].mfe = pi[i].comp; don't have that .. yet*/