1 /*-----------------------------------------------------------
5 / Secondary structure prediction accuracy evaluation
7 / SOV (Segment OVerlap) measure
9 / Copyright by Adam Zemla (11/16/1996)
10 / Email: adamz@llnl.gov
12 /------------------------------------------------------------
14 / Compile: cc sov.c -o sov -lm
16 /------------------------------------------------------------*/
37 char *letter_AA = "ARNDCQEGHILKMFPSTWYV-?X"; /* 23 chars */
39 void default_parameters (parameters *);
40 int read_aa_osec_psec (char[MAXRES], char[MAXRES], char[MAXRES], parameters *, char *);
41 float sov (int, char[MAXRES], char[MAXRES], parameters *);
42 float q3 (int, char[MAXRES], char[MAXRES], parameters *);
43 int check_aa (char, char *, int);
46 main (int argc, char *argv[])
48 int i, n_aa, sov_method;
49 char c, aa[MAXRES], osec[MAXRES], psec[MAXRES];
51 float out0, out1, out2, out3;
54 printf (" Usage: sov <input_data>\n");
55 printf (" HELP: sov -h\n");
58 if (!strncmp (argv[1], "-h\0", 2) || !strncmp (argv[1], "help\0", 5) || !strncmp (argv[1], "-help\0", 6)) {
59 system ("more ./README.sov");
64 default_parameters (&pdata);
66 strcpy (pdata.fname, argv[1]);
68 n_aa = read_aa_osec_psec (aa, osec, psec, &pdata, letter_AA);
70 if (pdata.input == 1) {
71 n_aa = read_aa_osec_psec (aa, osec, psec, &pdata, letter_AA);
74 if (pdata.order == 1) {
75 for (i = 0; i < n_aa; i++) {
83 printf ("\n ERROR! There is no 'AA OSEC PSEC' data in submited prediction.");
84 printf ("\n The submission should contain an observed and predicted");
85 printf ("\n secondary structure in COLUMN format.\n");
89 printf ("\n\n SECONDARY STRUCTURE PREDICTION");
90 printf ("\n NUMBER OF RESIDUES PREDICTED: LENGTH = %d", n_aa);
91 printf ("\n AA OSEC PSEC NUM");
92 for (i = 0; i < n_aa; i++) {
93 printf ("\n %1c %1c %1c %4d", aa[i], osec[i], psec[i], i + 1);
95 printf ("\n -----------------------\n");
96 printf ("\n SECONDARY STRUCTURE PREDICTION ACCURACY EVALUATION. N_AA = %4d\n", n_aa);
97 if (pdata.sov_out >= 1) {
98 printf ("\n SOV parameters: DELTA = %5.2f DELTA-S = %5.2f\n", pdata.sov_delta, pdata.sov_delta_s);
101 printf ("\n ALL HELIX STRAND COIL\n");
104 out0 = q3 (n_aa, osec, psec, &pdata);
106 out1 = q3 (n_aa, osec, psec, &pdata);
108 out2 = q3 (n_aa, osec, psec, &pdata);
110 out3 = q3 (n_aa, osec, psec, &pdata);
111 printf ("\n Q3 : %6.1f %6.1f %6.1f %6.1f", out0 * 100.0, out1 * 100.0, out2 * 100.0, out3 * 100.0);
114 sov_method = pdata.sov_method;
117 pdata.sov_method = 1;
119 if (pdata.sov_method == 1) {
121 out0 = sov (n_aa, osec, psec, &pdata);
123 out1 = sov (n_aa, osec, psec, &pdata);
125 out2 = sov (n_aa, osec, psec, &pdata);
127 out3 = sov (n_aa, osec, psec, &pdata);
128 printf ("\n SOV : %6.1f %6.1f %6.1f %6.1f", out0 * 100.0, out1 * 100.0, out2 * 100.0, out3 * 100.0);
133 pdata.sov_method = 0;
135 if (pdata.sov_method == 0) {
136 pdata.sov_delta = 1.0;
139 out0 = sov (n_aa, osec, psec, &pdata);
141 out1 = sov (n_aa, osec, psec, &pdata);
143 out2 = sov (n_aa, osec, psec, &pdata);
145 out3 = sov (n_aa, osec, psec, &pdata);
146 printf ("\n SOV (1994 JMB. [delta=50%%]): %6.1f %6.1f %6.1f %6.1f", out0 * 100.0, out1 * 100.0, out2 * 100.0, out3 * 100.0);
148 pdata.sov_delta = 0.0;
151 out0 = sov (n_aa, osec, psec, &pdata);
153 out1 = sov (n_aa, osec, psec, &pdata);
155 out2 = sov (n_aa, osec, psec, &pdata);
157 out3 = sov (n_aa, osec, psec, &pdata);
158 printf ("\n SOV (1994 JMB. [delta=0]) : %6.1f %6.1f %6.1f %6.1f", out0 * 100.0, out1 * 100.0, out2 * 100.0, out3 * 100.0);
163 printf ("\n -----------------------\n");
168 /*-----------------------------------------------------------
170 / check_aa - checks an amino acid
172 /------------------------------------------------------------*/
174 check_aa (char token, char *letter, int n)
178 for (i = 0; i < n; i++) {
179 if (letter[i] == token)
185 /*-----------------------------------------------------------
187 / read_aa_osec_psec - read secondary structure segments file
189 /------------------------------------------------------------*/
191 read_aa_osec_psec (char aa[MAXRES], char sss1[MAXRES], char sss2[MAXRES], parameters * pdata, char *letter)
193 int i, j, coil, n_aa, n_aa_1, n_aa_2, n_aa_3, f_seq;
195 char line[1000], keyword[250], first[250], second[250], third[250], junk[250];
198 if ((fp = fopen (pdata->fname, "r")) == NULL) {
199 printf ("\n# error opening file %s for read\n\n", pdata->fname);
211 while (fgets (line, 1000, fp) != NULL) {
212 strcpy (keyword, " ");
214 strcpy (second, " ");
219 while (line[i] == ' ' && line[i] != '\n' && line[i] != '\0' && i < 250)
223 while (line[i] != ' ' && line[i] != '\n' && line[i] != '\0' && i < 250)
227 if (j < 250 && j > 0) {
228 sscanf (line, "%s", keyword);
230 if (!strncmp (keyword, "#", 1)) {
231 } else if (!strncmp (keyword, "-----", 5)) {
232 } else if (!strncmp (keyword, "NUMBER\0", 7)) {
233 } else if (!strncmp (keyword, "SECONDARY\0", 10)) {
234 } else if (!strncmp (keyword, "END\0", 4) && f_seq == 0) {
237 } else if (!strncmp (keyword, "AA-OSEC-PSEC\0", 13)) {
239 sscanf (line, "%s %s", keyword, first);
240 strcpy (pdata->fname, first);
242 } else if (line[0] == '\n' || !strncmp (keyword, " \0", 4)) {
243 } else if (!strncmp (keyword, "AA\0", 3) && f_seq == 0) {
244 sscanf (line, "%s %s %s", keyword, first, second);
245 if (!strncmp (keyword, "AA\0", 3) && !strncmp (first, "PSEC\0", 5) && !strncmp (second, "OSEC\0", 5)) {
248 } else if (!strncmp (keyword, "SOV-DELTA\0", 10)) {
250 sscanf (line, "%s %f", keyword, &x);
251 pdata->sov_delta = x;
252 } else if (!strncmp (keyword, "SOV-DELTA-S\0", 12)) {
254 sscanf (line, "%s %f", keyword, &x);
255 pdata->sov_delta_s = x;
256 } else if (!strncmp (keyword, "SOV-METHOD\0", 9)) {
258 sscanf (line, "%s %d", keyword, &i);
259 pdata->sov_method = i;
260 } else if (!strncmp (keyword, "SOV-OUTPUT\0", 9)) {
262 sscanf (line, "%s %d", keyword, &i);
264 } else if (line[0] == '>') {
269 } else if (f_seq == 0) {
271 if (!strncmp (keyword, "SSP\0", 4)) {
272 sscanf (line, "%s %s %s %s %s", keyword, junk, first, second, third);
274 printf ("\n ERROR! (line: %d) Check COLUMN format of your prediction!\n", n_aa + 1);
279 sscanf (line, "%s %s %s", first, second, third);
282 sss1[n_aa] = second[0];
283 sss2[n_aa] = third[0];
284 if (check_aa (aa[n_aa], letter, 23) == 23) {
285 printf ("\n# ERROR!\n%s", line);
286 printf ("\n# ERROR! (line: %d) Check amino acid code %c\n", n_aa + 1, aa[n_aa]);
290 if (sss1[n_aa] == ' ' || sss2[n_aa] == ' ') {
291 printf ("\n# ERROR!\n%s", line);
292 printf ("\n# ERROR! (line: %d) Check secondary structure code\n", n_aa + 1);
296 if (sss1[n_aa] == 'L') {
299 printf ("\n# WARNING! (line: %d) The 'L' characters are interpreted as 'C' (coil)", n_aa + 1);
303 if (sss2[n_aa] == 'L') {
306 printf ("\n# WARNING! (line: %d) The 'L' characters are interpreted as 'C' (coil)", n_aa + 1);
310 if (sss1[n_aa] != 'C' && sss1[n_aa] != 'E' && sss1[n_aa] != 'H') {
311 printf ("\n# ERROR!\n%s", line);
312 printf ("\n# ERROR! (line: %d) Check secondary structure code %c\n", n_aa + 1, sss1[n_aa]);
316 if (sss2[n_aa] != 'C' && sss2[n_aa] != 'E' && sss2[n_aa] != 'H') {
317 printf ("\n# ERROR!\n%s", line);
318 printf ("\n# ERROR! (line: %d) Check secondary structure code %c\n", n_aa + 1, sss2[n_aa]);
323 if (n_aa >= MAXRES) {
324 printf ("\n# ERROR! Check number of amino acid lines. (MAX = %d)\n\n", MAXRES);
328 } else if (f_seq == 1) {
330 while (line[i] != '\n') {
331 if (line[i] != ' ' && line[i] != '\t' && line[i] != '\0' && line[i] != '\a' && line[i] != '\b' && line[i] != '\f' && line[i] != '\r' && line[i] != '\v' && i < 1000) {
333 sss1[n_aa] = line[i];
334 if (sss1[n_aa] == 'L') {
337 printf ("\n# WARNING! The 'L' characters are interpreted as 'C' (coil)");
341 if (sss1[n_aa] != 'C' && sss1[n_aa] != 'E' && sss1[n_aa] != 'H') {
342 printf ("\n# ERROR!\n%s", line);
343 printf ("\n# ERROR! Check secondary structure code: %c\n", sss1[n_aa]);
348 if (n_aa >= MAXRES) {
349 printf ("\n# ERROR! Check number of residues. (MAX = %d)\n\n", MAXRES);
357 } else if (f_seq == 2) {
359 while (line[i] != '\n') {
360 if (line[i] != ' ' && line[i] != '\t' && line[i] != '\0' && line[i] != '\a' && line[i] != '\b' && line[i] != '\f' && line[i] != '\r' && line[i] != '\v' && i < 1000) {
362 sss2[n_aa] = line[i];
363 if (sss2[n_aa] == 'L') {
366 printf ("\n# WARNING! The 'L' characters are interpreted as 'C' (coil)");
370 if (sss2[n_aa] != 'C' && sss2[n_aa] != 'E' && sss2[n_aa] != 'H') {
371 printf ("\n# ERROR!\n%s", line);
372 printf ("\n# ERROR! Check secondary structure code: %c\n", sss2[n_aa]);
377 if (n_aa >= MAXRES) {
378 printf ("\n# ERROR! Check number of residues. (MAX = %d)\n\n", MAXRES);
386 } else if (f_seq == 3) {
388 while (line[i] != '\n') {
389 if (line[i] != ' ' && line[i] != '\t' && line[i] != '\0' && line[i] != '\a' && line[i] != '\b' && line[i] != '\f' && line[i] != '\r' && line[i] != '\v' && i < 1000) {
390 aa[n_aa_3] = line[i];
391 if (check_aa (aa[n_aa_3], letter, 23) == 23) {
392 printf ("\n# ERROR!\n%s", line);
393 printf ("\n# ERROR! (N_res: %d) Check amino acid code %c\n", n_aa_3 + 1, aa[n_aa_3]);
398 if (n_aa_3 >= MAXRES) {
399 printf ("\n# ERROR! Check number of residues. (MAX = %d)\n\n", MAXRES);
408 if (n_aa_1 != n_aa_2) {
409 printf ("\n# ERROR! Check format of your submission.\n");
416 /*-------------------------------------------------------------
418 / default_parameters - default parameters for SOV program
420 /------------------------------------------------------------*/
422 default_parameters (parameters * pdata)
426 pdata->sov_method = 1;
427 pdata->sov_delta = 1.0;
428 pdata->sov_delta_s = 0.5;
434 /*-----------------------------------------------------------
436 / sov - evaluate SSp by the Segment OVerlap quantity (SOV)
437 / Input: secondary structure segments
439 /------------------------------------------------------------*/
441 sov (int n_aa, char sss1[MAXRES], char sss2[MAXRES], parameters * pdata)
443 int i, k, length1, length2, beg_s1, end_s1, beg_s2, end_s2;
444 int j1, j2, k1, k2, minov, maxov, d, d1, d2, n, multiple;
453 if (pdata->sov_what == 0) {
458 if (pdata->sov_what == 1) {
463 if (pdata->sov_what == 2) {
468 if (pdata->sov_what == 3) {
474 for (i = 0; i < n_aa; i++) {
476 if (s1 == sse[0] || s1 == sse[1] || s1 == sse[2]) {
488 while (sss1[i] == s1 && i < n_aa) {
492 length1 = end_s1 - beg_s1 + 1;
498 while (sss2[k] == s2 && k < n_aa) {
502 length2 = end_s2 - beg_s2 + 1;
503 if (s1 == sse[0] || s1 == sse[1] || s1 == sse[2]) {
504 if (s1 == s2 && end_s2 >= beg_s1 && beg_s2 <= end_s1) {
505 if (multiple > 0 && pdata->sov_method == 1) {
509 if (beg_s1 > beg_s2) {
516 if (end_s1 < end_s2) {
525 d1 = floor (length1 * pdata->sov_delta_s);
526 d2 = floor (length2 * pdata->sov_delta_s);
529 if (d1 <= d2 || pdata->sov_method == 0)
534 if (d > maxov - minov) {
537 x = pdata->sov_delta * d;
538 x = (minov + x) * length1;
542 printf ("\n ERROR! minov = %-4d maxov = %-4d length = %-4d d = %-4d %4d %4d %4d %4d", minov, maxov, length1, d, beg_s1 + 1, end_s1 + 1, beg_s2 + 1, end_s2 + 1);
544 if (pdata->sov_out == 2) {
545 printf ("\n TEST: minov = %-4d maxov = %-4d length = %-4d d = %-4d %4d %4d %4d %4d", minov, maxov, length1, d, beg_s1 + 1, end_s1 + 1, beg_s2 + 1, end_s2 + 1);
551 if (pdata->sov_out == 2) {
552 printf ("\n TEST: Number of considered residues = %d", n);
562 /*-----------------------------------------------------------
564 / Q3 - evaluate SSp by the residues predicted correctly (Q3)
565 / Input: secondary structure segments
567 /------------------------------------------------------------*/
569 q3 (int n_aa, char sss1[MAXRES], char sss2[MAXRES], parameters * pdata)
579 if (pdata->q3_what == 0) {
584 if (pdata->q3_what == 1) {
589 if (pdata->q3_what == 2) {
594 if (pdata->q3_what == 3) {
602 for (i = 0; i < n_aa; i++) {
604 if (s == sse[0] || s == sse[1] || s == sse[2]) {
606 if (sss1[i] == sss2[i]) {