1 /************************************************************************
2 * Jnet - A consensus neural network protein *
3 * secondary structure prediction method *
5 * Copyright 1999,2009 James Cuff, Jonathan Barber and Christian Cole *
7 ************************************************************************
10 -------------------------------------------------------------------------
11 This program is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 -------------------------------------------------------------------------*/
25 /* Fri Nov 15 15:28:09 GMT 2002 Bug fixes by JDB */
26 /* WARNING: MUST BE PRESENTED WITH UPPERCASE DATA as lowercase 'n'
27 * has a special internal meaning in seq2int
30 /*#define POST *//* Allow post processing of NN output */
31 /*#define FILTER *//* Allow string post processing of final output */
33 /** The profile-specific defines below are not independent of each other.
34 Especially check that the Jury set-up is correct! */
35 #define HMM /* Do HMMer predictions */
36 #define PSSM /* Do PSSM predicitons */
37 /*#define HMMONLY *//* For non-jury positions use the HMM prediction - only works when DOJURY is undefined */
39 /* The allowed value of the integers representing residues */
42 WINAR = 30, /* array size for winar */
43 PSILEN = 20, /* no. of elements per position in PSIBLAST PSSM */
44 PROLEN = 24, /* no. of elements per position for other profiles */
45 HELIX = 1, /* enums for sec. struct. states */
48 MAXSEQNUM = 1000, /* Maximum number of sequences per alignment */
49 MAXSEQLEN = 5000, /* Maximum Sequence Length */
50 MAXBLOCK = 200 /* Maximum block size for human readable output */
60 #include "cmdline/cmdline.h"
62 #include "pssma1.h" /* Uses information from data.psimat, (PSSM) - 100 hidden nodes */
63 #include "pssma2.h" /* Second network for pssma1 - 100 hidden nodes */
64 #include "pssmb1.h" /* Uses information from data.psimat, (PSSM) - 20 hidden nodes */
65 #include "pssmb2.h" /* Second network for pssmb1 - 20 hidden nodes */
67 #include "hmm1.h" /* First network for HMMER profile prediction */
68 #include "hmm2.h" /* Second network for hmm1 */
78 /* Main data structure for information about input data. Not all the fields are
80 typedef struct alldata
82 int lens; /* length of sequences */
83 int profmat[MAXSEQLEN][PROLEN]; /* BLOSUM frequencies from PSIBLAST alignment - unused but can't remove without segfault? */
84 float psimat[MAXSEQLEN][PSILEN]; /* PSIBLAST PSSM */
85 float hmmmat[MAXSEQLEN][PROLEN]; /* results from HMMER */
89 /* Global Vars - used to specify which input profiles have been provided */
93 int HUMAN = 0; /* toggle human readable output */
94 int BLOCK = 60; /* block size for human output */
96 /* PSSM-specific functions */
97 void readpsi (FILE * psifile, alldata * data);
98 void doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30]);
100 /* HMMer-specific functions */
101 void readhmm (FILE * hmmfile, alldata * data);
102 void doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30]);
104 /* General or non-specific functions */
105 void int2pred (const int *pred, char *letpred, const int length);
106 float doconf (const float confa, const float confb, const float confc);
107 void dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data);
108 void dopred (const alldata * data);
109 void test_size (int size, int required_size, char *name);
110 void print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN]);
112 /* Prediction post-processing functions - not be used anymore, but retained for testing */
113 /* Executing controlled by #define FILTER */
114 void filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN]);
115 void StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr);
116 char *Strncat (char *s1, char *s2, int len);
118 /*************************************************************/
120 main (int argc, char **argv)
122 FILE *psifile, *hmmfile;
125 /* Declared in cmdline/cmdline.h */
126 struct gengetopt_args_info args_info;
128 /* Set up the data structure for the input data */
129 if ((data = malloc (sizeof (alldata))) == NULL) {
130 fprintf (stderr, "ERROR! malloc failed at line %i\n", __LINE__);
134 /* Get the command line arguments */
135 if (cmdline_parser (argc, argv, &args_info) != 0)
138 /* check whether TEST STYLE OUTPUT has been requested */
139 if (args_info.test_flag) {
143 /* Print version info - CMDLINE_PARSER_PACKAGE CMDLINE_PARSER_VERSION declared in cmdline/cmdline.h */
144 if (1 == TESTOUTPUT) {
145 fprintf (stderr, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
147 fprintf (stdout, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
150 /* Read the HMMER file. Don't need to check if it is given as is a required option */
151 if ((hmmfile = fopen (args_info.hmmer_arg, "r")) == NULL) {
152 fprintf (stderr, "ERROR: Can't open HMMER file %s\n", args_info.hmmer_arg);
155 if (1 == TESTOUTPUT) {
156 fprintf (stderr, "Found HMM profile data\n");
158 fprintf (stdout, "Found HMM profile data\n");
160 readhmm (hmmfile, data);
161 (void) fclose (hmmfile);
164 /* Read the PSIBLAST PSSM */
165 if (args_info.pssm_given) {
166 if ((psifile = fopen (args_info.pssm_arg, "r")) == NULL) {
167 fprintf (stderr, "ERROR: Can't open PSSM profile file %s\n", args_info.pssm_arg);
170 if (1 == TESTOUTPUT) {
171 fprintf (stderr, "Found PSSM profile file\n");
173 fprintf (stdout, "Found PSSM profile file\n");
175 readpsi (psifile, data);
176 (void) fclose (psifile);
182 /* check whether human readable output is required (set by --human) */
183 if (args_info.human_given) {
185 if (args_info.human_orig != NULL) {
186 printf ("human: %d\n", args_info.human_arg);
187 printf ("human orig: %s\n", args_info.human_orig);
188 BLOCK = args_info.human_arg;
189 if (BLOCK > MAXBLOCK) {
190 fprintf (stderr, "ERROR - argument for --human is too large\n");
196 /* Do the prediction */
197 if (1 == TESTOUTPUT) {
198 fprintf (stderr, "Running final predictions!\n");
200 fprintf (stdout, "Running final predictions!\n");
210 /*************************************************************************/
211 /* Replaces particular runs of secondary structure with something else */
213 filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN])
245 for (i = 0; pairs[i].orig != NULL; i++) {
246 StrReplStr (outp, inp, pairs[i].orig, pairs[i].final);
251 /*************************************************************************/
252 /* Reads in the PSIBLAST PSSM */
254 readpsi (FILE * psifile, alldata * data)
258 while ((getc (psifile)) != EOF) {
261 for (i = 0; i < 20; i++) {
262 int npar = fscanf (psifile, "%f", &tempval);
264 data->psimat[nlines][i] = tempval;
266 fprintf (stderr, "ERROR: Can't read a value from PSSM file");
267 data->psimat[nlines][i] = 0.;
273 /* check the data length matches that of the HMMer data */
274 if (data->lens != nlines - 1) {
275 fprintf (stderr, "ERROR! PSSM data length doesn't match that for HMMer. Are both input files from the same alignment/sequence?\n");
281 /*************************************************************************/
282 /* Reads in HMMER profile that was originally based upon ClustalW alignments */
283 /* Additionally, use this data to define the length of the sequence in the absence of an alignment */
285 readhmm (FILE * hmmfile, alldata * data)
289 while ((getc (hmmfile)) != EOF) {
292 if (nlines > MAXSEQLEN) {
293 fprintf (stderr, "ERROR! the HMMER profile is too long. Can't cope... must quit.\n");
297 for (i = 0; i < 24; i++) {
299 int npar = fscanf (hmmfile, "%f", &tempval);
301 data->hmmmat[nlines][i] = tempval;
303 fprintf (stderr, "ERROR: Can't read a value from HMMER profile file");
304 data->hmmmat[nlines][i] = 0.;
309 data->lens = nlines - 1; /* set the query sequence length */
312 /*************************************************************************/
313 /* Function to test whether the data array for input
314 into the Neural Network function is of the expected size */
316 test_size (int size, int required_size, char *name)
318 if (size != required_size) {
319 fprintf (stderr, "ERROR! input array not of correct size for %s (%i vs. %i)\n", name, size, required_size);
326 /*************************************************************************/
327 /***********************************************************************/
328 /* Functions below here haven't been checked */
330 dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data)
334 for (j = 0, i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++, j++) {
336 if (i >= 0 && i < data->lens) {
337 /* CC - changesd to make it more similar to network training */
339 /* for (k = 0; k < 3; k++)
340 winarss[j][k] = seq2str[i][k]; */
342 if ((seq2str[i][0] > seq2str[i][1]) && (seq2str[i][0] > seq2str[i][2])) {
346 } else if ((seq2str[i][1] > seq2str[i][0]) && (seq2str[i][1] > seq2str[i][2])) {
350 } else if ((seq2str[i][2] > seq2str[i][0]) && (seq2str[i][2] > seq2str[i][1])) {
359 } else if (i < 0 || i >= data->lens) {
360 /* Setting winarss[j][2] to 1 increases q3 accuracy */
361 /* CC - does it really??? */
369 /*************************************************************************/
370 /* Structure to hold data from networks */
371 typedef struct netdata
378 /*************************************************************************/
379 /* main prediction routine */
381 dopred (const alldata * data)
383 extern int NOHMM, NOPSI;
385 float psi2net[MAXSEQLEN][3];
386 float hmmnet[MAXSEQLEN][3];
388 float finalout[MAXSEQLEN][3];
389 int psi2fin[MAXSEQLEN];
390 int hmmfin[MAXSEQLEN];
391 int consfin[MAXSEQLEN];
393 char psi2let[MAXSEQLEN];
394 char hmmlet[MAXSEQLEN];
395 char finlet[MAXSEQLEN];
397 char sollet25[MAXSEQLEN];
398 char sollet5[MAXSEQLEN];
399 char sollet0[MAXSEQLEN];
401 float netprofin3[500];
402 float confidence[MAXSEQLEN]; /* Array for holding the confidence value calculated by doconf() */
403 float psiar[WINAR][WINAR];
404 int winar2[WINAR][WINAR];
406 float solacc25[MAXSEQLEN][2];
407 float solacc5[MAXSEQLEN][2];
408 float solacc0[MAXSEQLEN][2];
410 float winarss[WINAR][4];
412 char letfilt[MAXSEQLEN];
415 char jury[MAXSEQLEN];
417 float netprofin[MAXSEQLEN];
418 memset (netprofin, 0x00, sizeof (float) * MAXSEQLEN);
419 memset (winar2, 0x00, sizeof (winar2[0][0]) * WINAR * WINAR);
425 float seq2str[MAXSEQLEN][3];
427 for (i = 0; i < data->lens; i++) {
431 doprofhmm (data, windows, i, psiar);
433 for (y = 0; y < windows; y++) {
435 for (z = 0; z < PROLEN; z++)
436 netprofin3[j++] = psiar[y][z];
439 test_size (j, hmm1REC.NoOfInput, "hmm1");
441 hmm1 (netprofin3, nn_output, 0);
442 seq2str[i][0] = nn_output[0];
443 seq2str[i][1] = nn_output[1];
444 seq2str[i][2] = nn_output[2];
448 for (i = 0; i < data->lens; i++) {
451 float nn_input[19 * 3];
453 dowinsspsi (seq2str, windows, i, winarss, data);
455 for (y = 0; y < windows; y++) {
457 for (z = 0; z < 3; z++)
458 nn_input[j++] = winarss[y][z];
461 test_size (j, hmm2REC.NoOfInput, "hmm2");
463 hmm2 (nn_input, nn_output, 0);
464 hmmnet[i][0] = nn_output[0];
465 hmmnet[i][1] = nn_output[1];
466 hmmnet[i][2] = nn_output[2];
469 #endif /* end ifdef HMM */
473 for (i = 0; i < data->lens; i++) {
476 doprofhmm (data, windows, i, psiar);
478 for (y = 0; y < windows; y++) {
480 for (z = 0; z < 24; z++)
481 netprofin3[j++] = psiar[y][z];
484 test_size (j, hmmsol25REC.NoOfInput, "hmmsol25");
486 hmmsol25 (netprofin3, nn_output, 0);
487 solacc25[i][0] = nn_output[0];
488 solacc25[i][1] = nn_output[1];
490 test_size (j, hmmsol5REC.NoOfInput, "hmmsol5");
492 hmmsol5 (netprofin3, nn_output, 0);
493 solacc5[i][0] = nn_output[0];
494 solacc5[i][1] = nn_output[1];
496 test_size (j, hmmsol0REC.NoOfInput, "hmmsol0");
498 hmmsol0 (netprofin3, nn_output, 0);
499 solacc0[i][0] = nn_output[0];
500 solacc0[i][1] = nn_output[1];
506 for (i = 0; i < data->lens; i++) {
509 doprofpsi (data, windows, i, psiar);
511 for (y = 0; y < windows; y++) {
513 for (z = 0; z < 20; z++)
514 netprofin3[j++] = psiar[y][z];
517 test_size (j, psisol25REC.NoOfInput, "psisol25");
519 psisol25 (netprofin3, nn_output, 0);
520 solacc25[i][0] += nn_output[0];
521 solacc25[i][1] += nn_output[1];
523 test_size (j, psisol5REC.NoOfInput, "psisol5");
525 psisol5 (netprofin3, nn_output, 0);
526 solacc5[i][0] += nn_output[0];
527 solacc5[i][1] += nn_output[1];
529 test_size (j, psisol0REC.NoOfInput, "psisol0");
531 psisol0 (netprofin3, nn_output, 0);
532 solacc0[i][0] += nn_output[0];
533 solacc0[i][1] += nn_output[1];
537 for (i = 0; i < data->lens; i++) {
538 if (solacc25[i][0] > solacc25[i][1]) {
541 if (solacc25[i][1] > solacc25[i][0]) {
544 if (solacc5[i][0] > solacc5[i][1]) {
547 if (solacc5[i][1] > solacc5[i][0]) {
550 if (solacc0[i][0] > solacc0[i][1]) {
553 if (solacc0[i][1] > solacc0[i][0]) {
559 float seq2str[MAXSEQLEN][3];
562 /* This is for the PSIBLAST PSSM, for which there are two networks for each layer */
563 memset (psi2net, 0x00, MAXSEQLEN * 3);
565 for (t = 0; t < 2; t++) {
569 for (i = 0; i < data->lens; i++) {
573 doprofpsi (data, windows, i, psiar);
575 for (y = 0; y < windows; y++) {
577 for (z = 0; z < 20; z++)
578 netprofin[j++] = psiar[y][z];
583 test_size (j, pssma1REC.NoOfInput, "pssma1");
584 pssma1 (netprofin, nn_output, 0);
586 test_size (j, pssmb1REC.NoOfInput, "pssmb1");
587 pssmb1 (netprofin, nn_output, 0);
590 seq2str[i][0] = nn_output[0];
591 seq2str[i][1] = nn_output[1];
592 seq2str[i][2] = nn_output[2];
597 for (i = 0; i < data->lens; i++) {
600 float nn_input[19 * 3];
602 dowinsspsi (seq2str, windows, i, winarss, data);
604 for (y = 0; y < windows; y++) {
606 for (z = 0; z < 3; z++)
607 nn_input[j++] = winarss[y][z];
612 test_size (j, pssma2REC.NoOfInput, "pssma2");
613 pssma2 (nn_input, nn_output, 0);
615 test_size (j, pssmb2REC.NoOfInput, "pssmb2");
616 pssmb2 (nn_input, nn_output, 0);
619 psi2net[i][0] += nn_output[0];
620 psi2net[i][1] += nn_output[1];
621 psi2net[i][2] += nn_output[2];
624 for (i = 0; i < data->lens; i++) {
630 for (i = 0; i < data->lens; i++) {
635 #endif /* end of ifdef PSSM */
638 /* Work out the final output values for the various outputs
639 * There is some smoothing going on at the ends of the sequences
640 * Also, some of these are being done even if there is no data in them.
645 for (i = 0; i < data->lens; i++) {
646 finalout[i][0] = psi2net[i][0];
647 finalout[i][1] = psi2net[i][1];
648 finalout[i][2] = psi2net[i][2];
652 finalout[i][2] += (5 - i) * 0.2;
653 else if (i >= (data->lens - 5))
654 finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2;
657 if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2])
659 if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2])
661 if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1])
664 int2pred (psi2fin, psi2let, data->lens);
665 #endif /* end PSSM */
669 for (i = 0; i < data->lens; i++) {
670 finalout[i][0] = hmmnet[i][0];
671 finalout[i][1] = hmmnet[i][1];
672 finalout[i][2] = hmmnet[i][2];
676 finalout[i][2] += (5 - i) * 0.2;
677 else if (i >= (data->lens - 5))
678 finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2;
681 if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2])
683 if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2])
685 if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1])
688 int2pred (hmmfin, hmmlet, data->lens);
690 #endif /* end ifdef HMM */
692 /* When all data is available use the 'cons' network to decide all non-jury positions.
693 * Also calculate the confidence values */
696 for (j = 0; j < data->lens; j++) {
698 /* Is there disagreement? */
699 if (psi2let[j] == hmmlet[j]) { /* No - use the PSI networks */
701 consfin[j] = psi2fin[j];
703 confidence[j] = doconf (psi2net[j][0], psi2net[j][1], psi2net[j][2]);
705 } else { /* yes - take the mean propensities of the HMM and PSI networks */
709 finalout[j][0] = hmmnet[j][0];
710 finalout[j][1] = hmmnet[j][1];
711 finalout[j][2] = hmmnet[j][2];
713 finalout[j][0] = (hmmnet[j][0] + psi2net[j][0]) / 2;
714 finalout[j][1] = (hmmnet[j][1] + psi2net[j][1]) / 2;
715 finalout[j][2] = (hmmnet[j][2] + psi2net[j][2]) / 2;
716 #endif /* end HMMONLY */
718 confidence[j] = doconf (finalout[j][0], finalout[j][1], finalout[j][2]);
722 if (finalout[j][0] > finalout[j][1] && finalout[j][0] > finalout[j][2])
724 if (finalout[j][1] > finalout[j][0] && finalout[j][1] > finalout[j][2])
726 if (finalout[j][2] > finalout[j][0] && finalout[j][2] > finalout[j][1])
729 } /* agreement if/else block */
731 } /* end of sequence */
732 consfin[data->lens] = 25;
735 /* end of if (NOHMM==0 && NOPSI==0) */
736 /* Calculate the confidence values */
738 fprintf (stdout, "\n\nWARNING!: Only using the HMM profile\nAccuracy will average 79.6%%\n\n");
739 for (i = 0; i < data->lens; i++) {
740 consfin[i] = hmmfin[i];
742 confidence[i] = doconf ((hmmnet[i][0]), (hmmnet[i][1]), (hmmnet[i][2]));
744 consfin[data->lens] = 25;
748 if (1 == TESTOUTPUT) {
749 fprintf (stderr, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n");
751 fprintf (stdout, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n");
754 int2pred (consfin, finlet, data->lens);
755 finlet[data->lens] = '\0';
758 /* Remove unlikely secondary structure */
759 filter (finlet, letfilt);
760 filter (finlet, letfilt);
761 filter (finlet, letfilt);
763 memcpy (letfilt, finlet, sizeof (char) * MAXSEQLEN);
766 /* output predictions */
767 if (HUMAN) /* print human readable and quit */
768 print_human (data->lens, letfilt, confidence);
770 printf ("\njnetpred:");
771 for (i = 0; i < data->lens; i++)
772 printf ("%c,", letfilt[i]);
773 printf ("\nJNETCONF:");
774 for (i = 0; i < data->lens; i++)
775 printf ("%1.0f,", confidence[i]);
776 printf ("\nJNETSOL25:");
777 for (i = 0; i < data->lens; i++)
778 printf ("%c,", sollet25[i]);
779 printf ("\nJNETSOL5:");
780 for (i = 0; i < data->lens; i++)
781 printf ("%c,", sollet5[i]);
782 printf ("\nJNETSOL0:");
783 for (i = 0; i < data->lens; i++)
784 printf ("%c,", sollet0[i]);
785 printf ("\nJNETHMM:");
786 for (i = 0; i < data->lens; i++)
787 printf ("%c,", hmmlet[i]);
790 printf ("\nJNETPSSM:");
791 for (i = 0; i < data->lens; i++)
792 printf ("%c,", psi2let[i]);
793 printf ("\nJNETJURY:");
794 for (i = 0; i < data->lens; i++)
795 printf ("%c,", jury[i]);
798 printf ("\nJNETPROPE:");
799 for (i = 0; i < data->lens; i++)
800 printf ("%.4f,", hmmnet[i][0]);
801 printf ("\nJNETPROPH:");
802 for (i = 0; i < data->lens; i++)
803 printf ("%.4f,", hmmnet[i][1]);
804 printf ("\nJNETPROPC:");
805 for (i = 0; i < data->lens; i++)
806 printf ("%.4f,", hmmnet[i][2]);
811 /*************************************************************************/
812 /* Calculate confidence as for PHD method (Rost and Sander, JMB, 1993, v232, p584) */
814 doconf (const float confa, const float confb, const float confc)
816 float whichout, maxout, maxnext, outconf;
817 whichout = outconf = maxnext = 0;
820 if (confa > confb && confa > confc) {
824 if (confb > confa && confb > confc) {
828 if (confc > confa && confc > confb) {
851 outconf = 10 * (maxout - maxnext);
858 /*************************************************************************/
860 StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr)
867 while ((ChP2 = strstr (ChP1, FromSStr)) != NULL) {
869 Strncat (s1, ChP1, strlen (ChP1) - strlen (ChP2));
871 ChP1 = ChP2 + strlen (FromSStr);
877 /*************************************************************************/
879 Strncat (char *s1, char *s2, int len)
883 fprintf (stderr, "ERROR! Strncat error!");
887 if (s1 == NULL || s2 == NULL) {
888 fprintf (stderr, "ERROR! Strncat error!");
891 OrigLen = strlen (s1);
892 if (strncat (s1, s2, len) == NULL) {
893 fprintf (stderr, "ERROR! Strncat error!");
897 s1[OrigLen + len] = '\0';
902 /*************************************************************************/
903 /* Finds the data around the current position in a window */
905 doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30])
909 for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) {
911 for (k = 0; k < 20; k++) {
912 psiar[j][k] = data->psimat[i][k];
915 if (i < 0 || i >= data->lens)
916 memset (psiar[j], 0x00, sizeof (psiar[0][0]) * 30);
922 /*************************************************************************/
923 /* Converts an array of predicted structure to a character array, not a null terminated string */
925 int2pred (const int *pred, char *letpred, const int length)
929 for (i = 0; i < length; i++) {
940 /*case 25: letpred[i] = '\0'; break; */
942 fprintf (stderr, "ERROR! unknown secondary structure type '%i' at position %i in int2pred \n", pred[i], i);
949 /*************************************************************************/
951 doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30])
956 for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) {
957 for (k = 0; k < 24; k++)
958 psiar[j][k] = data->hmmmat[i][k];
960 if (i < 0 || i >= data->lens)
961 for (k = 0; k < 24; k++)
967 /*************************************************************************/
968 /* print out a human readable version of the prediction and confidence */
970 print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN])
973 float *confout; /* array pointers to temp storage of output lines */
976 /* allocate memory to array pointers */
977 confout = malloc (BLOCK * sizeof (float));
978 if (confout == NULL) {
979 fprintf (stderr, "ERROR! Out of memory\n");
983 predout = malloc (BLOCK * sizeof (char));
984 if (predout == NULL) {
985 fprintf (stderr, "ERROR! Out of memory\n");
989 for (i = 0; i < length; i++) {
990 confout[j] = conf[i]; /* store current conf value */
991 predout[j] = pred[i]; /* store current prediction value */
993 /* when reached end of block, print stuff out */
994 if (i > 1 && i % BLOCK == 0) {
995 printf ("\n\npred: ");
996 for (k = 0; k < BLOCK; ++k)
997 printf ("%c", predout[k]);
1000 for (k = 0; k < BLOCK; ++k)
1001 printf ("%1.0f", confout[k]);
1003 j = 0; /* zero the block counter */
1009 /* catch any trailing blocks that are smaller than required */
1011 printf ("\n\npred: ");
1012 for (k = 0; k < j; ++k)
1013 printf ("%c", predout[k]);
1015 printf ("\nconf: ");
1016 for (k = 0; k < j; ++k)
1017 printf ("%1.0f", confout[k]);
1020 /* free memory and exit */
1023 exit (EXIT_SUCCESS);