/************************************************************************ * Jnet - A consensus neural network protein * * secondary structure prediction method * * * * Copyright 1999,2009 James Cuff, Jonathan Barber and Christian Cole * * * ************************************************************************ ------------------------------------------------------------------------- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -------------------------------------------------------------------------*/ /* Fri Nov 15 15:28:09 GMT 2002 Bug fixes by JDB */ /* WARNING: MUST BE PRESENTED WITH UPPERCASE DATA as lowercase 'n' * has a special internal meaning in seq2int * */ /*#define POST *//* Allow post processing of NN output */ /*#define FILTER *//* Allow string post processing of final output */ /** The profile-specific defines below are not independent of each other. Especially check that the Jury set-up is correct! */ #define HMM /* Do HMMer predictions */ #define PSSM /* Do PSSM predicitons */ /*#define HMMONLY *//* For non-jury positions use the HMM prediction - only works when DOJURY is undefined */ /* The allowed value of the integers representing residues */ enum { WINAR = 30, /* array size for winar */ PSILEN = 20, /* no. of elements per position in PSIBLAST PSSM */ PROLEN = 24, /* no. of elements per position for other profiles */ HELIX = 1, /* enums for sec. struct. states */ SHEET = 0, COIL = 2, MAXSEQNUM = 1000, /* Maximum number of sequences per alignment */ MAXSEQLEN = 5000, /* Maximum Sequence Length */ MAXBLOCK = 200 /* Maximum block size for human readable output */ }; #include #include #include #include #include #include #include "cmdline/cmdline.h" #include "pssma1.h" /* Uses information from data.psimat, (PSSM) - 100 hidden nodes */ #include "pssma2.h" /* Second network for pssma1 - 100 hidden nodes */ #include "pssmb1.h" /* Uses information from data.psimat, (PSSM) - 20 hidden nodes */ #include "pssmb2.h" /* Second network for pssmb1 - 20 hidden nodes */ #include "hmm1.h" /* First network for HMMER profile prediction */ #include "hmm2.h" /* Second network for hmm1 */ #include "hmmsol25.h" #include "hmmsol5.h" #include "hmmsol0.h" #include "psisol25.h" #include "psisol5.h" #include "psisol0.h" /* Main data structure for information about input data. Not all the fields are * used */ typedef struct alldata { int lens; /* length of sequences */ int profmat[MAXSEQLEN][PROLEN]; /* BLOSUM frequencies from PSIBLAST alignment - unused but can't remove without segfault? */ float psimat[MAXSEQLEN][PSILEN]; /* PSIBLAST PSSM */ float hmmmat[MAXSEQLEN][PROLEN]; /* results from HMMER */ } alldata; /* Global Vars - used to specify which input profiles have been provided */ int NOPSI = 0; int NOHMM = 0; int TESTOUTPUT = 0; int HUMAN = 0; /* toggle human readable output */ int BLOCK = 60; /* block size for human output */ /* PSSM-specific functions */ void readpsi (FILE * psifile, alldata * data); void doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30]); /* HMMer-specific functions */ void readhmm (FILE * hmmfile, alldata * data); void doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30]); /* General or non-specific functions */ void int2pred (const int *pred, char *letpred, const int length); float doconf (const float confa, const float confb, const float confc); void dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data); void dopred (const alldata * data); void test_size (int size, int required_size, char *name); void print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN]); /* Prediction post-processing functions - not be used anymore, but retained for testing */ /* Executing controlled by #define FILTER */ void filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN]); void StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr); char *Strncat (char *s1, char *s2, int len); /*************************************************************/ int main (int argc, char **argv) { FILE *psifile, *hmmfile; alldata *data; /* Declared in cmdline/cmdline.h */ struct gengetopt_args_info args_info; /* Set up the data structure for the input data */ if ((data = malloc (sizeof (alldata))) == NULL) { fprintf (stderr, "ERROR! malloc failed at line %i\n", __LINE__); exit (EXIT_FAILURE); } /* Get the command line arguments */ if (cmdline_parser (argc, argv, &args_info) != 0) exit (EXIT_FAILURE); /* check whether TEST STYLE OUTPUT has been requested */ if (args_info.test_flag) { TESTOUTPUT = 1; } /* Print version info - CMDLINE_PARSER_PACKAGE CMDLINE_PARSER_VERSION declared in cmdline/cmdline.h */ if (1 == TESTOUTPUT) { fprintf (stderr, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION); } else { fprintf (stdout, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION); } /* Read the HMMER file. Don't need to check if it is given as is a required option */ if ((hmmfile = fopen (args_info.hmmer_arg, "r")) == NULL) { fprintf (stderr, "ERROR: Can't open HMMER file %s\n", args_info.hmmer_arg); exit (EXIT_FAILURE); } else { if (1 == TESTOUTPUT) { fprintf (stderr, "Found HMM profile data\n"); } else { fprintf (stdout, "Found HMM profile data\n"); } readhmm (hmmfile, data); (void) fclose (hmmfile); } /* Read the PSIBLAST PSSM */ if (args_info.pssm_given) { if ((psifile = fopen (args_info.pssm_arg, "r")) == NULL) { fprintf (stderr, "ERROR: Can't open PSSM profile file %s\n", args_info.pssm_arg); exit (EXIT_FAILURE); } else { if (1 == TESTOUTPUT) { fprintf (stderr, "Found PSSM profile file\n"); } else { fprintf (stdout, "Found PSSM profile file\n"); } readpsi (psifile, data); (void) fclose (psifile); } } else { NOPSI = 1; } /* check whether human readable output is required (set by --human) */ if (args_info.human_given) { HUMAN = 1; if (args_info.human_orig != NULL) { printf ("human: %d\n", args_info.human_arg); printf ("human orig: %s\n", args_info.human_orig); BLOCK = args_info.human_arg; if (BLOCK > MAXBLOCK) { fprintf (stderr, "ERROR - argument for --human is too large\n"); exit (EXIT_FAILURE); } } } /* Do the prediction */ if (1 == TESTOUTPUT) { fprintf (stderr, "Running final predictions!\n"); } else { fprintf (stdout, "Running final predictions!\n"); } dopred (data); free (data); return EXIT_SUCCESS; } /*************************************************************************/ /* Replaces particular runs of secondary structure with something else */ void filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN]) { int i; struct filter_pair { char *orig; char *final; } pairs[] = { { "EHHHE", "EEEEE"}, { "-HHH-", "HHHHH"}, { "EHHH-", "EHHHH"}, { "HHHE-", "HHH--"}, { "-EHHH", "--HHH"}, { "EHHE", "EEEE"}, { "-HEH-", "-HHH-"}, { "EHH-", "EEE-"}, { "-HHE", "-EEE"}, { "-HH-", "----"}, { "HEEH", "EEEE"}, { "-HE", "--E"}, { "EH-", "E--"}, { "-H-", "---"}, { "HEH", "HHH"}, { "-E-E-", "-EEE-"}, { "E-E", "EEE"}, { "H-H", "HHH"}, { "EHE", "EEE"}, { NULL, NULL} }; for (i = 0; pairs[i].orig != NULL; i++) { StrReplStr (outp, inp, pairs[i].orig, pairs[i].final); strcpy (inp, outp); } } /*************************************************************************/ /* Reads in the PSIBLAST PSSM */ void readpsi (FILE * psifile, alldata * data) { int nlines = 0; while ((getc (psifile)) != EOF) { float tempval; int i; for (i = 0; i < 20; i++) { int npar = fscanf (psifile, "%f", &tempval); if (1 == npar) { data->psimat[nlines][i] = tempval; } else { fprintf (stderr, "ERROR: Can't read a value from PSSM file"); data->psimat[nlines][i] = 0.; } } nlines++; } /* check the data length matches that of the HMMer data */ if (data->lens != nlines - 1) { fprintf (stderr, "ERROR! PSSM data length doesn't match that for HMMer. Are both input files from the same alignment/sequence?\n"); exit (EXIT_FAILURE); } } /*************************************************************************/ /* Reads in HMMER profile that was originally based upon ClustalW alignments */ /* Additionally, use this data to define the length of the sequence in the absence of an alignment */ void readhmm (FILE * hmmfile, alldata * data) { int nlines = 0; while ((getc (hmmfile)) != EOF) { int i; if (nlines > MAXSEQLEN) { fprintf (stderr, "ERROR! the HMMER profile is too long. Can't cope... must quit.\n"); exit (EXIT_FAILURE); } for (i = 0; i < 24; i++) { float tempval; int npar = fscanf (hmmfile, "%f", &tempval); if (1 == npar) { data->hmmmat[nlines][i] = tempval; } else { fprintf (stderr, "ERROR: Can't read a value from HMMER profile file"); data->hmmmat[nlines][i] = 0.; } } nlines++; } data->lens = nlines - 1; /* set the query sequence length */ } /*************************************************************************/ /* Function to test whether the data array for input into the Neural Network function is of the expected size */ void test_size (int size, int required_size, char *name) { if (size != required_size) { fprintf (stderr, "ERROR! input array not of correct size for %s (%i vs. %i)\n", name, size, required_size); exit (EXIT_FAILURE); } } /*************************************************************************/ /***********************************************************************/ /* Functions below here haven't been checked */ void dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data) { int i, j; for (j = 0, i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++, j++) { if (i >= 0 && i < data->lens) { /* CC - changesd to make it more similar to network training */ /* for (k = 0; k < 3; k++) winarss[j][k] = seq2str[i][k]; */ if ((seq2str[i][0] > seq2str[i][1]) && (seq2str[i][0] > seq2str[i][2])) { winarss[j][0] = 1.0; winarss[j][1] = 0.0; winarss[j][2] = 0.0; } else if ((seq2str[i][1] > seq2str[i][0]) && (seq2str[i][1] > seq2str[i][2])) { winarss[j][0] = 0.0; winarss[j][1] = 1.0; winarss[j][2] = 0.0; } else if ((seq2str[i][2] > seq2str[i][0]) && (seq2str[i][2] > seq2str[i][1])) { winarss[j][0] = 0.0; winarss[j][1] = 0.0; winarss[j][2] = 1.0; } else { winarss[j][0] = 0.0; winarss[j][1] = 0.0; winarss[j][2] = 1.0; } } else if (i < 0 || i >= data->lens) { /* Setting winarss[j][2] to 1 increases q3 accuracy */ /* CC - does it really??? */ winarss[j][0] = 0.0; winarss[j][1] = 0.0; winarss[j][2] = 0.0; } } } /*************************************************************************/ /* Structure to hold data from networks */ typedef struct netdata { int ps1; float *netin; float *netprofin; } netdata; /*************************************************************************/ /* main prediction routine */ void dopred (const alldata * data) { extern int NOHMM, NOPSI; float psi2net[MAXSEQLEN][3]; float hmmnet[MAXSEQLEN][3]; float finalout[MAXSEQLEN][3]; int psi2fin[MAXSEQLEN]; int hmmfin[MAXSEQLEN]; int consfin[MAXSEQLEN]; char psi2let[MAXSEQLEN]; char hmmlet[MAXSEQLEN]; char finlet[MAXSEQLEN]; char sollet25[MAXSEQLEN]; char sollet5[MAXSEQLEN]; char sollet0[MAXSEQLEN]; float netprofin3[500]; float confidence[MAXSEQLEN]; /* Array for holding the confidence value calculated by doconf() */ float psiar[WINAR][WINAR]; int winar2[WINAR][WINAR]; float solacc25[MAXSEQLEN][2]; float solacc5[MAXSEQLEN][2]; float solacc0[MAXSEQLEN][2]; float winarss[WINAR][4]; char letfilt[MAXSEQLEN]; int i, t; char jury[MAXSEQLEN]; float netprofin[MAXSEQLEN]; memset (netprofin, 0x00, sizeof (float) * MAXSEQLEN); memset (winar2, 0x00, sizeof (winar2[0][0]) * WINAR * WINAR); letfilt[0] = '\0'; #ifdef HMM { float seq2str[MAXSEQLEN][3]; int i, windows = 17; for (i = 0; i < data->lens; i++) { float nn_output[3]; int y, j = 0; doprofhmm (data, windows, i, psiar); for (y = 0; y < windows; y++) { int z; for (z = 0; z < PROLEN; z++) netprofin3[j++] = psiar[y][z]; } test_size (j, hmm1REC.NoOfInput, "hmm1"); hmm1 (netprofin3, nn_output, 0); seq2str[i][0] = nn_output[0]; seq2str[i][1] = nn_output[1]; seq2str[i][2] = nn_output[2]; } windows = 19; for (i = 0; i < data->lens; i++) { int y, j = 0; float nn_output[3]; float nn_input[19 * 3]; dowinsspsi (seq2str, windows, i, winarss, data); for (y = 0; y < windows; y++) { int z; for (z = 0; z < 3; z++) nn_input[j++] = winarss[y][z]; } test_size (j, hmm2REC.NoOfInput, "hmm2"); hmm2 (nn_input, nn_output, 0); hmmnet[i][0] = nn_output[0]; hmmnet[i][1] = nn_output[1]; hmmnet[i][2] = nn_output[2]; } } #endif /* end ifdef HMM */ if (NOHMM == 0) { int i, windows = 17; for (i = 0; i < data->lens; i++) { float nn_output[3]; int y, j = 0; doprofhmm (data, windows, i, psiar); for (y = 0; y < windows; y++) { int z; for (z = 0; z < 24; z++) netprofin3[j++] = psiar[y][z]; } test_size (j, hmmsol25REC.NoOfInput, "hmmsol25"); hmmsol25 (netprofin3, nn_output, 0); solacc25[i][0] = nn_output[0]; solacc25[i][1] = nn_output[1]; test_size (j, hmmsol5REC.NoOfInput, "hmmsol5"); hmmsol5 (netprofin3, nn_output, 0); solacc5[i][0] = nn_output[0]; solacc5[i][1] = nn_output[1]; test_size (j, hmmsol0REC.NoOfInput, "hmmsol0"); hmmsol0 (netprofin3, nn_output, 0); solacc0[i][0] = nn_output[0]; solacc0[i][1] = nn_output[1]; } } if (NOPSI == 0) { int i, windows = 17; for (i = 0; i < data->lens; i++) { float nn_output[3]; int y, j = 0; doprofpsi (data, windows, i, psiar); for (y = 0; y < windows; y++) { int z; for (z = 0; z < 20; z++) netprofin3[j++] = psiar[y][z]; } test_size (j, psisol25REC.NoOfInput, "psisol25"); psisol25 (netprofin3, nn_output, 0); solacc25[i][0] += nn_output[0]; solacc25[i][1] += nn_output[1]; test_size (j, psisol5REC.NoOfInput, "psisol5"); psisol5 (netprofin3, nn_output, 0); solacc5[i][0] += nn_output[0]; solacc5[i][1] += nn_output[1]; test_size (j, psisol0REC.NoOfInput, "psisol0"); psisol0 (netprofin3, nn_output, 0); solacc0[i][0] += nn_output[0]; solacc0[i][1] += nn_output[1]; } } for (i = 0; i < data->lens; i++) { if (solacc25[i][0] > solacc25[i][1]) { sollet25[i] = '-'; } if (solacc25[i][1] > solacc25[i][0]) { sollet25[i] = 'B'; } if (solacc5[i][0] > solacc5[i][1]) { sollet5[i] = '-'; } if (solacc5[i][1] > solacc5[i][0]) { sollet5[i] = 'B'; } if (solacc0[i][0] > solacc0[i][1]) { sollet0[i] = '-'; } if (solacc0[i][1] > solacc0[i][0]) { sollet0[i] = 'B'; } } if (NOPSI == 0) { float seq2str[MAXSEQLEN][3]; int i, windows = 17; /* This is for the PSIBLAST PSSM, for which there are two networks for each layer */ memset (psi2net, 0x00, MAXSEQLEN * 3); #ifdef PSSM for (t = 0; t < 2; t++) { /* First network */ windows = 17; for (i = 0; i < data->lens; i++) { int y, j = 0; float nn_output[3]; doprofpsi (data, windows, i, psiar); for (y = 0; y < windows; y++) { int z; for (z = 0; z < 20; z++) netprofin[j++] = psiar[y][z]; } if (t == 0) { test_size (j, pssma1REC.NoOfInput, "pssma1"); pssma1 (netprofin, nn_output, 0); } else if (t == 1) { test_size (j, pssmb1REC.NoOfInput, "pssmb1"); pssmb1 (netprofin, nn_output, 0); } seq2str[i][0] = nn_output[0]; seq2str[i][1] = nn_output[1]; seq2str[i][2] = nn_output[2]; } /* Second network */ windows = 19; for (i = 0; i < data->lens; i++) { int y, j = 0; float nn_output[3]; float nn_input[19 * 3]; dowinsspsi (seq2str, windows, i, winarss, data); for (y = 0; y < windows; y++) { int z; for (z = 0; z < 3; z++) nn_input[j++] = winarss[y][z]; } if (t == 0) { test_size (j, pssma2REC.NoOfInput, "pssma2"); pssma2 (nn_input, nn_output, 0); } else if (t == 1) { test_size (j, pssmb2REC.NoOfInput, "pssmb2"); pssmb2 (nn_input, nn_output, 0); } psi2net[i][0] += nn_output[0]; psi2net[i][1] += nn_output[1]; psi2net[i][2] += nn_output[2]; } } for (i = 0; i < data->lens; i++) { psi2net[i][0] /= 2; psi2net[i][1] /= 2; psi2net[i][2] /= 2; } #else for (i = 0; i < data->lens; i++) { psi2net[i][0] = 0; psi2net[i][1] = 0; psi2net[i][2] = 0; } #endif /* end of ifdef PSSM */ } /* Work out the final output values for the various outputs * There is some smoothing going on at the ends of the sequences * Also, some of these are being done even if there is no data in them. * */ if (NOPSI == 0) { #ifdef PSSM for (i = 0; i < data->lens; i++) { finalout[i][0] = psi2net[i][0]; finalout[i][1] = psi2net[i][1]; finalout[i][2] = psi2net[i][2]; #ifdef POST if (i <= 4) finalout[i][2] += (5 - i) * 0.2; else if (i >= (data->lens - 5)) finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2; #endif if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2]) psi2fin[i] = SHEET; if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2]) psi2fin[i] = HELIX; if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1]) psi2fin[i] = COIL; } int2pred (psi2fin, psi2let, data->lens); #endif /* end PSSM */ } #ifdef HMM if (NOHMM == 0) { for (i = 0; i < data->lens; i++) { finalout[i][0] = hmmnet[i][0]; finalout[i][1] = hmmnet[i][1]; finalout[i][2] = hmmnet[i][2]; #ifdef POST if (i <= 4) finalout[i][2] += (5 - i) * 0.2; else if (i >= (data->lens - 5)) finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2; #endif if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2]) hmmfin[i] = SHEET; if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2]) hmmfin[i] = HELIX; if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1]) hmmfin[i] = COIL; } int2pred (hmmfin, hmmlet, data->lens); } #endif /* end ifdef HMM */ /* When all data is available use the 'cons' network to decide all non-jury positions. * Also calculate the confidence values */ if (NOPSI == 0) { int j; for (j = 0; j < data->lens; j++) { /* Is there disagreement? */ if (psi2let[j] == hmmlet[j]) { /* No - use the PSI networks */ jury[j] = ' '; consfin[j] = psi2fin[j]; confidence[j] = doconf (psi2net[j][0], psi2net[j][1], psi2net[j][2]); } else { /* yes - take the mean propensities of the HMM and PSI networks */ jury[j] = '*'; #ifdef HMMONLY finalout[j][0] = hmmnet[j][0]; finalout[j][1] = hmmnet[j][1]; finalout[j][2] = hmmnet[j][2]; #else finalout[j][0] = (hmmnet[j][0] + psi2net[j][0]) / 2; finalout[j][1] = (hmmnet[j][1] + psi2net[j][1]) / 2; finalout[j][2] = (hmmnet[j][2] + psi2net[j][2]) / 2; #endif /* end HMMONLY */ confidence[j] = doconf (finalout[j][0], finalout[j][1], finalout[j][2]); consfin[j] = COIL; if (finalout[j][0] > finalout[j][1] && finalout[j][0] > finalout[j][2]) consfin[j] = SHEET; if (finalout[j][1] > finalout[j][0] && finalout[j][1] > finalout[j][2]) consfin[j] = HELIX; if (finalout[j][2] > finalout[j][0] && finalout[j][2] > finalout[j][1]) consfin[j] = COIL; } /* agreement if/else block */ } /* end of sequence */ consfin[data->lens] = 25; } /* end of if (NOHMM==0 && NOPSI==0) */ /* Calculate the confidence values */ if (NOPSI == 1) { fprintf (stdout, "\n\nWARNING!: Only using the HMM profile\nAccuracy will average 79.6%%\n\n"); for (i = 0; i < data->lens; i++) { consfin[i] = hmmfin[i]; confidence[i] = doconf ((hmmnet[i][0]), (hmmnet[i][1]), (hmmnet[i][2])); } consfin[data->lens] = 25; } if (NOPSI == 0) { if (1 == TESTOUTPUT) { fprintf (stderr, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n"); } else { fprintf (stdout, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n"); } } int2pred (consfin, finlet, data->lens); finlet[data->lens] = '\0'; #ifdef FILTER /* Remove unlikely secondary structure */ filter (finlet, letfilt); filter (finlet, letfilt); filter (finlet, letfilt); #else memcpy (letfilt, finlet, sizeof (char) * MAXSEQLEN); #endif /* output predictions */ if (HUMAN) /* print human readable and quit */ print_human (data->lens, letfilt, confidence); printf ("\njnetpred:"); for (i = 0; i < data->lens; i++) printf ("%c,", letfilt[i]); printf ("\nJNETCONF:"); for (i = 0; i < data->lens; i++) printf ("%1.0f,", confidence[i]); printf ("\nJNETSOL25:"); for (i = 0; i < data->lens; i++) printf ("%c,", sollet25[i]); printf ("\nJNETSOL5:"); for (i = 0; i < data->lens; i++) printf ("%c,", sollet5[i]); printf ("\nJNETSOL0:"); for (i = 0; i < data->lens; i++) printf ("%c,", sollet0[i]); printf ("\nJNETHMM:"); for (i = 0; i < data->lens; i++) printf ("%c,", hmmlet[i]); if (NOPSI == 0) { printf ("\nJNETPSSM:"); for (i = 0; i < data->lens; i++) printf ("%c,", psi2let[i]); printf ("\nJNETJURY:"); for (i = 0; i < data->lens; i++) printf ("%c,", jury[i]); } printf ("\nJNETPROPE:"); for (i = 0; i < data->lens; i++) printf ("%.4f,", hmmnet[i][0]); printf ("\nJNETPROPH:"); for (i = 0; i < data->lens; i++) printf ("%.4f,", hmmnet[i][1]); printf ("\nJNETPROPC:"); for (i = 0; i < data->lens; i++) printf ("%.4f,", hmmnet[i][2]); printf ("\n"); } /*************************************************************************/ /* Calculate confidence as for PHD method (Rost and Sander, JMB, 1993, v232, p584) */ float doconf (const float confa, const float confb, const float confc) { float whichout, maxout, maxnext, outconf; whichout = outconf = maxnext = 0; maxout = confc; if (confa > confb && confa > confc) { whichout = 0; maxout = confa; } if (confb > confa && confb > confc) { whichout = 1; maxout = confb; } if (confc > confa && confc > confb) { whichout = 2; maxout = confc; } if (whichout == 0) { if (confb > confc) maxnext = confb; if (confc > confb) maxnext = confc; } if (whichout == 1) { if (confc > confa) maxnext = confc; if (confa > confc) maxnext = confa; } if (whichout == 2) { if (confb > confa) maxnext = confb; if (confa > confb) maxnext = confa; } outconf = 10 * (maxout - maxnext); if (outconf > 9) outconf = 9; return outconf; } /*************************************************************************/ void StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr) { char *ChP1, *ChP2; s1[0] = '\0'; ChP1 = s2; while ((ChP2 = strstr (ChP1, FromSStr)) != NULL) { if (ChP1 != ChP2) Strncat (s1, ChP1, strlen (ChP1) - strlen (ChP2)); strcat (s1, ToSStr); ChP1 = ChP2 + strlen (FromSStr); } strcat (s1, ChP1); return; } /*************************************************************************/ char * Strncat (char *s1, char *s2, int len) { int OrigLen = 0; if (len == 0) { fprintf (stderr, "ERROR! Strncat error!"); return s1; } if (s1 == NULL || s2 == NULL) { fprintf (stderr, "ERROR! Strncat error!"); return NULL; } OrigLen = strlen (s1); if (strncat (s1, s2, len) == NULL) { fprintf (stderr, "ERROR! Strncat error!"); return NULL; } s1[OrigLen + len] = '\0'; return s1; } /*************************************************************************/ /* Finds the data around the current position in a window */ void doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30]) { int i, j = 0; for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) { int k; for (k = 0; k < 20; k++) { psiar[j][k] = data->psimat[i][k]; } if (i < 0 || i >= data->lens) memset (psiar[j], 0x00, sizeof (psiar[0][0]) * 30); j++; } } /*************************************************************************/ /* Converts an array of predicted structure to a character array, not a null terminated string */ void int2pred (const int *pred, char *letpred, const int length) { int i; for (i = 0; i < length; i++) { switch (pred[i]) { case HELIX: letpred[i] = 'H'; break; case SHEET: letpred[i] = 'E'; break; case COIL: letpred[i] = '-'; break; /*case 25: letpred[i] = '\0'; break; */ default: fprintf (stderr, "ERROR! unknown secondary structure type '%i' at position %i in int2pred \n", pred[i], i); exit (EXIT_FAILURE); break; } } } /*************************************************************************/ void doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30]) { int i, j, k; j = k = i = 0; for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) { for (k = 0; k < 24; k++) psiar[j][k] = data->hmmmat[i][k]; if (i < 0 || i >= data->lens) for (k = 0; k < 24; k++) psiar[j][k] = 0; j++; } } /*************************************************************************/ /* print out a human readable version of the prediction and confidence */ void print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN]) { int i, j = 0, k; float *confout; /* array pointers to temp storage of output lines */ char *predout; /* allocate memory to array pointers */ confout = malloc (BLOCK * sizeof (float)); if (confout == NULL) { fprintf (stderr, "ERROR! Out of memory\n"); exit (EXIT_FAILURE); } predout = malloc (BLOCK * sizeof (char)); if (predout == NULL) { fprintf (stderr, "ERROR! Out of memory\n"); exit (EXIT_FAILURE); } for (i = 0; i < length; i++) { confout[j] = conf[i]; /* store current conf value */ predout[j] = pred[i]; /* store current prediction value */ /* when reached end of block, print stuff out */ if (i > 1 && i % BLOCK == 0) { printf ("\n\npred: "); for (k = 0; k < BLOCK; ++k) printf ("%c", predout[k]); printf ("\nconf: "); for (k = 0; k < BLOCK; ++k) printf ("%1.0f", confout[k]); j = 0; /* zero the block counter */ } ++j; } /* catch any trailing blocks that are smaller than required */ if (j) printf ("\n\npred: "); for (k = 0; k < j; ++k) printf ("%c", predout[k]); printf ("\nconf: "); for (k = 0; k < j; ++k) printf ("%1.0f", confout[k]); printf ("\n"); /* free memory and exit */ free (confout); free (predout); exit (EXIT_SUCCESS); }