/************************************************************************
* Jnet - A consensus neural network protein *
* secondary structure prediction method *
* *
* Copyright 1999,2009 James Cuff, Jonathan Barber and Christian Cole *
* *
************************************************************************
-------------------------------------------------------------------------
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-------------------------------------------------------------------------*/
/* Fri Nov 15 15:28:09 GMT 2002 Bug fixes by JDB */
/* WARNING: MUST BE PRESENTED WITH UPPERCASE DATA as lowercase 'n'
* has a special internal meaning in seq2int
* */
/*#define POST *//* Allow post processing of NN output */
/*#define FILTER *//* Allow string post processing of final output */
/** The profile-specific defines below are not independent of each other.
Especially check that the Jury set-up is correct! */
#define HMM /* Do HMMer predictions */
#define PSSM /* Do PSSM predicitons */
/*#define HMMONLY *//* For non-jury positions use the HMM prediction - only works when DOJURY is undefined */
/* The allowed value of the integers representing residues */
enum
{
WINAR = 30, /* array size for winar */
PSILEN = 20, /* no. of elements per position in PSIBLAST PSSM */
PROLEN = 24, /* no. of elements per position for other profiles */
HELIX = 1, /* enums for sec. struct. states */
SHEET = 0,
COIL = 2,
MAXSEQNUM = 1000, /* Maximum number of sequences per alignment */
MAXSEQLEN = 5000, /* Maximum Sequence Length */
MAXBLOCK = 200 /* Maximum block size for human readable output */
};
#include
#include
#include
#include
#include
#include
#include "cmdline/cmdline.h"
#include "pssma1.h" /* Uses information from data.psimat, (PSSM) - 100 hidden nodes */
#include "pssma2.h" /* Second network for pssma1 - 100 hidden nodes */
#include "pssmb1.h" /* Uses information from data.psimat, (PSSM) - 20 hidden nodes */
#include "pssmb2.h" /* Second network for pssmb1 - 20 hidden nodes */
#include "hmm1.h" /* First network for HMMER profile prediction */
#include "hmm2.h" /* Second network for hmm1 */
#include "hmmsol25.h"
#include "hmmsol5.h"
#include "hmmsol0.h"
#include "psisol25.h"
#include "psisol5.h"
#include "psisol0.h"
/* Main data structure for information about input data. Not all the fields are
* used */
typedef struct alldata
{
int lens; /* length of sequences */
int profmat[MAXSEQLEN][PROLEN]; /* BLOSUM frequencies from PSIBLAST alignment - unused but can't remove without segfault? */
float psimat[MAXSEQLEN][PSILEN]; /* PSIBLAST PSSM */
float hmmmat[MAXSEQLEN][PROLEN]; /* results from HMMER */
}
alldata;
/* Global Vars - used to specify which input profiles have been provided */
int NOPSI = 0;
int NOHMM = 0;
int TESTOUTPUT = 0;
int HUMAN = 0; /* toggle human readable output */
int BLOCK = 60; /* block size for human output */
/* PSSM-specific functions */
void readpsi (FILE * psifile, alldata * data);
void doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30]);
/* HMMer-specific functions */
void readhmm (FILE * hmmfile, alldata * data);
void doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30]);
/* General or non-specific functions */
void int2pred (const int *pred, char *letpred, const int length);
float doconf (const float confa, const float confb, const float confc);
void dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data);
void dopred (const alldata * data);
void test_size (int size, int required_size, char *name);
void print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN]);
/* Prediction post-processing functions - not be used anymore, but retained for testing */
/* Executing controlled by #define FILTER */
void filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN]);
void StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr);
char *Strncat (char *s1, char *s2, int len);
/*************************************************************/
int
main (int argc, char **argv)
{
FILE *psifile, *hmmfile;
alldata *data;
/* Declared in cmdline/cmdline.h */
struct gengetopt_args_info args_info;
/* Set up the data structure for the input data */
if ((data = malloc (sizeof (alldata))) == NULL) {
fprintf (stderr, "ERROR! malloc failed at line %i\n", __LINE__);
exit (EXIT_FAILURE);
}
/* Get the command line arguments */
if (cmdline_parser (argc, argv, &args_info) != 0)
exit (EXIT_FAILURE);
/* check whether TEST STYLE OUTPUT has been requested */
if (args_info.test_flag) {
TESTOUTPUT = 1;
}
/* Print version info - CMDLINE_PARSER_PACKAGE CMDLINE_PARSER_VERSION declared in cmdline/cmdline.h */
if (1 == TESTOUTPUT) {
fprintf (stderr, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
} else {
fprintf (stdout, "%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
}
/* Read the HMMER file. Don't need to check if it is given as is a required option */
if ((hmmfile = fopen (args_info.hmmer_arg, "r")) == NULL) {
fprintf (stderr, "ERROR: Can't open HMMER file %s\n", args_info.hmmer_arg);
exit (EXIT_FAILURE);
} else {
if (1 == TESTOUTPUT) {
fprintf (stderr, "Found HMM profile data\n");
} else {
fprintf (stdout, "Found HMM profile data\n");
}
readhmm (hmmfile, data);
(void) fclose (hmmfile);
}
/* Read the PSIBLAST PSSM */
if (args_info.pssm_given) {
if ((psifile = fopen (args_info.pssm_arg, "r")) == NULL) {
fprintf (stderr, "ERROR: Can't open PSSM profile file %s\n", args_info.pssm_arg);
exit (EXIT_FAILURE);
} else {
if (1 == TESTOUTPUT) {
fprintf (stderr, "Found PSSM profile file\n");
} else {
fprintf (stdout, "Found PSSM profile file\n");
}
readpsi (psifile, data);
(void) fclose (psifile);
}
} else {
NOPSI = 1;
}
/* check whether human readable output is required (set by --human) */
if (args_info.human_given) {
HUMAN = 1;
if (args_info.human_orig != NULL) {
printf ("human: %d\n", args_info.human_arg);
printf ("human orig: %s\n", args_info.human_orig);
BLOCK = args_info.human_arg;
if (BLOCK > MAXBLOCK) {
fprintf (stderr, "ERROR - argument for --human is too large\n");
exit (EXIT_FAILURE);
}
}
}
/* Do the prediction */
if (1 == TESTOUTPUT) {
fprintf (stderr, "Running final predictions!\n");
} else {
fprintf (stdout, "Running final predictions!\n");
}
dopred (data);
free (data);
return EXIT_SUCCESS;
}
/*************************************************************************/
/* Replaces particular runs of secondary structure with something else */
void
filter (char inp[MAXSEQLEN], char outp[MAXSEQLEN])
{
int i;
struct filter_pair
{
char *orig;
char *final;
} pairs[] = {
{
"EHHHE", "EEEEE"}, {
"-HHH-", "HHHHH"}, {
"EHHH-", "EHHHH"}, {
"HHHE-", "HHH--"}, {
"-EHHH", "--HHH"}, {
"EHHE", "EEEE"}, {
"-HEH-", "-HHH-"}, {
"EHH-", "EEE-"}, {
"-HHE", "-EEE"}, {
"-HH-", "----"}, {
"HEEH", "EEEE"}, {
"-HE", "--E"}, {
"EH-", "E--"}, {
"-H-", "---"}, {
"HEH", "HHH"}, {
"-E-E-", "-EEE-"}, {
"E-E", "EEE"}, {
"H-H", "HHH"}, {
"EHE", "EEE"}, {
NULL, NULL}
};
for (i = 0; pairs[i].orig != NULL; i++) {
StrReplStr (outp, inp, pairs[i].orig, pairs[i].final);
strcpy (inp, outp);
}
}
/*************************************************************************/
/* Reads in the PSIBLAST PSSM */
void
readpsi (FILE * psifile, alldata * data)
{
int nlines = 0;
while ((getc (psifile)) != EOF) {
float tempval;
int i;
for (i = 0; i < 20; i++) {
int npar = fscanf (psifile, "%f", &tempval);
if (1 == npar) {
data->psimat[nlines][i] = tempval;
} else {
fprintf (stderr, "ERROR: Can't read a value from PSSM file");
data->psimat[nlines][i] = 0.;
}
}
nlines++;
}
/* check the data length matches that of the HMMer data */
if (data->lens != nlines - 1) {
fprintf (stderr, "ERROR! PSSM data length doesn't match that for HMMer. Are both input files from the same alignment/sequence?\n");
exit (EXIT_FAILURE);
}
}
/*************************************************************************/
/* Reads in HMMER profile that was originally based upon ClustalW alignments */
/* Additionally, use this data to define the length of the sequence in the absence of an alignment */
void
readhmm (FILE * hmmfile, alldata * data)
{
int nlines = 0;
while ((getc (hmmfile)) != EOF) {
int i;
if (nlines > MAXSEQLEN) {
fprintf (stderr, "ERROR! the HMMER profile is too long. Can't cope... must quit.\n");
exit (EXIT_FAILURE);
}
for (i = 0; i < 24; i++) {
float tempval;
int npar = fscanf (hmmfile, "%f", &tempval);
if (1 == npar) {
data->hmmmat[nlines][i] = tempval;
} else {
fprintf (stderr, "ERROR: Can't read a value from HMMER profile file");
data->hmmmat[nlines][i] = 0.;
}
}
nlines++;
}
data->lens = nlines - 1; /* set the query sequence length */
}
/*************************************************************************/
/* Function to test whether the data array for input
into the Neural Network function is of the expected size */
void
test_size (int size, int required_size, char *name)
{
if (size != required_size) {
fprintf (stderr, "ERROR! input array not of correct size for %s (%i vs. %i)\n", name, size, required_size);
exit (EXIT_FAILURE);
}
}
/*************************************************************************/
/***********************************************************************/
/* Functions below here haven't been checked */
void
dowinsspsi (float seq2str[MAXSEQLEN][3], int win, int curpos, float winarss[30][4], const alldata * data)
{
int i, j;
for (j = 0, i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++, j++) {
if (i >= 0 && i < data->lens) {
/* CC - changesd to make it more similar to network training */
/* for (k = 0; k < 3; k++)
winarss[j][k] = seq2str[i][k]; */
if ((seq2str[i][0] > seq2str[i][1]) && (seq2str[i][0] > seq2str[i][2])) {
winarss[j][0] = 1.0;
winarss[j][1] = 0.0;
winarss[j][2] = 0.0;
} else if ((seq2str[i][1] > seq2str[i][0]) && (seq2str[i][1] > seq2str[i][2])) {
winarss[j][0] = 0.0;
winarss[j][1] = 1.0;
winarss[j][2] = 0.0;
} else if ((seq2str[i][2] > seq2str[i][0]) && (seq2str[i][2] > seq2str[i][1])) {
winarss[j][0] = 0.0;
winarss[j][1] = 0.0;
winarss[j][2] = 1.0;
} else {
winarss[j][0] = 0.0;
winarss[j][1] = 0.0;
winarss[j][2] = 1.0;
}
} else if (i < 0 || i >= data->lens) {
/* Setting winarss[j][2] to 1 increases q3 accuracy */
/* CC - does it really??? */
winarss[j][0] = 0.0;
winarss[j][1] = 0.0;
winarss[j][2] = 0.0;
}
}
}
/*************************************************************************/
/* Structure to hold data from networks */
typedef struct netdata
{
int ps1;
float *netin;
float *netprofin;
} netdata;
/*************************************************************************/
/* main prediction routine */
void
dopred (const alldata * data)
{
extern int NOHMM, NOPSI;
float psi2net[MAXSEQLEN][3];
float hmmnet[MAXSEQLEN][3];
float finalout[MAXSEQLEN][3];
int psi2fin[MAXSEQLEN];
int hmmfin[MAXSEQLEN];
int consfin[MAXSEQLEN];
char psi2let[MAXSEQLEN];
char hmmlet[MAXSEQLEN];
char finlet[MAXSEQLEN];
char sollet25[MAXSEQLEN];
char sollet5[MAXSEQLEN];
char sollet0[MAXSEQLEN];
float netprofin3[500];
float confidence[MAXSEQLEN]; /* Array for holding the confidence value calculated by doconf() */
float psiar[WINAR][WINAR];
int winar2[WINAR][WINAR];
float solacc25[MAXSEQLEN][2];
float solacc5[MAXSEQLEN][2];
float solacc0[MAXSEQLEN][2];
float winarss[WINAR][4];
char letfilt[MAXSEQLEN];
int i, t;
char jury[MAXSEQLEN];
float netprofin[MAXSEQLEN];
memset (netprofin, 0x00, sizeof (float) * MAXSEQLEN);
memset (winar2, 0x00, sizeof (winar2[0][0]) * WINAR * WINAR);
letfilt[0] = '\0';
#ifdef HMM
{
float seq2str[MAXSEQLEN][3];
int i, windows = 17;
for (i = 0; i < data->lens; i++) {
float nn_output[3];
int y, j = 0;
doprofhmm (data, windows, i, psiar);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < PROLEN; z++)
netprofin3[j++] = psiar[y][z];
}
test_size (j, hmm1REC.NoOfInput, "hmm1");
hmm1 (netprofin3, nn_output, 0);
seq2str[i][0] = nn_output[0];
seq2str[i][1] = nn_output[1];
seq2str[i][2] = nn_output[2];
}
windows = 19;
for (i = 0; i < data->lens; i++) {
int y, j = 0;
float nn_output[3];
float nn_input[19 * 3];
dowinsspsi (seq2str, windows, i, winarss, data);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < 3; z++)
nn_input[j++] = winarss[y][z];
}
test_size (j, hmm2REC.NoOfInput, "hmm2");
hmm2 (nn_input, nn_output, 0);
hmmnet[i][0] = nn_output[0];
hmmnet[i][1] = nn_output[1];
hmmnet[i][2] = nn_output[2];
}
}
#endif /* end ifdef HMM */
if (NOHMM == 0) {
int i, windows = 17;
for (i = 0; i < data->lens; i++) {
float nn_output[3];
int y, j = 0;
doprofhmm (data, windows, i, psiar);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < 24; z++)
netprofin3[j++] = psiar[y][z];
}
test_size (j, hmmsol25REC.NoOfInput, "hmmsol25");
hmmsol25 (netprofin3, nn_output, 0);
solacc25[i][0] = nn_output[0];
solacc25[i][1] = nn_output[1];
test_size (j, hmmsol5REC.NoOfInput, "hmmsol5");
hmmsol5 (netprofin3, nn_output, 0);
solacc5[i][0] = nn_output[0];
solacc5[i][1] = nn_output[1];
test_size (j, hmmsol0REC.NoOfInput, "hmmsol0");
hmmsol0 (netprofin3, nn_output, 0);
solacc0[i][0] = nn_output[0];
solacc0[i][1] = nn_output[1];
}
}
if (NOPSI == 0) {
int i, windows = 17;
for (i = 0; i < data->lens; i++) {
float nn_output[3];
int y, j = 0;
doprofpsi (data, windows, i, psiar);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < 20; z++)
netprofin3[j++] = psiar[y][z];
}
test_size (j, psisol25REC.NoOfInput, "psisol25");
psisol25 (netprofin3, nn_output, 0);
solacc25[i][0] += nn_output[0];
solacc25[i][1] += nn_output[1];
test_size (j, psisol5REC.NoOfInput, "psisol5");
psisol5 (netprofin3, nn_output, 0);
solacc5[i][0] += nn_output[0];
solacc5[i][1] += nn_output[1];
test_size (j, psisol0REC.NoOfInput, "psisol0");
psisol0 (netprofin3, nn_output, 0);
solacc0[i][0] += nn_output[0];
solacc0[i][1] += nn_output[1];
}
}
for (i = 0; i < data->lens; i++) {
if (solacc25[i][0] > solacc25[i][1]) {
sollet25[i] = '-';
}
if (solacc25[i][1] > solacc25[i][0]) {
sollet25[i] = 'B';
}
if (solacc5[i][0] > solacc5[i][1]) {
sollet5[i] = '-';
}
if (solacc5[i][1] > solacc5[i][0]) {
sollet5[i] = 'B';
}
if (solacc0[i][0] > solacc0[i][1]) {
sollet0[i] = '-';
}
if (solacc0[i][1] > solacc0[i][0]) {
sollet0[i] = 'B';
}
}
if (NOPSI == 0) {
float seq2str[MAXSEQLEN][3];
int i, windows = 17;
/* This is for the PSIBLAST PSSM, for which there are two networks for each layer */
memset (psi2net, 0x00, MAXSEQLEN * 3);
#ifdef PSSM
for (t = 0; t < 2; t++) {
/* First network */
windows = 17;
for (i = 0; i < data->lens; i++) {
int y, j = 0;
float nn_output[3];
doprofpsi (data, windows, i, psiar);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < 20; z++)
netprofin[j++] = psiar[y][z];
}
if (t == 0) {
test_size (j, pssma1REC.NoOfInput, "pssma1");
pssma1 (netprofin, nn_output, 0);
} else if (t == 1) {
test_size (j, pssmb1REC.NoOfInput, "pssmb1");
pssmb1 (netprofin, nn_output, 0);
}
seq2str[i][0] = nn_output[0];
seq2str[i][1] = nn_output[1];
seq2str[i][2] = nn_output[2];
}
/* Second network */
windows = 19;
for (i = 0; i < data->lens; i++) {
int y, j = 0;
float nn_output[3];
float nn_input[19 * 3];
dowinsspsi (seq2str, windows, i, winarss, data);
for (y = 0; y < windows; y++) {
int z;
for (z = 0; z < 3; z++)
nn_input[j++] = winarss[y][z];
}
if (t == 0) {
test_size (j, pssma2REC.NoOfInput, "pssma2");
pssma2 (nn_input, nn_output, 0);
} else if (t == 1) {
test_size (j, pssmb2REC.NoOfInput, "pssmb2");
pssmb2 (nn_input, nn_output, 0);
}
psi2net[i][0] += nn_output[0];
psi2net[i][1] += nn_output[1];
psi2net[i][2] += nn_output[2];
}
}
for (i = 0; i < data->lens; i++) {
psi2net[i][0] /= 2;
psi2net[i][1] /= 2;
psi2net[i][2] /= 2;
}
#else
for (i = 0; i < data->lens; i++) {
psi2net[i][0] = 0;
psi2net[i][1] = 0;
psi2net[i][2] = 0;
}
#endif /* end of ifdef PSSM */
}
/* Work out the final output values for the various outputs
* There is some smoothing going on at the ends of the sequences
* Also, some of these are being done even if there is no data in them.
* */
if (NOPSI == 0) {
#ifdef PSSM
for (i = 0; i < data->lens; i++) {
finalout[i][0] = psi2net[i][0];
finalout[i][1] = psi2net[i][1];
finalout[i][2] = psi2net[i][2];
#ifdef POST
if (i <= 4)
finalout[i][2] += (5 - i) * 0.2;
else if (i >= (data->lens - 5))
finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2;
#endif
if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2])
psi2fin[i] = SHEET;
if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2])
psi2fin[i] = HELIX;
if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1])
psi2fin[i] = COIL;
}
int2pred (psi2fin, psi2let, data->lens);
#endif /* end PSSM */
}
#ifdef HMM
if (NOHMM == 0) {
for (i = 0; i < data->lens; i++) {
finalout[i][0] = hmmnet[i][0];
finalout[i][1] = hmmnet[i][1];
finalout[i][2] = hmmnet[i][2];
#ifdef POST
if (i <= 4)
finalout[i][2] += (5 - i) * 0.2;
else if (i >= (data->lens - 5))
finalout[i][2] += (5 - ((data->lens - 1) - i)) * 0.2;
#endif
if (finalout[i][0] > finalout[i][1] && finalout[i][0] > finalout[i][2])
hmmfin[i] = SHEET;
if (finalout[i][1] > finalout[i][0] && finalout[i][1] > finalout[i][2])
hmmfin[i] = HELIX;
if (finalout[i][2] > finalout[i][0] && finalout[i][2] > finalout[i][1])
hmmfin[i] = COIL;
}
int2pred (hmmfin, hmmlet, data->lens);
}
#endif /* end ifdef HMM */
/* When all data is available use the 'cons' network to decide all non-jury positions.
* Also calculate the confidence values */
if (NOPSI == 0) {
int j;
for (j = 0; j < data->lens; j++) {
/* Is there disagreement? */
if (psi2let[j] == hmmlet[j]) { /* No - use the PSI networks */
jury[j] = ' ';
consfin[j] = psi2fin[j];
confidence[j] = doconf (psi2net[j][0], psi2net[j][1], psi2net[j][2]);
} else { /* yes - take the mean propensities of the HMM and PSI networks */
jury[j] = '*';
#ifdef HMMONLY
finalout[j][0] = hmmnet[j][0];
finalout[j][1] = hmmnet[j][1];
finalout[j][2] = hmmnet[j][2];
#else
finalout[j][0] = (hmmnet[j][0] + psi2net[j][0]) / 2;
finalout[j][1] = (hmmnet[j][1] + psi2net[j][1]) / 2;
finalout[j][2] = (hmmnet[j][2] + psi2net[j][2]) / 2;
#endif /* end HMMONLY */
confidence[j] = doconf (finalout[j][0], finalout[j][1], finalout[j][2]);
consfin[j] = COIL;
if (finalout[j][0] > finalout[j][1] && finalout[j][0] > finalout[j][2])
consfin[j] = SHEET;
if (finalout[j][1] > finalout[j][0] && finalout[j][1] > finalout[j][2])
consfin[j] = HELIX;
if (finalout[j][2] > finalout[j][0] && finalout[j][2] > finalout[j][1])
consfin[j] = COIL;
} /* agreement if/else block */
} /* end of sequence */
consfin[data->lens] = 25;
}
/* end of if (NOHMM==0 && NOPSI==0) */
/* Calculate the confidence values */
if (NOPSI == 1) {
fprintf (stdout, "\n\nWARNING!: Only using the HMM profile\nAccuracy will average 79.6%%\n\n");
for (i = 0; i < data->lens; i++) {
consfin[i] = hmmfin[i];
confidence[i] = doconf ((hmmnet[i][0]), (hmmnet[i][1]), (hmmnet[i][2]));
}
consfin[data->lens] = 25;
}
if (NOPSI == 0) {
if (1 == TESTOUTPUT) {
fprintf (stderr, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n");
} else {
fprintf (stdout, "\n\nBoth PSIBLAST and HMM profiles were found\nAccuracy will average 81.6%%\n\n");
}
}
int2pred (consfin, finlet, data->lens);
finlet[data->lens] = '\0';
#ifdef FILTER
/* Remove unlikely secondary structure */
filter (finlet, letfilt);
filter (finlet, letfilt);
filter (finlet, letfilt);
#else
memcpy (letfilt, finlet, sizeof (char) * MAXSEQLEN);
#endif
/* output predictions */
if (HUMAN) /* print human readable and quit */
print_human (data->lens, letfilt, confidence);
printf ("\njnetpred:");
for (i = 0; i < data->lens; i++)
printf ("%c,", letfilt[i]);
printf ("\nJNETCONF:");
for (i = 0; i < data->lens; i++)
printf ("%1.0f,", confidence[i]);
printf ("\nJNETSOL25:");
for (i = 0; i < data->lens; i++)
printf ("%c,", sollet25[i]);
printf ("\nJNETSOL5:");
for (i = 0; i < data->lens; i++)
printf ("%c,", sollet5[i]);
printf ("\nJNETSOL0:");
for (i = 0; i < data->lens; i++)
printf ("%c,", sollet0[i]);
printf ("\nJNETHMM:");
for (i = 0; i < data->lens; i++)
printf ("%c,", hmmlet[i]);
if (NOPSI == 0) {
printf ("\nJNETPSSM:");
for (i = 0; i < data->lens; i++)
printf ("%c,", psi2let[i]);
printf ("\nJNETJURY:");
for (i = 0; i < data->lens; i++)
printf ("%c,", jury[i]);
}
printf ("\nJNETPROPE:");
for (i = 0; i < data->lens; i++)
printf ("%.4f,", hmmnet[i][0]);
printf ("\nJNETPROPH:");
for (i = 0; i < data->lens; i++)
printf ("%.4f,", hmmnet[i][1]);
printf ("\nJNETPROPC:");
for (i = 0; i < data->lens; i++)
printf ("%.4f,", hmmnet[i][2]);
printf ("\n");
}
/*************************************************************************/
/* Calculate confidence as for PHD method (Rost and Sander, JMB, 1993, v232, p584) */
float
doconf (const float confa, const float confb, const float confc)
{
float whichout, maxout, maxnext, outconf;
whichout = outconf = maxnext = 0;
maxout = confc;
if (confa > confb && confa > confc) {
whichout = 0;
maxout = confa;
}
if (confb > confa && confb > confc) {
whichout = 1;
maxout = confb;
}
if (confc > confa && confc > confb) {
whichout = 2;
maxout = confc;
}
if (whichout == 0) {
if (confb > confc)
maxnext = confb;
if (confc > confb)
maxnext = confc;
}
if (whichout == 1) {
if (confc > confa)
maxnext = confc;
if (confa > confc)
maxnext = confa;
}
if (whichout == 2) {
if (confb > confa)
maxnext = confb;
if (confa > confb)
maxnext = confa;
}
outconf = 10 * (maxout - maxnext);
if (outconf > 9)
outconf = 9;
return outconf;
}
/*************************************************************************/
void
StrReplStr (char *s1, char *s2, char *FromSStr, char *ToSStr)
{
char *ChP1, *ChP2;
s1[0] = '\0';
ChP1 = s2;
while ((ChP2 = strstr (ChP1, FromSStr)) != NULL) {
if (ChP1 != ChP2)
Strncat (s1, ChP1, strlen (ChP1) - strlen (ChP2));
strcat (s1, ToSStr);
ChP1 = ChP2 + strlen (FromSStr);
}
strcat (s1, ChP1);
return;
}
/*************************************************************************/
char *
Strncat (char *s1, char *s2, int len)
{
int OrigLen = 0;
if (len == 0) {
fprintf (stderr, "ERROR! Strncat error!");
return s1;
}
if (s1 == NULL || s2 == NULL) {
fprintf (stderr, "ERROR! Strncat error!");
return NULL;
}
OrigLen = strlen (s1);
if (strncat (s1, s2, len) == NULL) {
fprintf (stderr, "ERROR! Strncat error!");
return NULL;
}
s1[OrigLen + len] = '\0';
return s1;
}
/*************************************************************************/
/* Finds the data around the current position in a window */
void
doprofpsi (const alldata * data, const int win, const int curpos, float psiar[30][30])
{
int i, j = 0;
for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) {
int k;
for (k = 0; k < 20; k++) {
psiar[j][k] = data->psimat[i][k];
}
if (i < 0 || i >= data->lens)
memset (psiar[j], 0x00, sizeof (psiar[0][0]) * 30);
j++;
}
}
/*************************************************************************/
/* Converts an array of predicted structure to a character array, not a null terminated string */
void
int2pred (const int *pred, char *letpred, const int length)
{
int i;
for (i = 0; i < length; i++) {
switch (pred[i]) {
case HELIX:
letpred[i] = 'H';
break;
case SHEET:
letpred[i] = 'E';
break;
case COIL:
letpred[i] = '-';
break;
/*case 25: letpred[i] = '\0'; break; */
default:
fprintf (stderr, "ERROR! unknown secondary structure type '%i' at position %i in int2pred \n", pred[i], i);
exit (EXIT_FAILURE);
break;
}
}
}
/*************************************************************************/
void
doprofhmm (const alldata * data, const int win, const int curpos, float psiar[30][30])
{
int i, j, k;
j = k = i = 0;
for (i = (curpos - ((win - 1) / 2)); i <= (curpos + ((win - 1) / 2)); i++) {
for (k = 0; k < 24; k++)
psiar[j][k] = data->hmmmat[i][k];
if (i < 0 || i >= data->lens)
for (k = 0; k < 24; k++)
psiar[j][k] = 0;
j++;
}
}
/*************************************************************************/
/* print out a human readable version of the prediction and confidence */
void
print_human (int length, char pred[MAXSEQLEN], float conf[MAXSEQLEN])
{
int i, j = 0, k;
float *confout; /* array pointers to temp storage of output lines */
char *predout;
/* allocate memory to array pointers */
confout = malloc (BLOCK * sizeof (float));
if (confout == NULL) {
fprintf (stderr, "ERROR! Out of memory\n");
exit (EXIT_FAILURE);
}
predout = malloc (BLOCK * sizeof (char));
if (predout == NULL) {
fprintf (stderr, "ERROR! Out of memory\n");
exit (EXIT_FAILURE);
}
for (i = 0; i < length; i++) {
confout[j] = conf[i]; /* store current conf value */
predout[j] = pred[i]; /* store current prediction value */
/* when reached end of block, print stuff out */
if (i > 1 && i % BLOCK == 0) {
printf ("\n\npred: ");
for (k = 0; k < BLOCK; ++k)
printf ("%c", predout[k]);
printf ("\nconf: ");
for (k = 0; k < BLOCK; ++k)
printf ("%1.0f", confout[k]);
j = 0; /* zero the block counter */
}
++j;
}
/* catch any trailing blocks that are smaller than required */
if (j)
printf ("\n\npred: ");
for (k = 0; k < j; ++k)
printf ("%c", predout[k]);
printf ("\nconf: ");
for (k = 0; k < j; ++k)
printf ("%1.0f", confout[k]);
printf ("\n");
/* free memory and exit */
free (confout);
free (predout);
exit (EXIT_SUCCESS);
}