#include <ctype.h>
#include <math.h>
+#include <ctype.h>
+#include "getquery.h"
+#include "gjutil.h"
#define AA "GAVLIFPSTCMWYNQDEKRH"
typedef struct
{
- char name[1000];
+ char *name;
int le;
char *seq;
double expscore;
void read_mat(char *path, char *fn, double **MAT, double *matave);
int getargs(char *line,char *args[],int max);
void read_ref(char *path, char *fn, double **REF);
-void Get_Histo( P_STR *P, char *path, char *);
+void Get_Histo( P_STR *PARAMS, char *path, char *);
double **DMatrix(int n_rows, int n_cols);
void *my_malloc(size_t size);
-void IUPred(SEQ_STR *SEQ, P_STR *P);
+void IUPred(SEQ_STR *SEQ, P_STR *PARAMS);
void getRegions(SEQ_STR *SEQ );
void Get_Seq(char *fn, SEQ_STR *SEQ);
+void createOutputFiles(int type);
+void closeOutputFiles(int type);
int LC, UC, WS;
int JOIN, DEL;
int Flag_EP;
double EP;
+FILE *shortout;
+FILE *longout;
+FILE *globout;
int main(int argc, char **argv)
{
- P_STR *P;
+ P_STR *PARAMS;
SEQ_STR *SEQ;
int i,j;
int type;
char *path;
-
-
-
- if (argc!=3) {
+
+ if (argc<2) {
printf(" Usage: %s seqfile type \n",argv[0]);
printf(" where type stands for one of the options of \n");
- printf(" \"long\", \"short\" or \"glob\"\n");
+ printf(" \"long\", \"short\", \"glob\" or \"all\"\n");
exit(1);
}
if ((path=getenv("IUPred_PATH"))==NULL) {
printf("# Z. Dosztanyi, V. Csizmok, P. Tompa and I. Simon\n");
printf("# J. Mol. Biol. (2005) 347, 827-839. \n");
printf("#\n");
+ printf("# Modified to work within JABAWS (http://www.compbio.dundee.ac.uk/jabaws) framework by \n");
+ printf("# Peter Troshin (pvtroshin@dundee.ac.uk) and Geoff Barton gjbarton@dundee.ac.uk\n");
+ printf("# June, 2011\n");
+ printf("#\n");
printf("#\n");
-
-
if ((strncmp(argv[2],"long",4))==0) {
else if ((strncmp(argv[2],"glob",4))==0) {
type=2;
}
+ else if ((strncmp(argv[2],"all",3))==0) {
+ type=3;
+ }
else {
- printf("Wrong argument\n");exit(1);
+ printf("No disorder type is given assuming long\n");
+ type=0;
}
-
-
- SEQ=malloc(sizeof(SEQ_STR));
- Get_Seq(argv[1],SEQ);
+
+/* Creating output files depending on the type */
+ createOutputFiles(type);
+
+
+/* Read input file sequence by sequence */
+ FILE *fasta;
+ SEQS *fastaseq;
+ fasta = fopen(argv[1], "r");
+
+ do {
+
+ fastaseq = gseq_fasta(fasta);
+ if(fastaseq==NULL) {
+ break;
+ }
+ SEQ=malloc(sizeof(SEQ_STR));
+
+ //printf("No: %s\n",fastaseq->id );
+ //printf("L: %d\n",fastaseq->slen);
+
+ SEQ->name=fastaseq->id;
+ SEQ->seq=fastaseq->seq;
+ SEQ->le=fastaseq->slen;
+
+#ifdef DEBUG
+ printf("N: %s\n",SEQ->name);
+ printf("S: %s\n",SEQ->seq);
+ printf("L: %d\n",SEQ->le);
+#endif
+
+
if (SEQ->le==0) {printf(" Sequence length 0\n");exit(1);}
#ifdef DEBUG
#endif
- P=malloc(sizeof(P_STR));
- P->CC= DMatrix(AAN,AAN);
-
- if (type==0) {
+ PARAMS=malloc(sizeof(P_STR));
+ PARAMS->CC= DMatrix(AAN,AAN);
+ /* LONG - DISORDER */
+ if (type==0 || type==3) {
LC=1;
UC=100;
WS=10;
Flag_EP=0;
- read_ref(path,"ss",P->CC);
- Get_Histo(P, path, "histo");
+ read_ref(path,"ss",PARAMS->CC);
+ Get_Histo(PARAMS, path, "histo");
+ IUPred(SEQ,PARAMS);
- IUPred(SEQ,P);
+ fprintf(longout, "# %s\n",SEQ->name);
- printf("# Prediction output \n");
- printf("# %s\n",SEQ->name);
for (i=0;i<SEQ->le;i++)
- printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]);
- }
- if (type==1) {
- LC=1;
+ fprintf(longout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]);
+
+}
+/* SHORT - DISORDER */
+ if (type==1 || type==3) {
+ LC=1;
UC=25;
WS=10;
Flag_EP=1;
EP=-1.26;
- read_ref(path,"ss_casp",P->CC);
- Get_Histo(P, path, "histo_casp");
+ read_ref(path,"ss_casp",PARAMS->CC);
+ Get_Histo(PARAMS, path, "histo_casp");
- IUPred(SEQ,P);
+ IUPred(SEQ,PARAMS);
- printf("# Prediction output \n");
- printf("# %s\n",SEQ->name);
+ fprintf(shortout, "# %s\n",SEQ->name);
for (i=0;i<SEQ->le;i++)
- printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]);
- }
- if (type==2) {
+ fprintf(shortout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]);
+}
+
+/* GLOB - GLOBULAR DOMAINS */
+ if (type==2 || type==3) {
char *globseq;
LC=1;
WS=15;
Flag_EP=0;
+ read_ref(path,"ss",PARAMS->CC);
+ Get_Histo(PARAMS,path,"histo");
-
- read_ref(path,"ss",P->CC);
- Get_Histo(P,path,"histo");
-
- IUPred(SEQ,P);
+ IUPred(SEQ,PARAMS);
Min_Ene=DMin_Ene;
JOIN=DJOIN;
globseq=malloc((SEQ->le+1)*sizeof(char));
for (i=0;i<SEQ->le;i++) globseq[i]=tolower(SEQ->seq[i]);
- printf("# Prediction output \n");
- printf("# %s\n",SEQ->name);
- printf("Number of globular domains: %5d \n",SEQ->ngr);
+ fprintf(globout,"# %s\n",SEQ->name);
+ fprintf(globout,"Number of globular domains: %5d \n",SEQ->ngr);
for (i=0;i<SEQ->ngr;i++) {
- printf(" globular domain %5d. %d - %d \n",
+ fprintf(globout," globular domain %5d. %d - %d \n",
i+1,SEQ->gr[i][0]+1,SEQ->gr[i][1]+1);
for (j=SEQ->gr[i][0];j<SEQ->gr[i][1]+1;j++) {
globseq[j]=toupper(globseq[j]);
}
}
- printf(">%s\n",SEQ->name);
+ fprintf(globout,">%s\n",SEQ->name);
for (i=0;i<SEQ->le;i++) {
- if ((i>0)&&(i%60==0)) printf("\n");
- else if ((i>0)&&(i%10==0)) printf(" ");
- printf("%c",globseq[i]);
+ if ((i>0)&&(i%60==0)) fprintf(globout,"\n");
+ else if ((i>0)&&(i%10==0)) fprintf(globout," ");
+ fprintf(globout,"%c",globseq[i]);
}
- printf("\n");
+ fprintf(globout,"\n");
free(globseq);
+ }
-
#ifdef DEBUG
for (i=0;i<SEQ->le;i++)
printf("%5d %c %10.4f\n",i,SEQ->seq[i],SEQ->en[i]);
#endif
- }
-
- free(SEQ->seq);
- free(SEQ->eprof);free(SEQ->en);free(SEQ->smp);
- free(SEQ);
-
+
+
+ free(SEQ->name);
+ free(SEQ->seq);
+ free(SEQ);
+ free(fastaseq);
+
+ } while(fastaseq != NULL);
+
+ fclose(fasta);
+ closeOutputFiles(type);
return 0;
}
-void IUPred(SEQ_STR *SEQ, P_STR *P)
+void closeOutputFiles(int type) {
+ if (type==0) {
+ fclose(longout);
+ }
+ else if (type==1) {
+ fclose(shortout);
+ }
+ else if (type==2) {
+ fclose(globout);
+ }
+ else if (type==3) {
+ fclose(longout);
+ fclose(shortout);
+ fclose(globout);
+ }
+}
+
+void createOutputFiles(int type) {
+ if (type==0) {
+ longout = fopen("out.long", "w");
+ }
+ else if (type==1) {
+ shortout = fopen("out.short", "w");
+ }
+ else if (type==2) {
+ globout = fopen("out.glob", "w");
+ }
+ else if (type==3) {
+ longout = fopen("out.long", "w");
+ shortout = fopen("out.short", "w");
+ globout = fopen("out.glob", "w");
+ }
+}
+
+void IUPred(SEQ_STR *SEQ, P_STR *PARAMS)
{
int i,j, a1, a2, p;
int naa;
double min, max, step;
naa=SEQ->le;
- min=P->min; max=P->max;step=P->step;
+ min=PARAMS->min; max=PARAMS->max;step=PARAMS->step;
SEQ->eprof=malloc(naa*sizeof(double));
for (i=0;i<naa;i++) SEQ->eprof[i]=0;
SEQ->en=malloc(naa*sizeof(double));
for (i=0;i<naa;i++) SEQ->en[i]=0;
SEQ->smp=malloc(naa*sizeof(double));
- for (i=0;i<naa;i++) SEQ->smp[i]=0;
+ for (i=0;i<naa;i++) SEQ->smp[i]=0;
SEQ->expscore=0;
for (i=0;i<naa;i++) {
for (j=0;j<naa;j++) if (((abs(i-j))>LC)&&((abs(i-j))<UC)) {
a2=strchr(AA,((toupper(SEQ->seq[j]))))-AA;
if ((a2<0) || (a2>=AAN)) continue;
- SEQ->eprof[i]+=P->CC[a1][a2];
+ SEQ->eprof[i]+=PARAMS->CC[a1][a2];
n2++;
}
SEQ->expscore+=SEQ->eprof[i]/(naa*n2);
SEQ->eprof[i]/=n2;
}
-
+
if (Flag_EP==0) {
for (i=0;i<naa;i++) {
n2=0;
if (SEQ->smp[i]>=max-2*step) SEQ->en[i]=0;
if ((SEQ->smp[i]>min+2*step)&&(SEQ->smp[i]<max-2*step)) {
p=(int)((SEQ->smp[i]-min)*(1.0/step));
- SEQ->en[i]=P->distro[p];
+ SEQ->en[i]=PARAMS->distro[p];
}
#ifdef DEBUG
i,SEQ->eprof[i], SEQ->smp[i],SEQ->en[i]);
#endif
-
}
}
}
-void Get_Histo(P_STR *P, char *path, char *fn)
+void Get_Histo(P_STR *PARAMS, char *path, char *fn)
{
FILE *f;
char ln[ML];
fscanf(f,"%*s %lf %lf %d\n",&min, &max, &nb);
- P->distro=malloc(nb*sizeof(double ));
- for (i=0;i<nb;i++) P->distro[i]=0;
+ PARAMS->distro=malloc(nb*sizeof(double ));
+ for (i=0;i<nb;i++) PARAMS->distro[i]=0;
for (i=0,set=0;i<nb;i++) {
sscanf(ln,"%*s %lf %*s %*s %lf\n", &c,&v);
if ((set==0)&&(v<=0.5)) {set=1;cutoff=c;}
- P->distro[i]=v;
+ PARAMS->distro[i]=v;
}
fclose(f);
- P->max=max;
- P->min=min;
- P->nb=nb;
- P->cutoff=cutoff;
+ PARAMS->max=max;
+ PARAMS->min=min;
+ PARAMS->nb=nb;
+ PARAMS->cutoff=cutoff;
-
- P->step=(max-min)/nb;
- P->cutoff-=P->step;
+ PARAMS->step=(max-min)/nb;
+ PARAMS->cutoff-=PARAMS->step;
}
}
+/*
void Get_Seq(char *fn, SEQ_STR *SEQ)
{
char line[ML];
#ifdef DEBUG
printf("%s %5d\n%s\n",SEQ->name,SEQ->le,SEQ->seq);
#endif
-
}
-
+*/