X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fiupred%2Fiupred.c;h=c5e25a2abde2a159a5aee3faf25643df14dd42ab;hb=HEAD;hp=d3b65ac009e043eb7196e979d3185602483bef55;hpb=9c5df7960abf895efe5978b6d9185cc50596c22b;p=jabaws.git diff --git a/binaries/src/iupred/iupred.c b/binaries/src/iupred/iupred.c index d3b65ac..c5e25a2 100644 --- a/binaries/src/iupred/iupred.c +++ b/binaries/src/iupred/iupred.c @@ -4,6 +4,9 @@ #include #include +#include +#include "getquery.h" +#include "gjutil.h" #define AA "GAVLIFPSTCMWYNQDEKRH" @@ -25,7 +28,7 @@ typedef struct { - char name[1000]; + char *name; int le; char *seq; double expscore; @@ -51,12 +54,14 @@ typedef struct void read_mat(char *path, char *fn, double **MAT, double *matave); int getargs(char *line,char *args[],int max); void read_ref(char *path, char *fn, double **REF); -void Get_Histo( P_STR *P, char *path, char *); +void Get_Histo( P_STR *PARAMS, char *path, char *); double **DMatrix(int n_rows, int n_cols); void *my_malloc(size_t size); -void IUPred(SEQ_STR *SEQ, P_STR *P); +void IUPred(SEQ_STR *SEQ, P_STR *PARAMS); void getRegions(SEQ_STR *SEQ ); void Get_Seq(char *fn, SEQ_STR *SEQ); +void createOutputFiles(int type); +void closeOutputFiles(int type); int LC, UC, WS; @@ -64,24 +69,26 @@ double Min_Ene; int JOIN, DEL; int Flag_EP; double EP; +FILE *shortout; +FILE *longout; +FILE *globout; int main(int argc, char **argv) { - P_STR *P; + P_STR *PARAMS; SEQ_STR *SEQ; int i,j; int type; char *path; - - - - if (argc!=3) { + + if (argc<2) { printf(" Usage: %s seqfile type \n",argv[0]); printf(" where type stands for one of the options of \n"); - printf(" \"long\", \"short\" or \"glob\"\n"); + printf(" \"long\", \"short\", \"glob\". If no type is provided the program assumes all types\n"); exit(1); } + if ((path=getenv("IUPred_PATH"))==NULL) { fprintf(stderr,"IUPred_PATH environment variable is not set\n"); path="./"; @@ -93,12 +100,16 @@ int main(int argc, char **argv) printf("# Z. Dosztanyi, V. Csizmok, P. Tompa and I. Simon\n"); printf("# J. Mol. Biol. (2005) 347, 827-839. \n"); printf("#\n"); + printf("# Modified to work within JABAWS (http://www.compbio.dundee.ac.uk/jabaws) framework by \n"); + printf("# Peter Troshin (pvtroshin@dundee.ac.uk) and Geoff Barton gjbarton@dundee.ac.uk\n"); + printf("# June, 2011\n"); + printf("#\n"); printf("#\n"); - - - - if ((strncmp(argv[2],"long",4))==0) { + if(argv[2]==NULL) { + type=3; + } + else if ((strncmp(argv[2],"long",4))==0) { type=0; } else if ((strncmp(argv[2],"short",5))==0) { @@ -108,12 +119,45 @@ int main(int argc, char **argv) type=2; } else { - printf("Wrong argument\n");exit(1); + printf("Cannot recognise disorder type assuming long\n"); + type=0; } - - - SEQ=malloc(sizeof(SEQ_STR)); - Get_Seq(argv[1],SEQ); + + FILE *fasta; + + if ((fasta=fopen(argv[1],"r"))==NULL) { + printf("Could not open %s\n",argv[1]),exit(1); + } + +/* Creating output files depending on the type */ + createOutputFiles(type); + +/* Read input file sequence by sequence */ + + SEQS *fastaseq; + + do { + + fastaseq = gseq_fasta(fasta); + if(fastaseq==NULL) { + break; + } + SEQ=malloc(sizeof(SEQ_STR)); + + //printf("No: %s\n",fastaseq->id ); + //printf("L: %d\n",fastaseq->slen); + + SEQ->name=fastaseq->id; + SEQ->seq=fastaseq->seq; + SEQ->le=fastaseq->slen; + +#ifdef DEBUG + printf("N: %s\n",SEQ->name); + printf("S: %s\n",SEQ->seq); + printf("L: %d\n",SEQ->le); +#endif + + if (SEQ->le==0) {printf(" Sequence length 0\n");exit(1);} #ifdef DEBUG @@ -121,45 +165,47 @@ int main(int argc, char **argv) #endif - P=malloc(sizeof(P_STR)); - P->CC= DMatrix(AAN,AAN); - - if (type==0) { + PARAMS=malloc(sizeof(P_STR)); + PARAMS->CC= DMatrix(AAN,AAN); + /* LONG - DISORDER */ + if (type==0 || type==3) { LC=1; UC=100; WS=10; Flag_EP=0; - read_ref(path,"ss",P->CC); - Get_Histo(P, path, "histo"); + read_ref(path,"ss",PARAMS->CC); + Get_Histo(PARAMS, path, "histo"); + IUPred(SEQ,PARAMS); - IUPred(SEQ,P); + fprintf(longout, "# %s\n",SEQ->name); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); for (i=0;ile;i++) - printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); - } - if (type==1) { - LC=1; + fprintf(longout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); + +} +/* SHORT - DISORDER */ + if (type==1 || type==3) { + LC=1; UC=25; WS=10; Flag_EP=1; EP=-1.26; - read_ref(path,"ss_casp",P->CC); - Get_Histo(P, path, "histo_casp"); + read_ref(path,"ss_casp",PARAMS->CC); + Get_Histo(PARAMS, path, "histo_casp"); - IUPred(SEQ,P); + IUPred(SEQ,PARAMS); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); + fprintf(shortout, "# %s\n",SEQ->name); for (i=0;ile;i++) - printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); - } - if (type==2) { + fprintf(shortout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); +} + +/* GLOB - GLOBULAR DOMAINS */ + if (type==2 || type==3) { char *globseq; LC=1; @@ -167,12 +213,10 @@ int main(int argc, char **argv) WS=15; Flag_EP=0; + read_ref(path,"ss",PARAMS->CC); + Get_Histo(PARAMS,path,"histo"); - - read_ref(path,"ss",P->CC); - Get_Histo(P,path,"histo"); - - IUPred(SEQ,P); + IUPred(SEQ,PARAMS); Min_Ene=DMin_Ene; JOIN=DJOIN; @@ -182,42 +226,80 @@ int main(int argc, char **argv) globseq=malloc((SEQ->le+1)*sizeof(char)); for (i=0;ile;i++) globseq[i]=tolower(SEQ->seq[i]); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); - printf("Number of globular domains: %5d \n",SEQ->ngr); + fprintf(globout,"# %s\n",SEQ->name); + fprintf(globout,"Number of globular domains: %5d \n",SEQ->ngr); for (i=0;ingr;i++) { - printf(" globular domain %5d. %d - %d \n", + fprintf(globout," globular domain %5d. %d - %d \n", i+1,SEQ->gr[i][0]+1,SEQ->gr[i][1]+1); for (j=SEQ->gr[i][0];jgr[i][1]+1;j++) { globseq[j]=toupper(globseq[j]); } } - printf(">%s\n",SEQ->name); + fprintf(globout,">%s\n",SEQ->name); for (i=0;ile;i++) { - if ((i>0)&&(i%60==0)) printf("\n"); - else if ((i>0)&&(i%10==0)) printf(" "); - printf("%c",globseq[i]); + if ((i>0)&&(i%60==0)) fprintf(globout,"\n"); + else if ((i>0)&&(i%10==0)) fprintf(globout," "); + fprintf(globout,"%c",globseq[i]); } - printf("\n"); + fprintf(globout,"\n"); free(globseq); + } - #ifdef DEBUG for (i=0;ile;i++) printf("%5d %c %10.4f\n",i,SEQ->seq[i],SEQ->en[i]); #endif - } - - free(SEQ->seq); - free(SEQ->eprof);free(SEQ->en);free(SEQ->smp); - free(SEQ); - + + + free(SEQ->name); + free(SEQ->seq); + free(SEQ); + free(fastaseq); + + } while(fastaseq != NULL); + + fclose(fasta); + closeOutputFiles(type); return 0; } -void IUPred(SEQ_STR *SEQ, P_STR *P) +void closeOutputFiles(int type) { + if (type==0) { + fclose(longout); + } + else if (type==1) { + fclose(shortout); + } + else if (type==2) { + fclose(globout); + } + else if (type==3) { + fclose(longout); + fclose(shortout); + fclose(globout); + } +} + +void createOutputFiles(int type) { + if (type==0) { + longout = fopen("out.long", "w"); + } + else if (type==1) { + shortout = fopen("out.short", "w"); + } + else if (type==2) { + globout = fopen("out.glob", "w"); + } + else if (type==3) { + longout = fopen("out.long", "w"); + shortout = fopen("out.short", "w"); + globout = fopen("out.glob", "w"); + } +} + +void IUPred(SEQ_STR *SEQ, P_STR *PARAMS) { int i,j, a1, a2, p; int naa; @@ -225,15 +307,15 @@ void IUPred(SEQ_STR *SEQ, P_STR *P) double min, max, step; naa=SEQ->le; - min=P->min; max=P->max;step=P->step; + min=PARAMS->min; max=PARAMS->max;step=PARAMS->step; SEQ->eprof=malloc(naa*sizeof(double)); for (i=0;ieprof[i]=0; SEQ->en=malloc(naa*sizeof(double)); for (i=0;ien[i]=0; SEQ->smp=malloc(naa*sizeof(double)); - for (i=0;ismp[i]=0; + for (i=0;ismp[i]=0; SEQ->expscore=0; for (i=0;iLC)&&((abs(i-j))seq[j]))))-AA; if ((a2<0) || (a2>=AAN)) continue; - SEQ->eprof[i]+=P->CC[a1][a2]; + SEQ->eprof[i]+=PARAMS->CC[a1][a2]; n2++; } SEQ->expscore+=SEQ->eprof[i]/(naa*n2); SEQ->eprof[i]/=n2; } - + if (Flag_EP==0) { for (i=0;ismp[i]>=max-2*step) SEQ->en[i]=0; if ((SEQ->smp[i]>min+2*step)&&(SEQ->smp[i]smp[i]-min)*(1.0/step)); - SEQ->en[i]=P->distro[p]; + SEQ->en[i]=PARAMS->distro[p]; } #ifdef DEBUG @@ -287,7 +369,6 @@ void IUPred(SEQ_STR *SEQ, P_STR *P) i,SEQ->eprof[i], SEQ->smp[i],SEQ->en[i]); #endif - } } @@ -388,7 +469,7 @@ void getRegions(SEQ_STR *SEQ ) } -void Get_Histo(P_STR *P, char *path, char *fn) +void Get_Histo(P_STR *PARAMS, char *path, char *fn) { FILE *f; char ln[ML]; @@ -410,8 +491,8 @@ void Get_Histo(P_STR *P, char *path, char *fn) fscanf(f,"%*s %lf %lf %d\n",&min, &max, &nb); - P->distro=malloc(nb*sizeof(double )); - for (i=0;idistro[i]=0; + PARAMS->distro=malloc(nb*sizeof(double )); + for (i=0;idistro[i]=0; for (i=0,set=0;idistro[i]=v; + PARAMS->distro[i]=v; } fclose(f); - P->max=max; - P->min=min; - P->nb=nb; - P->cutoff=cutoff; - + PARAMS->max=max; + PARAMS->min=min; + PARAMS->nb=nb; + PARAMS->cutoff=cutoff; - P->step=(max-min)/nb; - P->cutoff-=P->step; + PARAMS->step=(max-min)/nb; + PARAMS->cutoff-=PARAMS->step; } @@ -594,6 +674,7 @@ double **DMatrix(int n_rows, int n_cols) } +/* void Get_Seq(char *fn, SEQ_STR *SEQ) { char line[ML]; @@ -632,8 +713,7 @@ void Get_Seq(char *fn, SEQ_STR *SEQ) #ifdef DEBUG printf("%s %5d\n%s\n",SEQ->name,SEQ->le,SEQ->seq); #endif - } - +*/