3 /* Rob Russell's attempt to make a COILS program */
5 main(int argc, char *argv[]) {
20 char *seq,*title,*ident;
33 mode = 0; /* 0 = column mode, 1 = fasta, 2 = concise */
36 if((env=getenv("COILSDIR"))==NULL) {
37 fprintf(stderr,"error: environment variable COILSDIR must be set\n");
41 strcpy(&heptfile[0],env);
42 strcpy(&heptfile[strlen(heptfile)],"/new.mat");
45 for(i=1; i<argc; ++i) {
46 if(argv[i][0]!='-') exit_error();
47 if(strcmp(&argv[i][1],"m")==0) {
48 if((i+1)>=argc) exit_error();
49 strcpy(&heptfile[0],argv[i+1]);
51 } else if(strcmp(&argv[i][1],"win")==0) {
52 if((i+1)>=argc) exit_error();
53 sscanf(argv[i+1],"%d",&window);
55 } else if(strcmp(&argv[i][1],"c")==0) {
57 } else if(strcmp(&argv[i][1],"min_seg")==0) {
58 if((i+1)>=argc) exit_error();
59 sscanf(argv[i+1],"%d",&min_seg);
61 } else if(strcmp(&argv[i][1],"c")==0) {
63 } else if((strcmp(&argv[i][1],"f")==0) || (strcmp(&argv[i][1],"fasta")==0)) {
65 } else if((strcmp(&argv[i][1],"min_P")==0)) {
66 if((i+1)>=argc) exit_error();
67 sscanf(argv[i+1],"%f",&min_P);
69 } else if(strcmp(&argv[i][1],"help")==0) {
71 } else if(strcmp(&argv[i][1],"w")==0) {
73 } else if(strcmp(&argv[i][1],"V")==0 || strcmp(&argv[i][1],"v")==0) {
76 fprintf(stderr," can't understand flag/field %s\n",argv[i]);
81 if(verb) printf("Matrix file is %s\n",heptfile);
83 if((MAT=fopen(heptfile,"r"))==NULL) {
84 fprintf(stderr,"Error reading %s\n",heptfile);
89 for(i=0; i<strlen(AAs); ++i) if(AAs[i]!='_') {
91 printf("AA %c %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f\n",AAs[i],
92 h->m[pt][0],h->m[pt][1],h->m[pt][2],h->m[pt][3],h->m[pt][4],
93 h->m[pt][5],h->m[pt][6]);
95 for(i=0; i<h->n; ++i) {
96 printf("Window %4d %1d %f %f %f %f %f\n",
97 h->f[i].win,h->f[i].w,h->f[i].m_cc,h->f[i].sd_cc,h->f[i].m_g,h->f[i].sd_g,h->f[i].sc);
101 /* See if there is a file for our chosen window length/weight scheme */
103 for(i=0; i<h->n; ++i) {
104 if((h->f[i].win == window) && (h->f[i].w == weighted)) { /* match */
105 if(verb) printf("Found fitting data for win %4d w %d\n",window,weighted);
110 /* Read in a sequence from the standard input */
112 ident = (char*) malloc(100*sizeof(char));
113 title = (char*) malloc(100000*sizeof(char));
114 buff = (char*) malloc(100000*sizeof(char));
117 while(fgets(buff,99999,stdin)!=NULL) {
118 /* There is a memory problem - the matrix data gets corrupted under OSF after this fgets */
119 for(i=0; i<strlen(buff); ++i) if(buff[i]=='\n' || buff[i]=='\r') buff[i]='\0';
122 pred_coils(seq,ident,title,h,window,which,weighted,mode,min_P,&t,&tc,min_seg);
127 while((buff[i]!=' ') && (buff[i]!='\0') && (buff[i]!='\n') && (buff[i]!='\r')) { ident[i-1]=buff[i]; i++; }
130 seq = (char*)malloc(sizeof(char));
132 while(buff[i]!='\n' && buff[i]!='\0' && buff[i]!='\r') {
133 title[j]=buff[i]; i++;
139 /* printf("Adding |%s| to |%s| = \n",buff,seq); */
140 seq=(char*)realloc(seq,(seqlen+strlen(buff)+1)*sizeof(char));
141 strcpy(&seq[seqlen],buff);
143 /* printf(" |%s|\n",seq); */
147 pred_coils(seq,ident,title,h,window,which,weighted,mode,min_P,&t,&tc,min_seg);
150 fprintf(stderr,"%8d sequences %8d aas %8d in coil\n",nseq,t,tc);
151 free(title); free(ident);
158 fprintf(stderr,"NCOILS: R.B. Russell, A.N. Lupas, 1999\n");
159 fprintf(stderr," based on Lupas, Van Dyck & Stock (1991) Science 252,1162-1164\n");
160 fprintf(stderr,"format: ncoils [options] < [sequence file]\n");
161 fprintf(stderr," -f or -fasta [fasta output - coils as 'x', like '-x' in seg]\n");
162 fprintf(stderr," -c [concise mode - which sequences have any coils (and how many)]\n");
163 fprintf(stderr," -min_seg <int> [for concise mode - only report sequence if >= min coil segments]\n");
164 fprintf(stderr," -min_P <float> [minimum P to define coil segment; DEFAULT = 0.5]\n");
165 fprintf(stderr," -win <int> [window size; DEFAULT = 21]\n");
166 fprintf(stderr," -w [weight heptad positions a&d the same as b,c,e,f,g]\n");
167 fprintf(stderr," -v [verbose/debug mode - print extra junk]\n");
168 fprintf(stderr," -max_seq_len <int> [longest sequence tolerated; DEFAULT = 100 000]\n");
172 void pred_coils(char *seq,char *ident,char *title,struct hept_pref *h,int win, int which, int weighted,int mode, float min_P, int *t, int *tc, int min_seg) {
177 int total_coil_segments;
181 float this_score,Gg,Gcc,power;
190 score = (float*)malloc(len*sizeof(float));
191 P = (float*)malloc(len*sizeof(float));
192 hept_seq = (char*)malloc(len*sizeof(char));
194 /* printf("Sequence is %s length is %d\n",seq,len); */
195 for(i=0; i<len; ++i) { P[i]=0.0; score[i] = 0.0; hept_seq[i] = 'x'; }
197 for(i=0; i<(len-win+1); ++i) {
200 for(j=0; ((j<win) && ((i+j)<len)); ++j) {
201 aa_pt = (int)(seq[i+j]-'A');
202 if((aa_pt>=0) && (aa_pt<26) && (AAs[aa_pt]!='_')) {
203 pos = j%7; /* where are we in the heptad? pos modulus 7 */
204 /* printf("AA %c in hept %c %7.5f\n",seq[i+j],('a'+pos),h->m[aa_pt][pos]); */
205 if(weighted && (pos==0 || pos==3)) { power = 2.5; }
206 else { power = 1.0; }
208 if(h->m[aa_pt][pos]!=-1) {
209 this_score*=pow(h->m[aa_pt][pos],power);
211 this_score*=pow(h->smallest,power);
216 this_score = pow(this_score,(1/(float)actual_win));
220 for(j=0; ((j<win) && ((i+j)<len)); ++j) {
221 aa_pt = (int)(seq[i+j]-'A');
222 if((aa_pt>=0) && (aa_pt<26) && (AAs[aa_pt]!='_')) {
223 pos = j%7; /* where are we in the heptad? pos modulus 7 */
224 if(this_score>score[i+j]) { score[i+j]=this_score; hept_seq[i+j]='a'+pos; }
231 printf(">%s %s\n",ident,title);
234 total_coil_segments=0;
235 for(i=0; i<len; ++i) {
237 t1 = 1/(h->f[which].sd_cc);
238 t2 = (score[i]-(h->f[which].m_cc))/h->f[which].sd_cc;
242 Gcc = t1 * exp(-0.5*t4);
243 /* printf("Gcc %f %f %f %f %f\n",t1cc,t2cc,t3cc,t4cc,Gcc); */
244 t1 = 1/(h->f[which].sd_g);
245 t2 = (score[i]-(h->f[which].m_g))/h->f[which].sd_g;
249 Gg = t1 * exp(-0.5*t4);
250 /* printf("Gg %f %f %f %f %f\n",t1g,t2g,t3g,t4g,Gg); */
251 P[i] = Gcc/(h->f[which].sc*Gg+Gcc);
254 if((i==0) || (P[i-1]<min_P)) { total_coil_segments++; }
259 if(P[i]>=min_P) { printf("x"); }
260 else { printf("%c",seq[i]); }
261 if(((i+1)%60)==0) { printf("\n"); }
263 printf("%4d %c %c %7.3f %7.3f (%7.3f %7.3f)\n",i+1,seq[i],hept_seq[i],score[i],P[i],Gcc,Gg);
266 if(mode==1) { printf("\n"); }
267 if((mode==2) && (are_there_coils==1) && (total_coil_segments>=min_seg)) {
268 if(total_coil_segments==1) {
269 printf("Pred %4d coil segment : %s %s\n",total_coil_segments,ident,title);
271 printf("Pred %4d coil segments : %s %s\n",total_coil_segments,ident,title);
275 free(P); free(score); free(hept_seq);