Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / mafft / core / contrafoldwrap.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 static char *whereiscontrafold;
6
7 void unknown_n( char *out, char *in )
8 {
9         while( *in )
10         {
11                 if( *in == 'a' || *in == 'A' )
12                         *out = 'A';
13                 else if( *in == 't' || *in == 'T' || *in == 'u' || *in == 'U' )
14                         *out = 'U';
15                 else if( *in == 'g' || *in == 'G' )
16                         *out = 'G';
17                 else if( *in == 'c' || *in == 'C' )
18                         *out = 'C';
19                 else if( *in == '-' )
20                         *out = '-';
21                 else
22                         *out = 'N';
23
24                 out++;
25                 in++;
26         }
27         *out = 0;
28 }
29
30 void outcontrafold( FILE *fp, RNApair **pairprob, int length )
31 {
32         int i;
33         RNApair *pt;
34         for( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ )
35         {
36                 if( pt->bestpos > i ) 
37                         fprintf( fp, "%d %d %f\n", i, pt->bestpos, pt->bestscore );
38         }
39 }
40
41 #if 1
42 static void readcontrafold( FILE *fp, RNApair **pairprob, int length )
43 {
44         char gett[10000];
45         int *pairnum;
46         char *pt;
47         int i;
48         int left, right;
49         float prob;
50
51         pairnum = (int *)calloc( length, sizeof( int ) );
52         for( i=0; i<length; i++ ) pairnum[i] = 0;
53
54         while( 1 )
55         {
56                 if( feof( fp ) ) break;
57                 fgets( gett, 9999, fp );
58
59 //              fprintf( stderr, "gett=%s\n", gett );
60
61                 pt = gett;
62
63                 sscanf( gett, "%d ", &left );
64                 left--;
65
66 //              fprintf( stderr, "left=%d\n", left );
67                 pt = strchr( pt, ' ' ) + 1;
68 //              fprintf( stderr, "pt=%s\n", pt );
69
70                 while( (pt = strchr( pt, ' ' ) ) )
71                 {
72                         pt++;
73 //                      fprintf( stderr, "pt=%s\n", pt );
74                         sscanf( pt, "%d:%f", &right, &prob );
75                         right--;
76
77 //                      fprintf( stderr, "%d-%d, %f\n", left, right, prob );
78
79                         pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );
80                         pairprob[left][pairnum[left]].bestscore = prob;
81                         pairprob[left][pairnum[left]].bestpos = right;
82                         pairnum[left]++;
83                         pairprob[left][pairnum[left]].bestscore = -1.0;
84                         pairprob[left][pairnum[left]].bestpos = -1;
85 //                      fprintf( stderr, "%d-%d, %f\n", left, right, prob );
86
87                         pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );
88                         pairprob[right][pairnum[right]].bestscore = prob;
89                         pairprob[right][pairnum[right]].bestpos = left;
90                         pairnum[right]++;
91                         pairprob[right][pairnum[right]].bestscore = -1.0;
92                         pairprob[right][pairnum[right]].bestpos = -1;
93 //                      fprintf( stderr, "%d-%d, %f\n", right, left, prob );
94                 }
95         }
96         free( pairnum );
97 }
98 #endif
99
100 void arguments( int argc, char *argv[] )
101 {
102     int c;
103         inputfile = NULL;
104         dorp = NOTSPECIFIED;
105         kimuraR = NOTSPECIFIED;
106         pamN = NOTSPECIFIED;
107         whereiscontrafold = NULL;
108
109     while( --argc > 0 && (*++argv)[0] == '-' )
110         {
111         while ( (c = *++argv[0]) )
112                 {
113             switch( c )
114             {
115                                 case 'i':
116                                         inputfile = *++argv;
117                                         fprintf( stderr, "inputfile = %s\n", inputfile );
118                                         --argc;
119                                         goto nextoption;
120                                 case 'd':
121                                         whereiscontrafold = *++argv;
122                                         fprintf( stderr, "whereiscontrafold = %s\n", whereiscontrafold );
123                                         --argc;
124                                         goto nextoption;
125                 default:
126                     fprintf( stderr, "illegal option %c\n", c );
127                     argc = 0;
128                     break;
129             }
130                 }
131                 nextoption:
132                         ;
133         }
134     if( argc != 0 ) 
135     {
136         fprintf( stderr, "options: Check source file !\n" );
137         exit( 1 );
138     }
139 }
140
141
142 int main( int argc, char *argv[] )
143 {
144         static char com[10000];
145         static int  *nlen;      
146         int left, right;
147         int res;
148         static char **name, **seq, **nogap;
149         static int **gapmap;
150         static int *order;
151         int i, j;
152         FILE *infp;
153         RNApair ***pairprob;
154         RNApair **alnpairprob;
155         RNApair *pairprobpt;
156         RNApair *pt;
157         int *alnpairnum;
158         float prob;
159         int adpos;
160
161         arguments( argc, argv );
162
163         if( inputfile )
164         {
165                 infp = fopen( inputfile, "r" );
166                 if( !infp )
167                 {
168                         fprintf( stderr, "Cannot open %s\n", inputfile );
169                         exit( 1 );
170                 }
171         }
172         else
173                 infp = stdin;
174
175         if( !whereiscontrafold )
176                 whereiscontrafold = "";
177
178         getnumlen( infp );
179         rewind( infp );
180
181         if( dorp != 'd' )
182         {
183                 fprintf( stderr, "nuc only\n" );
184                 exit( 1 );
185         }
186
187         seq = AllocateCharMtx( njob, nlenmax*2+1 );
188         nogap = AllocateCharMtx( njob, nlenmax*2+1 );
189         gapmap = AllocateIntMtx( njob, nlenmax*2+1 );
190         order = AllocateIntVec( njob );
191         name = AllocateCharMtx( njob, B+1 );
192     nlen = AllocateIntVec( njob );
193         pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
194         alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) );
195         alnpairnum = AllocateIntVec( nlenmax );
196
197         for( i=0; i<nlenmax; i++ ) alnpairnum[i] = 0;
198
199         readData_pointer( infp, name, nlen, seq );
200         fclose( infp );
201
202         for( i=0; i<njob; i++ )
203         {
204                 pairprob[i] = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) );
205                 for( j=0; j<nlenmax; j++ )
206                 {
207                         pairprob[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
208                         pairprob[i][j][0].bestpos = -1;
209                         pairprob[i][j][0].bestscore = -1.0;
210                 }
211                 unknown_n( nogap[i], seq[i] );
212                 order[i] = i;
213         }
214         for( j=0; j<nlenmax; j++ )
215         {
216                 alnpairprob[j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
217                 alnpairprob[j][0].bestpos = -1;
218                 alnpairprob[j][0].bestscore = -1.0;
219         }
220
221
222         constants( njob, seq );
223
224         fprintf( stderr, "running contrafold\n" );
225         for( i=0; i<njob; i++ )
226         {
227                 fprintf( stderr, "%d / %d\n", i+1, njob );
228                 commongappick_record( 1, nogap+i, gapmap[i] );
229                 infp = fopen( "_contrafoldin", "w" );
230                 fprintf( infp, ">in\n%s\n", nogap[i] );
231                 fclose( infp );
232 #if 0 // contrafold v1
233                 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 > _contrafoldout", whereiscontrafold );
234 #else // contrafold v2
235                 sprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01   _contrafoldout", whereiscontrafold );
236 #endif
237                 res = system( com );
238                 if( res )
239                 {
240                         fprintf( stderr, "error in contrafold\n" );
241                         fprintf( stderr, "=================================================================\n" );
242                         fprintf( stderr, "=================================================================\n" );
243                         fprintf( stderr, "==\n" );
244                         fprintf( stderr, "== This version of MAFFT supports CONTRAfold v2.02.\n" );
245                         fprintf( stderr, "== If you have a lower version of CONTRAfold installed in the\n" );
246                         fprintf( stderr, "== %s directory,\n", whereiscontrafold );
247                         fprintf( stderr, "== please update it!\n" );
248                         fprintf( stderr, "==\n" );
249                         fprintf( stderr, "=================================================================\n" );
250                         fprintf( stderr, "=================================================================\n" );
251                         exit( 1 );
252                 }
253
254
255                 infp = fopen( "_contrafoldout", "r" );
256                 readcontrafold( infp, pairprob[i], nlenmax );
257                 fclose( infp );
258                 fprintf( stdout, ">%d\n", i );
259                 outcontrafold( stdout, pairprob[i], nlenmax );
260         }
261
262         for( i=0; i<njob; i++ )
263         {
264                 for( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ )
265                 {
266                         left = gapmap[i][j];
267                         right = gapmap[i][pairprobpt->bestpos];
268                         prob = pairprobpt->bestscore;
269
270                         for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ )
271                                 if( pt->bestpos == right ) break;
272
273                         if( pt->bestpos == -1 )
274                         {
275                                 alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) );
276                                 adpos = alnpairnum[left];
277                                 alnpairnum[left]++;
278                                 alnpairprob[left][adpos].bestscore = 0.0;
279                                 alnpairprob[left][adpos].bestpos = right;
280                                 alnpairprob[left][adpos+1].bestscore = -1.0;
281                                 alnpairprob[left][adpos+1].bestpos = -1;
282                                 pt = alnpairprob[left]+adpos;
283                         }
284                         else
285                                 adpos = pt-alnpairprob[left];
286
287                         pt->bestscore += prob;
288                         if( pt->bestpos != right )
289                         {
290                                 fprintf( stderr, "okashii!\n" );
291                                 exit( 1 );
292                         }
293 //                      fprintf( stderr, "adding %d-%d, %f\n", left, right, prob );
294                 }
295         }
296         return( 0 );
297
298 #if 0
299         fprintf( stdout, "result=\n" );
300
301         for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ )
302         {
303                 pairprobpt->bestscore /= (float)njob;
304                 left = i;
305                 right = pairprobpt->bestpos;
306                 prob = pairprobpt->bestscore;
307                 fprintf( stdout, "%d-%d, %f\n", left, right, prob );
308         }
309
310         return( 0 );
311 #endif
312 }