Fix core WST file
[jabaws.git] / binaries / src / mafft / core / sextet5.c
1 #include "mltaln.h"
2 #include "mtxutl.h"
3
4 #define DEBUG 0
5 #define TEST  0
6
7 #define END_OF_VEC -1
8
9 static int maxl;
10 static int tsize;
11
12 void arguments( int argc, char *argv[] )
13 {
14         int c;
15
16         inputfile = NULL;
17         disopt = 0;
18         scoremtx = 1;
19         nblosum = 62;
20         dorp = NOTSPECIFIED;
21
22     while( --argc > 0 && (*++argv)[0] == '-' )
23         {
24         while ( ( c = *++argv[0] ) )
25                 {
26             switch( c )
27             {
28                                 case 'i':
29                                         inputfile = *++argv;
30                                         fprintf( stderr, "inputfile = %s\n", inputfile );
31                                         --argc;
32                                         goto nextoption;
33                                 case 'D':
34                                         dorp = 'd';
35                                         break;
36                                 case 'P':
37                                         dorp = 'p';
38                                         break;
39                                 case 'I':
40                                         disopt = 1;
41                                         break;
42                 default:
43                     fprintf( stderr, "illegal option %c\n", c );
44                     argc = 0;
45                     break;
46             }
47                 }
48                 nextoption:
49                         ;
50         }
51     if( argc != 0 )
52     {
53         fprintf( stderr, "options: -i\n" );
54         exit( 1 );
55     }
56 }
57
58 void seq_grp_nuc( int *grp, char *seq )
59 {
60         int tmp;
61         while( *seq )
62         {
63                 tmp = amino_grp[(int)*seq++];
64                 if( tmp < 4 )
65                         *grp++ = tmp;
66                 else
67                         fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) );
68         }
69         *grp = END_OF_VEC;
70 }
71
72 void seq_grp( int *grp, char *seq )
73 {
74         int tmp;
75         while( *seq )
76         {
77                 tmp = amino_grp[(int)*seq++];
78                 if( tmp < 6 )
79                         *grp++ = tmp;
80                 else
81                         fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) );
82         }
83         *grp = END_OF_VEC;
84 }
85
86 void makecompositiontable_p( short *table, int *pointt )
87 {
88         int point;
89
90         while( ( point = *pointt++ ) != END_OF_VEC )
91                 table[point]++;
92 }
93
94 int commonsextet_p( short *table, int *pointt )
95 {
96         int value = 0;
97         short tmp;
98         int point;
99         static short *memo = NULL;
100         static int *ct = NULL;
101         static int *cp;
102
103         if( !memo )
104         {
105                 memo = (short *)calloc( tsize, sizeof( short ) );
106                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
107                 ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) );
108                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
109         }
110
111         cp = ct;
112         while( ( point = *pointt++ ) != END_OF_VEC )
113         {
114                 tmp = memo[point]++;
115                 if( tmp < table[point] )
116                         value++;
117                 if( tmp == 0 ) *cp++ = point;
118 //              fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize );
119         }
120         *cp = END_OF_VEC;
121         
122         cp =  ct;
123         while( *cp != END_OF_VEC )
124                 memo[*cp++] = 0;
125
126         return( value );
127 }
128
129 void makepointtable_nuc( int *pointt, int *n )
130 {
131         int point;
132         register int *p;
133
134         p = n;
135         point  = *n++ *  1024;
136         point += *n++ *   256;
137         point += *n++ *    64;
138         point += *n++ *    16;
139         point += *n++ *     4;
140         point += *n++;
141         *pointt++ = point;
142
143         while( *n != END_OF_VEC )
144         {
145                 point -= *p++ * 1024;
146                 point *= 4;
147                 point += *n++;
148                 *pointt++ = point;
149         }
150         *pointt = END_OF_VEC;
151 }
152
153 void makepointtable( int *pointt, int *n )
154 {
155         int point;
156         register int *p;
157
158         p = n;
159         point  = *n++ *  7776;
160         point += *n++ *  1296;
161         point += *n++ *   216;
162         point += *n++ *    36;
163         point += *n++ *     6;
164         point += *n++;
165         *pointt++ = point;
166
167         while( *n != END_OF_VEC )
168         {
169                 point -= *p++ * 7776;
170                 point *= 6;
171                 point += *n++;
172                 *pointt++ = point;
173         }
174         *pointt = END_OF_VEC;
175 }
176
177 int main( int argc, char **argv )
178 {
179         int i, j;
180         FILE *fp, *infp;
181         char **seq;
182         int *grpseq;
183         char *tmpseq;
184         int  **pointt;
185         static char name[M][B];
186         static int nlen[M];
187         double **mtx;
188         double **mtx2;
189         double score, score0;
190         static short *table1;
191         char b[B];
192
193         arguments( argc, argv );
194
195         if( inputfile )
196         {
197                 infp = fopen( inputfile, "r" );
198                 if( !infp )
199                 {
200                         fprintf( stderr, "Cannot open %s\n", inputfile );
201                         exit( 1 );
202                 }
203         }
204         else
205                 infp = stdin;
206
207 #if 0
208         PreRead( stdin, &njob, &nlenmax );
209 #else
210         getnumlen( infp );
211 #endif
212         rewind( infp );
213         if( njob < 2 )
214         {
215                 fprintf( stderr, "At least 2 sequences should be input!\n"
216                                                  "Only %d sequence found.\n", njob );
217                 exit( 1 );
218         }
219
220         tmpseq = AllocateCharVec( nlenmax+1 );
221         seq = AllocateCharMtx( njob, nlenmax+1 );
222         grpseq = AllocateIntVec( nlenmax+1 );
223         pointt = AllocateIntMtx( njob, nlenmax+1 );
224         mtx = AllocateDoubleMtx( njob, njob );
225         mtx2 = AllocateDoubleMtx( njob, njob );
226         pamN = NOTSPECIFIED;
227
228 #if 0
229         FRead( infp, name, nlen, seq );
230 #else
231         readData( infp, name, nlen, seq );
232 #endif
233
234         fclose( infp );
235
236         constants( njob, seq );
237
238         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
239         else              tsize = (int)pow( 6, 6 );
240
241         maxl = 0;
242         for( i=0; i<njob; i++ ) 
243         {
244                 gappick0( tmpseq, seq[i] );
245                 nlen[i] = strlen( tmpseq );
246                 if( nlen[i] < 6 )
247                 {
248                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
249                         exit( 1 );
250                 }
251                 if( nlen[i] > maxl ) maxl = nlen[i];
252                 if( dorp == 'd' ) /* nuc */
253                 {
254                         seq_grp_nuc( grpseq, tmpseq );
255                         makepointtable_nuc( pointt[i], grpseq );
256                 }
257                 else                 /* amino */
258                 {
259                         seq_grp( grpseq, tmpseq );
260                         makepointtable( pointt[i], grpseq );
261                 }
262         }
263         for( i=0; i<njob; i++ )
264         {
265                 table1 = (short *)calloc( tsize, sizeof( short ) );
266                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
267                 if( i % 10 == 0 )
268                 {
269                         fprintf( stderr, "%4d / %4d\r", i+1, njob );
270                 }
271                 makecompositiontable_p( table1, pointt[i] );
272
273                 for( j=i; j<njob; j++ ) 
274                 {
275                         score = (double)commonsextet_p( table1, pointt[j] );
276                         mtx[i][j] = score;
277                 } 
278                 free( table1 );
279         }
280         for( i=0; i<njob; i++ )
281         {
282                 score0 = mtx[i][i];
283                 for( j=0; j<njob; j++ ) 
284                         mtx2[i][j] = ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3.0;
285         }
286         for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) 
287         {
288 #if TEST
289                 double jscore;
290                 jscore = mtx[i][j] / ( MIN( strlen( seq[i] ), strlen( seq[j] ) ) - 2 );
291                 fprintf( stdout, "jscore = %f\n", jscore );
292
293                 fprintf( stdout, "mtx2[%d][%d] = %f, mtx2[%d][%d] = %f\n", i, j, mtx2[i][j], j, i, mtx2[j][i] );
294 #endif
295                 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] );
296 #if TEST
297                 fprintf( stdout, "sonokekka mtx2[%d][%d] %f\n", i, j, mtx2[i][j] );
298 #endif
299         }
300
301         if( disopt )
302         {
303                 for( i=0; i<njob; i++ ) 
304                 {
305                         sprintf( b, "=lgth = %04d", nlen[i] );
306                         strins( b, name[i] );
307                 }
308         }
309                 
310         fp = fopen( "hat2", "w" );
311         if( !fp ) ErrorExit( "Cannot open hat2." );
312         WriteHat2( fp, njob, name, mtx2 );
313         fclose( fp );
314
315         fprintf( stderr, "\n" );
316         SHOWVERSION;
317         exit( 0 );
318 }