JWS-112 Bumping version of Mafft to version 7.310.
[jabaws.git] / binaries / src / mafft / core / sextet5.c
1 #include "mltaln.h"
2 #include "mtxutl.h"
3
4 #define DEBUG 0
5 #define TEST  0
6
7 #define END_OF_VEC -1
8
9
10 void arguments( int argc, char *argv[] )
11 {
12         int c;
13
14         inputfile = NULL;
15         disopt = 0;
16         scoremtx = 1;
17         nblosum = 62;
18         dorp = NOTSPECIFIED;
19
20     while( --argc > 0 && (*++argv)[0] == '-' )
21         {
22         while ( ( c = *++argv[0] ) )
23                 {
24             switch( c )
25             {
26                                 case 'i':
27                                         inputfile = *++argv;
28                                         fprintf( stderr, "inputfile = %s\n", inputfile );
29                                         --argc;
30                                         goto nextoption;
31                                 case 'D':
32                                         dorp = 'd';
33                                         break;
34                                 case 'P':
35                                         dorp = 'p';
36                                         break;
37                                 case 'I':
38                                         disopt = 1;
39                                         break;
40                 default:
41                     fprintf( stderr, "illegal option %c\n", c );
42                     argc = 0;
43                     break;
44             }
45                 }
46                 nextoption:
47                         ;
48         }
49     if( argc != 0 )
50     {
51         fprintf( stderr, "options: -i\n" );
52         exit( 1 );
53     }
54 }
55
56 void seq_grp_nuc( int *grp, char *seq )
57 {
58         int tmp;
59         while( *seq )
60         {
61                 tmp = amino_grp[(int)*seq++];
62                 if( tmp < 4 )
63                         *grp++ = tmp;
64                 else
65                         fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) );
66         }
67         *grp = END_OF_VEC;
68 }
69
70 void seq_grp( int *grp, char *seq )
71 {
72         int tmp;
73         while( *seq )
74         {
75                 tmp = amino_grp[(int)*seq++];
76                 if( tmp < 6 )
77                         *grp++ = tmp;
78                 else
79                         fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) );
80         }
81         *grp = END_OF_VEC;
82 }
83
84 void makecompositiontable_p( short *table, int *pointt )
85 {
86         int point;
87
88         while( ( point = *pointt++ ) != END_OF_VEC )
89                 table[point]++;
90 }
91
92 static int localcommonsextet_p( short *table, int *pointt )
93 {
94         int value = 0;
95         short tmp;
96         int point;
97         static short *memo = NULL;
98         static int *ct = NULL;
99         static int *cp;
100
101         if( !memo )
102         {
103                 memo = (short *)calloc( tsize, sizeof( short ) );
104                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
105                 ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) );
106                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
107         }
108
109         cp = ct;
110         while( ( point = *pointt++ ) != END_OF_VEC )
111         {
112                 tmp = memo[point]++;
113                 if( tmp < table[point] )
114                         value++;
115                 if( tmp == 0 ) *cp++ = point;
116 //              fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize );
117         }
118         *cp = END_OF_VEC;
119         
120         cp =  ct;
121         while( *cp != END_OF_VEC )
122                 memo[*cp++] = 0;
123
124         return( value );
125 }
126
127 void makepointtable_nuc( int *pointt, int *n )
128 {
129         int point;
130         register int *p;
131
132         p = n;
133         point  = *n++ *  1024;
134         point += *n++ *   256;
135         point += *n++ *    64;
136         point += *n++ *    16;
137         point += *n++ *     4;
138         point += *n++;
139         *pointt++ = point;
140
141         while( *n != END_OF_VEC )
142         {
143                 point -= *p++ * 1024;
144                 point *= 4;
145                 point += *n++;
146                 *pointt++ = point;
147         }
148         *pointt = END_OF_VEC;
149 }
150
151 void makepointtable( int *pointt, int *n )
152 {
153         int point;
154         register int *p;
155
156         p = n;
157         point  = *n++ *  7776;
158         point += *n++ *  1296;
159         point += *n++ *   216;
160         point += *n++ *    36;
161         point += *n++ *     6;
162         point += *n++;
163         *pointt++ = point;
164
165         while( *n != END_OF_VEC )
166         {
167                 point -= *p++ * 7776;
168                 point *= 6;
169                 point += *n++;
170                 *pointt++ = point;
171         }
172         *pointt = END_OF_VEC;
173 }
174
175 int main( int argc, char **argv )
176 {
177         int i, j;
178         FILE *fp, *infp;
179         char **seq;
180         int *grpseq;
181         char *tmpseq;
182         int  **pointt;
183         static char **name;
184         static int nlen[M];
185         double **mtx;
186         double **mtx2;
187         double score, score0;
188         static short *table1;
189         char b[B];
190
191         arguments( argc, argv );
192
193         if( inputfile )
194         {
195                 infp = fopen( inputfile, "r" );
196                 if( !infp )
197                 {
198                         fprintf( stderr, "Cannot open %s\n", inputfile );
199                         exit( 1 );
200                 }
201         }
202         else
203                 infp = stdin;
204
205 #if 0
206         PreRead( stdin, &njob, &nlenmax );
207 #else
208         getnumlen( infp );
209 #endif
210         rewind( infp );
211         if( njob < 2 )
212         {
213                 fprintf( stderr, "At least 2 sequences should be input!\n"
214                                                  "Only %d sequence found.\n", njob );
215                 exit( 1 );
216         }
217
218         name = AllocateCharMtx( njob, B+1 );
219         tmpseq = AllocateCharVec( nlenmax+1 );
220         seq = AllocateCharMtx( njob, nlenmax+1 );
221         grpseq = AllocateIntVec( nlenmax+1 );
222         pointt = AllocateIntMtx( njob, nlenmax+1 );
223         mtx = AllocateDoubleMtx( njob, njob );
224         mtx2 = AllocateDoubleMtx( njob, njob );
225         pamN = NOTSPECIFIED;
226
227 #if 0
228         FRead( infp, name, nlen, seq );
229 #else
230         readData_pointer( infp, name, nlen, seq );
231 #endif
232
233         fclose( infp );
234
235         constants( njob, seq );
236
237         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
238         else              tsize = (int)pow( 6, 6 );
239
240         maxl = 0;
241         for( i=0; i<njob; i++ ) 
242         {
243                 gappick0( tmpseq, seq[i] );
244                 nlen[i] = strlen( tmpseq );
245                 if( nlen[i] < 6 )
246                 {
247                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
248                         exit( 1 );
249                 }
250                 if( nlen[i] > maxl ) maxl = nlen[i];
251                 if( dorp == 'd' ) /* nuc */
252                 {
253                         seq_grp_nuc( grpseq, tmpseq );
254                         makepointtable_nuc( pointt[i], grpseq );
255                 }
256                 else                 /* amino */
257                 {
258                         seq_grp( grpseq, tmpseq );
259                         makepointtable( pointt[i], grpseq );
260                 }
261         }
262         for( i=0; i<njob; i++ )
263         {
264                 table1 = (short *)calloc( tsize, sizeof( short ) );
265                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
266                 if( i % 10 == 0 )
267                 {
268                         fprintf( stderr, "%4d / %4d\r", i+1, njob );
269                 }
270                 makecompositiontable_p( table1, pointt[i] );
271
272                 for( j=i; j<njob; j++ ) 
273                 {
274                         score = (double)localcommonsextet_p( table1, pointt[j] );
275                         mtx[i][j] = score;
276                 } 
277                 free( table1 );
278         }
279         for( i=0; i<njob; i++ )
280         {
281                 score0 = mtx[i][i];
282                 for( j=0; j<njob; j++ ) 
283                         mtx2[i][j] = ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3.0;
284         }
285         for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) 
286         {
287 #if TEST
288                 double jscore;
289                 jscore = mtx[i][j] / ( MIN( strlen( seq[i] ), strlen( seq[j] ) ) - 2 );
290                 fprintf( stdout, "jscore = %f\n", jscore );
291
292                 fprintf( stdout, "mtx2[%d][%d] = %f, mtx2[%d][%d] = %f\n", i, j, mtx2[i][j], j, i, mtx2[j][i] );
293 #endif
294                 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] );
295 #if TEST
296                 fprintf( stdout, "sonokekka mtx2[%d][%d] %f\n", i, j, mtx2[i][j] );
297 #endif
298         }
299
300         if( disopt )
301         {
302                 for( i=0; i<njob; i++ ) 
303                 {
304                         sprintf( b, "=lgth = %04d", nlen[i] );
305                         strins( b, name[i] );
306                 }
307         }
308                 
309         fp = fopen( "hat2", "w" );
310         if( !fp ) ErrorExit( "Cannot open hat2." );
311         WriteHat2_pointer( fp, njob, name, mtx2 );
312         fclose( fp );
313
314         fprintf( stderr, "\n" );
315         SHOWVERSION;
316         exit( 0 );
317 }