11 static char outputformat;
12 static float lenfaca, lenfacb, lenfacc, lenfacd;
14 #define PLENFACB 10000
15 #define PLENFACC 10000
22 void arguments( int argc, char *argv[] )
33 while( --argc > 0 && (*++argv)[0] == '-' )
35 while ( (c = *++argv[0]) )
41 fprintf( stderr, "inputfile = %s\n", inputfile );
54 fprintf( stderr, "illegal option %c\n", c );
62 if( inputfile == NULL )
66 fprintf( stderr, "inputfile = %s\n", inputfile );
70 fprintf( stderr, "Usage: mafft-distance [-PD] [-i inputfile] inputfile > outputfile\n" );
75 void seq_grp_nuc( int *grp, char *seq )
81 tmp = amino_grp[(int)*seq++];
85 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
90 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
96 void seq_grp( int *grp, char *seq )
102 tmp = amino_grp[(int)*seq++];
106 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
109 if( grp - grpbk < 6 )
111 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
117 void makecompositiontable_p( short *table, int *pointt )
121 while( ( point = *pointt++ ) != END_OF_VEC )
125 int commonsextet_p( short *table, int *pointt )
130 static short *memo = NULL;
131 static int *ct = NULL;
139 memo = (short *)calloc( tsize, sizeof( short ) );
140 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
141 ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) );
142 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
146 while( ( point = *pointt++ ) != END_OF_VEC )
149 if( tmp < table[point] )
151 if( tmp == 0 ) *cp++ = point;
152 // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize );
157 while( *cp != END_OF_VEC )
163 void makepointtable_nuc( int *pointt, int *n )
183 while( *n != END_OF_VEC )
185 point -= *p++ * 1024;
190 *pointt = END_OF_VEC;
193 void makepointtable( int *pointt, int *n )
206 point += *n++ * 1296;
213 while( *n != END_OF_VEC )
215 point -= *p++ * 7776;
220 *pointt = END_OF_VEC;
223 int main( int argc, char **argv )
235 static short *table1;
236 float longer, shorter;
240 arguments( argc, argv );
244 infp = fopen( inputfile, "r" );
247 fprintf( stderr, "Cannot open %s\n", inputfile );
255 PreRead( stdin, &njob, &nlenmax );
262 fprintf( stderr, "At least 2 sequences should be input!\n"
263 "Only %d sequence found.\n", njob );
267 tmpseq = AllocateCharVec( nlenmax+1 );
268 seq = AllocateCharMtx( njob, nlenmax+1 );
269 grpseq = AllocateIntVec( nlenmax+1 );
270 pointt = AllocateIntMtx( njob, nlenmax+1 );
271 mtxself = AllocateDoubleVec( njob );
273 name = AllocateCharMtx( njob, B );
274 nlen = AllocateIntVec( njob );
277 FRead( infp, name, nlen, seq );
279 readData_pointer( infp, name, nlen, seq );
284 constants( njob, seq );
286 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
287 else tsize = (int)pow( 6, 6 );
305 for( i=0; i<njob; i++ )
307 gappick0( tmpseq, seq[i] );
308 nlen[i] = strlen( tmpseq );
311 // fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
314 if( nlen[i] > maxl ) maxl = nlen[i];
315 if( dorp == 'd' ) /* nuc */
317 seq_grp_nuc( grpseq, tmpseq );
318 makepointtable_nuc( pointt[i], grpseq );
322 seq_grp( grpseq, tmpseq );
323 makepointtable( pointt[i], grpseq );
326 fprintf( stderr, "\nCalculating i-i scores ... " );
327 for( i=0; i<njob; i++ )
329 table1 = (short *)calloc( tsize, sizeof( short ) );
330 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
331 makecompositiontable_p( table1, pointt[i] );
333 score = commonsextet_p( table1, pointt[i] );
338 fprintf( stderr, "done.\n" );
339 fprintf( stderr, "\nCalculating i-j scores ... \n" );
340 if( outputformat == 'p' ) fprintf( stdout, "%-5d", njob );
341 for( i=0; i<njob; i++ )
343 if( outputformat == 'p' ) fprintf( stdout, "\n%-9d ", i+1 );
344 table1 = (short *)calloc( tsize, sizeof( short ) );
345 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
348 fprintf( stderr, "%4d / %4d\r", i+1, njob );
350 makecompositiontable_p( table1, pointt[i] );
353 if( outputformat == 'p' ) initj = 0;
355 for( j=initj; j<njob; j++ )
357 if( nlen[i] > nlen[j] )
359 longer=(float)nlen[i];
360 shorter=(float)nlen[j];
364 longer=(float)nlen[j];
365 shorter=(float)nlen[i];
367 // lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
368 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
370 // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
371 score = commonsextet_p( table1, pointt[j] );
372 bunbo = MIN( mtxself[i], mtxself[j] );
373 if( outputformat == 'p' )
376 fprintf( stdout, " %8.6f", 1.0 );
378 fprintf( stdout, " %8.6f", ( 1.0 - score / bunbo ) * lenfac );
379 if( j % 7 == 6 ) fprintf( stdout, "\n" );
384 fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, 1.0, nlen[i], nlen[j] );
386 fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] );
388 // fprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
389 // score = (double)commonsextet_p( table1, pointt[j] );
390 // fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] );
397 fprintf( stderr, "\n" );
398 if( outputformat == 'p' ) fprintf( stdout, "\n" );