11 static float lenfaca, lenfacb, lenfacc, lenfacd;
13 #define PLENFACB 10000
14 #define PLENFACC 10000
21 void arguments( int argc, char *argv[] )
31 while( --argc > 0 && (*++argv)[0] == '-' )
33 while ( (c = *++argv[0]) )
39 fprintf( stderr, "inputfile = %s\n", inputfile );
49 fprintf( stderr, "illegal option %c\n", c );
57 if( inputfile == NULL )
61 fprintf( stderr, "inputfile = %s\n", inputfile );
65 fprintf( stderr, "Usage: mafft-distance [-PD] [-i inputfile] inputfile > outputfile\n" );
70 void seq_grp_nuc( int *grp, char *seq )
76 tmp = amino_grp[(int)*seq++];
80 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
85 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
91 void seq_grp( int *grp, char *seq )
97 tmp = amino_grp[(int)*seq++];
101 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
104 if( grp - grpbk < 6 )
106 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
112 void makecompositiontable_p( short *table, int *pointt )
116 while( ( point = *pointt++ ) != END_OF_VEC )
120 int commonsextet_p( short *table, int *pointt )
125 static short *memo = NULL;
126 static int *ct = NULL;
134 memo = (short *)calloc( tsize, sizeof( short ) );
135 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
136 ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) );
137 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
141 while( ( point = *pointt++ ) != END_OF_VEC )
144 if( tmp < table[point] )
146 if( tmp == 0 ) *cp++ = point;
147 // fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize );
152 while( *cp != END_OF_VEC )
158 void makepointtable_nuc( int *pointt, int *n )
178 while( *n != END_OF_VEC )
180 point -= *p++ * 1024;
185 *pointt = END_OF_VEC;
188 void makepointtable( int *pointt, int *n )
201 point += *n++ * 1296;
208 while( *n != END_OF_VEC )
210 point -= *p++ * 7776;
215 *pointt = END_OF_VEC;
218 int main( int argc, char **argv )
230 static short *table1;
231 float longer, shorter;
235 arguments( argc, argv );
239 infp = fopen( inputfile, "r" );
242 fprintf( stderr, "Cannot open %s\n", inputfile );
250 PreRead( stdin, &njob, &nlenmax );
257 fprintf( stderr, "At least 2 sequences should be input!\n"
258 "Only %d sequence found.\n", njob );
262 tmpseq = AllocateCharVec( nlenmax+1 );
263 seq = AllocateCharMtx( njob, nlenmax+1 );
264 grpseq = AllocateIntVec( nlenmax+1 );
265 pointt = AllocateIntMtx( njob, nlenmax+1 );
266 mtxself = AllocateDoubleVec( njob );
268 name = AllocateCharMtx( njob, B );
269 nlen = AllocateIntVec( njob );
272 FRead( infp, name, nlen, seq );
274 readData_pointer( infp, name, nlen, seq );
279 constants( njob, seq );
281 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
282 else tsize = (int)pow( 6, 6 );
300 for( i=0; i<njob; i++ )
302 gappick0( tmpseq, seq[i] );
303 nlen[i] = strlen( tmpseq );
306 // fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
309 if( nlen[i] > maxl ) maxl = nlen[i];
310 if( dorp == 'd' ) /* nuc */
312 seq_grp_nuc( grpseq, tmpseq );
313 makepointtable_nuc( pointt[i], grpseq );
317 seq_grp( grpseq, tmpseq );
318 makepointtable( pointt[i], grpseq );
321 fprintf( stderr, "\nCalculating i-i scores ... " );
322 for( i=0; i<njob; i++ )
324 table1 = (short *)calloc( tsize, sizeof( short ) );
325 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
326 makecompositiontable_p( table1, pointt[i] );
328 score = commonsextet_p( table1, pointt[i] );
333 fprintf( stderr, "done.\n" );
334 fprintf( stderr, "\nCalculating i-j scores ... \n" );
335 for( i=0; i<njob; i++ )
337 table1 = (short *)calloc( tsize, sizeof( short ) );
338 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
341 fprintf( stderr, "%4d / %4d\r", i+1, njob );
343 makecompositiontable_p( table1, pointt[i] );
346 for( j=i+1; j<njob; j++ )
348 if( nlen[i] > nlen[j] )
350 longer=(float)nlen[i];
351 shorter=(float)nlen[j];
355 longer=(float)nlen[j];
356 shorter=(float)nlen[i];
358 // lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
359 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
361 // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
362 score = commonsextet_p( table1, pointt[j] );
363 bunbo = MIN( mtxself[i], mtxself[j] );
365 fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, 1.0, nlen[i], nlen[j] );
367 fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] );
368 // fprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
369 // score = (double)commonsextet_p( table1, pointt[j] );
370 // fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] );
377 fprintf( stderr, "\n" );