8 int howmanyx( char *s )
15 if( !strchr( "atgcuATGCU", *s ) ) val++;
22 if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++;
28 void arguments( int argc, char *argv[] )
36 while( --argc > 0 && (*++argv)[0] == '-' )
38 while ( (c = *++argv[0]) )
44 fprintf( stderr, "inputfile = %s\n", inputfile );
51 fprintf( stderr, "illegal option %c\n", c );
62 fprintf( stderr, "options: -i\n" );
67 int main( int argc, char *argv[] )
74 char **seq = NULL; // by D.Mathog
76 static char name[M][B];
77 static char name1[M][B];
92 int pid = (int)getpid();
93 LocalHom **localhomtable, *tmpptr;
95 home = getenv( "HOME" );
96 #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */
101 if( home ) fprintf( stderr, "home = %s\n", home );
103 if( !home ) home = "";
104 sprintf( queryfile, "%s/tmp/query-%d", home, pid );
105 sprintf( datafile, "%s/tmp/data-%d", home, pid );
106 sprintf( fastafile, "%s/tmp/fasta-%d", home, pid );
107 sprintf( hat2file, "hat2-%d", pid );
110 arguments( argc, argv );
114 infp = fopen( inputfile, "r" );
117 fprintf( stderr, "Cannot open %s\n", inputfile );
124 PreRead( infp, &njob, &nlenmax );
140 constants( njob, seq );
144 seq = AllocateCharMtx( njob, nlenmax+1 );
145 seq1 = AllocateCharMtx( 2, nlenmax+1 );
146 mtx = AllocateDoubleMtx( njob, njob );
147 mtx2 = AllocateDoubleMtx( njob, njob );
148 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
149 for( i=0; i<njob; i++)
151 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
152 for( j=0; j<njob; j++)
154 localhomtable[i][j].start1 = -1;
155 localhomtable[i][j].end1 = -1;
156 localhomtable[i][j].start2 = -1;
157 localhomtable[i][j].end2 = -1;
158 localhomtable[i][j].opt = -1.0;
159 localhomtable[i][j].next = NULL;
165 FRead( infp, name, nlen, seq );
167 readData( infp, name, nlen, seq );
171 if( scoremtx == -1 ) ktuple = 6;
174 for( i=0; i<njob; i++ )
176 gappick0( seq1[0], seq[i] );
177 strcpy( seq[i], seq1[0] );
179 for( j=0; j<njob; j++ )
181 sprintf( name1[j], "+==========+%d ", j );
185 for( i=0; i<njob; i++ )
187 // fprintf( stderr, "### i = %d\n", i );
191 fprintf( stderr, "formatting .. " );
192 hat2p = fopen( datafile, "w" );
193 if( !hat2p ) ErrorExit( "Cannot open datafile." );
194 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );
198 sprintf( com, "formatdb -p f -i %s -o F", datafile );
200 sprintf( com, "formatdb -i %s -o F", datafile );
202 fprintf( stderr, "done.\n" );
205 hat2p = fopen( queryfile, "w" );
206 if( !hat2p ) ErrorExit( "Cannot open queryfile." );
207 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i );
212 sprintf( com, "blastall -e 1e10 -p blastn -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile );
214 sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile );
216 if( res ) ErrorExit( "error in fasta" );
219 hat2p = fopen( fastafile, "r" );
221 ErrorExit( "file 'fasta.$$' does not exist\n" );
222 res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] );
226 for( j=0; j<njob; j++ )
228 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
230 if( tmpptr->opt == -1.0 ) continue;
231 // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
236 if( res < njob-i+i%10 )
238 fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 );
245 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ )
246 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] );
249 fprintf( stderr, "query : %4d / %d\n", i+1, njob );
253 fprintf( stderr, "##### writing hat3\n" );
254 hat3p = fopen( "hat3", "w" );
255 if( !hat3p ) ErrorExit( "Cannot open hat3." );
256 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ )
258 // fprintf( stderr, "mtx[%d][%d] = %f, mtx[%d][%d] = %f\n", i, j, mtx[i][j], j, i, mtx[j][i] );
259 if( i == j ) continue;
260 if( mtx[i][j] == mtx[j][i] && i > j ) continue;
261 if( mtx[j][i] > mtx[i][j] ) continue;
262 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
264 if( tmpptr->opt == -1.0 ) continue;
265 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );
271 for( i=0; i<njob; i++ )
273 // fprintf( stderr, "### i = %d\n", i );
274 hat2p = fopen( datafile, "w" );
275 if( !hat2p ) ErrorExit( "Cannot open datafile." );
276 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );
280 // nlen1[0] = nlen[i];
282 hat2p = fopen( queryfile, "w" );
283 if( !hat2p ) ErrorExit( "Cannot open queryfile." );
284 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i );
289 sprintf( com, "fasta34 -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );
291 sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );
293 if( res ) ErrorExit( "error in fasta" );
296 hat2p = fopen( fastafile, "r" );
298 ErrorExit( "file 'fasta.$$' does not exist\n" );
299 res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] );
303 fprintf( stderr, "count (fasta34 -z 3) = %d\n", res );
309 for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j];
315 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ )
316 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] );
319 fprintf( stderr, "query : %4d\r", i+1 );
325 for( i=0; i<njob; i++ )
330 for( j=0; j<njob; j++ )
335 for( j=0; j<njob; j++ )
337 mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0;
338 // fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] );
342 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
344 // fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] );
345 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] );
351 if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ )
352 fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] );
356 for( i=0; i<njob; i++ ) name[i][0] = '=';
360 strcpy( b, name[0] );
361 sprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b );
363 strins( b, name[0] );
365 for( i=1; i<njob; i++ )
367 strcpy( b, name[i] );
368 sprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b );
370 strins( b, name[i] );
375 hat2p = fopen( hat2file, "w" );
376 if( !hat2p ) ErrorExit( "Cannot open hat2." );
377 WriteHat2( hat2p, njob, name, mtx2 );
381 sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile );
385 sprintf( com, ALNDIR "/supgsdl < %s", hat2file );
387 if( res ) ErrorExit( "error in spgsdl" );
390 sprintf( com, "mv %s hat2", hat2file );
392 if( res ) ErrorExit( "error in mv" );