6 #define TSUYOSAFACTOR 100
16 if( *pt == '\n' ) *pt = 0;
19 int searchused( char *q, char **keys, int n )
24 // fprintf( stderr, "%s ? %s\n", q, names[i] );
25 if( !strcmp( q, keys[i] ) ) return( i );
30 void arguments( int argc, char *argv[] )
69 ppenalty = NOTSPECIFIED;
70 ppenalty_OP = NOTSPECIFIED;
71 ppenalty_ex = NOTSPECIFIED;
72 ppenalty_EX = NOTSPECIFIED;
73 poffset = NOTSPECIFIED;
74 kimuraR = NOTSPECIFIED;
77 fftWinSize = NOTSPECIFIED;
78 fftThreshold = NOTSPECIFIED;
80 while( --argc > 0 && (*++argv)[0] == '-' )
82 while ( ( c = *++argv[0] ) )
88 fprintf( stderr, "inputfile = %s\n", inputfile );
93 fprintf( stderr, "pairfile = %s\n", pairfile );
97 nhomologs = atoi( *++argv );
98 fprintf( stderr, "nhomologs = %d\n", nhomologs );
108 fprintf( stderr, "illegal option %c\n", c );
118 cut = atof( (*argv) );
123 fprintf( stderr, "options: Check source file !\n" );
126 if( tbitr == 1 && outgap == 0 )
128 fprintf( stderr, "conflicting options : o, m or u\n" );
131 if( alg == 'C' && outgap == 0 )
133 fprintf( stderr, "conflicting options : C, o\n" );
138 int countamino( char *s, int end )
142 if( *s++ != '-' ) val++;
146 static void pairalign( char name[M][B], int nlen[M], char **seq, double *effarr, int alloclen )
149 static char dumm1[B], dumm0[B];
153 static double *effarr1 = NULL;
154 static double *effarr2 = NULL;
156 LocalHom **localhomtable, *tmpptr;
157 float pscore = 0.0; // by D.Mathog, aguess
158 char *aseq = NULL; // by D.Mathog
159 char **usedseqs = NULL; // by D.Mathog
160 char **usednames = NULL; // by D.Mathog
164 tsuyosa = (double)nhomologs * (nhomologs-1) / njob * TSUYOSAFACTOR;
165 fprintf( stderr, "tsuyosa = %f\n", tsuyosa );
166 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
167 for( i=0; i<njob; i++)
169 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
170 for( j=0; j<njob; j++)
172 localhomtable[i][j].start1 = -1;
173 localhomtable[i][j].end1 = -1;
174 localhomtable[i][j].start2 = -1;
175 localhomtable[i][j].end2 = -1;
176 localhomtable[i][j].opt = -1.0;
177 localhomtable[i][j].next = NULL;
181 if( effarr1 == NULL )
183 effarr1 = AllocateDoubleVec( njob );
184 effarr2 = AllocateDoubleVec( njob );
185 pseq = AllocateCharMtx( 2, nlenmax*9+1 );
186 aseq = AllocateCharVec( nlenmax*9+1 );
187 usedseqs = AllocateCharMtx( njob, nlenmax*9+1 );
188 usednames = AllocateCharMtx( njob, B );
195 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
199 for( i=0; i<njob; i++ )
200 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
204 // writePre( njob, name, nlen, aseq, 0 );
206 fprintf( stderr, "opening %s\n", pairfile );
207 tmpfp = fopen( pairfile, "r" );
210 fprintf( stderr, "Cannot open %s\n", pairfile );
213 searchKUorWA( tmpfp );
214 hat3p = fopen( "hat3", "w" );
215 if( !hat3p ) ErrorExit( "Cannot open hat3." );
219 res = fgets( dumm0, B-1, tmpfp );
225 load1SeqWithoutName_new( tmpfp, pseq[0] );
226 gappick0( aseq, pseq[0] );
227 i = searchused( aseq, usedseqs, nused );
230 strcpy( usednames[nused], dumm0+1 );
231 strcpy( usedseqs[nused], aseq );
235 fprintf( stderr, "i = %d\n", i );
237 res = fgets( dumm1, B-1, tmpfp );
241 fprintf( stderr, "ERROR: The number of sequences in %s must be even.\n", pairfile );
244 load1SeqWithoutName_new( tmpfp, pseq[1] );
245 gappick0( aseq, pseq[1] );
246 j = searchused( aseq, usedseqs, nused );
249 strcpy( usednames[nused], dumm1+1 );
250 strcpy( usedseqs[nused], aseq );
254 fprintf( stderr, "j = %d\n", j );
256 if( strlen( pseq[0] ) != strlen( pseq[1] ) )
258 fprintf( stderr, "Not aligned, %s - %s\n", dumm0, dumm1 );
263 fprintf( stderr, "adding %d-%d\n", i, j );
264 putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ) );
265 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
267 if( tmpptr->opt == -1.0 ) continue;
268 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );
274 for( i=0; i<nused; i++ )
275 fprintf( stdout, ">%s\n%s\n", usednames[i], usedseqs[i] );
279 fprintf( stderr, "##### writing hat3\n" );
280 hat3p = fopen( "hat3", "w" );
281 if( !hat3p ) ErrorExit( "Cannot open hat3." );
283 for( i=0; i<ilim; i++ )
285 for( j=i+1; j<njob; j++ )
287 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
289 if( tmpptr->opt == -1.0 ) continue;
290 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
297 fprintf( stderr, "calling FreeLocalHomTable\n" );
299 FreeLocalHomTable( localhomtable, njob );
301 fprintf( stderr, "done. FreeLocalHomTable\n" );
305 static void WriteOptions( FILE *fp )
308 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
311 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
312 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
313 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
315 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
316 if( use_fft ) fprintf( fp, "FFT on\n" );
318 fprintf( fp, "tree-base method\n" );
319 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
320 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
321 if( tbitr || tbweight )
323 fprintf( fp, "iterate at each step\n" );
324 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
325 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
326 if( tbweight ) fprintf( fp, " weighted\n" );
330 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
333 fprintf( fp, "Algorithm A\n" );
334 else if( alg == 'A' )
335 fprintf( fp, "Algorithm A+\n" );
336 else if( alg == 'S' )
337 fprintf( fp, "Apgorithm S\n" );
338 else if( alg == 'C' )
339 fprintf( fp, "Apgorithm A+/C\n" );
341 fprintf( fp, "Unknown algorithm\n" );
343 if( treemethod == 'x' )
344 fprintf( fp, "Tree = UPGMA (3).\n" );
345 else if( treemethod == 's' )
346 fprintf( fp, "Tree = UPGMA (2).\n" );
347 else if( treemethod == 'p' )
348 fprintf( fp, "Tree = UPGMA (1).\n" );
350 fprintf( fp, "Unknown tree.\n" );
354 fprintf( fp, "FFT on\n" );
356 fprintf( fp, "Basis : 4 nucleotides\n" );
360 fprintf( fp, "Basis : Polarity and Volume\n" );
362 fprintf( fp, "Basis : 20 amino acids\n" );
364 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
365 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
368 fprintf( fp, "FFT off\n" );
373 int main( int argc, char *argv[] )
376 static char name[M][B], **seq;
384 arguments( argc, argv );
388 infp = fopen( inputfile, "r" );
391 fprintf( stderr, "Cannot open %s\n", inputfile );
400 fprintf( stderr, "Usage: %s -p pairfile -i inputfile \n", argv[0] );
409 fprintf( stderr, "At least 2 sequences should be input!\n"
410 "Only %d sequence found.\n", njob );
414 seq = AllocateCharMtx( njob, nlenmax*9+1 );
415 bseq = AllocateCharMtx( njob, nlenmax*9+1 );
416 alloclen = nlenmax*9;
418 eff = AllocateDoubleVec( njob );
421 Read( name, nlen, seq );
423 readData( infp, name, nlen, seq );
427 constants( njob, seq );
430 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
437 WriteOptions( trap_g );
442 fprintf( stderr, "Illeagal character %c\n", c );
446 // writePre( njob, name, nlen, seq, 0 );
448 for( i=0; i<njob; i++ ) eff[i] = 1.0;
451 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
454 pairalign( name, nlen, bseq, eff, alloclen );
456 fprintf( trap_g, "done.\n" );
458 fprintf( stderr, "closing trap_g\n" );
463 fprintf( stderr, "OSHIMAI\n" );