static char *orderfile;
static int format;
static int namelen;
+static int extendedalphabet;
static void fillspace( char *seq, int lenmax )
{
void setmark_clustal( int nlen, int nseq, char **seq, char *mark )
{
- int i, j, k;
+ int i, j, k, nalpha;
+ char firstletter;
+ char *strong[9];
+ char *weaker[11];
+ int nstrong, nweaker;
+ char s;
- char *strong[] = {
- "STA",
- "NEQK",
- "NHQK",
- "NDEQ",
- "QHRK",
- "MILV",
- "MILF",
- "HY",
- "FYW",
- };
- int nstrong = 9;
- char *weaker[] = {
- "CSA",
- "ATV",
- "SAG",
- "STNK",
- "STPA",
- "SGND",
- "SNDEQK",
- "NDEQHK",
- "NEQHRK",
- "FVLIM",
- "HFY",
- };
- int nweaker = 11;
+ if( dorp == 'd' )
+ {
+ strong[0] = "TU";
+ nstrong = 1;
+ weaker[0] = "AG";
+ weaker[1] = "CT";
+ weaker[2] = "CU";
+ nweaker = 2;
+ nalpha = 10;
+ }
+ else
+ {
+ strong[0] = "STA";
+ strong[1] = "NEQK";
+ strong[2] = "NHQK";
+ strong[3] = "NDEQ";
+ strong[4] = "QHRK";
+ strong[5] = "MILV";
+ strong[6] = "MILF";
+ strong[7] = "HY";
+ strong[8] = "FYW";
+ nstrong = 9;
+ weaker[0] = "CSA";
+ weaker[1] = "ATV";
+ weaker[2] = "SAG";
+ weaker[3] = "STNK";
+ weaker[4] = "STPA";
+ weaker[5] = "SGND";
+ weaker[6] = "SNDEQK";
+ weaker[7] = "NDEQHK";
+ weaker[8] = "NEQHRK";
+ weaker[9] = "FVLIM";
+ weaker[10] = "HFY";
+ nweaker = 11;
+ nalpha = 20;
+ }
for( i=0; i<nlen; i++ )
{
mark[i] = ' ';
for( j=0; j<nseq; j++ )
- if( '-' == seq[j][i] ) break;
- if( j != nseq )
{
- continue;
+ s = seq[j][i];
+ if( '-' == s || ' ' == s ) break;
}
- for( j=0; j<nseq; j++ )
- if( toupper( seq[0][i] ) != toupper( seq[j][i] ) ) break;
- if( j == nseq )
+ if( j != nseq )
{
- mark[i] = '*';
continue;
}
- for( k=0; k<nstrong; k++ )
+ if( extendedalphabet )
{
+ firstletter = seq[0][i];
+ if( amino_n[(unsigned char)firstletter] < 0 ) continue;
+
for( j=0; j<nseq; j++ )
+ if( seq[j][i] != firstletter ) break;
+ if( j == nseq )
{
- if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+ mark[i] = '*';
+ continue;
}
- if( j == nseq ) break;
- }
- if( k < nstrong )
- {
- mark[i] = ':';
- continue;
}
- for( k=0; k<nweaker; k++ )
+ else
{
+ firstletter = toupper( seq[0][i] );
+ if( amino_n[(unsigned char)firstletter] >= nalpha || amino_n[(unsigned char)firstletter] < 0 ) continue;
+
for( j=0; j<nseq; j++ )
+ if( toupper( seq[j][i] ) != firstletter ) break;
+ if( j == nseq )
{
- if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+ mark[i] = '*';
+ continue;
+ }
+ for( k=0; k<nstrong; k++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+ }
+ if( j == nseq ) break;
+ }
+ if( k < nstrong )
+ {
+ mark[i] = ':';
+ continue;
+ }
+ for( k=0; k<nweaker; k++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+ }
+ if( j == nseq ) break;
+ }
+ if( k < nweaker )
+ {
+ mark[i] = '.';
+ continue;
}
- if( j == nseq ) break;
- }
- if( k < nweaker )
- {
- mark[i] = '.';
- continue;
}
}
mark[nlen] = 0;
continue;
}
for( j=0; j<nseq; j++ )
- if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break;
+ if( amino_grp[(unsigned char)seq[0][i]] != amino_grp[(unsigned char)seq[j][i]] ) break;
if( j == nseq )
{
mark[i] = '.';
void arguments( int argc, char *argv[] )
{
int c;
- namelen = 15;
+ namelen = -1;
scoremtx = 1;
nblosum = 62;
dorp = NOTSPECIFIED;
comment = NULL;
orderfile = NULL;
format = 'c';
+ extendedalphabet = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
--argc;
goto nextoption;
case 'n':
- namelen = atoi( *++argv );
+ namelen = myatoi( *++argv );
fprintf( stderr, "namelen = %d\n", namelen );
--argc;
goto nextoption;
case 'y':
format = 'y';
break;
+ case 'E':
+ extendedalphabet = 1;
+ nblosum = -2;
+ break;
+ case 'N':
+ extendedalphabet = 0;
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
if( orderfile )
{
orderfp = fopen( orderfile, "r" );
- if( !orderfile )
+ if( !orderfp )
{
fprintf( stderr, "Cannot open %s\n", orderfile );
exit( 1 );
else if( format == 'c' )
clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen );
else if( format == 'y' )
- phylipout_pointer( stdout, njob, nlenmax, seq, name, order );
+ phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen );
else
fprintf( stderr, "Unknown format\n" );