JWS-112 Bumping version of Mafft to version 7.310.
[jabaws.git] / binaries / src / mafft / core / f2cl.c
index d8c9f2c..31d1b6c 100644 (file)
@@ -7,6 +7,7 @@ static char *comment;
 static char *orderfile;
 static int format;
 static int namelen;
+static int extendedalphabet;
 
 static void fillspace( char *seq, int lenmax )
 {
@@ -19,76 +20,113 @@ static void fillspace( char *seq, int lenmax )
 
 void setmark_clustal( int nlen, int nseq, char **seq, char *mark )
 {
-       int i, j, k;
+       int i, j, k, nalpha;
+       char firstletter;
+       char *strong[9];
+       char *weaker[11];
+       int nstrong, nweaker;
+       char s;
 
-       char *strong[] = { 
-                                       "STA",
-                                       "NEQK",
-                                       "NHQK",
-                                       "NDEQ",
-                                       "QHRK",
-                                       "MILV",
-                                       "MILF",
-                                       "HY",
-                                       "FYW",
-                                 };
-       int nstrong = 9;
-       char *weaker[] = { 
-                                       "CSA",
-                                       "ATV",
-                                       "SAG",
-                                       "STNK",
-                                       "STPA",
-                                       "SGND",
-                                       "SNDEQK",
-                                       "NDEQHK",
-                                       "NEQHRK",
-                                       "FVLIM",
-                                       "HFY",
-                                 };
-       int nweaker = 11;
+       if( dorp == 'd' ) 
+       {
+               strong[0] = "TU";
+               nstrong = 1;
+               weaker[0] = "AG";
+               weaker[1] = "CT";
+               weaker[2] = "CU";
+               nweaker = 2;
+               nalpha = 10;
+       }
+       else
+       {
+               strong[0] = "STA";
+               strong[1] = "NEQK";
+               strong[2] = "NHQK";
+               strong[3] = "NDEQ";
+               strong[4] = "QHRK";
+               strong[5] = "MILV";
+               strong[6] = "MILF";
+               strong[7] = "HY";
+               strong[8] = "FYW";
+               nstrong = 9;
+               weaker[0] = "CSA";
+               weaker[1] = "ATV";
+               weaker[2] = "SAG";
+               weaker[3] = "STNK";
+               weaker[4] = "STPA";
+               weaker[5] = "SGND";
+               weaker[6] = "SNDEQK";
+               weaker[7] = "NDEQHK";
+               weaker[8] = "NEQHRK";
+               weaker[9] = "FVLIM";
+               weaker[10] = "HFY";
+               nweaker = 11;
+               nalpha = 20;
+       }
 
        for( i=0; i<nlen; i++ )
        {
                mark[i] = ' ';
                for( j=0; j<nseq; j++ )
-                       if( '-' == seq[j][i] ) break;
-               if( j != nseq ) 
                {
-                       continue;
+                       s = seq[j][i];
+                       if( '-' == s || ' ' == s ) break;
                }
-               for( j=0; j<nseq; j++ )
-                       if( toupper( seq[0][i] ) != toupper( seq[j][i] ) ) break;
-               if( j == nseq ) 
+               if( j != nseq ) 
                {
-                       mark[i] = '*';
                        continue;
                }
-               for( k=0; k<nstrong; k++ )
+               if( extendedalphabet )
                {
+                       firstletter = seq[0][i];
+                       if( amino_n[(unsigned char)firstletter] < 0 ) continue;
+       
                        for( j=0; j<nseq; j++ )
+                               if( seq[j][i] != firstletter ) break;
+                       if( j == nseq ) 
                        {
-                               if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+                               mark[i] = '*';
+                               continue;
                        }
-                       if( j == nseq ) break;
-               }
-               if( k < nstrong )
-               {
-                       mark[i] = ':';
-                       continue;
                }
-               for( k=0; k<nweaker; k++ )
+               else 
                {
+                       firstletter = toupper( seq[0][i] );
+                       if( amino_n[(unsigned char)firstletter] >= nalpha || amino_n[(unsigned char)firstletter] < 0 ) continue;
+       
                        for( j=0; j<nseq; j++ )
+                               if( toupper( seq[j][i] ) != firstletter ) break;
+                       if( j == nseq ) 
                        {
-                               if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+                               mark[i] = '*';
+                               continue;
+                       }
+                       for( k=0; k<nstrong; k++ )
+                       {
+                               for( j=0; j<nseq; j++ )
+                               {
+                                       if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+                               }
+                               if( j == nseq ) break;
+                       }
+                       if( k < nstrong )
+                       {
+                               mark[i] = ':';
+                               continue;
+                       }
+                       for( k=0; k<nweaker; k++ )
+                       {
+                               for( j=0; j<nseq; j++ )
+                               {
+                                       if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+                               }
+                               if( j == nseq ) break;
+                       }
+                       if( k < nweaker )
+                       {
+                               mark[i] = '.';
+                               continue;
                        }
-                       if( j == nseq ) break;
-               }
-               if( k < nweaker )
-               {
-                       mark[i] = '.';
-                       continue;
                }
        }
        mark[nlen] = 0;
@@ -115,7 +153,7 @@ void setmark( int nlen, int nseq, char **seq, char *mark )
                        continue;
                }
                for( j=0; j<nseq; j++ )
-                       if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break;
+                       if( amino_grp[(unsigned char)seq[0][i]] != amino_grp[(unsigned char)seq[j][i]] ) break;
                if( j == nseq ) 
                {
                        mark[i] = '.';
@@ -128,7 +166,7 @@ void setmark( int nlen, int nseq, char **seq, char *mark )
 void arguments( int argc, char *argv[] )
 {
     int c;
-       namelen = 15;
+       namelen = -1;
        scoremtx = 1;
        nblosum = 62;
        dorp = NOTSPECIFIED;
@@ -138,6 +176,7 @@ void arguments( int argc, char *argv[] )
        comment = NULL;
        orderfile = NULL;
        format = 'c';
+       extendedalphabet = 0;
 
     while( --argc > 0 && (*++argv)[0] == '-' )
        {
@@ -161,7 +200,7 @@ void arguments( int argc, char *argv[] )
                                        --argc;
                                        goto nextoption;
                                case 'n':
-                                       namelen = atoi( *++argv );
+                                       namelen = myatoi( *++argv );
                                        fprintf( stderr, "namelen = %d\n", namelen );
                                        --argc;
                                        goto nextoption;
@@ -171,6 +210,13 @@ void arguments( int argc, char *argv[] )
                                case 'y':
                                        format = 'y';
                                        break;
+                               case 'E':
+                                       extendedalphabet = 1;
+                                       nblosum = -2;
+                                       break;
+                               case 'N':
+                                       extendedalphabet = 0;
+                                       break;
                 default:
                     fprintf( stderr, "illegal option %c\n", c );
                     argc = 0;
@@ -227,7 +273,7 @@ int main( int argc, char *argv[] )
        if( orderfile )
        {
                orderfp = fopen( orderfile, "r" );
-               if( !orderfile )
+               if( !orderfp )
                {
                        fprintf( stderr, "Cannot open %s\n", orderfile );
                        exit( 1 );
@@ -264,7 +310,7 @@ int main( int argc, char *argv[] )
        else if( format == 'c' )
                clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen );
        else if( format == 'y' )
-               phylipout_pointer( stdout, njob, nlenmax, seq, name, order );
+               phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen );
        else
                fprintf( stderr, "Unknown format\n" );