forester/archive/RIO/others/hmmer/squid/stockholm.c

   1 /*****************************************************************
   2  * HMMER - Biological sequence analysis with profile HMMs
   3  * Copyright (C) 1992-1999 Washington University School of Medicine
   4  * All Rights Reserved
   5  *
   6  *     This source code is distributed under the terms of the
   7  *     GNU General Public License. See the files COPYING and LICENSE
   8  *     for details.
   9  *****************************************************************/
  10
  11 /* stockholm.c
  12  * SRE, Fri May 28 15:46:41 1999
  13  *
  14  * Reading/writing of Stockholm format multiple sequence alignments.
  15  *
  16  * example of API:
  17  *
  18  * MSA     *msa;
  19  * FILE    *fp;        -- opened for write with fopen()
  20  * MSAFILE *afp;       -- opened for read with MSAFileOpen()
  21  *
  22  * while ((msa = ReadStockholm(afp)) != NULL)
  23  *   {
  24  *      WriteStockholm(fp, msa);
  25  *      MSAFree(msa);
  26  *   }
  27  *
  28  * RCS $Id: stockholm.c,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $
  29  */
  30 #include <stdio.h>
  31 #include <string.h>
  32 #include "squid.h"
  33 #include "msa.h"
  34
  35 static int  parse_gf(MSA *msa, char *buf);
  36 static int  parse_gs(MSA *msa, char *buf);
  37 static int  parse_gc(MSA *msa, char *buf);
  38 static int  parse_gr(MSA *msa, char *buf);
  39 static int  parse_comment(MSA *msa, char *buf);
  40 static int  parse_sequence(MSA *msa, char *buf);
  41 static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl);
  42
  43 #ifdef TESTDRIVE_STOCKHOLM
  44 /*****************************************************************
  45  * stockholm.c test driver:
  46  * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm
  47  *
  48  */
  49 int
  50 main(int argc, char **argv)
  51 {
  52   MSAFILE *afp;
  53   MSA     *msa;
  54   char    *file;
  55
  56   file = argv[1];
  57
  58   if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
  59     Die("Couldn't open %s\n", file);
  60
  61   while ((msa = ReadStockholm(afp)) != NULL)
  62     {
  63       WriteStockholm(stdout, msa);
  64       MSAFree(msa);
  65     }
  66
  67   MSAFileClose(afp);
  68   exit(0);
  69 }
  70 /******************************************************************/
  71 #endif /* testdriver */
  72
  73
  74 /* Function: ReadStockholm()
  75  * Date:     SRE, Fri May 21 17:33:10 1999 [St. Louis]
  76  *
  77  * Purpose:  Parse the next alignment from an open Stockholm
  78  *           format alignment file. Return the alignment, or
  79  *           NULL if there are no more alignments in the file.
  80  *
  81  * Args:     afp  - open alignment file
  82  *
  83  * Returns:  MSA *   - an alignment object.
  84  *                     caller responsible for an MSAFree()
  85  *           NULL if no more alignments
  86  *
  87  * Diagnostics:
  88  *           Will Die() here with a (potentially) useful message
  89  *           if a parsing error occurs
  90  */
  91 MSA *
  92 ReadStockholm(MSAFILE *afp)
  93 {
  94   MSA   *msa;
  95   char  *s;
  96   int    status;
  97
  98   if (feof(afp->f)) return NULL;
  99
 100   /* Initialize allocation of the MSA.
 101    */
 102   msa = MSAAlloc(10, 0);
 103
 104   /* Check the magic Stockholm header line.
 105    * We have to skip blank lines here, else we perceive
 106    * trailing blank lines in a file as a format error when
 107    * reading in multi-record mode.
 108    */
 109   do {
 110     if ((s = MSAFileGetLine(afp)) == NULL) {
 111       MSAFree(msa);
 112       return NULL;
 113     }
 114   } while (IsBlankline(s));
 115
 116   if (strncmp(s, "# STOCKHOLM 1.", 14) != 0)
 117     Die("\
 118 File %s doesn't appear to be in Stockholm format.\n\
 119 Assuming there isn't some other problem with your file (it is an\n\
 120 alignment file, right?), please either:\n\
 121   a) use the Babelfish format autotranslator option (-B, usually);\n\
 122   b) specify the file's format with the --informat option; or\n\
 123   a) reformat the alignment to Stockholm format.\n",
 124         afp->fname);
 125
 126   /* Read the alignment file one line at a time.
 127    */
 128   while ((s = MSAFileGetLine(afp)) != NULL)
 129     {
 130       while (*s == ' ' || *s == '\t') s++;  /* skip leading whitespace */
 131
 132       if (*s == '#') {
 133         if      (strncmp(s, "#=GF", 4) == 0)   status = parse_gf(msa, s);
 134         else if (strncmp(s, "#=GS", 4) == 0)   status = parse_gs(msa, s);
 135         else if (strncmp(s, "#=GC", 4) == 0)   status = parse_gc(msa, s);
 136         else if (strncmp(s, "#=GR", 4) == 0)   status = parse_gr(msa, s);
 137         else                                   status = parse_comment(msa, s);
 138       }
 139       else if (strncmp(s, "//",   2) == 0)   break;
 140       else if (*s == '\n')                   continue;
 141       else                                   status = parse_sequence(msa, s);
 142
 143       if (status == 0)
 144         Die("Stockholm format parse error: line %d of file %s while reading alignment %s",
 145             afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name);
 146     }
 147
 148   if (s == NULL && msa->nseq != 0)
 149     Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name);
 150
 151   if (s == NULL && msa->nseq == 0) {
 152                                 /* probably just some junk at end of file */
 153       MSAFree(msa);
 154       return NULL;
 155     }
 156
 157   MSAVerifyParse(msa);
 158   return msa;
 159 }
 160
 161
 162 /* Function: WriteStockholm()
 163  * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis]
 164  *
 165  * Purpose:  Write an alignment in standard multi-block
 166  *           Stockholm format to an open file. A wrapper
 167  *           for actually_write_stockholm().
 168  *
 169  * Args:     fp  - file that's open for writing
 170  *           msa - alignment to write
 171  *
 172  * Returns:  (void)
 173  */
 174 void
 175 WriteStockholm(FILE *fp, MSA *msa)
 176 {
 177   actually_write_stockholm(fp, msa, 50); /* 50 char per block */
 178 }
 179
 180 /* Function: WriteStockholmOneBlock()
 181  * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis]
 182  *
 183  * Purpose:  Write an alignment in Pfam's single-block
 184  *           Stockholm format to an open file. A wrapper
 185  *           for actually_write_stockholm().
 186  *
 187  * Args:     fp  - file that's open for writing
 188  *           msa - alignment to write
 189  *
 190  * Returns:  (void)
 191  */
 192 void
 193 WriteStockholmOneBlock(FILE *fp, MSA *msa)
 194 {
 195   actually_write_stockholm(fp, msa, msa->alen); /* one big block */
 196 }
 197
 198
 199 /* Function: actually_write_stockholm()
 200  * Date:     SRE, Fri May 21 17:39:22 1999 [St. Louis]
 201  *
 202  * Purpose:  Write an alignment in Stockholm format to
 203  *           an open file. This is the function that actually
 204  *           does the work. The API's WriteStockholm()
 205  *           and WriteStockholmOneBlock() are wrappers.
 206  *
 207  * Args:     fp    - file that's open for writing
 208  *           msa   - alignment to write
 209  *           cpl   - characters to write per line in alignment block
 210  *
 211  * Returns:  (void)
 212  */
 213 static void
 214 actually_write_stockholm(FILE *fp, MSA *msa, int cpl)
 215 {
 216   int  i, j;
 217   int  len = 0;
 218   int  namewidth;
 219   int  typewidth = 0;           /* markup tags are up to 5 chars long */
 220   int  markupwidth = 0;         /* #=GR, #=GC are four char wide + 1 space */
 221   char buf[256];
 222   int  currpos;
 223   char *s, *tok;
 224
 225   /* Figure out how much space we need for name + markup
 226    * to keep the alignment in register. Required by Stockholm
 227    * spec, even though our Stockholm parser doesn't care (Erik's does).
 228    */
 229   namewidth = 0;
 230   for (i = 0; i < msa->nseq; i++)
 231     if ((len = strlen(msa->sqname[i])) > namewidth)
 232       namewidth = len;
 233
 234   /* Figure out how much space we need for markup tags
 235    *   markupwidth = always 4 if we're doing markup:  strlen("#=GR")
 236    *   typewidth   = longest markup tag
 237    */
 238   if (msa->ss      != NULL) { markupwidth = 4; typewidth = 2; }
 239   if (msa->sa      != NULL) { markupwidth = 4; typewidth = 2; }
 240   for (i = 0; i < msa->ngr; i++)
 241     if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len;
 242
 243   if (msa->rf      != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; }
 244   if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
 245   if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
 246   for (i = 0; i < msa->ngc; i++)
 247     if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len;
 248
 249
 250   /* Magic Stockholm header
 251    */
 252   fprintf(fp, "# STOCKHOLM 1.0\n");
 253
 254   /* Free text comments
 255    */
 256   for (i = 0;  i < msa->ncomment; i++)
 257     fprintf(fp, "# %s\n", msa->comment[i]);
 258   if (msa->ncomment > 0) fprintf(fp, "\n");
 259
 260   /* GF section: per-file annotation
 261    */
 262   if (msa->name  != NULL)       fprintf(fp, "#=GF ID    %s\n", msa->name);
 263   if (msa->acc   != NULL)       fprintf(fp, "#=GF AC    %s\n", msa->acc);
 264   if (msa->desc  != NULL)       fprintf(fp, "#=GF DE    %s\n", msa->desc);
 265   if (msa->au    != NULL)       fprintf(fp, "#=GF AU    %s\n", msa->au);
 266   if (msa->flags & MSA_SET_GA)  fprintf(fp, "#=GF GA    %.1f %.1f\n", msa->ga1, msa->ga2);
 267   if (msa->flags & MSA_SET_NC)  fprintf(fp, "#=GF TC    %.1f %.1f\n", msa->nc1, msa->nc2);
 268   if (msa->flags & MSA_SET_TC)  fprintf(fp, "#=GF TC    %.1f %.1f\n", msa->tc1, msa->tc2);
 269   for (i = 0; i < msa->ngf; i++)
 270     fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]);
 271   fprintf(fp, "\n");
 272
 273
 274   /* GS section: per-sequence annotation
 275    */
 276   if (msa->flags & MSA_SET_WGT)
 277     {
 278       for (i = 0; i < msa->nseq; i++)
 279         fprintf(fp, "#=GS %-*.*s WT    %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]);
 280       fprintf(fp, "\n");
 281     }
 282   if (msa->sqacc != NULL)
 283     {
 284       for (i = 0; i < msa->nseq; i++)
 285         if (msa->sqacc[i] != NULL)
 286           fprintf(fp, "#=GS %-*.*s AC    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]);
 287       fprintf(fp, "\n");
 288     }
 289   if (msa->sqdesc != NULL)
 290     {
 291       for (i = 0; i < msa->nseq; i++)
 292         if (msa->sqdesc[i] != NULL)
 293           fprintf(fp, "#=GS %*.*s DE    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]);
 294       fprintf(fp, "\n");
 295     }
 296   for (i = 0; i < msa->ngs; i++)
 297     {
 298       /* Multiannotated GS tags are possible; for example,
 299        *     #=GS foo DR PDB; 1xxx;
 300        *     #=GS foo DR PDB; 2yyy;
 301        * These are stored, for example, as:
 302        *     msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;"
 303        * and must be decomposed.
 304        */
 305       for (j = 0; j < msa->nseq; j++)
 306         if (msa->gs[i][j] != NULL)
 307           {
 308             s = msa->gs[i][j];
 309             while ((tok = sre_strtok(&s, "\n", NULL)) != NULL)
 310               fprintf(fp, "#=GS %*.*s %5s %s\n", namewidth, namewidth,
 311                       msa->sqname[j], msa->gs_tag[i], tok);
 312           }
 313       fprintf(fp, "\n");
 314     }
 315
 316   /* Alignment section:
 317    * contains aligned sequence, #=GR annotation, and #=GC annotation
 318    */
 319   for (currpos = 0; currpos < msa->alen; currpos += cpl)
 320     {
 321       if (currpos > 0) fprintf(fp, "\n");
 322       for (i = 0; i < msa->nseq; i++)
 323         {
 324           strncpy(buf, msa->aseq[i] + currpos, cpl);
 325           buf[cpl] = '\0';
 326           fprintf(fp, "%-*.*s  %s\n", namewidth+typewidth+markupwidth, namewidth+typewidth+markupwidth,
 327                   msa->sqname[i], buf);
 328
 329           if (msa->ss != NULL && msa->ss[i] != NULL) {
 330             strncpy(buf, msa->ss[i] + currpos, cpl);
 331             buf[cpl] = '\0';
 332             fprintf(fp, "#=GR %-*.*s SS     %s\n", namewidth, namewidth, msa->sqname[i], buf);
 333           }
 334           if (msa->sa != NULL && msa->sa[i] != NULL) {
 335             strncpy(buf, msa->sa[i] + currpos, cpl);
 336             buf[cpl] = '\0';
 337             fprintf(fp, "#=GR %-*.*s SA     %s\n", namewidth, namewidth, msa->sqname[i], buf);
 338           }
 339           for (j = 0; j < msa->ngr; j++)
 340             if (msa->gr[j][i] != NULL) {
 341               strncpy(buf, msa->gr[j][i] + currpos, cpl);
 342               buf[cpl] = '\0';
 343               fprintf(fp, "#=GR %-*.*s %5s  %s\n",
 344                       namewidth, namewidth, msa->sqname[i], msa->gr_tag[j], buf);
 345             }
 346         }
 347       if (msa->ss_cons != NULL) {
 348         strncpy(buf, msa->ss_cons + currpos, cpl);
 349         buf[cpl] = '\0';
 350         fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SS_cons", buf);
 351       }
 352
 353       if (msa->sa_cons != NULL) {
 354         strncpy(buf, msa->sa_cons + currpos, cpl);
 355         buf[cpl] = '\0';
 356         fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SA_cons", buf);
 357       }
 358
 359       if (msa->rf != NULL) {
 360         strncpy(buf, msa->rf + currpos, cpl);
 361         buf[cpl] = '\0';
 362         fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "RF", buf);
 363       }
 364       for (j = 0; j < msa->ngc; j++) {
 365         strncpy(buf, msa->gc[j] + currpos, cpl);
 366         buf[cpl] = '\0';
 367         fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth,
 368                 msa->gc_tag[j], buf);
 369       }
 370     }
 371   fprintf(fp, "//\n");
 372 }
 373
 374
 375
 376
 377
 378 /* Format of a GF line:
 379  *    #=GF <featurename> <text>
 380  */
 381 static int
 382 parse_gf(MSA *msa, char *buf)
 383 {
 384   char *gf;
 385   char *featurename;
 386   char *text;
 387   char *s;
 388
 389   s = buf;
 390   if ((gf          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 391   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 392   if ((text        = sre_strtok(&s, "\n",       NULL)) == NULL) return 0;
 393   while (*text && (*text == ' ' || *text == '\t')) text++;
 394
 395   if      (strcmp(featurename, "ID") == 0)
 396     msa->name                 = sre_strdup(text, -1);
 397   else if (strcmp(featurename, "AC") == 0)
 398     msa->acc                  = sre_strdup(text, -1);
 399   else if (strcmp(featurename, "DE") == 0)
 400     msa->desc                 = sre_strdup(text, -1);
 401   else if (strcmp(featurename, "AU") == 0)
 402     msa->au                   = sre_strdup(text, -1);
 403   else if (strcmp(featurename, "GA") == 0)
 404     {
 405       s = text;
 406       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 407       msa->ga1 = atof(text);
 408       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 409       msa->ga2 = atof(text);
 410       msa->flags |= MSA_SET_GA;
 411     }
 412   else if (strcmp(featurename, "NC") == 0)
 413     {
 414       s = text;
 415       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 416       msa->nc1 = atof(text);
 417       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 418       msa->nc2 = atof(text);
 419       msa->flags |= MSA_SET_NC;
 420     }
 421   else if (strcmp(featurename, "TC") == 0)
 422     {
 423       s = text;
 424       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 425       msa->tc1 = atof(text);
 426       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 427       msa->tc2 = atof(text);
 428       msa->flags |= MSA_SET_TC;
 429     }
 430   else
 431     MSAAddGF(msa, featurename, text);
 432
 433   return 1;
 434 }
 435
 436
 437 /* Format of a GS line:
 438  *    #=GS <seqname> <featurename> <text>
 439  */
 440 static int
 441 parse_gs(MSA *msa, char *buf)
 442 {
 443   char *gs;
 444   char *seqname;
 445   char *featurename;
 446   char *text;
 447   int   seqidx;
 448   char *s;
 449
 450   s = buf;
 451   if ((gs          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 452   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 453   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 454   if ((text        = sre_strtok(&s, "\n",       NULL)) == NULL) return 0;
 455   while (*text && (*text == ' ' || *text == '\t')) text++;
 456
 457   /* GS usually follows another GS; guess lastidx+1
 458    */
 459   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
 460   msa->lastidx = seqidx;
 461
 462   if (strcmp(featurename, "WT") == 0)
 463     {
 464       msa->wgt[seqidx]          = atof(text);
 465       msa->flags |= MSA_SET_WGT;
 466     }
 467
 468   else if (strcmp(featurename, "AC") == 0)
 469     MSASetSeqAccession(msa, seqidx, text);
 470
 471   else if (strcmp(featurename, "DE") == 0)
 472     MSASetSeqDescription(msa, seqidx, text);
 473
 474   else
 475     MSAAddGS(msa, featurename, seqidx, text);
 476
 477   return 1;
 478 }
 479
 480 /* Format of a GC line:
 481  *    #=GC <featurename> <text>
 482  */
 483 static int
 484 parse_gc(MSA *msa, char *buf)
 485 {
 486   char *gc;
 487   char *featurename;
 488   char *text;
 489   char *s;
 490   int   len;
 491
 492   s = buf;
 493   if ((gc          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 494   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 495   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
 496
 497   if (strcmp(featurename, "SS_cons") == 0)
 498     sre_strcat(&(msa->ss_cons), -1, text, len);
 499   else if (strcmp(featurename, "SA_cons") == 0)
 500     sre_strcat(&(msa->sa_cons), -1, text, len);
 501   else if (strcmp(featurename, "RF") == 0)
 502     sre_strcat(&(msa->rf), -1, text, len);
 503   else
 504     MSAAppendGC(msa, featurename, text);
 505
 506   return 1;
 507 }
 508
 509 /* Format of a GR line:
 510  *    #=GR <seqname> <featurename> <text>
 511  */
 512 static int
 513 parse_gr(MSA *msa, char *buf)
 514 {
 515   char *gr;
 516   char *seqname;
 517   char *featurename;
 518   char *text;
 519   int   seqidx;
 520   int   len;
 521   int   j;
 522   char *s;
 523
 524   s = buf;
 525   if ((gr          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 526   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 527   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 528   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
 529
 530   /* GR usually follows sequence it refers to; guess msa->lastidx */
 531   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx);
 532   msa->lastidx = seqidx;
 533
 534   if (strcmp(featurename, "SS") == 0)
 535     {
 536       if (msa->ss == NULL)
 537         {
 538           msa->ss    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
 539           msa->sslen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
 540           for (j = 0; j < msa->nseqalloc; j++)
 541             {
 542               msa->ss[j]    = NULL;
 543               msa->sslen[j] = 0;
 544             }
 545         }
 546       msa->sslen[seqidx] = sre_strcat(&(msa->ss[seqidx]), msa->sslen[seqidx], text, len);
 547     }
 548   else if (strcmp(featurename, "SA") == 0)
 549     {
 550       if (msa->sa == NULL)
 551         {
 552           msa->sa    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
 553           msa->salen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
 554           for (j = 0; j < msa->nseqalloc; j++)
 555             {
 556               msa->sa[j]    = NULL;
 557               msa->salen[j] = 0;
 558             }
 559         }
 560       msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len);
 561     }
 562   else
 563     MSAAppendGR(msa, featurename, seqidx, text);
 564
 565   return 1;
 566 }
 567
 568
 569 /* comments are simply stored verbatim, not parsed
 570  */
 571 static int
 572 parse_comment(MSA *msa, char *buf)
 573 {
 574   char *s;
 575   char *comment;
 576
 577   s = buf + 1;                                 /* skip leading '#' */
 578   if (*s == '\n') { *s = '\0'; comment = s; }  /* deal with blank comment */
 579   else if ((comment = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
 580
 581   MSAAddComment(msa, comment);
 582   return 1;
 583 }
 584
 585 static int
 586 parse_sequence(MSA *msa, char *buf)
 587 {
 588   char *s;
 589   char *seqname;
 590   char *text;
 591   int   seqidx;
 592   int   len;
 593
 594   s = buf;
 595   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
 596   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
 597
 598   /* seq usually follows another seq; guess msa->lastidx +1 */
 599   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
 600   msa->lastidx = seqidx;
 601
 602   msa->sqlen[seqidx] = sre_strcat(&(msa->aseq[seqidx]), msa->sqlen[seqidx], text, len);
 603   return 1;
 604 }
 605
 606
 607