1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
12 * SRE, Mon Jun 14 14:08:33 1999 [St. Louis]
14 * Import/export of PHYLIP interleaved multiple sequence alignment
17 * RCS $Id: phylip.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $
27 #ifdef TESTDRIVE_PHYLIP
28 /*****************************************************************
29 * phylip.c test driver:
33 main(int argc, char **argv)
41 if ((afp = MSAFileOpen(file, MSAFILE_UNKNOWN, NULL)) == NULL)
42 Die("Couldn't open %s\n", file);
44 printf("format %d\n", afp->format);
46 while ((msa = ReadPhylip(afp)) != NULL)
48 WritePhylip(stdout, msa);
55 /******************************************************************/
56 #endif /* testdrive_phylip */
60 /* Function: ReadPhylip()
61 * Date: SRE, Fri Jun 18 12:59:37 1999 [Sanger Centre]
63 * Purpose: Parse an alignment from an open Phylip format
64 * alignment file. Phylip is a single-alignment format.
65 * Return the alignment, or NULL if we have no data.
67 * Args: afp - open alignment file
69 * Returns: MSA * - an alignment object
70 * Caller responsible for an MSAFree()
71 * NULL if no more alignments
74 ReadPhylip(MSAFILE *afp)
78 char name[11]; /* seq name max len = 10 char */
80 int idx; /* index of current sequence */
84 if (feof(afp->f)) return NULL;
86 /* Skip until we see a nonblank line; it's the header,
87 * containing nseq/alen
90 while ((s = MSAFileGetLine(afp)) != NULL)
92 if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue;
93 if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL)
94 Die("Failed to parse nseq/alen from first line of PHYLIP file %s\n", afp->fname);
95 if (! IsInt(s1) || ! IsInt(s2))
96 Die("nseq and/or alen not an integer in first line of PHYLIP file %s\n", afp->fname);
102 msa = MSAAlloc(nseq, 0);
105 while ((s = MSAFileGetLine(afp)) != NULL)
107 /* ignore blank lines. nonblank lines start w/ nonblank char */
108 if (isspace(*s)) continue;
109 /* First block has seq names */
111 strncpy(name, s, 10);
113 GKIStoreKey(msa->index, name);
114 msa->sqname[idx] = sre_strdup(name, -1);
117 /* be careful of trailing whitespace on lines */
118 if ((s1 = sre_strtok(&s, WHITESPACE, &slen)) == NULL)
119 Die("Failed to parse sequence at line %d of PHYLIP file %s\n",
120 afp->linenumber, afp->fname);
121 msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], s1, slen);
124 if (idx == nseq) { idx = 0; nblock++; }
127 MSAVerifyParse(msa); /* verifies; sets alen, wgt; frees sqlen[] */
133 /* Function: WritePhylip()
134 * Date: SRE, Fri Jun 18 12:07:41 1999 [Sanger Centre]
136 * Purpose: Write an alignment in Phylip format to an open file.
138 * Args: fp - file that's open for writing.
139 * msa - alignment to write.
144 WritePhylip(FILE *fp, MSA *msa)
146 int idx; /* counter for sequences */
147 int cpl = 50; /* 50 seq char per line */
148 char buf[51]; /* buffer for writing seq */
151 /* First line has nseq, alen
153 fprintf(fp, " %d %d\n", msa->nseq, msa->alen);
155 /* Alignment section.
156 * PHYLIP is a multiblock format, blocks (optionally) separated
157 * by blanks; names only attached to first block. Names are
158 * restricted to ten char; we achieve this by simple truncation (!).
159 * (Do we need to convert gap characters from our ./- convention?)
161 for (pos = 0; pos < msa->alen; pos += cpl)
163 if (pos > 0) fprintf(fp, "\n");
165 for (idx = 0; idx < msa->nseq; idx++)
167 strncpy(buf, msa->aseq[idx] + pos, cpl);
169 if (pos > 0) fprintf(fp, "%s\n", buf);
170 else fprintf(fp, "%-10.10s%s\n", msa->sqname[idx], buf);