13 /* Macro's added by Patrick, mucho neato, aparently ifndef aren't POSIX
16 #define MAX(A, B) ((A) > (B) ? (A) : (B))
20 #define MIN(A, B) ((A) < (B) ? (A) : (B))
23 /* Macro's added by Jon, allows checking of NULL pointers from memory
24 * allocations without function calls, but how can you return values? */
26 #define MALLOC(PTR, SIZE) do { PTR = malloc(SIZE); if (PTR == NULL) fatal_sys_error("malloc returned NULL"); } while (0)
30 #define REALLOC(PTR, SIZE) do { PTR = realloc(PTR, SIZE); if (PTR == NULL && SIZE != 0) fatal_sys_error("realloc returned NULL"); } while (0)
33 /* Structure for sequences from FASTA files */
42 void fatal_error (char *message);
43 FILE * xfopen (const char *path, const char *mode);
44 void * xmalloc (size_t size);
45 void * xrealloc (void *ptr, size_t size);
46 int diff (char a, char b);
47 float pairwise (struct fasta *a, struct fasta *b);
48 void populate (struct fasta *a);
49 void check_length (struct fasta **array);
50 struct fasta ** read_fasta (FILE *fh);
51 char ** do_pairwise (struct fasta **array);
52 void free_fasta (struct fasta *);
53 int jsprintf (char *str, const char *const fmt, ...);
56 free_fasta (struct fasta *seq) {
63 fatal_error (char *message) {
64 fprintf(stderr, "%s", message);
69 fatal_sys_error (char *message) {
75 xfopen (const char *path, const char *mode) {
77 fh = fopen(path, mode);
79 fatal_sys_error("fopen returned NULL");
84 xmalloc (size_t size) {
86 ptr = (void *) malloc(size);
88 fatal_sys_error("malloc returned NULL");
93 xrealloc (void *ptr, size_t size) {
94 ptr = (void *) realloc(ptr, size);
95 if (ptr == NULL && size != 0)
96 fatal_sys_error("realloc returned NULL");
100 /* If the same resdidue return 1 else return 0 */
102 diff (char a, char b) {
103 if (a == b && a == '-')
110 /* Does the pairwise comparison
111 * Compares the regions of the alignment that overlap, common gaps are
112 * ignored, and the number of common residues divided by the length of
113 * the shortest sequence is the identity.
116 pairwise (struct fasta *a, struct fasta *b) {
118 int start, end, numres, i, id = 0;
120 /* If the sequences don't overlap then the seq ID is 0 */
121 if (a->end < b->start || a->start > b->end)
124 start = MIN( a->start, b->start );
125 end = MAX( a->end, b->end );
126 numres = MAX( a->numres, b->numres );
128 for (i = start; i < end; i++)
129 id += diff(a->seq[i], b->seq[i]);
131 result = 100 * (float) id / (float) numres;
136 /* "Populate" the rest of the fasta structure with information
137 * Gets the start position of the sequence (first non-gap character)
138 * Gets the end position (the last non-gap character)
139 * Gets the number of residues in the sequence (that aren't gaps)
140 * Gaps are represented by '-'
143 populate (struct fasta *a) {
145 int len = strlen(a->seq);
149 for (i = 0; i <= len; i++) {
150 if (a->seq[i] != '-') {
155 for (i = len; i > 0; i--) {
156 if (a->seq[i] != '-') {
161 for (i = a->start; i < a->end; i++) {
162 if (a->seq[i] != '-') {
168 /* Makes sure that all of the sequences are the same length */
170 check_length (struct fasta **array) {
173 if (array[0] != NULL)
174 length = strlen(array[0]->seq);
176 fatal_error("check_length() not passed an array of fasta structs\n");
179 for (i = 0; array[i] != NULL; i++) {
180 if (length != (int) strlen(array[i]->seq)) {
181 fatal_error("Not all of the sequences are the same length\n");
186 /* Reads in a FASTA file and returns an array of fasta structures */
188 read_fasta (FILE *fh) {
189 int i, j, k, c, filesize = 0;
191 struct fasta **array;
192 struct stat file_stat;
194 array = (struct fasta **) malloc(ARRAY * sizeof(struct fasta *));
196 /* Find the size of the file and allocate it */
197 fstat(fileno(fh), &file_stat);
198 if (! S_ISREG(file_stat.st_mode)) {
199 fatal_error("Input file is not a regular file");
201 file = (char *) malloc(sizeof(char) * (file_stat.st_size + 1));
203 /* Read in the file and null terminate it */
204 while ((c = getc(fh)) != EOF) {
205 file[filesize++] = c;
207 file[filesize] = '\0';
209 /* Parse the FASTA file into an array of structures */
210 for (i = 0, j = 0, k = 0; i < filesize; i++) {
212 /* Begining of FASTA record */
213 if (file[i] == '>') {
215 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * (j + ARRAY));
217 /* Initalise the structure */
218 array[j] = (struct fasta *) xmalloc(sizeof(struct fasta));
219 array[j]->id = (char *) xmalloc(sizeof(char));
220 array[j]->seq = (char *) xmalloc(sizeof(char));
221 array[j]->numres = 0;
223 /* Read in the ID, terminated by a newline or NULL (end of string) */
225 while (file[i] != '\0' && file[i] != '\n') {
226 if (k % FILEBUF == 0)
227 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (FILEBUF + k));
228 array[j]->id[k++] = file[i++];
230 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * k + 1);
231 array[j]->id[k] = '\0';
234 /* read in the sequence, terminated by a '>' */
235 for (k = 0; file[i] != '\0' && file[i] != '>'; i++) {
236 if (file[i] != '\n') {
237 if (k % FILEBUF == 0)
238 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (FILEBUF + k));
239 array[j]->seq[k++] = file[i];
242 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * k + 1);
243 array[j]->seq[k] = '\0';
252 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * (j + 1));
255 /* find the start and end points for the alignments */
256 for (i = 0; array[i] != NULL; i++)
262 do_pairwise (struct fasta **array) {
263 int j = 0, i = 0, offset = 0, size;
266 /* Find the size of the output and preallocate the space in the output
269 while (array[i] != NULL)
272 /* Number of IDs, each ID, then the upper corner distances, then for
274 size = 1 + i + (i * i - i) / 2 + 1;
276 output = (char **) xmalloc(sizeof(char *) * size);
278 for (i = 0; i < size - 1; i++) {
279 output[i] = (char *) xmalloc(sizeof(char) * FILEBUF);
280 memset(output[i], 0x00, sizeof(char) * FILEBUF);
282 output[size - 1] = NULL;
285 /* The number of IDs */
287 while (array[i] != NULL)
289 jsprintf(output[offset++], "%i", i);
291 /* If this statement is above the other one then it core dumps!? */
294 for (i = 0; array[i] != NULL; i++)
295 jsprintf(output[offset++], "%s", array[i]->id);
297 /* Upper corner pairwise comparisons */
298 for (i = 0; array[i] != NULL; i++) {
299 for (j = i + 1; array[j] != NULL; j++) {
300 jsprintf(output[offset++], "%f", pairwise(array[i], array[j]));
307 /* Grows buffer dynamically to make input fit inside it.
308 * Requires malloc'd pointer to string.
309 * Returns length of string.
312 jsprintf (char *str, const char *const fmt, ...) {
317 str = (char *) realloc(str, sizeof(char) * FILEBUF);
319 for (i = 1; (size = vsnprintf(str, sizeof(char) * FILEBUF * i, fmt, ap)) == -1; i++) {
321 fatal_error("Reached 100 iterations of vsnprinft, it's probably blown up. Check input");
322 str = (char *) xrealloc(str, sizeof(char) * FILEBUF * i);
325 str = (char *) xrealloc(str, sizeof(char) * size + 1);