00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <errno.h>
00026 #include <string.h>
00027 #include "pairwise.h"
00028
00029 #define ARRAY 50
00030 #define FILEBUF 1000
00031
00032
00033
00034 #if !defined(MAX)
00035 #define MAX(A, B) ((A) > (B) ? (A) : (B))
00036 #endif
00037
00038 #if !defined(MIN)
00039 #define MIN(A, B) ((A) < (B) ? (A) : (B))
00040 #endif
00041
00042
00043
00044 #if !defined(MALLOC)
00045 #define MALLOC(PTR, SIZE) do { PTR = malloc(SIZE); if (PTR == NULL) fatal_sys_error("malloc returned NULL"); } while (0)
00046 #endif
00047
00048 #if !defined(REALLOC)
00049 #define REALLOC(PTR, SIZE) do { PTR = realloc(PTR, SIZE); if (PTR == NULL && SIZE != 0) fatal_sys_error("realloc returned NULL"); } while (0)
00050 #endif
00051
00052 void
00053 fatal_error (char *message) {
00054 printf(message);
00055 exit(EXIT_FAILURE);
00056 }
00057
00058 void
00059 fatal_sys_error (char *message) {
00060 perror(message);
00061 exit(EXIT_FAILURE);
00062 }
00063
00064 FILE *
00065 xfopen (const char *path, const char *mode) {
00066 FILE *fh;
00067 fh = fopen(path, mode);
00068 if (fh == NULL)
00069 fatal_sys_error("fopen returned NULL");
00070 return fh;
00071 }
00072
00073 void *
00074 xmalloc (size_t size) {
00075 void *ptr;
00076 ptr = (void *) malloc(size);
00077 if (ptr == NULL)
00078 fatal_sys_error("malloc returned NULL");
00079 return ptr;
00080 }
00081
00082 void *
00083 xrealloc (void *ptr, size_t size) {
00084 ptr = (void *) realloc(ptr, size);
00085 if (ptr == NULL && size != 0)
00086 fatal_sys_error("realloc returned NULL");
00087 return ptr;
00088 }
00089
00090
00091 int
00092 diff (char a, char b) {
00093 if (a == b && a == '-')
00094 return 0;
00095 if (a == b)
00096 return 1;
00097 return 0;
00098 }
00099
00100
00101
00102
00103
00104
00105 float
00106 pairwise (struct fasta *a, struct fasta *b) {
00107 float result;
00108 int start, end, numres, i, id = 0;
00109
00110
00111 if (a->end < b->start || a->start > b->end)
00112 return 0;
00113
00114 start = MIN( a->start, b->start );
00115 end = MAX( a->end, b->end );
00116 numres = MAX( a->numres, b->numres );
00117
00118 for (i = start; i < end; i++)
00119 id += diff(a->seq[i], b->seq[i]);
00120
00121 result = 100 * (float) id / (float) numres;
00122
00123 return result;
00124 }
00125
00126
00127
00128
00129
00130
00131
00132 void
00133 populate (struct fasta *a) {
00134 int i;
00135 int len = strlen(a->seq);
00136
00137 a->numres = 0;
00138
00139 for (i = 0; i <= len; i++) {
00140 if (a->seq[i] != '-') {
00141 a->start = i;
00142 break;
00143 }
00144 }
00145 for (i = len; i > 0; i--) {
00146 if (a->seq[i] != '-') {
00147 a->end = i;
00148 break;
00149 }
00150 }
00151 for (i = a->start; i < a->end; i++) {
00152 if (a->seq[i] != '-') {
00153 a->numres++;
00154 }
00155 }
00156 }
00157
00158
00159 void
00160 check_length (struct fasta **array) {
00161 int i, length;
00162
00163 if (array[0] != NULL)
00164 length = strlen(array[0]->seq);
00165 else {
00166 fprintf(stderr, "check_length() not passed an array of fasta structs\n");
00167 return;
00168 }
00169
00170 for (i = 0; array[i] != NULL; i++) {
00171 if (length != strlen(array[i]->seq)) {
00172 fatal_error("Not all of the sequences are the same length\n");
00173 }
00174 }
00175 }
00176
00177
00178 struct fasta **
00179 read_fasta (FILE *fh) {
00180 int i, j, k, c, filesize = 1;
00181 char *file;
00182 struct fasta **array;
00183
00184 array = (struct fasta **) xmalloc(ARRAY * sizeof(struct fasta *));
00185 file = (char *) xmalloc(sizeof(char));
00186
00187
00188 file = xrealloc(file, sizeof(char) * filesize);
00189 file[filesize] = '\0';
00190
00191
00192 while ((c = getc(fh)) != EOF) {
00193 if (filesize % FILEBUF)
00194 file = xrealloc(file, sizeof(char) * (FILEBUF + filesize));
00195
00196 file[filesize] = c;
00197 filesize++;
00198 }
00199
00200
00201 for (i = 0, j = 0, k = 0; i < filesize; i++) {
00202 if (file[i] == '>') {
00203 if (j % ARRAY == 0)
00204 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * (j + ARRAY));
00205
00206 array[j] = (struct fasta *) xmalloc(sizeof(struct fasta));
00207 array[j]->id = (char *) xmalloc(sizeof(char));
00208 array[j]->seq = (char *) xmalloc(sizeof(char));
00209 array[j]->numres = 0;
00210
00211 i++;
00212 while (file[i] != '\0' && file[i] != '\n') {
00213 if (k % ARRAY == 0)
00214 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k));
00215 array[j]->id[k] = file[i];
00216 k++; i++;
00217 }
00218 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k));
00219 array[j]->id[k] = '\0';
00220 k = 0;
00221
00222 while (file[i] != '\0' && file[i] != '>') {
00223 if (file[i] == '\n') {
00224 i++;
00225 continue;
00226 }
00227 if (k % ARRAY == 0)
00228 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k));
00229 array[j]->seq[k] = file[i];
00230 k++; i++;
00231 }
00232 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k));
00233 array[j]->seq[k] = '\0';
00234 k = 0;
00235 i--;
00236 j++;
00237 }
00238 }
00239
00240 free(file);
00241 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * j);
00242 array[j] = NULL;
00243
00244
00245 for (i = 0; array[i] != NULL; i++) {
00246 populate(array[i]);
00247 }
00248
00249 return array;
00250 }
00251
00252 int
00253 main (int argc, char **argv) {
00254 FILE *fh;
00255 struct fasta **array;
00256 int i = 0, j;
00257
00258
00259 if (argc == 2)
00260 fh = xfopen(argv[1], "r");
00261 else
00262 fh = stdin;
00263
00264 array = read_fasta(fh);
00265 fclose(fh);
00266
00267 check_length(array);
00268
00269
00270 while (array[i] != NULL)
00271 i++;
00272
00273 fprintf(stdout, "%i\n", i);
00274
00275 for (i = 0; array[i] != NULL; i++)
00276 fprintf(stdout, "%s\n", array[i]->id);
00277
00278
00279 for (i = 0; array[i] != NULL; i++) {
00280 for (j = i + 1; array[j] != NULL; j++) {
00281 printf("%f\n", pairwise(array[i], array[j]));
00282 }
00283 }
00284
00285 return EXIT_SUCCESS;
00286 }