X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=sources%2Fpairwise%2Fdocs%2Fhtml%2Fpairwise_8c-source.html;fp=sources%2Fpairwise%2Fdocs%2Fhtml%2Fpairwise_8c-source.html;h=1f87d07719fa7ecf39b08b8b42f7c73b2c632554;hb=81362e35a140cd040e948b921053e74267f8a6e3;hp=0000000000000000000000000000000000000000;hpb=2cf032f4b987ba747c04159965aed78e3820d942;p=jpred.git diff --git a/sources/pairwise/docs/html/pairwise_8c-source.html b/sources/pairwise/docs/html/pairwise_8c-source.html new file mode 100644 index 0000000..1f87d07 --- /dev/null +++ b/sources/pairwise/docs/html/pairwise_8c-source.html @@ -0,0 +1,299 @@ + +
+00001 /* Pairwise identity program. +00002 * +00003 * $Id +00004 * +00005 * Reads in a gapped alignment sequences in a FASTA file format. +00006 * Assume gaps are represented by '-' characters. +00007 * +00008 * For each pair calculates the sequence percentage identity, see comments +00009 * for pairwise() for details. +00010 * +00011 * Output is in the format for OC to read in, i.e. the number of sequences, +00012 * followed by the sequence ID's, followed by the pairwise comparisons. +00013 * +00014 * Thu Dec 5 14:38:43 GMT 2002 Jon +00015 * Added checking for eqilength sequences +00016 * +00017 * Thu Jul 24 12:14:04 BST 2003 Jon +00018 * Moved FASTA reading into read_fasta function +00019 * Checked into CVS +00020 * +00021 * */ +00022 +00023 #include <stdio.h> +00024 #include <stdlib.h> +00025 #include <errno.h> +00026 #include <string.h> +00027 #include "pairwise.h" +00028 +00029 #define ARRAY 50 +00030 #define FILEBUF 1000 +00031 +00032 /* Macro's added by Patrick, mucho neato, aparently ifndef aren't POSIX +00033 * anymore */ +00034 #if !defined(MAX) +00035 #define MAX(A, B) ((A) > (B) ? (A) : (B)) +00036 #endif +00037 +00038 #if !defined(MIN) +00039 #define MIN(A, B) ((A) < (B) ? (A) : (B)) +00040 #endif +00041 +00042 /* Macro's added by Jon, allows checking of NULL pointers from memory +00043 * allocations without function calls, but how can you return values? */ +00044 #if !defined(MALLOC) +00045 #define MALLOC(PTR, SIZE) do { PTR = malloc(SIZE); if (PTR == NULL) fatal_sys_error("malloc returned NULL"); } while (0) +00046 #endif +00047 +00048 #if !defined(REALLOC) +00049 #define REALLOC(PTR, SIZE) do { PTR = realloc(PTR, SIZE); if (PTR == NULL && SIZE != 0) fatal_sys_error("realloc returned NULL"); } while (0) +00050 #endif +00051 +00052 void +00053 fatal_error (char *message) { +00054 printf(message); +00055 exit(EXIT_FAILURE); +00056 } +00057 +00058 void +00059 fatal_sys_error (char *message) { +00060 perror(message); +00061 exit(EXIT_FAILURE); +00062 } +00063 +00064 FILE * +00065 xfopen (const char *path, const char *mode) { +00066 FILE *fh; +00067 fh = fopen(path, mode); +00068 if (fh == NULL) +00069 fatal_sys_error("fopen returned NULL"); +00070 return fh; +00071 } +00072 +00073 void * +00074 xmalloc (size_t size) { +00075 void *ptr; +00076 ptr = (void *) malloc(size); +00077 if (ptr == NULL) +00078 fatal_sys_error("malloc returned NULL"); +00079 return ptr; +00080 } +00081 +00082 void * +00083 xrealloc (void *ptr, size_t size) { +00084 ptr = (void *) realloc(ptr, size); +00085 if (ptr == NULL && size != 0) +00086 fatal_sys_error("realloc returned NULL"); +00087 return ptr; +00088 } +00089 +00090 /* If the same resdidue return 1 else return 0 */ +00091 int +00092 diff (char a, char b) { +00093 if (a == b && a == '-') +00094 return 0; +00095 if (a == b) +00096 return 1; +00097 return 0; +00098 } +00099 +00100 /* Does the pairwise comparison +00101 * Compares the regions of the alignment that overlap, common gaps are +00102 * ignored, and the number of common residues divided by the length of +00103 * the shortest sequence is the identity. +00104 * */ +00105 float +00106 pairwise (struct fasta *a, struct fasta *b) { +00107 float result; +00108 int start, end, numres, i, id = 0; +00109 +00110 /* If the sequences don't overlap then the seq ID is 0 */ +00111 if (a->end < b->start || a->start > b->end) +00112 return 0; +00113 +00114 start = MIN( a->start, b->start ); +00115 end = MAX( a->end, b->end ); +00116 numres = MAX( a->numres, b->numres ); +00117 +00118 for (i = start; i < end; i++) +00119 id += diff(a->seq[i], b->seq[i]); +00120 +00121 result = 100 * (float) id / (float) numres; +00122 +00123 return result; +00124 } +00125 +00126 /* "Populate" the rest of the fasta structure with information +00127 * Gets the start position of the sequence (first non-gap character) +00128 * Gets the end position (the last non-gap character) +00129 * Gets the number of residues in the sequence (that aren't gaps) +00130 * Gaps are represented by '-' +00131 * */ +00132 void +00133 populate (struct fasta *a) { +00134 int i; +00135 int len = strlen(a->seq); +00136 +00137 a->numres = 0; +00138 +00139 for (i = 0; i <= len; i++) { +00140 if (a->seq[i] != '-') { +00141 a->start = i; +00142 break; +00143 } +00144 } +00145 for (i = len; i > 0; i--) { +00146 if (a->seq[i] != '-') { +00147 a->end = i; +00148 break; +00149 } +00150 } +00151 for (i = a->start; i < a->end; i++) { +00152 if (a->seq[i] != '-') { +00153 a->numres++; +00154 } +00155 } +00156 } +00157 +00158 /* Makes sure that all of the sequences are the same length */ +00159 void +00160 check_length (struct fasta **array) { +00161 int i, length; +00162 +00163 if (array[0] != NULL) +00164 length = strlen(array[0]->seq); +00165 else { +00166 fprintf(stderr, "check_length() not passed an array of fasta structs\n"); +00167 return; +00168 } +00169 +00170 for (i = 0; array[i] != NULL; i++) { +00171 if (length != strlen(array[i]->seq)) { +00172 fatal_error("Not all of the sequences are the same length\n"); +00173 } +00174 } +00175 } +00176 +00177 /* Reads in a FASTA file and returns an array of fasta structures */ +00178 struct fasta ** +00179 read_fasta (FILE *fh) { +00180 int i, j, k, c, filesize = 1; +00181 char *file; +00182 struct fasta **array; +00183 +00184 array = (struct fasta **) xmalloc(ARRAY * sizeof(struct fasta *)); +00185 file = (char *) xmalloc(sizeof(char)); +00186 +00187 /* Allocate initial space for the file */ +00188 file = xrealloc(file, sizeof(char) * filesize); +00189 file[filesize] = '\0'; +00190 +00191 /* Read in the file */ +00192 while ((c = getc(fh)) != EOF) { +00193 if (filesize % FILEBUF) +00194 file = xrealloc(file, sizeof(char) * (FILEBUF + filesize)); +00195 +00196 file[filesize] = c; +00197 filesize++; +00198 } +00199 +00200 /* Parse the FASTA file into an array of structures */ +00201 for (i = 0, j = 0, k = 0; i < filesize; i++) { +00202 if (file[i] == '>') { +00203 if (j % ARRAY == 0) +00204 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * (j + ARRAY)); +00205 +00206 array[j] = (struct fasta *) xmalloc(sizeof(struct fasta)); +00207 array[j]->id = (char *) xmalloc(sizeof(char)); +00208 array[j]->seq = (char *) xmalloc(sizeof(char)); +00209 array[j]->numres = 0; +00210 +00211 i++; +00212 while (file[i] != '\0' && file[i] != '\n') { +00213 if (k % ARRAY == 0) +00214 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k)); +00215 array[j]->id[k] = file[i]; +00216 k++; i++; +00217 } +00218 array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k)); +00219 array[j]->id[k] = '\0'; +00220 k = 0; +00221 +00222 while (file[i] != '\0' && file[i] != '>') { +00223 if (file[i] == '\n') { +00224 i++; +00225 continue; +00226 } +00227 if (k % ARRAY == 0) +00228 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k)); +00229 array[j]->seq[k] = file[i]; +00230 k++; i++; +00231 } +00232 array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k)); +00233 array[j]->seq[k] = '\0'; +00234 k = 0; +00235 i--; +00236 j++; +00237 } +00238 } +00239 +00240 free(file); +00241 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * j); +00242 array[j] = NULL; +00243 +00244 /* find the start and end points for the alignments */ +00245 for (i = 0; array[i] != NULL; i++) { +00246 populate(array[i]); +00247 } +00248 +00249 return array; +00250 } +00251 +00252 int +00253 main (int argc, char **argv) { +00254 FILE *fh; +00255 struct fasta **array; +00256 int i = 0, j; +00257 +00258 /* Read in the FASTA file */ +00259 if (argc == 2) +00260 fh = xfopen(argv[1], "r"); +00261 else +00262 fh = stdin; +00263 +00264 array = read_fasta(fh); +00265 fclose(fh); +00266 +00267 check_length(array); +00268 +00269 /* start the OC output */ +00270 while (array[i] != NULL) +00271 i++; +00272 +00273 fprintf(stdout, "%i\n", i); +00274 +00275 for (i = 0; array[i] != NULL; i++) +00276 fprintf(stdout, "%s\n", array[i]->id); +00277 +00278 /* do the pairwise comparison */ +00279 for (i = 0; array[i] != NULL; i++) { +00280 for (j = i + 1; array[j] != NULL; j++) { +00281 printf("%f\n", pairwise(array[i], array[j])); +00282 } +00283 } +00284 +00285 return EXIT_SUCCESS; +00286 } +