X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=sources%2Fpairwise%2Fdocs%2Fhtml%2Fpairwise_8c-source.html;fp=sources%2Fpairwise%2Fdocs%2Fhtml%2Fpairwise_8c-source.html;h=1f87d07719fa7ecf39b08b8b42f7c73b2c632554;hb=81362e35a140cd040e948b921053e74267f8a6e3;hp=0000000000000000000000000000000000000000;hpb=2cf032f4b987ba747c04159965aed78e3820d942;p=jpred.git diff --git a/sources/pairwise/docs/html/pairwise_8c-source.html b/sources/pairwise/docs/html/pairwise_8c-source.html new file mode 100644 index 0000000..1f87d07 --- /dev/null +++ b/sources/pairwise/docs/html/pairwise_8c-source.html @@ -0,0 +1,299 @@ + + +pairwise: pairwise.c Source File + + + +
Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals
+

pairwise.c

Go to the documentation of this file.
00001 /* Pairwise identity program.
+00002  *
+00003  * $Id
+00004  *
+00005  * Reads in a gapped alignment sequences in a FASTA file format.
+00006  * Assume gaps are represented by '-' characters.
+00007  *
+00008  * For each pair calculates the sequence percentage identity, see comments
+00009  * for pairwise() for details.
+00010  *
+00011  * Output is in the format for OC to read in, i.e. the number of sequences,
+00012  * followed by the sequence ID's, followed by the pairwise comparisons.
+00013  *
+00014  * Thu Dec  5 14:38:43 GMT 2002 Jon
+00015  * Added checking for eqilength sequences
+00016  *
+00017  * Thu Jul 24 12:14:04 BST 2003 Jon
+00018  * Moved FASTA reading into read_fasta function
+00019  * Checked into CVS
+00020  *
+00021  * */
+00022 
+00023 #include <stdio.h>
+00024 #include <stdlib.h>
+00025 #include <errno.h>
+00026 #include <string.h>
+00027 #include "pairwise.h"
+00028 
+00029 #define ARRAY 50
+00030 #define FILEBUF 1000
+00031 
+00032 /* Macro's added by Patrick, mucho neato, aparently ifndef aren't POSIX
+00033  * anymore */
+00034 #if !defined(MAX)
+00035 #define MAX(A, B)       ((A) > (B) ? (A) : (B))
+00036 #endif
+00037 
+00038 #if !defined(MIN)
+00039 #define MIN(A, B)       ((A) < (B) ? (A) : (B))
+00040 #endif
+00041 
+00042 /* Macro's added by Jon, allows checking of NULL pointers from memory
+00043  * allocations without function calls, but how can you return values? */
+00044 #if !defined(MALLOC)
+00045 #define MALLOC(PTR, SIZE) do { PTR = malloc(SIZE); if (PTR == NULL) fatal_sys_error("malloc returned NULL"); } while (0)
+00046 #endif
+00047 
+00048 #if !defined(REALLOC)
+00049 #define REALLOC(PTR, SIZE) do { PTR = realloc(PTR, SIZE); if (PTR == NULL && SIZE != 0) fatal_sys_error("realloc returned NULL"); } while (0)
+00050 #endif
+00051 
+00052 void
+00053 fatal_error (char *message) {
+00054         printf(message);
+00055         exit(EXIT_FAILURE);
+00056 }
+00057 
+00058 void
+00059 fatal_sys_error (char *message) {
+00060         perror(message);
+00061         exit(EXIT_FAILURE);
+00062 }
+00063 
+00064 FILE *
+00065 xfopen (const char *path, const char *mode) {
+00066         FILE *fh;
+00067         fh = fopen(path, mode);
+00068         if (fh == NULL)
+00069                 fatal_sys_error("fopen returned NULL");
+00070         return fh;
+00071 }
+00072 
+00073 void *
+00074 xmalloc (size_t size) {
+00075         void *ptr;
+00076         ptr = (void *) malloc(size);
+00077         if (ptr == NULL)
+00078                 fatal_sys_error("malloc returned NULL");
+00079         return ptr;
+00080 }
+00081 
+00082 void *
+00083 xrealloc (void *ptr, size_t size) {
+00084         ptr = (void *) realloc(ptr, size);
+00085         if (ptr == NULL && size != 0)
+00086                 fatal_sys_error("realloc returned NULL");
+00087         return ptr;
+00088 }
+00089 
+00090 /* If the same resdidue return 1 else return 0 */
+00091 int 
+00092 diff (char a, char b) {
+00093         if (a == b && a == '-')
+00094                 return 0;
+00095         if (a == b)
+00096                 return 1;
+00097         return 0;
+00098 }
+00099 
+00100 /* Does the pairwise comparison
+00101  * Compares the regions of the alignment that overlap, common gaps are
+00102  * ignored, and the number of common residues divided by the length of
+00103  * the shortest sequence is the identity.
+00104  * */
+00105 float
+00106 pairwise (struct fasta *a, struct fasta *b) {
+00107         float result;
+00108         int start, end, numres, i, id = 0;
+00109         
+00110         /* If the sequences don't overlap then the seq ID is 0 */
+00111         if (a->end < b->start || a->start > b->end)
+00112                 return 0;
+00113 
+00114         start = MIN( a->start, b->start );
+00115         end = MAX( a->end, b->end );
+00116         numres = MAX( a->numres, b->numres );
+00117 
+00118         for (i = start; i < end; i++)
+00119                 id += diff(a->seq[i], b->seq[i]);
+00120 
+00121         result = 100 * (float) id / (float) numres;
+00122 
+00123         return result;
+00124 }
+00125 
+00126 /* "Populate" the rest of the fasta structure with information
+00127  * Gets the start position of the sequence (first non-gap character)
+00128  * Gets the end position (the last non-gap character)
+00129  * Gets the number of residues in the sequence (that aren't gaps)
+00130  * Gaps are represented by '-'
+00131  * */
+00132 void
+00133 populate (struct fasta *a) {
+00134         int i;
+00135         int len = strlen(a->seq);
+00136 
+00137         a->numres = 0;
+00138         
+00139         for (i = 0; i <= len; i++) {
+00140                 if (a->seq[i] != '-') {
+00141                         a->start = i;
+00142                         break;
+00143                 }
+00144         }
+00145         for (i = len; i > 0; i--) {
+00146                 if (a->seq[i] != '-') {
+00147                         a->end = i;
+00148                         break;
+00149                 }
+00150         }
+00151         for (i = a->start; i < a->end; i++) {
+00152                 if (a->seq[i] != '-') {
+00153                         a->numres++;
+00154                 }
+00155         }
+00156 }
+00157 
+00158 /* Makes sure that all of the sequences are the same length */
+00159 void
+00160 check_length (struct fasta **array) {
+00161         int i, length;
+00162 
+00163         if (array[0] != NULL)
+00164                 length = strlen(array[0]->seq);
+00165         else {
+00166                 fprintf(stderr, "check_length() not passed an array of fasta structs\n");
+00167                 return;
+00168         }
+00169 
+00170         for (i = 0; array[i] != NULL; i++) {
+00171                 if (length != strlen(array[i]->seq)) {
+00172                         fatal_error("Not all of the sequences are the same length\n");
+00173                 }
+00174         }
+00175 }
+00176 
+00177 /* Reads in a FASTA file and returns an array of fasta structures */
+00178 struct fasta **
+00179 read_fasta (FILE *fh) {
+00180         int i, j, k, c, filesize = 1;
+00181         char *file;
+00182         struct fasta **array;
+00183 
+00184         array = (struct fasta **) xmalloc(ARRAY * sizeof(struct fasta *));
+00185         file = (char *) xmalloc(sizeof(char));
+00186 
+00187         /* Allocate initial space for the file */
+00188         file = xrealloc(file, sizeof(char) * filesize);
+00189         file[filesize] = '\0';
+00190 
+00191         /* Read in the file */
+00192         while ((c = getc(fh)) != EOF) {
+00193                 if (filesize % FILEBUF)
+00194                         file = xrealloc(file, sizeof(char) * (FILEBUF + filesize));
+00195 
+00196                 file[filesize] = c;
+00197                 filesize++;
+00198         }
+00199 
+00200         /* Parse the FASTA file into an array of structures */
+00201         for (i = 0, j = 0, k = 0; i < filesize; i++) {
+00202                 if (file[i] == '>') {
+00203                         if (j % ARRAY == 0)
+00204                                 array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * (j + ARRAY));
+00205 
+00206                         array[j] = (struct fasta *) xmalloc(sizeof(struct fasta));
+00207                         array[j]->id = (char *) xmalloc(sizeof(char));
+00208                         array[j]->seq = (char *) xmalloc(sizeof(char));
+00209                         array[j]->numres = 0;
+00210 
+00211                         i++;
+00212                         while (file[i] != '\0' && file[i] != '\n') {
+00213                                 if (k % ARRAY == 0)
+00214                                         array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k));
+00215                                 array[j]->id[k] = file[i];
+00216                                 k++; i++;
+00217                         }
+00218                         array[j]->id = (char *) xrealloc(array[j]->id, sizeof(char) * (ARRAY + k));
+00219                         array[j]->id[k] = '\0';
+00220                         k = 0;
+00221 
+00222                         while (file[i] != '\0' && file[i] != '>') {
+00223                                 if (file[i] == '\n') {
+00224                                         i++;
+00225                                         continue;
+00226                                 }
+00227                                 if (k % ARRAY == 0)
+00228                                         array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k));
+00229                                 array[j]->seq[k] = file[i];
+00230                                 k++; i++;
+00231                         }
+00232                         array[j]->seq = (char *) xrealloc(array[j]->seq, sizeof(char) * (ARRAY + k));
+00233                         array[j]->seq[k] = '\0';
+00234                         k = 0;
+00235                         i--;
+00236                         j++;
+00237                 }
+00238         }
+00239 
+00240         free(file);
+00241         array = (struct fasta **) xrealloc(array, sizeof(struct fasta *) * j);
+00242         array[j] = NULL;
+00243 
+00244         /* find the start and end points for the alignments */
+00245         for (i = 0; array[i] != NULL; i++) {
+00246                 populate(array[i]);
+00247         }
+00248 
+00249         return array;
+00250 }
+00251 
+00252 int
+00253 main (int argc, char **argv) {
+00254         FILE *fh;
+00255         struct fasta **array;
+00256         int i = 0, j;
+00257 
+00258         /* Read in the FASTA file */
+00259         if (argc == 2) 
+00260                 fh = xfopen(argv[1], "r");
+00261         else
+00262                 fh = stdin;
+00263 
+00264         array = read_fasta(fh);
+00265         fclose(fh);
+00266 
+00267         check_length(array);
+00268         
+00269         /* start the OC output */
+00270         while (array[i] != NULL)
+00271                 i++;
+00272 
+00273         fprintf(stdout, "%i\n", i);
+00274 
+00275         for (i = 0; array[i] != NULL; i++)
+00276                 fprintf(stdout, "%s\n", array[i]->id);
+00277         
+00278         /* do the pairwise comparison */
+00279         for (i = 0; array[i] != NULL; i++) {
+00280                 for (j = i + 1; array[j] != NULL; j++) {
+00281                         printf("%f\n", pairwise(array[i], array[j]));
+00282                 }
+00283         }
+00284 
+00285         return EXIT_SUCCESS;
+00286 }
+

Generated on Thu Jul 24 12:17:49 2003 for pairwise by + +doxygen +1.3.2
+ +