--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
+<title>pairwise: pairwise.c Source File</title>
+<link href="doxygen.css" rel="stylesheet" type="text/css">
+</head><body>
+<!-- Generated by Doxygen 1.3.2 -->
+<div class="qindex"><a class="qindex" href="main.html">Main Page</a> | <a class="qindex" href="classes.html">Alphabetical List</a> | <a class="qindex" href="annotated.html">Data Structures</a> | <a class="qindex" href="files.html">File List</a> | <a class="qindex" href="functions.html">Data Fields</a> | <a class="qindex" href="globals.html">Globals</a></div>
+<h1>pairwise.c</h1><a href="pairwise_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre>00001 <span class="comment">/* Pairwise identity program.</span>
+00002 <span class="comment"> *</span>
+00003 <span class="comment"> * $Id</span>
+00004 <span class="comment"> *</span>
+00005 <span class="comment"> * Reads in a gapped alignment sequences in a FASTA file format.</span>
+00006 <span class="comment"> * Assume gaps are represented by '-' characters.</span>
+00007 <span class="comment"> *</span>
+00008 <span class="comment"> * For each pair calculates the sequence percentage identity, see comments</span>
+00009 <span class="comment"> * for pairwise() for details.</span>
+00010 <span class="comment"> *</span>
+00011 <span class="comment"> * Output is in the format for OC to read in, i.e. the number of sequences,</span>
+00012 <span class="comment"> * followed by the sequence ID's, followed by the pairwise comparisons.</span>
+00013 <span class="comment"> *</span>
+00014 <span class="comment"> * Thu Dec 5 14:38:43 GMT 2002 Jon</span>
+00015 <span class="comment"> * Added checking for eqilength sequences</span>
+00016 <span class="comment"> *</span>
+00017 <span class="comment"> * Thu Jul 24 12:14:04 BST 2003 Jon</span>
+00018 <span class="comment"> * Moved FASTA reading into read_fasta function</span>
+00019 <span class="comment"> * Checked into CVS</span>
+00020 <span class="comment"> *</span>
+00021 <span class="comment"> * */</span>
+00022
+00023 <span class="preprocessor">#include <stdio.h></span>
+00024 <span class="preprocessor">#include <stdlib.h></span>
+00025 <span class="preprocessor">#include <errno.h></span>
+00026 <span class="preprocessor">#include <string.h></span>
+00027 <span class="preprocessor">#include "<a class="code" href="pairwise_8h.html">pairwise.h</a>"</span>
+00028
+<a name="l00029"></a><a class="code" href="pairwise_8c.html#a0">00029</a> <span class="preprocessor">#define ARRAY 50</span>
+<a name="l00030"></a><a class="code" href="pairwise_8c.html#a1">00030</a> <span class="preprocessor"></span><span class="preprocessor">#define FILEBUF 1000</span>
+00031 <span class="preprocessor"></span>
+00032 <span class="comment">/* Macro's added by Patrick, mucho neato, aparently ifndef aren't POSIX</span>
+00033 <span class="comment"> * anymore */</span>
+00034 <span class="preprocessor">#if !defined(MAX)</span>
+<a name="l00035"></a><a class="code" href="pairwise_8c.html#a2">00035</a> <span class="preprocessor"></span><span class="preprocessor">#define MAX(A, B) ((A) > (B) ? (A) : (B))</span>
+00036 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+00037 <span class="preprocessor"></span>
+00038 <span class="preprocessor">#if !defined(MIN)</span>
+<a name="l00039"></a><a class="code" href="pairwise_8c.html#a3">00039</a> <span class="preprocessor"></span><span class="preprocessor">#define MIN(A, B) ((A) < (B) ? (A) : (B))</span>
+00040 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+00041 <span class="preprocessor"></span>
+00042 <span class="comment">/* Macro's added by Jon, allows checking of NULL pointers from memory</span>
+00043 <span class="comment"> * allocations without function calls, but how can you return values? */</span>
+00044 <span class="preprocessor">#if !defined(MALLOC)</span>
+<a name="l00045"></a><a class="code" href="pairwise_8c.html#a4">00045</a> <span class="preprocessor"></span><span class="preprocessor">#define MALLOC(PTR, SIZE) do { PTR = malloc(SIZE); if (PTR == NULL) fatal_sys_error("malloc returned NULL"); } while (0)</span>
+00046 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+00047 <span class="preprocessor"></span>
+00048 <span class="preprocessor">#if !defined(REALLOC)</span>
+<a name="l00049"></a><a class="code" href="pairwise_8c.html#a5">00049</a> <span class="preprocessor"></span><span class="preprocessor">#define REALLOC(PTR, SIZE) do { PTR = realloc(PTR, SIZE); if (PTR == NULL && SIZE != 0) fatal_sys_error("realloc returned NULL"); } while (0)</span>
+00050 <span class="preprocessor"></span><span class="preprocessor">#endif</span>
+00051 <span class="preprocessor"></span>
+00052 <span class="keywordtype">void</span>
+<a name="l00053"></a><a class="code" href="pairwise_8c.html#a6">00053</a> <a class="code" href="pairwise_8c.html#a6">fatal_error</a> (<span class="keywordtype">char</span> *message) {
+00054 printf(message);
+00055 exit(EXIT_FAILURE);
+00056 }
+00057
+00058 <span class="keywordtype">void</span>
+<a name="l00059"></a><a class="code" href="pairwise_8c.html#a7">00059</a> <a class="code" href="pairwise_8c.html#a7">fatal_sys_error</a> (<span class="keywordtype">char</span> *message) {
+00060 perror(message);
+00061 exit(EXIT_FAILURE);
+00062 }
+00063
+00064 FILE *
+<a name="l00065"></a><a class="code" href="pairwise_8c.html#a8">00065</a> <a class="code" href="pairwise_8c.html#a8">xfopen</a> (<span class="keyword">const</span> <span class="keywordtype">char</span> *path, <span class="keyword">const</span> <span class="keywordtype">char</span> *mode) {
+00066 FILE *fh;
+00067 fh = fopen(path, mode);
+00068 <span class="keywordflow">if</span> (fh == NULL)
+00069 <a class="code" href="pairwise_8c.html#a7">fatal_sys_error</a>(<span class="stringliteral">"fopen returned NULL"</span>);
+00070 <span class="keywordflow">return</span> fh;
+00071 }
+00072
+00073 <span class="keywordtype">void</span> *
+<a name="l00074"></a><a class="code" href="pairwise_8c.html#a9">00074</a> <a class="code" href="pairwise_8c.html#a9">xmalloc</a> (size_t size) {
+00075 <span class="keywordtype">void</span> *ptr;
+00076 ptr = (<span class="keywordtype">void</span> *) malloc(size);
+00077 <span class="keywordflow">if</span> (ptr == NULL)
+00078 <a class="code" href="pairwise_8c.html#a7">fatal_sys_error</a>(<span class="stringliteral">"malloc returned NULL"</span>);
+00079 <span class="keywordflow">return</span> ptr;
+00080 }
+00081
+00082 <span class="keywordtype">void</span> *
+<a name="l00083"></a><a class="code" href="pairwise_8c.html#a10">00083</a> <a class="code" href="pairwise_8c.html#a10">xrealloc</a> (<span class="keywordtype">void</span> *ptr, size_t size) {
+00084 ptr = (<span class="keywordtype">void</span> *) realloc(ptr, size);
+00085 <span class="keywordflow">if</span> (ptr == NULL && size != 0)
+00086 <a class="code" href="pairwise_8c.html#a7">fatal_sys_error</a>(<span class="stringliteral">"realloc returned NULL"</span>);
+00087 <span class="keywordflow">return</span> ptr;
+00088 }
+00089
+00090 <span class="comment">/* If the same resdidue return 1 else return 0 */</span>
+00091 <span class="keywordtype">int</span>
+<a name="l00092"></a><a class="code" href="pairwise_8c.html#a11">00092</a> <a class="code" href="pairwise_8c.html#a11">diff</a> (<span class="keywordtype">char</span> a, <span class="keywordtype">char</span> b) {
+00093 <span class="keywordflow">if</span> (a == b && a == <span class="charliteral">'-'</span>)
+00094 <span class="keywordflow">return</span> 0;
+00095 <span class="keywordflow">if</span> (a == b)
+00096 <span class="keywordflow">return</span> 1;
+00097 <span class="keywordflow">return</span> 0;
+00098 }
+00099
+00100 <span class="comment">/* Does the pairwise comparison</span>
+00101 <span class="comment"> * Compares the regions of the alignment that overlap, common gaps are</span>
+00102 <span class="comment"> * ignored, and the number of common residues divided by the length of</span>
+00103 <span class="comment"> * the shortest sequence is the identity.</span>
+00104 <span class="comment"> * */</span>
+00105 <span class="keywordtype">float</span>
+<a name="l00106"></a><a class="code" href="pairwise_8c.html#a12">00106</a> <a class="code" href="pairwise_8c.html#a12">pairwise</a> (<span class="keyword">struct</span> <a class="code" href="structfasta.html">fasta</a> *a, <span class="keyword">struct</span> <a class="code" href="structfasta.html">fasta</a> *b) {
+00107 <span class="keywordtype">float</span> result;
+00108 <span class="keywordtype">int</span> start, end, numres, i, id = 0;
+00109
+00110 <span class="comment">/* If the sequences don't overlap then the seq ID is 0 */</span>
+00111 <span class="keywordflow">if</span> (a-><a class="code" href="structfasta.html#o3">end</a> < b-><a class="code" href="structfasta.html#o2">start</a> || a-><a class="code" href="structfasta.html#o2">start</a> > b-><a class="code" href="structfasta.html#o3">end</a>)
+00112 <span class="keywordflow">return</span> 0;
+00113
+00114 start = <a class="code" href="pairwise_8c.html#a3">MIN</a>( a-><a class="code" href="structfasta.html#o2">start</a>, b-><a class="code" href="structfasta.html#o2">start</a> );
+00115 end = <a class="code" href="pairwise_8c.html#a2">MAX</a>( a-><a class="code" href="structfasta.html#o3">end</a>, b-><a class="code" href="structfasta.html#o3">end</a> );
+00116 numres = <a class="code" href="pairwise_8c.html#a2">MAX</a>( a-><a class="code" href="structfasta.html#o4">numres</a>, b-><a class="code" href="structfasta.html#o4">numres</a> );
+00117
+00118 <span class="keywordflow">for</span> (i = start; i < end; i++)
+00119 id += <a class="code" href="pairwise_8c.html#a11">diff</a>(a-><a class="code" href="structfasta.html#o1">seq</a>[i], b-><a class="code" href="structfasta.html#o1">seq</a>[i]);
+00120
+00121 result = 100 * (<span class="keywordtype">float</span>) id / (<span class="keywordtype">float</span>) numres;
+00122
+00123 <span class="keywordflow">return</span> result;
+00124 }
+00125
+00126 <span class="comment">/* "Populate" the rest of the fasta structure with information</span>
+00127 <span class="comment"> * Gets the start position of the sequence (first non-gap character)</span>
+00128 <span class="comment"> * Gets the end position (the last non-gap character)</span>
+00129 <span class="comment"> * Gets the number of residues in the sequence (that aren't gaps)</span>
+00130 <span class="comment"> * Gaps are represented by '-'</span>
+00131 <span class="comment"> * */</span>
+00132 <span class="keywordtype">void</span>
+<a name="l00133"></a><a class="code" href="pairwise_8c.html#a13">00133</a> <a class="code" href="pairwise_8c.html#a13">populate</a> (<span class="keyword">struct</span> <a class="code" href="structfasta.html">fasta</a> *a) {
+00134 <span class="keywordtype">int</span> i;
+00135 <span class="keywordtype">int</span> len = strlen(a-><a class="code" href="structfasta.html#o1">seq</a>);
+00136
+00137 a-><a class="code" href="structfasta.html#o4">numres</a> = 0;
+00138
+00139 <span class="keywordflow">for</span> (i = 0; i <= len; i++) {
+00140 <span class="keywordflow">if</span> (a-><a class="code" href="structfasta.html#o1">seq</a>[i] != <span class="charliteral">'-'</span>) {
+00141 a-><a class="code" href="structfasta.html#o2">start</a> = i;
+00142 <span class="keywordflow">break</span>;
+00143 }
+00144 }
+00145 <span class="keywordflow">for</span> (i = len; i > 0; i--) {
+00146 <span class="keywordflow">if</span> (a-><a class="code" href="structfasta.html#o1">seq</a>[i] != <span class="charliteral">'-'</span>) {
+00147 a-><a class="code" href="structfasta.html#o3">end</a> = i;
+00148 <span class="keywordflow">break</span>;
+00149 }
+00150 }
+00151 <span class="keywordflow">for</span> (i = a-><a class="code" href="structfasta.html#o2">start</a>; i < a-><a class="code" href="structfasta.html#o3">end</a>; i++) {
+00152 <span class="keywordflow">if</span> (a-><a class="code" href="structfasta.html#o1">seq</a>[i] != <span class="charliteral">'-'</span>) {
+00153 a-><a class="code" href="structfasta.html#o4">numres</a>++;
+00154 }
+00155 }
+00156 }
+00157
+00158 <span class="comment">/* Makes sure that all of the sequences are the same length */</span>
+00159 <span class="keywordtype">void</span>
+<a name="l00160"></a><a class="code" href="pairwise_8c.html#a14">00160</a> <a class="code" href="pairwise_8c.html#a14">check_length</a> (<span class="keyword">struct</span> <a class="code" href="structfasta.html">fasta</a> **array) {
+00161 <span class="keywordtype">int</span> i, length;
+00162
+00163 <span class="keywordflow">if</span> (array[0] != NULL)
+00164 length = strlen(array[0]->seq);
+00165 <span class="keywordflow">else</span> {
+00166 fprintf(stderr, <span class="stringliteral">"check_length() not passed an array of fasta structs\n"</span>);
+00167 <span class="keywordflow">return</span>;
+00168 }
+00169
+00170 <span class="keywordflow">for</span> (i = 0; array[i] != NULL; i++) {
+00171 <span class="keywordflow">if</span> (length != strlen(array[i]->seq)) {
+00172 <a class="code" href="pairwise_8c.html#a6">fatal_error</a>(<span class="stringliteral">"Not all of the sequences are the same length\n"</span>);
+00173 }
+00174 }
+00175 }
+00176
+00177 <span class="comment">/* Reads in a FASTA file and returns an array of fasta structures */</span>
+00178 <span class="keyword">struct </span><a class="code" href="structfasta.html">fasta</a> **
+<a name="l00179"></a><a class="code" href="pairwise_8c.html#a15">00179</a> <a class="code" href="pairwise_8c.html#a15">read_fasta</a> (FILE *fh) {
+00180 <span class="keywordtype">int</span> i, j, k, c, filesize = 1;
+00181 <span class="keywordtype">char</span> *file;
+00182 <span class="keyword">struct </span><a class="code" href="structfasta.html">fasta</a> **array;
+00183
+00184 array = (<span class="keyword">struct </span><a class="code" href="structfasta.html">fasta</a> **) <a class="code" href="pairwise_8c.html#a9">xmalloc</a>(<a class="code" href="pairwise_8c.html#a0">ARRAY</a> * sizeof(struct fasta *));
+00185 file = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a9">xmalloc</a>(<span class="keyword">sizeof</span>(<span class="keywordtype">char</span>));
+00186
+00187 <span class="comment">/* Allocate initial space for the file */</span>
+00188 file = <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(file, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * filesize);
+00189 file[filesize] = <span class="charliteral">'\0'</span>;
+00190
+00191 <span class="comment">/* Read in the file */</span>
+00192 <span class="keywordflow">while</span> ((c = getc(fh)) != EOF) {
+00193 <span class="keywordflow">if</span> (filesize % <a class="code" href="pairwise_8c.html#a1">FILEBUF</a>)
+00194 file = <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(file, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * (<a class="code" href="pairwise_8c.html#a1">FILEBUF</a> + filesize));
+00195
+00196 file[filesize] = c;
+00197 filesize++;
+00198 }
+00199
+00200 <span class="comment">/* Parse the FASTA file into an array of structures */</span>
+00201 <span class="keywordflow">for</span> (i = 0, j = 0, k = 0; i < filesize; i++) {
+00202 <span class="keywordflow">if</span> (file[i] == <span class="charliteral">'>'</span>) {
+00203 <span class="keywordflow">if</span> (j % <a class="code" href="pairwise_8c.html#a0">ARRAY</a> == 0)
+00204 array = (<span class="keyword">struct </span>fasta **) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array, sizeof(struct fasta *) * (j + <a class="code" href="pairwise_8c.html#a0">ARRAY</a>));
+00205
+00206 array[j] = (<span class="keyword">struct </span>fasta *) <a class="code" href="pairwise_8c.html#a9">xmalloc</a>(sizeof(struct fasta));
+00207 array[j]-><a class="code" href="structfasta.html#o0">id</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a9">xmalloc</a>(<span class="keyword">sizeof</span>(<span class="keywordtype">char</span>));
+00208 array[j]-><a class="code" href="structfasta.html#o1">seq</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a9">xmalloc</a>(<span class="keyword">sizeof</span>(<span class="keywordtype">char</span>));
+00209 array[j]-><a class="code" href="structfasta.html#o4">numres</a> = 0;
+00210
+00211 i++;
+00212 <span class="keywordflow">while</span> (file[i] != <span class="charliteral">'\0'</span> && file[i] != <span class="charliteral">'\n'</span>) {
+00213 <span class="keywordflow">if</span> (k % <a class="code" href="pairwise_8c.html#a0">ARRAY</a> == 0)
+00214 array[j]-><a class="code" href="structfasta.html#o0">id</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array[j]-><a class="code" href="structfasta.html#o0">id</a>, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * (<a class="code" href="pairwise_8c.html#a0">ARRAY</a> + k));
+00215 array[j]-><a class="code" href="structfasta.html#o0">id</a>[k] = file[i];
+00216 k++; i++;
+00217 }
+00218 array[j]-><a class="code" href="structfasta.html#o0">id</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array[j]-><a class="code" href="structfasta.html#o0">id</a>, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * (<a class="code" href="pairwise_8c.html#a0">ARRAY</a> + k));
+00219 array[j]-><a class="code" href="structfasta.html#o0">id</a>[k] = <span class="charliteral">'\0'</span>;
+00220 k = 0;
+00221
+00222 <span class="keywordflow">while</span> (file[i] != <span class="charliteral">'\0'</span> && file[i] != <span class="charliteral">'>'</span>) {
+00223 <span class="keywordflow">if</span> (file[i] == <span class="charliteral">'\n'</span>) {
+00224 i++;
+00225 <span class="keywordflow">continue</span>;
+00226 }
+00227 <span class="keywordflow">if</span> (k % <a class="code" href="pairwise_8c.html#a0">ARRAY</a> == 0)
+00228 array[j]-><a class="code" href="structfasta.html#o1">seq</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array[j]-><a class="code" href="structfasta.html#o1">seq</a>, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * (<a class="code" href="pairwise_8c.html#a0">ARRAY</a> + k));
+00229 array[j]-><a class="code" href="structfasta.html#o1">seq</a>[k] = file[i];
+00230 k++; i++;
+00231 }
+00232 array[j]-><a class="code" href="structfasta.html#o1">seq</a> = (<span class="keywordtype">char</span> *) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array[j]-><a class="code" href="structfasta.html#o1">seq</a>, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span>) * (<a class="code" href="pairwise_8c.html#a0">ARRAY</a> + k));
+00233 array[j]-><a class="code" href="structfasta.html#o1">seq</a>[k] = <span class="charliteral">'\0'</span>;
+00234 k = 0;
+00235 i--;
+00236 j++;
+00237 }
+00238 }
+00239
+00240 free(file);
+00241 array = (<span class="keyword">struct </span>fasta **) <a class="code" href="pairwise_8c.html#a10">xrealloc</a>(array, sizeof(struct fasta *) * j);
+00242 array[j] = NULL;
+00243
+00244 <span class="comment">/* find the start and end points for the alignments */</span>
+00245 <span class="keywordflow">for</span> (i = 0; array[i] != NULL; i++) {
+00246 <a class="code" href="pairwise_8c.html#a13">populate</a>(array[i]);
+00247 }
+00248
+00249 <span class="keywordflow">return</span> array;
+00250 }
+00251
+00252 <span class="keywordtype">int</span>
+<a name="l00253"></a><a class="code" href="pairwise_8c.html#a16">00253</a> <a class="code" href="pairwise_8c.html#a16">main</a> (<span class="keywordtype">int</span> argc, <span class="keywordtype">char</span> **argv) {
+00254 FILE *fh;
+00255 <span class="keyword">struct </span><a class="code" href="structfasta.html">fasta</a> **array;
+00256 <span class="keywordtype">int</span> i = 0, j;
+00257
+00258 <span class="comment">/* Read in the FASTA file */</span>
+00259 <span class="keywordflow">if</span> (argc == 2)
+00260 fh = <a class="code" href="pairwise_8c.html#a8">xfopen</a>(argv[1], <span class="stringliteral">"r"</span>);
+00261 <span class="keywordflow">else</span>
+00262 fh = stdin;
+00263
+00264 array = <a class="code" href="pairwise_8c.html#a15">read_fasta</a>(fh);
+00265 fclose(fh);
+00266
+00267 <a class="code" href="pairwise_8c.html#a14">check_length</a>(array);
+00268
+00269 <span class="comment">/* start the OC output */</span>
+00270 <span class="keywordflow">while</span> (array[i] != NULL)
+00271 i++;
+00272
+00273 fprintf(stdout, <span class="stringliteral">"%i\n"</span>, i);
+00274
+00275 <span class="keywordflow">for</span> (i = 0; array[i] != NULL; i++)
+00276 fprintf(stdout, <span class="stringliteral">"%s\n"</span>, array[i]->id);
+00277
+00278 <span class="comment">/* do the pairwise comparison */</span>
+00279 <span class="keywordflow">for</span> (i = 0; array[i] != NULL; i++) {
+00280 <span class="keywordflow">for</span> (j = i + 1; array[j] != NULL; j++) {
+00281 printf(<span class="stringliteral">"%f\n"</span>, <a class="code" href="pairwise_8c.html#a12">pairwise</a>(array[i], array[j]));
+00282 }
+00283 }
+00284
+00285 <span class="keywordflow">return</span> EXIT_SUCCESS;
+00286 }
+</pre></div><hr size="1"><address style="align: right;"><small>Generated on Thu Jul 24 12:17:49 2003 for pairwise by
+<a href="http://www.doxygen.org/index.html">
+<img src="doxygen.png" alt="doxygen" align="middle" border=0 >
+</a>1.3.2 </small></address>
+</body>
+</html>