1 Over all structure of the fasta3 program. (Some functions
2 are different for translated comparisons FASTX, FASTY, TFASTX, TFASTY.)
4 main() { /* complib.c structure */
6 /* get command line arguments, set up initial parameter values */
7 initenv (argc, argv, &m_msg, &pst,&aa0[0],outtty);
9 /* allocate space for sequence arrays */
10 /* get the query file name if not on command line */
12 m_msg.n0 = getseq (m_msg.tname,aa0[0], MAXTOT, m_msg.libstr,&pst.dnaseq,
15 /* reset some parameters if DNA */
16 resetp (aa0[0], m_msg.n0, &m_msg, &pst);
18 /* get a library name if not on command line */
19 libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
20 /* use library name to build list of library files */
21 libselect(m_msg.lname, &m_msg);
23 /* get additional options (ktup, prss-window) if not specified */
24 query_parm (&m_msg, &pst);
26 /* do final parameter initializations */
27 last_init(&m_msg, &pst);
29 /* set up structures for saved scores[20000], statistics[50000] */
32 /* initialize the comparison function */
33 init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
35 /* open the library */
36 for (iln = 0; iln < m_msg.nln; iln++) {
37 if (openlib(m_msg.lbnames[iln],m_msg)!=1) {continue;}
40 /* get the library sequence and do the comparison */
41 while ((n1=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) {
42 do_work (aa0[itt], m_msg.n0, aa1, n1, itt, &pst, f_str[itt], &rst);
45 /* save the scores for statistics */
48 /* all done with all libraries */
49 process_hist(stats,nstats,pst);
51 /* sort the scores by z-value */
52 sortbestz (bptr, nbest);
54 /* sort the scores by E-value */
55 sortbeste (bptr, nbest);
57 /* print the histogram */
58 prhist (stdout,m_msg,pst,gstring2);
60 /* show the high scoring sequences */
61 showbest (stdout, aa0, aa1, maxn, bptr, nbest, qlib, &m_msg, pst,
64 /* show the high-scoring alignments */
65 showalign(outfd, aa0, aa1, maxn, bptr, nbest, qlib, m_msg, pst,
68 /* thats all folks !!! */
72 complib.c /* version set as mp_verstr */
75 printsum() /* prints summary of run (residues, entries, time) */
76 void fsigint() /* sets up interrupt handler for HUP not used */
81 void selectbest() /* select best 15000/20000 based on raw score */
82 void selectbestz() /* select best 15000/20000 based on z-score */
83 void sortbest() /* sort based on raw score */
84 void sortbestz() /* sort based on z-score */
85 void sortbeste() /* sort based on E() score - different from z-score for DNA */
87 prhist() /* print histogram */
89 shuffle() /* shuffle sequence (prss) */
90 wshuffle() /* window shuffle */
95 void showbest() /* present list of high scoring sequences */
100 void showalign() /* show list of high-scoring alignments */
101 void do_show() /* show an individual alignment */
102 void initseq() /* setup seqc0/seqc1 which contain alignment characters */
103 void freeseq() /* free them up */
108 time_t s_time() /* get the time in usecs */
109 void ptime() /* print elapsed time */
114 initpam () /* read in PAM matrix or change default array */
115 void mk_n_pam() /* make DNA pam from +5/-3 values */
119 void initenv() /* read environment variables, general options */
121 initfa.c /* version set as "verstr" */
123 alloc_pam() /* allocate 2D pam array */
124 initpam2() /* fill it up from 1D pam triangle */
125 f_initenv() /* function-specific environment variables */
126 f_getopt() /* function-specific options */
127 f_getarg() /* function specific argument - ktup */
128 resetp() /* reset scoring matrix, optional parameters for DNA-DNA */
129 reseta() /* reset scoring matrix, optional parameters for prot-DNA */
130 query_parm() /* ask for additional program arguments (ktup) */
131 last_init() /* last chance to set up parameters based on query,lib,parms */
132 f_initpam() /* not used - could set parameters from pam matrix */
137 process_hist() /* do statistics calculations */
139 proc_hist_r() /* regression fit z=1, also used by z=5 */
140 float find_z() /* gives z-score for score, length, mu, rho, var */
141 float find_zr() /* gives z-score for score, length, mu, rho, var */
142 fit_llen() /* first estimate of mu, rho, var */
143 fit_llens() /* second estimate of mu, rho, var, mu2, rho2 */
145 proc_hist_r2() /* regression_i fit z=4 */
146 float find_zr2() /* gives z-score for score, length, mu, rho, mu2, rho2 */
147 fit_llen2() /* iterative estimate of mu, rho, var */
149 proc_hist_ln() /* ln()-scaled z=2 */ /* no longer used */
150 float find_zl() /* gives z-score from ln()-scaled scores */
152 proc_hist_ml() /* estimate lambda, K using Maximum Likelihood */
153 float find_ze() /* z-score from lambda, K */
155 proc_hist_n() /* no length-scaling z=0 */
156 float find_zn() /* gives z-score from mu, var (no scaling) */
158 proc_hist_a() /* Altschul-Gish params z= 3 */
159 ag_parm() /* match pst.pamfile name, look_p() */
160 look_p() /* lookup Lambda, K, H given param struct */
163 eq_s() /* returns (double)score (available for length correction) */
164 ln_s() /* returns (double)score * ln(200)/ln(length) */
166 proc_hist_r() /* regression fit z=1, also used by z=5 */
167 alloc_hist() /* set up arrays for score vs length */
168 free_hist() /* free them */
169 inithist() /* calls alloc_hist(), sets some other globals */
170 addhist() /* update score vs length hist */
171 inithistz() /* initialize displayed (z-score) histogram hist[]*/
172 addhistz() /* add to hist[], increment num_db_entries */
173 addhistzp() /* add to hist[], don't change num_db_entries */
174 prune_hist() /* remove scores from score vs length */
175 update_db_size() /* num_db_entries = nlib - ntrimmed */
176 set_db_size() /* -Z db_size; set nlib */
178 double z_to_E() /* z-value to E() (extreme value distribution */
179 double zs_to_E() /* z-score (mu=50, sigma=10) to E() */
180 double zs_to_bit() /* z-score to BLAST2 bit score */
182 float E_to_zs() /* E() to z-score */
183 double zs_to_Ec() /* z-score to num_db_entries*(1 - P(zs))
185 summ_stats() /* put stat summary in string */
186 vsort() /* not used, does shell sort */
187 calc_ks() /* does Kolmogorov-Smirnoff calculation for histogram */
189 dropnfa.c /* contains worker comparison functions */
191 init_work() /* set up struct f_struct fstr - hash query */
192 get_param() /* actually prints parameters to string */
193 close_work() /* clean up fstr */
194 do_work() /* do a comparison */
195 do_fasta() /* use the fasta() function */
196 savemax() /* save the best region during scan */
197 spam() /* rescan the best regions */
198 sconn() /* try to connect the best regions for initn */
199 kssort() /* sort by score */
200 kpsort() /* sort by left end pos */
201 shscore() /* best self-score */
202 dmatch() /* do band alignment for opt score */
203 FLOCAL_ALIGN() /* fast band score-only */
205 do_opt() /* do an "optimized comparison */
207 do_walign() /* put an alignment into res[] for calcons() */
208 sw_walign() /* SW alignment driver - find boundaries */
209 ALIGN() /* actual alignment driver */
210 nw_align() /* recursive global alignment */
211 CHECK_SCORE() /* double check */
212 DISPLAY() /* Miller's display routine */
214 bd_walign() /* band alignment driver for DNA */
215 LOCAL_ALIGN() /* find boundaries in band */
216 B_ALIGN() /* produce band alignment */
217 bg_align() /* recursively produce band alignment */
218 BCHECK_SCORE() /* double check */
220 calcons() /* calculate ascii alignment seqc0,seqc1 from res[]*/
221 calc_id() /* calculate % identity with no alignment */
225 getseq() /* get a query (prot or DNA) */
226 getntseq() /* get a nt query (for fastx, fasty) */
227 gettitle() /* get a description */
229 int openlib() /* open a library */
230 closelib() /* close it */
231 GETLIB() /* get a fasta-format next library entry */
232 RANLIB() /* jump back in, get description, position for getlib() */
234 lgetlib() /* get a Genbank flat-file format next library entry */
235 lranlib() /* jump back in, get description, position for lgetlib() */
237 pgetlib() /* get CODATA format next library entry */
238 pranlib() /* jump back in, get description, position for lgetlib() */
240 egetlib() /* get EMBL format next library entry */
241 eranlib() /* jump back in, get description, position for egetlib() */
243 igetlib() /* get Intelligenetics format next library entry */
244 iranlib() /* jump back in, get description, position for igetlib() */
246 vgetlib() /* get PIR/VMS/GCG format next library entry */
247 vranlib() /* jump back in, get description, position for vgetlib() */
249 gcg_getlib() /* get GCG binary format next library entry */
250 gcg_ranlib() /* jump back in, get description, position for gcg_getlib() */
252 int scanseq() /* find %ACGT */
254 revcomp() /* do reverse complement */
255 sf_sort() /* sort superfamily numbers */
259 discons() /* display alignment from seqc0, seqc1 */
260 disgraph() /* display graphical representation, -m 4,5 */
261 aancpy() /* copy a binary sequence to ascii */
264 iidex() /* lookup ascii-encoding of residue */
265 cal_coord() /* calculate coordinates of alignment ends */
293 irand(time) /* initialize random number generator */
294 nrand(n) /* get a number 0 - n */
299 void do_url1() /* setup search links */