--- /dev/null
+#!/bin/sh
+# This is a shell archive (produced by shar 3.49)
+# To extract the files from this archive, save it to a file, remove
+# everything above the "!/bin/sh" line above, and type "sh file_name".
+#
+# made 04/29/2007 13:53 UTC by wrp@wrpsun2.bioch.Virginia.EDU
+# Source directory /home2.t2/users/wrp/fa_cvs/fasta-34.26.5
+#
+# existing files will NOT be overwritten unless -c is specified
+#
+# This shar contains:
+# length mode name
+# ------ ---------- ------------------------------------------
+# 1018 -rw-r--r-- COPYRIGHT
+# 373 -rw-r--r-- FASTA_LIST
+# 4638 -rw-r--r-- FileDlog.c
+# 1580 -rw-r--r-- Makefile
+# 831 -rw-r--r-- Makefile.NetBSD
+# 684 -rw-r--r-- Makefile.cray_pvp
+# 8454 -rw-r--r-- Makefile.fcom
+# 2135 -rw-r--r-- Makefile.freebsd
+# 1484 -rw-r--r-- Makefile.hpux_it
+# 740 -rw-r--r-- Makefile.ibm
+# 1580 -rw-r--r-- Makefile.linux
+# 1577 -rw-r--r-- Makefile.linux_mysql
+# 1581 -rw-r--r-- Makefile.linux_pgsql
+# 1548 -rw-r--r-- Makefile.linux_sql
+# 1671 -rw-r--r-- Makefile.linux_sse2
+# 13073 -rw-r--r-- Makefile.mpcom
+# 1600 -rw-r--r-- Makefile.mpi4
+# 1602 -rw-r--r-- Makefile.mpi4_bluegene
+# 1509 -rw-r--r-- Makefile.mpi4_sql
+# 8182 -rwxr-xr-x Makefile.nm_fcom
+# 27480 -rwxr-xr-x Makefile.nm_pcom
+# 905 -rwxr-xr-x Makefile.nmk_icl
+# 2116 -rw-r--r-- Makefile.os_x
+# 1917 -rw-r--r-- Makefile.os_x86
+# 1922 -rw-r--r-- Makefile.pLinux
+# 1946 -rw-r--r-- Makefile.pLinux_sql
+# 24893 -rw-r--r-- Makefile.pcom
+# 13214 -rw-r--r-- Makefile.pvcom
+# 1344 -rw-r--r-- Makefile.pvm4
+# 1264 -rw-r--r-- Makefile.pvm4_sql
+# 1238 -rw-r--r-- Makefile.sgi
+# 1150 -rw-r--r-- Makefile.sun
+# 1264 -rw-r--r-- Makefile.sun_x86
+# 9746 -rw-r--r-- Makefile.tc
+# 1304 -rw-r--r-- Makefile34.common
+# 1330 -rw-r--r-- Makefile34.common_sql
+# 765 -rwxr-xr-x Makefile34.nmk_com
+# 1311 -rw-r--r-- Makefile34m.common
+# 1395 -rw-r--r-- Makefile34m.common_mysql
+# 1407 -rw-r--r-- Makefile34m.common_pgsql
+# 1406 -rw-r--r-- Makefile34m.common_sql
+# 722 -rw-r--r-- README
+# 2614 -rw-r--r-- README.versions
+# 1332 -rw-r--r-- Readme.Mac
+# 321 -rw-r--r-- a_mark.h
+# 504 -rw-r--r-- aamap.h
+# 3021 -rw-r--r-- ag_stats.c
+# 758 -rw-r--r-- aln_structs.h
+# 10311 -rw-r--r-- alt_parms.h
+# 2319 -rw-r--r-- altlib.h
+# 10085 -rw-r--r-- apam.c
+# 1922 -rw-r--r-- blosum45.mat
+# 1921 -rw-r--r-- blosum50.mat
+# 1922 -rw-r--r-- blosum62.mat
+# 1924 -rw-r--r-- blosum80.mat
+# 2528 -rw-r--r-- bovgh.seq
+# 986 -rw-r--r-- bovprl.seq
+# 11467 -rw-r--r-- c_dispn.c
+# 3492 -rw-r--r-- checkevent.c
+# 55202 -rw-r--r-- comp_lib.c
+# 21270 -rw-r--r-- compacc.c
+# 536 -rw-r--r-- create_seq_demo.sql
+# 81 -rw-r--r-- cvs_id
+# 6955 -rw-r--r-- dec_pthr_subs.c
+# 1116 -rw-r--r-- dec_pthr_subs.h
+# 3530 -rw-r--r-- defs.h
+# 976 -rw-r--r-- dna.mat
+# 10740 -rw-r--r-- doinit.c
+# 3226 -rw-r--r-- drop_func.h
+# 48853 -rw-r--r-- dropff2.c
+# 59078 -rw-r--r-- dropfs2.c
+# 73324 -rw-r--r-- dropfx.c
+# 77360 -rw-r--r-- dropfz2.c
+# 55870 -rw-r--r-- dropgsw.c
+# 677 -rw-r--r-- dropgsw.h
+# 70110 -rw-r--r-- dropnfa.c
+# 1882 -rw-r--r-- dropnfa.h
+# 34172 -rw-r--r-- dropnsw.c
+# 1286 -rw-r--r-- egmsmg.aa
+# 13742 -rw-r--r-- faatran.c
+# 1959 -rw-r--r-- fast_new
+# 529 -rw-r--r-- fasta.defaults
+# 1670 -rw-r--r-- fasta.options
+# 49762 -rw-r--r-- fasta20.doc
+# 10345 -rw-r--r-- fasta3.1
+# 177 -rw-r--r-- fasta3.rsp
+# 41617 -rw-r--r-- fasta3x.doc
+# 39642 -rw-r--r-- fasta3x.me
+# 9645 -rw-r--r-- fasta_func.doc
+# 4824 -rw-r--r-- fastf3.1
+# 2173 -rw-r--r-- fastlibs
+# 4556 -rw-r--r-- fasts3.1
+# 203 -rw-r--r-- fasts3.rsp
+# 1036 -rw-r--r-- getenv.c
+# 1174 -rw-r--r-- getopt.c
+# 9431 -rw-r--r-- getseq.c
+# 806 -rw-r--r-- grou_drome.pseg
+# 18633 -rw-r--r-- gst.nlib
+# 1405 -rw-r--r-- gst.seq
+# 300 -rw-r--r-- gtm1_human.aa
+# 291 -rw-r--r-- gtt1_drome.aa
+# 247 -rw-r--r-- h10_human.aa
+# 691 -rw-r--r-- h_altlib.h
+# 225 -rw-r--r-- hahu.aa
+# 1466 -rw-r--r-- hostacc.c
+# 7118 -rw-r--r-- hsgstm1b.gcg
+# 2788 -rw-r--r-- hsgstm1b.seq
+# 674 -rw-r--r-- htime.c
+# 1323 -rw-r--r-- humgstd.seq
+# 2210 -rw-r--r-- idn_aa.mat
+# 54882 -rw-r--r-- initfa.c
+# 13727 -rw-r--r-- karlin.c
+# 4128 -rw-r--r-- last_tat.c
+# 271 -rw-r--r-- lcbo.aa
+# 7638 -rw-r--r-- lib_sel.c
+# 5150 -rw-r--r-- list_db.c
+# 10617 -rw-r--r-- llgetaa.c
+# 56 -rw-r--r-- m1r.aa
+# 50 -rw-r--r-- m2.aa
+# 312 -rwxr-xr-x make_osx_univ.sh
+# 948 -rw-r--r-- map_db.1
+# 10852 -rw-r--r-- map_db.c
+# 212 -rw-r--r-- mchu.aa
+# 2255 -rw-r--r-- md_10.mat
+# 2256 -rw-r--r-- md_20.mat
+# 2255 -rw-r--r-- md_40.mat
+# 284 -rw-r--r-- mgstm1.aa
+# 310 -rw-r--r-- mgstm1.aaa
+# 1220 -rw-r--r-- mgstm1.e05
+# 1122 -rw-r--r-- mgstm1.eeq
+# 1116 -rw-r--r-- mgstm1.esq
+# 406 -rw-r--r-- mgstm1.gcg
+# 282 -rw-r--r-- mgstm1.lc
+# 677 -rw-r--r-- mgstm1.nt
+# 160 -rw-r--r-- mgstm1.nts
+# 259 -rw-r--r-- mgstm1.raa
+# 1167 -rw-r--r-- mgstm1.rev
+# 1158 -rw-r--r-- mgstm1.seq
+# 1286 -rw-r--r-- mgtt2_x.seq
+# 3057 -rw-r--r-- mm_file.h
+# 21318 -rw-r--r-- mmgetaa.c
+# 43 -rw-r--r-- ms1.aa
+# 1085 -rw-r--r-- msg.h
+# 17780 -rw-r--r-- mshowalign.c
+# 14393 -rw-r--r-- mshowbest.c
+# 2361 -rw-r--r-- mu.lib
+# 953 -rw-r--r-- musplfm.aa
+# 1042 -rw-r--r-- mw.h
+# 2047 -rw-r--r-- mwkw.aa
+# 500 -rw-r--r-- mwrtc1.aa
+# 1294 -rw-r--r-- myosin_bp.aa
+# 340 -rw-r--r-- mysql_demo1.sql
+# 381 -rw-r--r-- mysql_demo_pv.sql
+# 16406 -rw-r--r-- mysql_lib.c
+# 26 -rw-r--r-- n0.aa
+# 47 -rw-r--r-- n1.aa
+# 692 -rw-r--r-- n2.aa
+# 1482 -rw-r--r-- n2_fs.lib
+# 178 -rw-r--r-- n2s.aa
+# 243 -rw-r--r-- n2t.aa
+# 330 -rw-r--r-- n_fs.lib
+# 882 -rw-r--r-- ncbl2_head.h
+# 42930 -rw-r--r-- ncbl2_mlib.c
+# 1034 -rw-r--r-- ncbl_head.h
+# 12694 -rw-r--r-- ncbl_lib.c
+# 217 -rw-r--r-- ngt.aa
+# 111 -rw-r--r-- ngts.aa
+# 36301 -rw-r--r-- nmgetlib.c
+# 2452 -rwxr-xr-x nr_to_sql.pl
+# 566 -rw-r--r-- nrand.c
+# 533 -rw-r--r-- nrand48.c
+# 532 -rw-r--r-- nrandom.c
+# 385 -rw-r--r-- oohu.aa
+# 401 -rw-r--r-- oohu.raa
+# 55578 -rw-r--r-- p2_complib.c
+# 37611 -rw-r--r-- p2_workcomp.c
+# 1096 -rw-r--r-- p_mw.h
+# 1922 -rw-r--r-- pam120.mat
+# 1923 -rw-r--r-- pam250.mat
+# 3002 -rw-r--r-- param.h
+# 16978 -rw-r--r-- pgsql_lib.c
+# 230 -rw-r--r-- pirpsd.sql
+# 11147 -rw-r--r-- print_pssm.c
+# 340 -rw-r--r-- prio_atepa.aa
+# 2741 -rw-r--r-- prot_test.lib
+# 2786 -rw-r--r-- prot_test.lseg
+# 4969 -rw-r--r-- prss3.1
+# 119 -rw-r--r-- prss3.rsp
+# 317 -rw-r--r-- psql_demo.sql
+# 366 -rw-r--r-- psql_demo1.sql
+# 336 -rw-r--r-- psql_demo_pv.sql
+# 26268 -rw-r--r-- pssm_asn_subs.c
+# 1301 -rw-r--r-- pthr_subs.h
+# 7689 -rw-r--r-- pthr_subs2.c
+# 6657 -rw-r--r-- pvcomp.1
+# 914 -rw-r--r-- qrhuld.aa
+# 339 -rw-r--r-- randtest.c
+# 1184 -rw-r--r-- re_getlib.c
+# 1994 -rw-r--r-- readme.mpi_3.3
+# 1404 -rw-r--r-- readme.pvm_3.2
+# 7535 -rw-r--r-- readme.pvm_3.3
+# 3539 -rw-r--r-- readme.pvm_3.4
+# 1070 -rw-r--r-- readme.v30
+# 1871 -rw-r--r-- readme.v30t6
+# 5283 -rw-r--r-- readme.v30t7
+# 4461 -rw-r--r-- readme.v31t0
+# 3632 -rw-r--r-- readme.v31t1
+# 15841 -rw-r--r-- readme.v32t0
+# 50697 -rw-r--r-- readme.v33t0
+# 66121 -rw-r--r-- readme.v34t0
+# 2402 -rw-r--r-- readme.w32
+# 16277 -rw-r--r-- res_stats.c
+# 998 -rw-r--r-- rna.mat
+# 1427 -rw-r--r-- sc_to_e.c
+# 69722 -rw-r--r-- scaleswn.c
+# 37581 -rw-r--r-- scaleswt.c
+# 5247 -rw-r--r-- search.html
+# 2033 -rw-r--r-- showrss.c
+# 12412 -rw-r--r-- showsum.c
+# 113815 -rw-r--r-- smith_waterman_altivec.c
+# 1144 -rw-r--r-- smith_waterman_altivec.h
+# 12106 -rw-r--r-- smith_waterman_sse2.c
+# 1723 -rwxr-xr-x smith_waterman_sse2.h
+# 4279 -rw-r--r-- structs.h
+# 12998 -rw-r--r-- tatstats.c
+# 4126 -rw-r--r-- tatstats.h
+# 2891 -rw-r--r-- test.bat
+# 2996 -rwxr-xr-x test.sh
+# 2775 -rwxr-xr-x test2.bat
+# 2429 -rwxr-xr-x test_osx.sh
+# 1597 -rwxr-xr-x test_s.sh
+# 1312 -rwxr-xr-x test_z.sh
+# 203 -rw-r--r-- tfasts3.rsp
+# 1144 -rw-r--r-- thr.h
+# 27376 -rw-r--r-- titin_hum.aa
+# 83286 -rw-r--r-- titin_hum.seq
+# 2006 -rw-r--r-- uascii.h
+# 16008 -rw-r--r-- upam.h
+# 3335 -rw-r--r-- url_subs.c
+# 1229 -rw-r--r-- uthr_subs.h
+# 2771 -rw-r--r-- vtml160.mat
+# 2899 -rw-r--r-- w_mw.h
+# 7001 -rw-r--r-- work_thr.c
+# 5262 -rw-r--r-- workacc.c
+# 302 -rw-r--r-- xurt8c.aa
+# 302 -rw-r--r-- xurt8c.lc
+# 281 -rw-r--r-- xurtg.aa
+#
+# ============= COPYRIGHT ==============
+if test -f 'COPYRIGHT' -a X"$1" != X"-c"; then
+ echo 'x - skipping COPYRIGHT (File already exists)'
+else
+echo 'x - extracting COPYRIGHT (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'COPYRIGHT' &&
+X
+X Copyright 1988, 1991, 1992, 1993, 1994 1995, by William
+X R. Pearson and the University of Virginia. All rights
+X reserved. The FASTA program and documentation may not be sold or
+X incorporated into a commercial product, in whole or in part,
+X without written consent of William R. Pearson and the University
+X of Virginia. For further information regarding permission for
+X use or reproduction, please contact:
+X
+X David Hudson
+X Assistant Provost for Research
+X University of Virginia
+X P.O. Box 400301
+X Charlottesville, VA 22906-9025
+X
+X (434) 924-3606
+X
+X Code in the smith_waterman_sse2.c and smith_waterman_sse2.h files
+X is copyright (c) 2006 by Michael Farrar.
+X
+X This program may not be sold or incorporated into a commercial
+X product, in whole or in part, without written consent of Michael
+X Farrar. For further information regarding permission for use or
+X reproduction, please contact: Michael Farrar at
+X farrar.michael@gmail.com.
+X
+SHAR_EOF
+chmod 0644 COPYRIGHT ||
+echo 'restore of COPYRIGHT failed'
+Wc_c="`wc -c < 'COPYRIGHT'`"
+test 1018 -eq "$Wc_c" ||
+ echo 'COPYRIGHT: original size 1018, current size' "$Wc_c"
+fi
+# ============= FASTA_LIST ==============
+if test -f 'FASTA_LIST' -a X"$1" != X"-c"; then
+ echo 'x - skipping FASTA_LIST (File already exists)'
+else
+echo 'x - extracting FASTA_LIST (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'FASTA_LIST' &&
+X
+X
+30 July 2003
+X
+If you regularly install the latest version of the fasta3 package from
+ftp://ftp.virginia.edu/pub/fasta, you may want to join the fasta_list
+majordomo mailing list. I plan to use this list to announce new
+releases and solicit bug reports.
+X
+To join the mailing list, go to the WWW page at:
+X
+X list.mail.virginia.edu/mailman/listinfo/fasta_list
+X
+Bill Pearson
+SHAR_EOF
+chmod 0644 FASTA_LIST ||
+echo 'restore of FASTA_LIST failed'
+Wc_c="`wc -c < 'FASTA_LIST'`"
+test 373 -eq "$Wc_c" ||
+ echo 'FASTA_LIST: original size 373, current size' "$Wc_c"
+fi
+# ============= FileDlog.c ==============
+if test -f 'FileDlog.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping FileDlog.c (File already exists)'
+else
+echo 'x - extracting FileDlog.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'FileDlog.c' &&
+X
+/* copyright (c) 1997 William R. Pearson */
+X
+/* used only in Mac versions for file selection */
+/* should use navigation services if available */
+X
+X
+#include <Dialogs.h>
+#include <Fonts.h>
+#include <Types.h>
+#include <Gestalt.h>
+#include <Resources.h>
+#include <Controls.h>
+#include <StandardFile.h>
+#include <Files.h>
+#include <Folders.h>
+X
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+X
+#define NIL nil
+#define PauseID 301
+#define ExitID 302
+#define FileDID 204
+#define SFileDID 205
+X
+void HiliteDlog(DialogPtr);
+X
+SFTypeList tlist={'TEXT',0L,0L,0L};
+X
+extern Point wpos;
+X
+FileDlog(prompt,freply)
+X char *prompt;
+X StandardFileReply *freply;
+{
+X Point dpos={-1,-1};
+X if (GetResource('DLOG',SFileDID)==NIL) {
+X fprintf(stderr," cannot load %d DLOG resource\n",SFileDID); exit(1);
+X }
+X CtoPstr(prompt);
+X ParamText((StringPtr)prompt,"\p","\p","\p");
+/* SFPGetFile(wpos, (StringPtr)prompt, 0L,(short)1, tlist, 0L, freply, FileDID, NIL); */
+X CustomGetFile(NIL,
+X -1,
+X nil,
+X freply,
+X SFileDID,
+X dpos,
+X nil,
+X nil,nil,nil,nil);
+X
+X ParamText("\p","\p","\p","\p");
+X PtoCstr((StringPtr)prompt);
+X }
+X
+TFileDlog(prompt,freply,plist,nl)
+X char *prompt;
+X StandardFileReply *freply;
+X SFTypeList plist;
+X int nl;
+{
+X Point dpos={-1,-1};
+X if (GetResource('DLOG',SFileDID)==NIL) {
+X fprintf(stderr," cannot load %d TFile DLOG resource\n",SFileDID); exit(1);
+X }
+X CtoPstr(prompt);
+X ParamText((StringPtr)prompt,"\p","\p","\p");
+/* SFPGetFile(wpos,(StringPtr)prompt,0L,(short)nl,plist,0L,freply,FileDID,NIL); */
+X CustomGetFile(NIL,
+X nl,
+X plist,
+X freply,
+X SFileDID,
+X dpos,
+X nil,
+X nil,nil,nil,nil);
+X ParamText("\p","\p","\p","\p");
+X PtoCstr((StringPtr)prompt);
+X }
+X
+SFileDlog(prompt,freply)
+X char *prompt;
+X StandardFileReply *freply;
+{
+X Point dpos={-1,-1};
+X
+X if (GetResource('DLOG',SFileDID)==NIL) {
+X fprintf(stderr," cannot load %d DLOG resource\n",SFileDID); exit(1);
+X }
+X
+X CtoPstr(prompt);
+X ParamText((StringPtr)prompt,"\p","\p","\p");
+X
+/* StandardGetFile(NIL,(short)1,tlist,freply); */
+X CustomGetFile(NIL,
+X -1,
+X nil,
+X freply,
+X SFileDID,
+X dpos,
+X nil,
+X nil,nil,nil,nil);
+X ParamText("\p","\p","\p","\p");
+X PtoCstr((StringPtr)prompt);
+X }
+X
+STFileDlog(char *prompt, StandardFileReply *freply,
+X SFTypeList plist, int nl)
+{
+X Point dpos={-1,-1};
+X
+X if (GetResource('DLOG',SFileDID)==NIL) {
+X fprintf(stderr," cannot load %d TFile DLOG resource\n",SFileDID); exit(1);
+X }
+X CtoPstr(prompt);
+X ParamText((StringPtr)prompt,"\p","\p","\p");
+X
+X CustomGetFile(NIL,
+X -1,
+X nil,
+X freply,
+X SFileDID,
+X dpos,
+X nil,
+X nil,nil,nil,nil);
+X ParamText("\p","\p","\p","\p");
+X PtoCstr((StringPtr)prompt);
+}
+X
+PauseAlert(unsigned char *prompt)
+{
+X if (GetResource('DLOG',PauseID)==NIL) {
+X fprintf(stderr," cannot load %d TFile DLOG resource\n",PauseID); exit(1);
+X }
+X CtoPstr((char *)prompt);
+X ParamText(prompt,"\p","\p","\p");
+X CautionAlert(PauseID,NULL);
+X ParamText("\p","\p","\p","\p");
+}
+X
+IntroDlog(int DlogID, unsigned char *prompt)
+{
+X short itemHit;
+X DialogPtr DP;
+X
+X CtoPstr((char *)prompt);
+X ParamText(prompt,"\p","\p","\p");
+X
+X if (GetResource('DLOG',DlogID)==NIL) {
+X fprintf(stderr," cannot load %d Intro DLOG resource\n",DlogID); exit(1);
+X }
+X DP = GetNewDialog(DlogID,NULL,(WindowPtr)-1);
+X ShowWindow(DP);
+X SelectWindow(DP);
+X HiliteDlog(DP);
+X
+X ModalDialog(0L,&itemHit);
+X DisposeDialog(DP);
+X ParamText("\p","\p","\p","\p");
+X PtoCstr(prompt);
+}
+X
+NIntroDlog(int DlogID,unsigned char *p0,unsigned char *p1,
+X unsigned char *p2,unsigned char *p3)
+{
+X short itemHit;
+X DialogPtr DP;
+X unsigned char *p;
+X
+X for (p=p0; *p; p++) if (*p=='\n') *p=' ';
+X for (p=p1; *p; p++) if (*p=='\n') *p=' ';
+X for (p=p2; *p; p++) if (*p=='\n') *p=' ';
+X for (p=p2; *p; p++) if (*p=='\n') *p=' ';
+X
+X CtoPstr((char *)p0);
+X CtoPstr((char *)p1);
+X CtoPstr((char *)p2);
+X CtoPstr((char *)p3);
+X ParamText(p0,p1,p2,p3);
+X
+X if (GetResource('DLOG',DlogID)==NIL) {
+X fprintf(stderr," cannot load %d Intro DLOG resource\n",DlogID); exit(1);
+X }
+X DP = GetNewDialog(DlogID,NULL,(WindowPtr)-1);
+X ShowWindow(DP);
+X SelectWindow(DP);
+X HiliteDlog(DP);
+X
+X ModalDialog(0L,&itemHit);
+X DisposeDialog(DP);
+X ParamText("\p","\p","\p","\p");
+X PtoCstr(p0);
+X PtoCstr(p1);
+X PtoCstr(p2);
+X PtoCstr(p3);
+}
+X
+void
+HiliteDlog(DialogPtr DP)
+{
+X Rect tRect;
+X short tType;
+X Handle tItem;
+X
+X SetPort(DP);
+X GetDialogItem(DP,1,&tType,&tItem,&tRect);
+X PenSize(3, 3); /* Change pen to draw thick default outline */
+X InsetRect(&tRect, -4, -4); /* Draw outside the button by 1 pixel */
+X FrameRoundRect(&tRect, 16, 16); /* Draw the outline */
+X PenSize(1, 1); /* Restore the pen size to the default value */
+}
+SHAR_EOF
+chmod 0644 FileDlog.c ||
+echo 'restore of FileDlog.c failed'
+Wc_c="`wc -c < 'FileDlog.c'`"
+test 4638 -eq "$Wc_c" ||
+ echo 'FileDlog.c: original size 4638, current size' "$Wc_c"
+fi
+# ============= Makefile ==============
+if test -f 'Makefile' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile (File already exists)'
+else
+echo 'x - extracting Makefile (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+#
+# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
+#
+X
+CC= gcc -g -O2
+X
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC = gcc -g -DDEBUG
+#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+X
+# this file works for x86 LINUX
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta33) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0644 Makefile ||
+echo 'restore of Makefile failed'
+Wc_c="`wc -c < 'Makefile'`"
+test 1580 -eq "$Wc_c" ||
+ echo 'Makefile: original size 1580, current size' "$Wc_c"
+fi
+# ============= Makefile.NetBSD ==============
+if test -f 'Makefile.NetBSD' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.NetBSD (File already exists)'
+else
+echo 'x - extracting Makefile.NetBSD (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.NetBSD' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+# this file works for NetBSD
+#
+# provided by Marc Baudoin <babafou@babafou.eu.org>
+#
+X
+CC= cc -O
+#CC= cc -g -DDEBUG
+#CC= gcc -g -Wall
+#
+# standard line for normal searching
+CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your.host.here/fasta/cgi"' -DUSE_MMAP
+X
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DSFCHAR="'|'" -c -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP
+X
+LIB_M= -lm
+HFLAGS= -o
+NFLAGS= -o
+X
+# for NetBSD
+THR_SUBS = pthr_subs2
+THR_LIBS = -L/usr/pkg/pthreads/lib -lpthread
+THR_CC = -I/usr/pkg/pthreads/include
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+X
+include Makefile34m.common
+SHAR_EOF
+chmod 0644 Makefile.NetBSD ||
+echo 'restore of Makefile.NetBSD failed'
+Wc_c="`wc -c < 'Makefile.NetBSD'`"
+test 831 -eq "$Wc_c" ||
+ echo 'Makefile.NetBSD: original size 831, current size' "$Wc_c"
+fi
+# ============= Makefile.cray_pvp ==============
+if test -f 'Makefile.cray_pvp' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.cray_pvp (File already exists)'
+else
+echo 'x - extracting Makefile.cray_pvp (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.cray_pvp' &&
+#
+# makefile for fasta33
+#
+# for more information on FASTA on CRAY's, see:
+#
+# http://home.cray.com/~cpsosa/ChemApps/BioInf/fasta/fasta.html
+# provided by: Carlos P. Sosa, cpsosa@cray.com
+#
+X
+CC= cc -h inline1,scalar3,task0,vector2
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+LIB_M=
+#
+X
+CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4
+-DTHR_EXIT=pthread_exit -DPROGRESS
+-DFASTA_HOST='"crick.med.virginia.edu/fasta/cgi"' -DIS_BIG_ENDIAN
+X
+THR_SUBS = pthr_subs
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+X
+# renamed (fasta33) programs
+include Makefile33.nommap
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.cray_pvp ||
+echo 'restore of Makefile.cray_pvp failed'
+Wc_c="`wc -c < 'Makefile.cray_pvp'`"
+test 684 -eq "$Wc_c" ||
+ echo 'Makefile.cray_pvp: original size 684, current size' "$Wc_c"
+fi
+# ============= Makefile.fcom ==============
+if test -f 'Makefile.fcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.fcom (File already exists)'
+else
+echo 'x - extracting Makefile.fcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.fcom' &&
+X
+#================ common .o files
+X
+doinit.o : doinit.c defs.h param.h upam.h structs.h uascii.h
+X $(CC) $(THR_CC) $(CFLAGS) -c doinit.c
+X
+init_sw.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DSSEARCH initfa.c -o init_sw.o
+X
+init_ssw.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DOSEARCH initfa.c -o init_ssw.o
+X
+init_rss.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS initfa.c -o init_rss.o
+X
+init_rfx.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c -o init_rfx.o
+X
+init_fa.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA initfa.c -o init_fa.o
+X
+init_ff.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF initfa.c -o init_ff.o
+X
+init_tf.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c -o init_tf.o
+X
+init_fs.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS initfa.c -o init_fs.o
+X
+init_fm.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM initfa.c -o init_fm.o
+X
+init_tfs.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -DTFAST initfa.c -o init_tfs.o
+X
+init_tfm.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -DTFAST initfa.c -o init_tfm.o
+X
+init_tfa.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c -o init_tfa.o
+X
+init_fx.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX initfa.c -o init_fx.o
+X
+init_tfx.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c -o init_tfx.o
+X
+init_fy.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY initfa.c -o init_fy.o
+X
+init_tfy.o : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c -o init_tfy.o
+X
+#================ miscellaneous
+X
+htime.o : htime.c
+X $(CC) $(THR_CC) $(CFLAGS) -c htime.c
+X
+compacc.o : compacc.c upam.h uascii.h param.h structs.h $(MWH) defs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c compacc.c
+X
+pssm_asn_subs.o : pssm_asn_subs.c defs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c pssm_asn_subs.c
+X
+#================ display list of best hits / alignments
+X
+showbest.o : $(SHOWBESTC) $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c $(SHOWBESTC) -o showbest.o
+X
+showrss.o : showrss.c $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c showrss.c
+X
+showun.o : mshowbest.c $(MWH) defs.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DSHOWUN mshowbest.c -o showun.o
+X
+showrel.o : $(SHOWBESTC) $(MWH) defs.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DSHOWREL $(SHOWBESTC) -o showrel.o
+X
+showsum.o : showsum.c $(MWH) defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c showsum.c
+X
+$(SHOWALIGN).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c $(SHOWALIGN).c -o $(SHOWALIGN).o
+X
+$(SHOWALIGN)_u.o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -DSHOWUN -c -o $(SHOWALIGN)_u.o $(SHOWALIGN).c
+re_getlib.o : re_getlib.c mw.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c re_getlib.c
+X
+lib_sel.o : lib_sel.c defs.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c lib_sel.c
+X
+c_dispn.o : c_dispn.c defs.h structs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c c_dispn.c
+X
+#================ statistical functions
+X
+karlin.o : karlin.c param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c karlin.c
+X
+scaleswn.o : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(THR_CC) $(CFLAGS) -c scaleswn.c
+X
+scaleswtf.o : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -c scaleswt.c -o scaleswtf.o
+X
+scaleswts.o : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(THR_CC) $(CFLAGS) -c scaleswt.c -o scaleswts.o
+X
+tatstats_fs.o : tatstats.c tatstats.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS tatstats.c -o tatstats_fs.o
+X
+tatstats_ff.o : tatstats.c tatstats.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF tatstats.c -o tatstats_ff.o
+X
+tatstats_fm.o : tatstats.c tatstats.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM tatstats.c -o tatstats_fm.o
+X
+last_tat.o : last_tat.c defs.h mm_file.h structs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c last_tat.c
+X
+#================ drop functions - actual scores/alignments
+X
+drop_nfa.o : dropnfa.c dropnfa.h param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropnfa.c -o drop_nfa.o
+X
+# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
+# that tatstats.h is built appropriately
+X
+drop_ff.o : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -c dropff2.c -o drop_ff.o
+X
+drop_tff.o : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -DTFAST -c dropff2.c -o drop_tff.o
+X
+drop_ff2.o : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF dropff2.c -o drop_ff2.o
+X
+drop_tff2.o : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c -o drop_tff.o
+X
+drop_fs.o : dropfs2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -DFASTS -c dropfs2.c -o drop_fs.o
+X
+drop_tfs.o : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c -o drop_tfs.o
+X
+drop_fm.o : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM dropfs2.c -o drop_fm.o
+X
+drop_tfm.o : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c -o drop_tfm.o
+X
+drop_tfa.o : dropnfa.c dropnfa.h upam.h param.h defs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DTFASTA dropnfa.c -o drop_tfa.o
+X
+drop_fx.o : dropfx.c upam.h param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropfx.c -o drop_fx.o
+X
+drop_tfx.o : dropfx.c upam.h param.h defs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfx.c -o drop_tfx.o
+X
+drop_fz.o : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropfz2.c -o drop_fz.o
+X
+drop_tfz.o : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfz2.c -o drop_tfz.o
+X
+dropnsw.o : dropnsw.c upam.h param.h structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropnsw.c
+X
+dropgsw.o : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropgsw.c
+X
+smith_waterman_altivec.o : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw.h defs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c smith_waterman_altivec.c
+X
+smith_waterman_sse2.o : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw.h defs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c smith_waterman_sse2.c
+X
+dropnw.o : dropnw.c upam.h param.h structs.h drop_func.h
+X $(CC) $(THR_CC) $(CFLAGS) -c dropnw.c
+X
+#================ reading query, libraries
+X
+getseq.o : getseq.c defs.h uascii.h structs.h upam.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c getseq.c
+X
+llgetaa.o : llgetaa.c upam.h uascii.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DNOLIB llgetaa.c
+X
+lgetlib.o : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c $(NGETLIB).c -o lgetlib.o
+X
+lgetaa_m.o : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c mmgetaa.c -o lgetaa_m.o
+X
+ncbl_lib.o : ncbl_lib.c ncbl_head.h
+X $(CC) $(THR_CC) $(CFLAGS) -c ncbl_lib.c
+X
+ncbl2_mlib.o : ncbl2_mlib.c ncbl2_head.h mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c ncbl2_mlib.c
+X
+mysql_lib.o : mysql_lib.c mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c mysql_lib.c
+X
+pgsql_lib.o : pgsql_lib.c mm_file.h
+X $(CC) $(THR_CC) $(CFLAGS) -c pgsql_lib.c
+X
+#================ threading functions
+X
+pthr_subs2.o : pthr_subs2.c thr.h pthr_subs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c pthr_subs2.c
+X
+uthr_subs.o : uthr_subs.c thr.h uthr_subs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c uthr_subs.c
+X
+#================ translation
+X
+faatran.o : faatran.c upam.h uascii.h
+X $(CC) $(THR_CC) $(CFLAGS) -c faatran.c
+X
+url_subs.o : url_subs.c structs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c url_subs.c
+X
+$(NRAND).o : $(NRAND).c
+X $(CC) $(THR_CC) $(CFLAGS) -c $(NRAND).c
+#================ pvm/mpi specific functions
+X
+hostacc.o : hostacc.c upam.h uascii.h
+X $(CC) $(THR_CC) $(CFLAGS) hostacc.c
+X
+workacc.o : workacc.c upam.h uascii.h param.h
+X $(NCC) $(THR_CC) $(CFLAGS) workacc.c -o workacc.o
+SHAR_EOF
+chmod 0644 Makefile.fcom ||
+echo 'restore of Makefile.fcom failed'
+Wc_c="`wc -c < 'Makefile.fcom'`"
+test 8454 -eq "$Wc_c" ||
+ echo 'Makefile.fcom: original size 8454, current size' "$Wc_c"
+fi
+# ============= Makefile.freebsd ==============
+if test -f 'Makefile.freebsd' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.freebsd (File already exists)'
+else
+echo 'x - extracting Makefile.freebsd (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.freebsd' &&
+#
+# Makefile for building fasta3 on FreeBSD
+#
+# Fernan Aguero - <fernan@iib.unsam.edu.ar>
+X
+# we take care of doing variable assignment using the '?=' and '+='
+# operators to preserve the value of variables if they are already
+# defined. In FreeBSD this happens when fasta3 is build from the port or
+# when the user has set these variables -- most notably CC and/or CFLAGS
+# -- in /etc/make.conf
+X
+# Compiler executable, and optional flags
+CC?= gcc
+CFLAGS?= -g -O2
+X
+# your FASTA host
+FASTA_HOST?= "your_fasta_host"
+X
+# common CFLAGS. These are the set of CFLAGS that are always used
+COMMON_CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -c -DMAX_WORKERS=2 \
+X -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -D_REENTRANT \
+X -D_LARGE_FILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO \
+X -DHAS_INTTYPES -DSAMP_STATS
+X
+# standard options, these will be added to the common CFLAGS if
+# selected below
+STANDARD_CFLAGS= -DSFCHAR="':'" -DFASTA_HOST='${FASTA_HOST}' \
+X -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DPGM_DOC
+X
+# options for superfamily validations, these will be added to the common
+# CFLAGS if selected below
+SUPERFAMILY_CFLAGS= -DSFCHAR="'|'" -DSUPERFAMNUM -DBIG_LIB64
+X
+# here we define CFLAGS to be the sum of common flags plus a subset of
+# optional flags that define our intended use.
+# The default standard flags are selected by default, although the user
+# can override this if s/he wants
+CFLAGS+= ${COMMON_CFLAGS} ${STANDARD_CFLAGS}
+X
+XXDIR?= /usr/local/bin
+X
+LIB_M+= -lm
+X
+HFLAGS+= -o
+NFLAGS+= -o
+X
+# FreeBSD users BEWARE! Different threading models ahead!
+X
+# The threading model has changed along the way from FreeBSD-4 to
+# FreeBSD-6. If you're building fasta3 on your own, you will need to
+# adjust this accordingly. The default works in FreeBSD-6x (currently
+# the recommended major version for use in production). Or better yet,
+# use the biology/fasta3 port from the ports collection, which will use
+# the correct threading library for your OSVERSION
+X
+THR_SUBS?= pthr_subs2
+THR_LIBS?= -lpthread
+THR_CC?=
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+include Makefile34m.common
+SHAR_EOF
+chmod 0644 Makefile.freebsd ||
+echo 'restore of Makefile.freebsd failed'
+Wc_c="`wc -c < 'Makefile.freebsd'`"
+test 2135 -eq "$Wc_c" ||
+ echo 'Makefile.freebsd: original size 2135, current size' "$Wc_c"
+fi
+# ============= Makefile.hpux_it ==============
+if test -f 'Makefile.hpux_it' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.hpux_it (File already exists)'
+else
+echo 'x - extracting Makefile.hpux_it (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.hpux_it' &&
+#
+# makefile for fasta3, fasta3_t
+#
+# flags for HP-UX #
+X
+CC= cc -g -O2 +Onolimit -Wl,+pi,1M -Wl,+pd,1M -Wl,+mergeseg
+#CC = gcc -g -DDEBUG
+X
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+X
+# this file works for x86 LINUX
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta33) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0644 Makefile.hpux_it ||
+echo 'restore of Makefile.hpux_it failed'
+Wc_c="`wc -c < 'Makefile.hpux_it'`"
+test 1484 -eq "$Wc_c" ||
+ echo 'Makefile.hpux_it: original size 1484, current size' "$Wc_c"
+fi
+# ============= Makefile.ibm ==============
+if test -f 'Makefile.ibm' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.ibm (File already exists)'
+else
+echo 'x - extracting Makefile.ibm (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.ibm' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+X
+CC= xlc_r -O3 -qarch=auto -qtune=auto -qcache=auto
+X
+# for IBM with current pthreads
+CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DIS_BIG_ENDIAN -DUSE_MMAP -DIBM_AIX -D_LARGE_FILES -DHAS_INTTYPES -D_LARGE_FILES -UMAXSEG -DSAMP_STATS -DPGM_DOC
+X
+# consider -D_LARGE_FILE_API -D_LARGE_FILES for files > 2 GB
+X
+LIB_M = -lm
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthreads
+THR_CC =
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0644 Makefile.ibm ||
+echo 'restore of Makefile.ibm failed'
+Wc_c="`wc -c < 'Makefile.ibm'`"
+test 740 -eq "$Wc_c" ||
+ echo 'Makefile.ibm: original size 740, current size' "$Wc_c"
+fi
+# ============= Makefile.linux ==============
+if test -f 'Makefile.linux' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.linux (File already exists)'
+else
+echo 'x - extracting Makefile.linux (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+#
+# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
+#
+X
+CC= gcc -g -O
+#CC = gcc -g -DDEBUG
+X
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+X
+# this file works for x86 LINUX
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
+X
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPTFA_O = drop_tfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+X
+# renamed (fasta33) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0644 Makefile.linux ||
+echo 'restore of Makefile.linux failed'
+Wc_c="`wc -c < 'Makefile.linux'`"
+test 1580 -eq "$Wc_c" ||
+ echo 'Makefile.linux: original size 1580, current size' "$Wc_c"
+fi
+# ============= Makefile.linux_mysql ==============
+if test -f 'Makefile.linux_mysql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.linux_mysql (File already exists)'
+else
+echo 'x - extracting Makefile.linux_mysql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_mysql' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+# On the version of linux that I use, MININT is defined and causes a warning
+# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
+# MININT causes the code to break. Do not change the MININT definition
+# in dropnfa.c (wrp 3/19/1998)
+#
+# for DEC/Compaq Alpha/LINUX, use gcc -mieee -g to avoid buggy compilers
+X
+CC= gcc -g -O2
+X
+#CC= gcc -g -DDEBUG
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# this file works for x86 LINUX
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/include/mysql -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+X
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common_mysql
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.linux_mysql ||
+echo 'restore of Makefile.linux_mysql failed'
+Wc_c="`wc -c < 'Makefile.linux_mysql'`"
+test 1577 -eq "$Wc_c" ||
+ echo 'Makefile.linux_mysql: original size 1577, current size' "$Wc_c"
+fi
+# ============= Makefile.linux_pgsql ==============
+if test -f 'Makefile.linux_pgsql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.linux_pgsql (File already exists)'
+else
+echo 'x - extracting Makefile.linux_pgsql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_pgsql' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+# On the version of linux that I use, MININT is defined and causes a warning
+# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
+# MININT causes the code to break. Do not change the MININT definition
+# in dropnfa.c (wrp 3/19/1998)
+#
+# for DEC/Compaq Alpha/LINUX, use gcc -mieee -g to avoid buggy compilers
+X
+CC= gcc -g -O
+#CC= gcc -g -DDEBUG
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# this file works for x86 LINUX
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -DPGSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+X
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common_pgsql
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.linux_pgsql ||
+echo 'restore of Makefile.linux_pgsql failed'
+Wc_c="`wc -c < 'Makefile.linux_pgsql'`"
+test 1581 -eq "$Wc_c" ||
+ echo 'Makefile.linux_pgsql: original size 1581, current size' "$Wc_c"
+fi
+# ============= Makefile.linux_sql ==============
+if test -f 'Makefile.linux_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.linux_sql (File already exists)'
+else
+echo 'x - extracting Makefile.linux_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_sql' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+# On the version of linux that I use, MININT is defined and causes a warning
+# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
+# MININT causes the code to break. Do not change the MININT definition
+# in dropnfa.c (wrp 3/19/1998)
+#
+X
+X
+CC= gcc -g -O
+#CC= gcc -g -DDEBUG
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+X
+# this file works for x86 LINUX
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -I/usr/include/mysql -DPGSQL_DB -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DLINUX6 -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
+X
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+#LIB_M = -lm
+#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common_sql
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.linux_sql ||
+echo 'restore of Makefile.linux_sql failed'
+Wc_c="`wc -c < 'Makefile.linux_sql'`"
+test 1548 -eq "$Wc_c" ||
+ echo 'Makefile.linux_sql: original size 1548, current size' "$Wc_c"
+fi
+# ============= Makefile.linux_sse2 ==============
+if test -f 'Makefile.linux_sse2' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.linux_sse2 (File already exists)'
+else
+echo 'x - extracting Makefile.linux_sse2 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_sse2' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+#
+# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
+#
+X
+CC= gcc -g -O -DSW_SSE2 -msse2
+#CC = gcc -g -DDEBUG -DSW_SSE2 -msse2
+X
+#CC=gcc -Wall -pedantic -ansi -g -O
+#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
+X
+# EBI uses the following with pgcc, -O3 does not work:
+# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
+X
+# this file works for x86 LINUX
+X
+# standard options
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
+X
+# use options below for superfamily validations
+#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
+X
+# -I/usr/local/include/mysql -DMYSQL_DB
+#
+#(for mySQL databases) (also requires change to Makefile34.common)
+X
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+# for Linux
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/bin
+#XDIR = ~/bin/LINUX
+X
+DROPNFA_O = drop_nfa.o
+DROPTFA_O = drop_tfa.o
+DROPGSW_O = dropgsw.o smith_waterman_sse2.o
+DROPRSS_O = dropgsw.o smith_waterman_sse2.o
+X
+# renamed (fasta33) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0644 Makefile.linux_sse2 ||
+echo 'restore of Makefile.linux_sse2 failed'
+Wc_c="`wc -c < 'Makefile.linux_sse2'`"
+test 1671 -eq "$Wc_c" ||
+ echo 'Makefile.linux_sse2: original size 1671, current size' "$Wc_c"
+fi
+# ============= Makefile.mpcom ==============
+if test -f 'Makefile.mpcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.mpcom (File already exists)'
+else
+echo 'x - extracting Makefile.mpcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpcom' &&
+X
+PROGS= mp34compfa mp34compsw mp34compfx mp34comptfx mp34compfy mp34comptfy mp34compfs mp34comptfs
+X
+# ms34compfa, etc provides a summaries of effectiveness, require
+# superfamily annotated database. ms34compss uses dropnsw.c instead of
+# dropgsw.c, thus allowing high gap penalties.
+X
+SPROGS = ms34compfa ms34compsw ms34compss ms34compfx ms34compfy ms34comptfx ms34comptfy
+X
+# report highest unrelated sequences
+UPROGS = mu34compfa mu34compsw mu34compfx mu34comptfx mu34compfy mu34comptfy
+X
+vall : $(PROGS) $(WPROGS)
+X
+uall : $(UPROGS) $(WPROGS)
+X
+sall : $(SPROGS) $(WPROGS)
+X
+all : $(PROGS) $(UPROGS) $(SPROGS) $(WPROGS)
+X
+clean-up:
+X rm -f *.o $(PROGS) $(WPROGS) $(SPROGS) $(UPROGS)
+X
+install : $(PROGS) $(WPROGS)
+X cp $(PROGS) $(WPROGS) $(XDIR)
+X
+sinstall : $(SPROGS) $(WPROGS)
+X cp $(SPROGS) $(WPROGS) $(XDIR)
+X
+uinstall : $(UPROGS) $(WPROGS)
+X cp $(UPROGS) $(WPROGS) $(XDIR)
+X
+mp34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) mp34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+ms34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) ms34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+mu34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o c_dispn.o
+X $(LCC) $(LFLAGS) mu34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
+X
+mr34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o manshowrel.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) mr34compfa p2_complib.o compacc.o lib_sel.o url_subs.o manshowrel.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+mp34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) mp34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+ms34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) ms34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+mu34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o c_dispn.o
+X $(LCC) $(LFLAGS) mu34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
+X
+mp34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) mp34compss p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+ms34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) ms34compss p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+mu34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o c_dispn.o
+X $(LCC) $(LFLAGS) mu34compss p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
+X
+mp34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mp34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+ms34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) ms34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mu34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mu34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mp34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mp34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+ms34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) ms34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mu34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mu34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mp34compfs : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fs.o workacc.o faatran.o $(NRAND).o
+X $(LCC) $(LFLAGS) mp34compfs p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fs.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
+X
+mp34comptfs : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mp34comptfs p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mp34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mp34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+ms34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) ms34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mu34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mu34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mp34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mp34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+ms34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) ms34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+mu34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
+X $(LCC) $(LFLAGS) mu34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
+X
+p2_complib.o : p2_complib.c msg.h defs.h upam.h uascii.h param.h structs.h
+X $(CC) $(CFLAGS) p2_complib.c -o p2_complib.o
+X
+p2_workcomp.o : p2_workcomp.c structs.h msg.h defs.h mw.h upam.h uascii.h param.h
+X $(NCC) $(CFLAGS) p2_workcomp.c
+SHAR_EOF
+chmod 0644 Makefile.mpcom ||
+echo 'restore of Makefile.mpcom failed'
+Wc_c="`wc -c < 'Makefile.mpcom'`"
+test 13073 -eq "$Wc_c" ||
+ echo 'Makefile.mpcom: original size 13073, current size' "$Wc_c"
+fi
+# ============= Makefile.mpi4 ==============
+if test -f 'Makefile.mpi4' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.mpi4 (File already exists)'
+else
+echo 'x - extracting Makefile.mpi4 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4,v 1.26 2006/12/12 16:58:51 wrp Exp $
+#
+# 21 July, 2000
+#
+# Makefile for MPI versions of the parallel library comparison programs.
+# this file is derived from Makefile.pvm, with only a few differences:
+# (1) -DMPI_SRC instead of -DPVM_SRC
+# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
+# (3) MPI does not require/allow a "worker" program, thus no c3.work*
+#
+X
+# setenv MPI_CC gcc-3.3 for best performance
+X
+MPI_ROOT = /m0/xshare/mpich2
+MPICC = ${MPI_ROOT}/bin/mpicc
+X
+CC= ${MPICC} -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
+NCC= ${MPICC} -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
+LCC= ${MPICC}
+X
+#ARCH = ALPHAMP (get from $ARCH)
+X
+PLIB = -L${MPI_ROOT}/lib -lmpich
+XXDIR = /home/slib/mpi/bin/
+SDIR = .
+X
+CFLAGS= -DMPI_SRC -DUNIX -DPCOMPLIB -DBFR=120 -DSHOWSIM -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DSW_ALTIVEC
+# -DMYSQL_DB -I/usr/include/mysql
+# -DSFCHAR="'|'" -DSUPERFAMNUM
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = p_mw.h
+MWHP = p_mw.h w_mw.h
+X
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -lm
+X
+LFLAGS= -o
+X
+DROPGSW_O = dropgsw.o smith_waterman_altivec.o
+DROPNFA_O = drop_nfa.o
+X
+include Makefile.mpcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile.mpi4 ||
+echo 'restore of Makefile.mpi4 failed'
+Wc_c="`wc -c < 'Makefile.mpi4'`"
+test 1600 -eq "$Wc_c" ||
+ echo 'Makefile.mpi4: original size 1600, current size' "$Wc_c"
+fi
+# ============= Makefile.mpi4_bluegene ==============
+if test -f 'Makefile.mpi4_bluegene' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.mpi4_bluegene (File already exists)'
+else
+echo 'x - extracting Makefile.mpi4_bluegene (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4_bluegene' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4_bluegene,v 1.1 2006/04/01 14:09:11 wrp Exp $
+#
+# 21 July, 2000
+#
+# Makefile for MPI versions of the parallel library comparison programs.
+# this file is derived from Makefile.pvm, with only a few differences:
+# (1) -DMPI_SRC instead of -DPVM_SRC
+# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
+# (3) MPI does not require/allow a "worker" program, thus no c3.work*
+#
+# 1-April-2006 - Makefile for IBM BlueGene - use -DMAXWRKR to set the
+# maximum number of workers.
+#
+CC= blrts_xlc -O3 -qsource -qlist -qarch=440d -qtune=440
+NCC= blrts_xlc -O3 -qsource -qlist -qarch=440d -qtune=440
+LCC= blrts_xlc -O3
+X
+MPI_ROOT = /bgl/BlueLight/ppcfloor/bglsys
+PLIB = -L${MPI_ROOT}/lib -lmpich.rts -lrts.rts -ldevices.rts -lmsglayer.rts -ldevices.440
+XXDIR = /home/slib/mpi/bin/
+SDIR = .
+X
+CFLAGS= -DMPI_SRC -DMAXWRKR=128 -DUNIX -DPCOMPLIB -DBFR=1200 -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS
+# -DMYSQL_DB -I/usr/include/mysql
+# -DSFCHAR="'|'" -DSUPERFAMNUM
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = p_mw.h
+MWHP = p_mw.h w_mw.h
+X
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -lm
+X
+LFLAGS= -o
+X
+DROPGSW_O = dropgsw.o
+DROPNFA_O = drop_nfa.o
+X
+include Makefile.mpcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile.mpi4_bluegene ||
+echo 'restore of Makefile.mpi4_bluegene failed'
+Wc_c="`wc -c < 'Makefile.mpi4_bluegene'`"
+test 1602 -eq "$Wc_c" ||
+ echo 'Makefile.mpi4_bluegene: original size 1602, current size' "$Wc_c"
+fi
+# ============= Makefile.mpi4_sql ==============
+if test -f 'Makefile.mpi4_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.mpi4_sql (File already exists)'
+else
+echo 'x - extracting Makefile.mpi4_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4_sql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4_sql,v 1.19 2004/11/19 15:28:26 wrp Exp $
+#
+# 21 July, 2000
+#
+# Makefile for MPI versions of the parallel library comparison programs.
+# this file is derived from Makefile.pvm, with only a few differences:
+# (1) -DMPI_SRC instead of -DPVM_SRC
+# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
+# (3) MPI does not require/allow a "worker" program, thus no c3.work*
+#
+X
+CC= mpicc -g -O
+NCC= mpicc -O
+LCC= mpicc -O
+X
+#ARCH = ALPHAMP (get from $ARCH)
+X
+#MPI_ROOT = /opt/share/mpi
+#PLIB = -L${MPI_ROOT}/lib -lmpich
+#XDIR = /seqprg/pvm3/bin/LINUX
+XXDIR = ${HOME}/pvm3/bin/LINUX
+SDIR = .
+X
+CFLAGS= -DMPI_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DMYSQL_DB -I/usr/include/mysql -DPGSQL_DB -I/usr/include/pgsql
+# -DMYSQL_DB -I/usr/include/mysql
+# -DSFCHAR="'|'" -DSUPERFAMNUM
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = p_mw.h
+MWHP = p_mw.h w_mw.h
+X
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+#NCBL_LIB=ncbl2_mlib.o
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm -L/usr/lib/pgsql -lpq -lcrypt -lssl
+#LIB_M= -lm
+X
+LFLAGS= -o
+X
+DROPGSW_O = dropgsw.o
+DROPNFA_O = drop_nfa.o
+X
+include Makefile.mpcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile.mpi4_sql ||
+echo 'restore of Makefile.mpi4_sql failed'
+Wc_c="`wc -c < 'Makefile.mpi4_sql'`"
+test 1509 -eq "$Wc_c" ||
+ echo 'Makefile.mpi4_sql: original size 1509, current size' "$Wc_c"
+fi
+# ============= Makefile.nm_fcom ==============
+if test -f 'Makefile.nm_fcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.nm_fcom (File already exists)'
+else
+echo 'x - extracting Makefile.nm_fcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nm_fcom' &&
+X
+#================ common .obj files
+X
+doinit.obj : doinit.c defs.h param.h upam.h structs.h uascii.h
+X $(CC) $(CFLAGS) -c doinit.c
+X
+init_sw.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DSSEARCH initfa.c /Foinit_sw.obj
+X
+init_ssw.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DOSEARCH initfa.c /Foinit_ssw.obj
+X
+init_rss.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DPRSS initfa.c /Foinit_rss.obj
+X
+init_rfx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c /Foinit_rfx.obj
+X
+init_fa.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTA initfa.c /Foinit_fa.obj
+X
+init_ff.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTF initfa.c /Foinit_ff.obj
+X
+init_tf.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c /Foinit_tf.obj
+X
+init_fs.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTS initfa.c /Foinit_fs.obj
+X
+init_fm.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTM initfa.c /Foinit_fm.obj
+X
+init_tfs.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTS -DTFAST initfa.c /Foinit_tfs.obj
+X
+init_tfm.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTM -DTFAST initfa.c /Foinit_tfm.obj
+X
+init_tfa.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c /Foinit_tfa.obj
+X
+init_fx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTX initfa.c /Foinit_fx.obj
+X
+init_tfx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c /Foinit_tfx.obj
+X
+init_fy.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTY initfa.c /Foinit_fy.obj
+X
+init_tfy.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c /Foinit_tfy.obj
+X
+#================ miscellaneous
+X
+htime.obj : htime.c
+X $(CC) $(CFLAGS) -c htime.c
+X
+compacc.obj : compacc.c upam.h uascii.h param.h structs.h $(MWH) defs.h
+X $(CC) $(CFLAGS) -c compacc.c
+X
+pssm_asn_subs.obj : pssm_asn_subs.c defs.h
+X $(CC) $(CFLAGS) -c pssm_asn_subs.c
+X
+#================ display list of best hits / alignments
+X
+showbest.obj : $(SHOWBESTC) $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -c $(SHOWBESTC) /Foshowbest.obj
+X
+showrss.obj : showrss.c $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -c showrss.c
+X
+showun.obj : mshowbest.c $(MWH) defs.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DSHOWUN mshowbest.c /Foshowun.obj
+X
+showrel.obj : $(SHOWBESTC) $(MWH) defs.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DSHOWREL $(SHOWBESTC) /Foshowrel.obj
+X
+showsum.obj : showsum.c $(MWH) defs.h drop_func.h
+X $(CC) $(CFLAGS) -c showsum.c
+X
+$(SHOWALIGN).obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -c $(SHOWALIGN).c /Fo$(SHOWALIGN).obj
+X
+$(SHOWALIGN)_u.obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
+X $(CC) $(CFLAGS) -DSHOWUN -c /Fo$(SHOWALIGN)_u.obj $(SHOWALIGN).c
+re_getlib.obj : re_getlib.c mw.h mm_file.h
+X $(CC) $(CFLAGS) -c re_getlib.c
+X
+lib_sel.obj : lib_sel.c defs.h structs.h
+X $(CC) $(CFLAGS) -c lib_sel.c
+X
+c_dispn.obj : c_dispn.c defs.h structs.h param.h
+X $(CC) $(CFLAGS) -c c_dispn.c
+X
+#================ statistical functions
+X
+karlin.obj : karlin.c param.h
+X $(CC) $(CFLAGS) -c karlin.c
+X
+scaleswn.obj : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(CFLAGS) -c scaleswn.c
+X
+scaleswtf.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(CFLAGS) -DFASTF -c scaleswt.c /Foscaleswtf.obj
+X
+scaleswts.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
+X $(CC) $(CFLAGS) -c scaleswt.c /Foscaleswts.obj
+X
+tatstats_fs.obj : tatstats.c tatstats.h
+X $(CC) $(CFLAGS) -c -DFASTS tatstats.c /Fotatstats_fs.obj
+X
+tatstats_ff.obj : tatstats.c tatstats.h
+X $(CC) $(CFLAGS) -c -DFASTF tatstats.c /Fotatstats_ff.obj
+X
+tatstats_fm.obj : tatstats.c tatstats.h
+X $(CC) $(CFLAGS) -c -DFASTM tatstats.c /Fotatstats_fm.obj
+X
+last_tat.obj : last_tat.c defs.h mm_file.h structs.h param.h
+X $(CC) $(CFLAGS) -c last_tat.c
+X
+#================ drop functions - actual scores/alignments
+X
+drop_nfa.obj : dropnfa.c dropnfa.h param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c dropnfa.c /Fodrop_nfa.obj
+X
+# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
+# that tatstats.h is built appropriately
+X
+drop_ff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(CFLAGS) -DFASTF -c dropff2.c /Fodrop_ff.obj
+X
+drop_tff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(CFLAGS) -DFASTF -DTFAST -c dropff2.c /Fodrop_tff.obj
+X
+drop_ff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(CFLAGS) -c -DFASTF dropff2.c /Fodrop_ff2.obj
+X
+drop_tff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c /Fodrop_tff.obj
+X
+drop_fs.obj : dropfs2.c param.h defs.h tatstats.h drop_func.h
+X $(CC) $(CFLAGS) -DFASTS -c dropfs2.c /Fodrop_fs.obj
+X
+drop_tfs.obj : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c /Fodrop_tfs.obj
+X
+drop_fm.obj : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DFASTM dropfs2.c /Fodrop_fm.obj
+X
+drop_tfm.obj : dropfs2.c param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c /Fodrop_tfm.obj
+X
+drop_tfa.obj : dropnfa.c dropnfa.h upam.h param.h defs.h
+X $(CC) $(CFLAGS) -c -DTFASTA dropnfa.c /Fodrop_tfa.obj
+X
+drop_fx.obj : dropfx.c upam.h param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c dropfx.c /Fodrop_fx.obj
+X
+drop_tfx.obj : dropfx.c upam.h param.h defs.h drop_func.h
+X $(CC) $(CFLAGS) -c -DTFAST dropfx.c /Fodrop_tfx.obj
+X
+drop_fz.obj : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
+X $(CC) $(CFLAGS) -c dropfz2.c /Fodrop_fz.obj
+X
+drop_tfz.obj : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
+X $(CC) $(CFLAGS) -c -DTFAST dropfz2.c /Fodrop_tfz.obj
+X
+dropnsw.obj : dropnsw.c upam.h param.h structs.h drop_func.h
+X $(CC) $(CFLAGS) -c dropnsw.c
+X
+dropgsw.obj : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
+X $(CC) $(CFLAGS) -c dropgsw.c
+X
+dropgsw_sse2.obj : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
+X $(CC) $(CFLAGS) -DSW_SSE2 -c dropgsw.c /Fodropgsw_sse2.obj
+X
+smith_waterman_altivec.obj : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw.h defs.h param.h
+X $(CC) $(CFLAGS) -c smith_waterman_altivec.c
+X
+smith_waterman_sse2.obj : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw.h defs.h param.h
+X $(CC) $(CFLAGS) -DSW_SSE2 -c smith_waterman_sse2.c
+X
+dropnw.obj : dropnw.c upam.h param.h structs.h drop_func.h
+X $(CC) $(CFLAGS) -c dropnw.c
+X
+#================ reading query, libraries
+X
+getseq.obj : getseq.c defs.h uascii.h structs.h upam.h
+X $(CC) $(CFLAGS) -c getseq.c
+X
+llgetaa.obj : llgetaa.c upam.h uascii.h
+X $(CC) $(CFLAGS) -c -DNOLIB llgetaa.c
+X
+lgetlib.obj : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
+X $(CC) $(CFLAGS) -c $(NGETLIB).c /Folgetlib.obj
+X
+lgetaa_m.obj : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
+X $(CC) $(CFLAGS) -c mmgetaa.c /Folgetaa_m.obj
+X
+ncbl_lib.obj : ncbl_lib.c ncbl_head.h
+X $(CC) $(CFLAGS) -c ncbl_lib.c
+X
+ncbl2_mlib.obj : ncbl2_mlib.c ncbl2_head.h mm_file.h
+X $(CC) $(CFLAGS) -c ncbl2_mlib.c
+X
+mysql_lib.obj : mysql_lib.c mm_file.h
+X $(CC) $(CFLAGS) -c mysql_lib.c
+X
+pgsql_lib.obj : pgsql_lib.c mm_file.h
+X $(CC) $(CFLAGS) -c pgsql_lib.c
+X
+#================ threading functions
+X
+pthr_subs2.obj : pthr_subs2.c thr.h pthr_subs.h
+X $(CC) $(CFLAGS) -c pthr_subs2.c
+X
+uthr_subs.obj : uthr_subs.c thr.h uthr_subs.h
+X $(CC) $(CFLAGS) -c uthr_subs.c
+X
+#================ translation
+X
+faatran.obj : faatran.c upam.h uascii.h
+X $(CC) $(CFLAGS) -c faatran.c
+X
+url_subs.obj : url_subs.c structs.h param.h
+X $(CC) $(CFLAGS) -c url_subs.c
+X
+$(NRAND).obj : $(NRAND).c
+X $(CC) $(CFLAGS) -c $(NRAND).c
+#================ pvm/mpi specific functions
+X
+hostacc.obj : hostacc.c upam.h uascii.h
+X $(CC) $(CFLAGS) hostacc.c
+X
+workacc.obj : workacc.c upam.h uascii.h param.h
+X $(NCC) $(CFLAGS) workacc.c /Foworkacc.obj
+X
+#================ windows getopt()
+X
+getopt.obj : getopt.c
+X $(CC) $(CFLAGS) -c getopt.c
+SHAR_EOF
+chmod 0755 Makefile.nm_fcom ||
+echo 'restore of Makefile.nm_fcom failed'
+Wc_c="`wc -c < 'Makefile.nm_fcom'`"
+test 8182 -eq "$Wc_c" ||
+ echo 'Makefile.nm_fcom: original size 8182, current size' "$Wc_c"
+fi
+# ============= Makefile.nm_pcom ==============
+if test -f 'Makefile.nm_pcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.nm_pcom (File already exists)'
+else
+echo 'x - extracting Makefile.nm_pcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nm_pcom' &&
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = mw.h
+MWHP = mw.h
+X
+TPROGS = ssearch34_t.exe ssearch34sse2_t.exe fasta34_t.exe fasts34_t.exe fastx34_t.exe tfastx34_t.exe fasty34_t.exe tfasty34_t.exe tfasts34_t.exe fastm34_t.exe fastf34_t.exe tfastf34_t.exe prss34_t.exe prss34sse2_t.exe prfx34_t.exe
+X
+SPROGS = fasta34.exe ssearch34.exe ssearch34sse2.exe fasts34.exe fastx34.exe tfastx34.exe fasty34.exe tfasty34.exe tfasts34.exe fastm34.exe tfastm34.exe prss34.exe prss34sse2.exe prfx34.exe fastf34.exe tfastf34.exe
+X
+MAPROGS = map_db.exe
+X
+XXTPROGS = fastx34_t.exe tfastx34_t.exe fasty34_t.exe tfasty34_t.exe
+XXPROGS = fastx34.exe tfastx34.exe .exe fasty34 tfasty34.exe
+X
+PROGS = $(SPROGS) $(TPROGS)
+X
+all : $(PROGS)
+X
+tall: $(TPROGS)
+X
+sall: $(SPROGS)
+X
+xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS)
+X
+clean-up:
+X del *.obj $(PROGS)
+X
+install: $(PROGS)
+X copy $(PROGS) $(XDIR)
+X
+sinstall: $(SPROGS)
+X copy $(SPROGS) $(XDIR)
+X
+tinstall: $(TPROGS)
+X cp $(TPROGS) $(XDIR)
+X
+fasta34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefasta34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+fastx34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj scaleswn.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefastx34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+fasty34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj scaleswn.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefasty34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+fastf34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastf34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+fastf34u : $(COMP_LIBO) compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastf34u.exe $(COMP_LIBO) compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+fastf34s : $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastf34s.exe $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+fasts34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_fs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefasts34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+fastm34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj drop_fm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastm34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj drop_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+tfastx34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scaleswn.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfastx34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+tfasty34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scaleswn.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfasty34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+tfastf34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfastf34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+tfastf34s : $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfastf34s.exe $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+tfasts34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj drop_tfs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfasts34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj drop_tfs.obj scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+tfastm34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj drop_tfm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfastm34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj drop_tfm.obj scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X
+ssearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X $(CL) /Fessearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X
+ssearch34sse2.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X $(CL) /Fessearch34sse2.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X
+osearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Feosearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+usearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Feusearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+prss34.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj
+X $(CL) /Feprss34.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X
+prss34sse2.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_SSE2_O) llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj
+X $(CL) /Feprss34sse2.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X
+prfx34.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj faatran.obj
+X $(CL) /Feprfx34.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+prss34o : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
+X $(CL) /Feprss34o.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_O) scaleswn.obj karlin.obj llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X
+prfx34o : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj faatran.obj
+X $(CL) /Feprfx34o.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showrss.obj lib_sel.obj faatran.obj $(NRAND).obj getopt.obj
+X
+ssearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X $(CL) /Fessearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+X
+ssearch34sse2_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
+X $(CL) /Fessearch34sse2_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+X
+osearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Feosearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+usearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Feusearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+fasta34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefasta34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+fasta34s_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefasta34s_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+fasta34u_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefasta34u_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fasta34r_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefasta34r_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fastf34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastf34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fastf34s_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastf34s_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fasts34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_fs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefasts34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fastm34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj drop_fm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fefastm34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+fastx34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fx.obj faatran.obj scaleswn.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefastx34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj faatran.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+fasty34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fy.obj faatran.obj scaleswn.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fefasty34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj faatran.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+tfasta34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj scaleswn.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfasta34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X
+tfasta34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfa.obj scaleswn.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfasta34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+tfastf34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfastf34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+tfasts34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_tfs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
+X $(CL) /Fetfasts34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj drop_tfs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
+X
+tfastx34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scaleswn.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfastx34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+tfasty34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scaleswn.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
+X $(CL) /Fetfasty34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+prss34_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
+X $(CL) /Feprss34_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+X
+prss34sse2_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
+X $(CL) /Feprss34sse2_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
+X
+prfx34_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj faatran.obj
+X $(CL) /Feprfx34_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
+X
+comp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(CFLAGS) -c comp_lib.c
+X
+comp_mlib.obj : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib.c /Focomp_mlib.obj
+X
+rcomp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(CFLAGS) -c -DPRSS comp_lib.c /Forcomp_lib.obj
+X
+comp_thr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(CFLAGS) -DCOMP_THR -c comp_lib.c /Focomp_thr.obj
+X
+comp_mthr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib.c /Focomp_mthr.obj
+X
+rcomp_thr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(CFLAGS) -DPRSS -DCOMP_THR -c comp_lib.c /Forcomp_thr.obj
+X
+work_thr.obj : work_thr.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(CFLAGS) -c work_thr.c
+X
+print_pssm.exe : print_pssm.c getseq.c karlin.c apam.c
+X $(CC) /Feprint_pssm.exe $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c getopt.obj
+X
+map_db.exe : map_db.c uascii.h ncbl2_head.h
+X $(CC) /Femap_db.exe map_db.c
+X
+list_db.exe : list_db.c
+X $(CC) /Felist_db.exe list_db.c
+X
+SHAR_EOF
+chmod 0755 Makefile.nm_pcom ||
+echo 'restore of Makefile.nm_pcom failed'
+Wc_c="`wc -c < 'Makefile.nm_pcom'`"
+test 27480 -eq "$Wc_c" ||
+ echo 'Makefile.nm_pcom: original size 27480, current size' "$Wc_c"
+fi
+# ============= Makefile.nmk_icl ==============
+if test -f 'Makefile.nmk_icl' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.nmk_icl (File already exists)'
+else
+echo 'x - extracting Makefile.nmk_icl (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nmk_icl' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+# options for Intel C compiler (v9.1)
+#
+# must be compiled/linked with /MT (or /MTd for debugging) to ensure
+# multi-threaded staticly linked executables. /MD uses dynamic
+# linking to DLL's, which may not be available on the users machine
+X
+CC= icl /O2 /MT /W1
+#CC= icl /Zi /MTd /W1
+CL= icl /O2 /MT
+#CL= icl /Zi /MTd
+X
+# standard options
+CFLAGS= -DSHOWSIM -DWIN32 -DHZ=100 -DPROGRESS -DSAMP_STATS -DPGM_DOC -DTHR_EXIT=pthread_exit -D_CRT_SECURE_NO_WARNINGS=1
+X
+XXDIR = /seqprg/bin
+X
+THR_SUBS = pthr_subs2
+THR_LIBS= pthreadVC2.lib
+X
+DROPNFA_O = drop_nfa.obj
+DROPGSW_O = dropgsw.obj
+DROPGSW_SSE2_O = dropgsw_sse2.obj smith_waterman_sse2.obj
+DROPRSS_O = dropnsw.obj
+DROPRSS_SSE2_O = dropgsw_sse2.obj smith_waterman_sse2.obj
+#
+X
+# renamed (fasta33) programs
+include Makefile34.nmk_com
+# conventional (fasta3) names
+# include Makefile.common
+X
+SHAR_EOF
+chmod 0755 Makefile.nmk_icl ||
+echo 'restore of Makefile.nmk_icl failed'
+Wc_c="`wc -c < 'Makefile.nmk_icl'`"
+test 905 -eq "$Wc_c" ||
+ echo 'Makefile.nmk_icl: original size 905, current size' "$Wc_c"
+fi
+# ============= Makefile.os_x ==============
+if test -f 'Makefile.os_x' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.os_x (File already exists)'
+else
+echo 'x - extracting Makefile.os_x (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.os_x' &&
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+X
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+X
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+X
+# changed to gcc-3.3 for MacOSX Tiger because of problems with Altivec
+#
+X
+# in my hands, gcc-4.0 is about 40% slower than gcc-3.3 on the Altivec code
+#CC= gcc-4.0 -g -falign-loops=32 -O3 -mcpu=7450 -maltivec -mpim-altivec -DSW_ALTIVEC
+X
+CC= gcc-3.3 -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
+#CC= gcc-3.3 -g -DDEBUG -mcpu=7450 -faltivec -DSW_ALTIVEC
+#CC= cc -g -Wall -pedantic -faltivec -DSW_ALTIVEC
+#
+# standard line for normal searching
+CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
+X
+#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
+X
+# add for MySQL support
+# -I/usr/local/mysql/include -DMYSQL_DB
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+#for DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+X
+#for Sun
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+X
+X
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XXDIR = /seqprg/bin
+#XDIR = ./ppc
+X
+DROPNFA_O = drop_nfa.o
+DROPTFA_O = drop_tfa.o
+DROPGSW_O = dropgsw.o smith_waterman_altivec.o
+DROPRSS_O = dropgsw.o smith_waterman_altivec.o
+#DROPGSW_O = dropgsw.o
+#DROPRSS_O = dropgsw.o
+X
+# provide mysql function
+#include Makefile34m.common_sql
+X
+# no mysql
+include Makefile34m.common
+SHAR_EOF
+chmod 0644 Makefile.os_x ||
+echo 'restore of Makefile.os_x failed'
+Wc_c="`wc -c < 'Makefile.os_x'`"
+test 2116 -eq "$Wc_c" ||
+ echo 'Makefile.os_x: original size 2116, current size' "$Wc_c"
+fi
+# ============= Makefile.os_x86 ==============
+if test -f 'Makefile.os_x86' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.os_x86 (File already exists)'
+else
+echo 'x - extracting Makefile.os_x86 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.os_x86' &&
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+X
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+X
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
+X
+# changed to gcc-3.3 for MacOSX Tiger because of problems with Altivec
+#
+X
+CC= gcc -g -O3 -DSW_SSE2 -msse2 -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk
+#CC= gcc -g -DDEBUG
+#CC= cc -g -Wall -pedantic
+#
+# standard line for normal searching
+CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
+X
+#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
+X
+LDFLAGS= -arch i386
+X
+# add for MySQL support
+# -I/usr/local/mysql/include -DMYSQL_DB
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+#for DEC Unix V4.0
+THR_SUBS = pthr_subs2
+THR_LIBS =
+THR_CC =
+X
+#for Sun
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+X
+X
+#XDIR = ${HOME}/bin
+#XDIR = /home/slib/bin/MACOSX/
+#XDIR = /Users/seqprg/bin
+XXDIR = /seqprg/bin
+#XDIR = ./i386
+X
+DROPNFA_O = drop_nfa.o
+DROPTFA_O = drop_tfa.o
+DROPGSW_O = dropgsw.o smith_waterman_sse2.o
+DROPRSS_O = dropgsw.o smith_waterman_sse2.o
+X
+# provide mysql function
+#include Makefile34m.common_sql
+X
+# no mysql
+include Makefile34m.common
+SHAR_EOF
+chmod 0644 Makefile.os_x86 ||
+echo 'restore of Makefile.os_x86 failed'
+Wc_c="`wc -c < 'Makefile.os_x86'`"
+test 1917 -eq "$Wc_c" ||
+ echo 'Makefile.os_x86: original size 1917, current size' "$Wc_c"
+fi
+# ============= Makefile.pLinux ==============
+if test -f 'Makefile.pLinux' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pLinux (File already exists)'
+else
+echo 'x - extracting Makefile.pLinux (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pLinux' &&
+# $Name: fa_34_26_5 $ - $Id: Makefile.pLinux,v 1.4 2004/11/19 15:28:26 wrp Exp $
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+X
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+X
+# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
+# files
+X
+# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
+X
+CC= xlc_r
+X
+#CC= cc -g3 -O -std1
+#CC= insure -g -DDEBUG
+#CC= cc -g -DDEBUG -std1
+X
+#CC= gcc -g -Wall
+#
+# standard line for normal searching
+CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__
+#
+#(-DMYSQL_DB for mySQL databases) (also requires change to Makefile34.common)
+X
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
+X
+LIB_M = -lm
+#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+#for DEC Unix V4.0
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -lpthreads
+#THR_CC =
+X
+#for Sun
+#THR_SUBS = uthr_subs
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+X
+X
+# for IBM Linux with current pthreads
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+#include Makefile34m.common_sql
+include Makefile34m.common
+X
+SHAR_EOF
+chmod 0644 Makefile.pLinux ||
+echo 'restore of Makefile.pLinux failed'
+Wc_c="`wc -c < 'Makefile.pLinux'`"
+test 1922 -eq "$Wc_c" ||
+ echo 'Makefile.pLinux: original size 1922, current size' "$Wc_c"
+fi
+# ============= Makefile.pLinux_sql ==============
+if test -f 'Makefile.pLinux_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pLinux_sql (File already exists)'
+else
+echo 'x - extracting Makefile.pLinux_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pLinux_sql' &&
+# $Name: fa_34_26_5 $ - $Id: Makefile.pLinux_sql,v 1.4 2004/11/19 15:28:26 wrp Exp $
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+# this file works for DEC Alphas
+#
+# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
+# for mmap()ed BLAST2 format.
+X
+# the -DDEBUG option provides additional debugging information, particularly
+# with -D on the command line.
+X
+# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
+# files
+X
+# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
+X
+CC= xlc_r
+X
+#CC= cc -g3 -O -std1
+#CC= insure -g -DDEBUG
+#CC= cc -g -DDEBUG -std1
+X
+#CC= gcc -g -Wall
+#
+X
+CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__ -DFASTA_HOST='"fasta.bioch.virginia.edu/fasta/cgi"' -I/usr/include/mysql -DMYSQL_DB
+#
+#(-DMYSQL_DB for mySQL databases) (also requires change to Makefile34.common)
+X
+# special options for SUPERFAMLIES
+#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
+X
+#LIB_M = -lm
+LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
+# for mySQL databases
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+#for DEC Unix V4.0
+#THR_SUBS = pthr_subs2
+#THR_LIBS = -threads
+#THR_CC =
+X
+#for Sun
+#THR_SUBS = uthr_subs
+#THR_LIBS = -lthread
+#THR_CC =
+#
+# for SGI with current pthreads
+#THR_SUBS = pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+#
+# for IBM with current pthreads
+#CC= xlc_r -v -g
+#THR_SUBS = ibm_pthr_subs
+#THR_LIBS = -lpthreads
+#THR_CC =
+X
+# for IBM Linux with current pthreads
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common_sql
+X
+SHAR_EOF
+chmod 0644 Makefile.pLinux_sql ||
+echo 'restore of Makefile.pLinux_sql failed'
+Wc_c="`wc -c < 'Makefile.pLinux_sql'`"
+test 1946 -eq "$Wc_c" ||
+ echo 'Makefile.pLinux_sql: original size 1946, current size' "$Wc_c"
+fi
+# ============= Makefile.pcom ==============
+if test -f 'Makefile.pcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pcom (File already exists)'
+else
+echo 'x - extracting Makefile.pcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pcom' &&
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = mw.h
+MWHP = mw.h
+X
+TPROGS = ssearch34_t fasta34_t fasts34_t tfasta34_t fastx34_t tfastx34_t fasty34_t tfasty34_t tfasts34_t fastm34_t fastf34_t tfastf34_t prss34_t prfx34_t
+X
+SPROGS = fasta34 ssearch34 fasts34 tfasta34 fastx34 tfastx34 fasty34 tfasty34 tfasts34 fastm34 tfastm34 prss34 prfx34 fastf34 tfastf34
+X
+APROGS = map_db
+X
+XXTPROGS = fastx34_t tfastx34_t fasty34_t tfasty34_t
+XXPROGS = fastx34 tfastx34 fasty34 tfasty34
+X
+PROGS = $(SPROGS) $(TPROGS)
+X
+all : $(PROGS)
+X
+tall: $(TPROGS)
+X
+sall: $(SPROGS)
+X
+xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS)
+X
+clean-up:
+X rm -f *.o $(PROGS)
+X
+install: $(PROGS)
+X cp $(PROGS) $(XDIR)
+X
+sinstall: $(SPROGS)
+X cp $(SPROGS) $(XDIR)
+X
+tinstall: $(TPROGS)
+X cp $(TPROGS) $(XDIR)
+X
+fasta34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fasta34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
+X
+fasta34u : $(COMP_LIBO) compacc.o showun.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasta34u $(COMP_LIBO) compacc.o showun.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o$(LIB_M)
+X
+fasta34r : $(COMP_LIBO) compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasta34r $(COMP_LIBO) compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o$(LIB_M)
+X
+fasta34s : $(COMP_LIBO) compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasta34s $(COMP_LIBO) compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
+X
+fastx34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fastx34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o drop_fx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+fastx34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fastx34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+fasty34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o scaleswn.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasty34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o drop_fz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+fastf34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastf34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
+X
+fastf34u : $(COMP_LIBO) compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastf34u $(COMP_LIBO) compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
+X
+fastf34s : $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastf34s $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
+X
+fasts34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_fs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fasts34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
+X
+fastm34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o drop_fm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastm34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fm.o drop_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
+X
+tfastx34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o scaleswn.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfastx34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+tfasty34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o scaleswn.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfasty34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+tfastf34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfastf34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
+X
+tfastf34s : $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o scaleswtf.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfastf34s $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
+X
+tfasts34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o drop_tfs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfasts34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o drop_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
+X
+tfastm34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o drop_tfm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfastm34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfm.o drop_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
+X
+ssearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) ssearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
+X
+osearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ssw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) osearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ssw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
+X
+usearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) usearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
+X
+prss34 : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) prss34 rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
+X
+prfx34 : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o faatran.o
+X $(CC) $(HFLAGS) prfx34 rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+prss34o : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) prss34o rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
+X
+prfx34o : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showrss.o lib_sel.o $(NRAND).o faatran.o
+X $(CC) $(HFLAGS) prfx34o rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o faatran.o $(NRAND).o $(LIB_M)
+X
+ssearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) ssearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+X
+ssearch34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) ssearch34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+ssearch34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) ssearch34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+osearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) osearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+usearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) usearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+fasta34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasta34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+fasta34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasta34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+fasta34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fasta34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fasta34r_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fasta34r_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fastf34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastf34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fastf34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastf34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fasts34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_fs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fasts34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fastm34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fm.o karlin.o drop_fm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) fastm34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+fastx34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fastx34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+fasty34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_fy.o faatran.o scaleswn.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) fasty34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o drop_fz.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+tfasta34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o scaleswn.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfasta34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
+X
+tfasta34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tfa.o scaleswn.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfasta34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+tfastf34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfastf34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+tfasts34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_tfs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
+X $(CC) $(HFLAGS) tfasts34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o drop_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
+X
+tfastx34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o scaleswn.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfastx34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+tfasty34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o scaleswn.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
+X $(CC) $(HFLAGS) tfasty34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+prss34_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showbest.o $(SHOWALIGN).o c_dispn.o url_subs.o lib_sel.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) prss34_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showbest.o $(SHOWALIGN).o c_dispn.o url_subs.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+X
+prss34o_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o
+X $(CC) $(HFLAGS) prss34o_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
+X
+prfx34_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o faatran.o
+X $(CC) $(HFLAGS) prfx34_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
+X
+comp_lib.o : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c comp_lib.c
+X
+comp_mlib.o : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib.c -o comp_mlib.o
+X
+rcomp_lib.o : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS comp_lib.c -o rcomp_lib.o
+X
+comp_thr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -c comp_lib.c -o comp_thr.o
+X
+comp_mthr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib.c -o comp_mthr.o
+X
+rcomp_thr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(THR_CC) $(CFLAGS) -DPRSS -DCOMP_THR -c comp_lib.c -o rcomp_thr.o
+X
+work_thr.o : work_thr.c mw.h structs.h defs.h param.h thr.h
+X $(CC) $(THR_CC) $(CFLAGS) -c work_thr.c
+X
+print_pssm : print_pssm.c getseq.c karlin.c apam.c
+X $(CC) -o print_pssm $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c $(LIB_M)
+X
+map_db : map_db.c uascii.h ncbl2_head.h
+X $(CC) -o map_db map_db.c
+X
+list_db : list_db.c
+X $(CC) -o list_db list_db.c
+X
+SHAR_EOF
+chmod 0644 Makefile.pcom ||
+echo 'restore of Makefile.pcom failed'
+Wc_c="`wc -c < 'Makefile.pcom'`"
+test 24893 -eq "$Wc_c" ||
+ echo 'Makefile.pcom: original size 24893, current size' "$Wc_c"
+fi
+# ============= Makefile.pvcom ==============
+if test -f 'Makefile.pvcom' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pvcom (File already exists)'
+else
+echo 'x - extracting Makefile.pvcom (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvcom' &&
+X
+SHOWBESTC = mshowbest.c
+SHOWALIGN = mshowalign
+MWH = p_mw.h
+MWHP = p_mw.h w_mw.h
+X
+# normal search programs are pv3compfa, etc.
+# each main program requires a worker pv3compfa/c34.workfa
+X
+PROGS= pv34compfa pv34compsw pv34compfx pv34comptfx pv34compfy pv34comptfy pv34compfs pv34comptfs
+X
+WPROGS = c34.workfa c34.worksw c34.workgsw c34.workfx c34.worktfx c34.workfy c34.worktfy c34.workfs c34.worktfs
+# ps4compfa, etc provides a summaries of effectiveness, require superfamily
+# annotated database. ps4compss uses c34.worksw instead of c34.workgsw, thus
+# allowing high gap penalties.
+X
+SPROGS = ps34compfa ps34compsw ps34compss ps34compfx ps34compfy ps34comptfx ps34comptfy
+X
+# report highest unrelated sequences
+UPROGS = pu34compfa pu34compsw pu34compfx pu34comptfx pu34compfy pu34comptfy
+X
+vall : $(PROGS) $(WPROGS)
+X
+uall : $(UPROGS) $(WPROGS)
+X
+sall : $(SPROGS) $(WPROGS)
+X
+all : $(PROGS) $(UPROGS) $(SPROGS) $(WPROGS)
+X
+clean-up:
+X rm -f *.o $(PROGS) $(WPROGS) $(SPROGS) $(UPROGS)
+X
+install : $(PROGS) $(WPROGS)
+X cp $(PROGS) $(WPROGS) $(XDIR)
+X
+sinstall : $(SPROGS) $(WPROGS)
+X cp $(SPROGS) $(WPROGS) $(XDIR)
+X
+uinstall : $(UPROGS) $(WPROGS)
+X cp $(UPROGS) $(WPROGS) $(XDIR)
+X
+pv34compfa : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o ${LGETLIB} $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compfa p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o ${LGETLIB} $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34compfa : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34compfa p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34compfa : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34compfa p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pr4compfa : p2_complib.o compacc.o showrel.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pr4compfa p2_complib.o compacc.o showrel.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34compsw : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compsw p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34compsw : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34compsw p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34compsw : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34compsw p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34compss : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compss p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34compss : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34compss p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34compss : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34compss p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34compfs : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compfs p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34compfx : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compfx p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34compfx : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34compfx p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34compfx : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34compfx p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34compfy : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34compfy p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34compfy : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34compfy p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34compfy : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34compfy p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34comptfx : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34comptfx p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34comptfx : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34comptfx p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34comptfx : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34comptfx p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34comptfy : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34comptfy p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+ps34comptfy : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) ps34comptfy p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pu34comptfy : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pu34comptfy p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+pv34comptfs : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
+X $(CC) $(HFLAGS) pv34comptfs p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
+X
+c34.workfa : p2_workcomp.o $(DROPNFA_O) workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.workfa p2_workcomp.o $(DROPNFA_O) workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
+X
+c34.worksw : p2_workcomp.o dropnsw.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.worksw p2_workcomp.o dropnsw.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
+X
+c34.workgsw : p2_workcomp.o $(DROPGSW_O) workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.workgsw p2_workcomp.o $(DROPGSW_O) workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
+X
+c34.worknw : p2_workcomp.o dropnw.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.worknw p2_workcomp.o dropnw.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
+X
+c34.workfx : p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.workfx p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
+X
+c34.workfs : p2_workcomp.o drop_fs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o
+X $(NCC) $(NFLAGS) c34.workfs p2_workcomp.o drop_fs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o $(PLIB) $(LIB_WM)
+X
+c34.worktfs : p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o
+X $(NCC) $(NFLAGS) c34.worktfs p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o $(PLIB) $(LIB_WM)
+X
+c34.workfy : p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.workfy p2_workcomp.o drop_fz.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
+X
+c34.worktfx : p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.worktfx p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
+X
+c34.worktfy : p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o karlin.o
+X $(NCC) $(NFLAGS) c34.worktfy p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
+X
+p2_complib.o : p2_complib.c msg.h defs.h upam.h uascii.h param.h structs.h
+X $(CC) -DWORKERPGM=\"c34.work\" $(CFLAGS) p2_complib.c
+X
+p2_workcomp.o : p2_workcomp.c structs.h msg.h defs.h p_mw.h w_mw.h upam.h uascii.h param.h
+X $(NCC) $(CFLAGS) p2_workcomp.c
+X
+SHAR_EOF
+chmod 0644 Makefile.pvcom ||
+echo 'restore of Makefile.pvcom failed'
+Wc_c="`wc -c < 'Makefile.pvcom'`"
+test 13214 -eq "$Wc_c" ||
+ echo 'Makefile.pvcom: original size 13214, current size' "$Wc_c"
+fi
+# ============= Makefile.pvm4 ==============
+if test -f 'Makefile.pvm4' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pvm4 (File already exists)'
+else
+echo 'x - extracting Makefile.pvm4 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvm4' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile.pvm4,v 1.35 2006/12/06 16:53:12 wrp Exp $
+#
+# tested with pvm3.4.beta7 and pvm3.3.11. Tested on DEC Alpha, x86
+# and Alpha LINUX for DEC/Compaq Alpha/LINUX
+#
+X
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+#CC= cc -O -ggdb -DDEBUG
+CC= cc -ggdb -O3 -falign-loops=32 -mcpu=7450 -DMacOSX -faltivec -DSW_ALTIVEC
+X
+X
+#NCC= cc -O3 -ggdb
+NCC= cc -g -falign-loops=32 -O3 -mcpu=7450 -DMacOSX -faltivec -DSW_ALTIVEC
+X
+#ARCH = NETBSDPOWERPC
+X
+PLIB = ${PVM_ROOT}/lib/$(ARCH)/libpvm3.a
+XXDIR = /home/slib/pvm3/bin/$(ARCH)
+#XDIR = /wrpx00.p0/users/wrp/pvm3/bin/$(ARCH)
+SDIR = .
+PVMSRC = ${PVM_ROOT}/src
+X
+CFLAGS= -DPVM_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -I${PVM_ROOT}/include -DSRAND=srandom -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DSHOWSIM
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -L/usr/lib/mysql -lmysqlclient -lm -lz
+LIB_M= -lm
+LIB_WM= -lm
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+DROPGSW_O = dropgsw.o smith_waterman_altivec.o
+DROPNFA_O = drop_nfa.o
+X
+# common pv34comp programs
+include Makefile.pvcom
+X
+# common *.o files for all environments
+include Makefile.fcom
+SHAR_EOF
+chmod 0644 Makefile.pvm4 ||
+echo 'restore of Makefile.pvm4 failed'
+Wc_c="`wc -c < 'Makefile.pvm4'`"
+test 1344 -eq "$Wc_c" ||
+ echo 'Makefile.pvm4: original size 1344, current size' "$Wc_c"
+fi
+# ============= Makefile.pvm4_sql ==============
+if test -f 'Makefile.pvm4_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.pvm4_sql (File already exists)'
+else
+echo 'x - extracting Makefile.pvm4_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvm4_sql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile.pvm4_sql,v 1.26 2006/05/19 16:29:45 wrp Exp $
+#
+# tested with pvm3.4.beta7 and pvm3.3.11. Tested on DEC Alpha, x86
+# and Alpha LINUX for DEC/Compaq Alpha/LINUX
+#
+X
+#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
+CC= cc -g
+NCC= cc -O -g
+X
+#ARCH = NETBSDPOWERPC
+X
+PLIB = ${PVM_ROOT}/lib/$(ARCH)/libpvm3.a
+XXDIR = /seqprg/pvm3/bin/$(ARCH)
+#XDIR = /wrpx00.p0/users/wrp/pvm3/bin/$(ARCH)
+SDIR = .
+PVMSRC = ${PVM_ROOT}/src
+X
+CFLAGS= -DPVM_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -DBIGMEM -I${PVM_ROOT}/include -DSRAND=srandom -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DMYSQL_DB -I/usr/include/mysql -DM10_CONS -DSHOWSIM
+# -DSFCHAR="'|'" -DSUPERFAMNUM
+X
+HFLAGS= -o
+NFLAGS= -o
+X
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+# pgsql_lib.o
+#NCBL_LIB=ncbl2_mlib.o
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lm
+#LIB_M= -lm
+LIB_WM= -lm
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# non-ALTIVEC versions
+DROPGSW_O = dropgsw.o
+DROPNFA_O = drop_nfa.o
+X
+# common pv34comp programs
+include Makefile.pvcom
+X
+# common *.o files for all environments
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile.pvm4_sql ||
+echo 'restore of Makefile.pvm4_sql failed'
+Wc_c="`wc -c < 'Makefile.pvm4_sql'`"
+test 1264 -eq "$Wc_c" ||
+ echo 'Makefile.pvm4_sql: original size 1264, current size' "$Wc_c"
+fi
+# ============= Makefile.sgi ==============
+if test -f 'Makefile.sgi' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.sgi (File already exists)'
+else
+echo 'x - extracting Makefile.sgi (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sgi' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+#
+# for more information on FASTA on SGI's, see:
+#
+# http://www.sgi.com/chembio/resources/fasta/index.html
+#
+# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files. This
+# only works on SGI's with the -64 option.
+X
+CC= cc -w -64 -mips4 -O2 -TENV:X=3 -DSGI_BUG -Wl,-multigot -DIRIX
+#CC= cc -64 -mips4 -g -DSGI_BUG -DDEBUG -DIRIX
+X
+HFLAGS= -64 -mips4 -o
+NFLAGS= -64 -mips4 -o
+X
+#CC= cc -g
+#HFLAGS= -o
+#NFLAGS= -o
+X
+LIB_M= -lm
+# For R2000/R3000 MIPS Processors, use -mips1
+#
+#CC= cc -mips1 -O2
+#HFLAGS= -mips1 -o
+#NFLAGS= -mips1 -o
+#
+# For R4000 MIPS Processors, use -mips2:
+#
+#CC = cc -mips2 -O2
+#HFLAGS= -mips2 -o
+#NFLAGS= -mips2 -o
+#
+X
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"crick.med.virginia.edu/fasta/cgi"' -DIS_BIG_ENDIAN -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
+X
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+XXDIR = /seqprg/slib/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.sgi ||
+echo 'restore of Makefile.sgi failed'
+Wc_c="`wc -c < 'Makefile.sgi'`"
+test 1238 -eq "$Wc_c" ||
+ echo 'Makefile.sgi: original size 1238, current size' "$Wc_c"
+fi
+# ============= Makefile.sun ==============
+if test -f 'Makefile.sun' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.sun (File already exists)'
+else
+echo 'x - extracting Makefile.sun (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sun' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+X
+#CC= cc -g -xarch=v8plusa
+X
+# switches for 64-bit addressing
+CC= cc -fast -xO4 -xarch=v9
+#CC= cc -g -xarch=v9
+X
+# for SUNMP, use -DTHR_EXIT=thr_exit
+# HZ=100 for Solaris x86
+# -DIS_LITTLE_ENDIAN for Solaris x86
+X
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit -DPROGRESS -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
+HFLAGS= -o
+NFLAGS= -o
+X
+# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# for files > 2 GB
+X
+#for Sun pthreads (preferred, pthreads used on all other platforms)
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+#for Sun threads (no longer necessary as Sun supports pthreads)
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+X
+LIB_M= -lmopt
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPGSW_O = dropgsw.o
+DROPRSS_O = dropnsw.o
+DROPTFA_O = drop_tfa.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.sun ||
+echo 'restore of Makefile.sun failed'
+Wc_c="`wc -c < 'Makefile.sun'`"
+test 1150 -eq "$Wc_c" ||
+ echo 'Makefile.sun: original size 1150, current size' "$Wc_c"
+fi
+# ============= Makefile.sun_x86 ==============
+if test -f 'Makefile.sun_x86' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.sun_x86 (File already exists)'
+else
+echo 'x - extracting Makefile.sun_x86 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sun_x86' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+X
+# switches for 64-bit addressing - AMD64
+CC= cc -g -fast -xarch=amd64 -DSW_SSE2
+X
+# debugging options
+#CC= cc -g -DDEBUG -xarch=amd64 -DSW_SSE2
+X
+# for SUNMP, use -DTHR_EXIT=thr_exit
+# HZ=100 for Solaris x86
+# Solaris X86 is little endian - be certain IS_BIG_ENDIAN is not defined
+X
+CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit -DPROGRESS -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
+HFLAGS= -o
+NFLAGS= -o
+X
+# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# for files > 2 GB
+X
+#for Sun pthreads (preferred, pthreads used on all other platforms)
+THR_SUBS = pthr_subs2
+THR_LIBS = -lpthread
+THR_CC =
+X
+#for Sun threads (no longer necessary as Sun supports pthreads)
+#THR_SUBS = uthr_subs2
+#THR_LIBS = -lthread
+#THR_CC =
+X
+LIB_M= -lmopt
+XXDIR = /seqprg/bin
+X
+DROPNFA_O = drop_nfa.o
+DROPTFA_O = drop_tfa.o
+DROPGSW_O = dropgsw.o smith_waterman_sse2.o
+DROPRSS_O = dropnsw.o smith_waterman_sse2.o
+X
+# renamed (fasta34) programs
+include Makefile34m.common
+# conventional (fasta3) names
+# include Makefile.common
+SHAR_EOF
+chmod 0644 Makefile.sun_x86 ||
+echo 'restore of Makefile.sun_x86 failed'
+Wc_c="`wc -c < 'Makefile.sun_x86'`"
+test 1264 -eq "$Wc_c" ||
+ echo 'Makefile.sun_x86: original size 1264, current size' "$Wc_c"
+fi
+# ============= Makefile.tc ==============
+if test -f 'Makefile.tc' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile.tc (File already exists)'
+else
+echo 'x - extracting Makefile.tc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile.tc' &&
+#
+# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
+X
+# MSDOS Borland C commands
+#CC= bcc -mm -w-rvl -w-pro -3 -O
+CC= bcc32 -WC -w-rvl -w-pro -3 -O
+#CFLAGS= -IC:\bc5\include -DFAR_PTR -DALLOCN0 -DMSDOS
+CFLAGS=-IC:\bc5\include -DBIGMEM -DALLOCN0 -DMSDOS
+#CL= bcc -mm
+CL= bcc32 -WC
+LFLAGS= -LC:\bc5\lib
+NRAND= nrand
+HZ=100
+X
+X
+XXDIR = /seqprg/slib/bin
+X
+SPROGS = fasta34.exe ssearch34.exe fastx34.exe tfastx34.exe fasty34.exe tfasty34.exe fasts34.exe tfasts34.exe prss34.exe prfx34.exe
+X
+PROGS = $(SPROGS)
+X
+all : $(PROGS)
+X
+sall: $(SPROGS)
+X
+clean-up:
+X del *.obj $(PROGS)
+X
+install:
+X cp $(PROGS) $(XDIR)
+X
+fasta34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj drop_nfa.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj
+X $(CL) $(LFLAGS) -efasta34.exe comp_lib.obj showalig.obj init_fa.obj drop_nfa.obj getseq.obj @fasta3.rsp -lm
+X
+fastx34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fx.obj scaleswn.obj karlin.obj drop_fx.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj getopt.obj
+X $(CL) $(LFLAGS) -efastx34.exe comp_lib.obj showalig.obj init_fx.obj drop_fx.obj faatran.obj getseq.obj @fasta3.rsp -lm
+X
+fasty34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fy.obj scaleswn.obj karlin.obj drop_fz.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
+X $(CL) $(LFLAGS) -efasty34.exe comp_lib.obj showalig.obj init_fy.obj drop_fz.obj faatran.obj getseq.obj @fasta3.rsp -lm
+X
+tfastx34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfx.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj scaleswn.obj karlin.obj tdropfx.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
+X $(CL) $(LFLAGS) -etfastx34.exe comp_lib.obj showalig.obj init_tfx.obj tdropfx.obj faatran.obj getseq.obj @fasta3.rsp -lm
+X
+tfasty34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfy.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj scaleswn.obj karlin.obj tdropfz.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
+X $(CL) $(LFLAGS) -etfasty34.exe comp_lib.obj showalig.obj init_tfy.obj tdropfz.obj faatran.obj getseq.obj @fasta3.rsp -lm
+X
+ssearch34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropgsw.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj pssm_asn_subs.obj
+X $(CL) $(LFLAGS) -essearch34.exe comp_lib.obj showalig.obj init_sw.obj dropgsw.obj getseq.obj pssm_asn_subs.obj @fasta3.rsp -lm
+X
+fasts34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fs.obj dropfs2.obj scaleswt.obj karlin.obj tatsta_s.obj last_tat.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj
+X $(CL) $(LFLAGS) -efasts34.exe comp_lib.obj showalig.obj init_fs.obj dropfs2.obj getseq.obj @fasts3.rsp -lm
+X
+tfasts34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfs.obj droptfs2.obj scaleswt.obj karlin.obj tatsttfs.obj last_tat.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj faatran.obj getopt.obj
+X $(CL) $(LFLAGS) -etfasts34.exe comp_lib.obj showalig.obj init_tfs.obj droptfs2.obj getseq.obj faatran.obj @tfasts3.rsp -lm
+X
+prss34.exe : rcomplib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj dropgsw.obj llgetaa.obj showrss.obj lib_sel.obj nrand.obj getopt.obj pssm_asn_subs.obj
+X $(CL) $(LFLAGS) -eprss34.exe rcomplib.obj init_rss.obj dropgsw.obj llgetaa.obj nrand.obj @prss3.rsp -lm
+X
+prfx34.exe : rcomplib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj faatran.obj showrss.obj lib_sel.obj nrand.obj getopt.obj
+X $(CL) $(LFLAGS) -eprfx34.exe rcomplib.obj init_rfx.obj drop_fx.obj faatran.obj llgetaa.obj nrand.obj @prss3.rsp -lm
+X
+comp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(CFLAGS) -DPGM_DOC -ocomp_lib.obj -c comp_lib.c
+X
+rcomplib.obj : comp_lib.c mw.h structs.h defs.h param.h
+X $(CC) $(CFLAGS) -DPRSS -orcomplib.obj -c comp_lib.c
+X
+htime.obj : htime.c
+X $(CC) $(CFLAGS) -c htime.c
+X
+hxgetaa.obj : hxgetaa.c altlib.h upam.h uascii.h
+X $(CC) $(CFLAGS) -c hxgetaa.c
+X
+init_sw.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DSSEARCH -oinit_sw.obj initfa.c
+X
+init_ssw.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DOSEARCH -oinit_ssw.obj initfa.c
+X
+init_rss.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS -oinit_rss.obj initfa.c
+X
+init_rfx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DPRFX -oinit_rfx.obj initfa.c
+X
+init_fa.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -oinit_fa.obj initfa.c
+X
+init_ff.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -oinit_ff.obj initfa.c
+X
+init_tf.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST -oinit_tf.obj initfa.c
+X
+init_fs.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -oinit_fs.obj initfa.c
+X
+init_fm.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -oinit_fm.obj initfa.c
+X
+init_tfs.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -DTFAST -oinit_tfs.obj initfa.c
+X
+init_tfm.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -DTFAST -oinit_tfm.obj initfa.c
+X
+init_tfa.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -DTFAST -oinit_tfa.obj initfa.c
+X
+init_fx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -oinit_fx.obj initfa.c
+X
+init_tfx.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -DTFAST -oinit_tfx.obj initfa.c
+X
+init_fy.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -oinit_fy.obj initfa.c
+X
+init_tfy.obj : initfa.c defs.h param.h upam.h structs.h
+X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -DTFAST -oinit_tfy.obj initfa.c
+X
+doinit.obj : doinit.c defs.h param.h upam.h structs.h
+X $(CC) $(CFLAGS) -c doinit.c
+X
+compacc.obj : compacc.c upam.h uascii.h param.h structs.h mw.h defs.h
+X $(CC) $(CFLAGS) -c compacc.c
+X
+showbest.obj : mshowbest.c mw.h defs.h param.h structs.h mm_file.h
+X $(CC) $(CFLAGS) -oshowbest.obj -c mshowbest.c
+X
+showrss.obj : showrss.c mw.h defs.h param.h structs.h
+X $(CC) $(CFLAGS) -c showrss.c
+X
+showalig.obj : mshowalign.c mw.h defs.h structs.h param.h
+X $(CC) $(CFLAGS) -oshowalig.obj -c mshowalign.c
+X
+c_dispn.obj : c_dispn.c defs.h structs.h param.h
+X $(CC) $(CFLAGS) -c c_dispn.c
+X
+lib_sel.obj : lib_sel.c defs.h structs.h
+X $(CC) $(CFLAGS) -c lib_sel.c
+X
+scaleswn.obj : scaleswn.c defs.h mw.h alt_parms.h
+X $(CC) $(CFLAGS) -c scaleswn.c
+X
+scaleswt.obj : scaleswt.c defs.h mw.h alt_parms.h
+X $(CC) $(CFLAGS) -c scaleswt.c
+X
+tatsta_s.obj : tatstats.c defs.h mw.h alt_parms.h tatstats.h
+X $(CC) $(CFLAGS) -DFASTS -otatsta_s.obj -c tatstats.c
+X
+tatsttfs.obj : tatstats.c defs.h mw.h alt_parms.h tatstats.h
+X $(CC) $(CFLAGS) -DTFAST -DFASTS -otatsttfs.obj -c tatstats.c
+X
+karlin.obj : karlin.c param.h
+X $(CC) $(CFLAGS) -c karlin.c
+X
+scaleswg.obj : scaleswg.c defs.h mw.h alt_parms.h
+X $(CC) $(CFLAGS) -c scaleswg.c
+X
+drop_nfa.obj : dropnfa.c param.h defs.h
+X $(CC) $(CFLAGS) -odrop_nfa.obj -c dropnfa.c
+X
+drop_ff.obj : dropffa.c mw.h param.h defs.h
+X $(CC) $(CFLAGS) -odrop_ff.obj -c dropffa.c
+X
+drop_tff.obj : dropffa.c mw.h param.h defs.h
+X $(CC) $(CFLAGS) -DTFAST -odrop_tff.obj -c dropffa.c
+X
+drop_fx.obj : dropfx.c mw.h upam.h param.h defs.h
+X $(CC) $(CFLAGS) -DFASTX -odrop_fx.obj -c dropfx.c
+X
+dropfs2.obj : dropfs2.c mw.h upam.h param.h defs.h tatstats.h
+X $(CC) $(CFLAGS) -DFASTS -c dropfs2.c
+X
+droptfs2.obj : dropfs2.c mw.h upam.h param.h defs.h tatstats.h
+X $(CC) $(CFLAGS) -DTFAST -DFASTS -c -odroptfs2.obj dropfs2.c
+X
+tdropfx.obj : dropfx.c mw.h upam.h param.h defs.h
+X $(CC) $(CFLAGS) -DTFAST -otdropfx.obj -c dropfx.c
+X
+drop_fz.obj : dropfz2.c mw.h upam.h param.h defs.h aamap.h
+X $(CC) $(CFLAGS) -odrop_fz.obj -c dropfz2.c
+X
+tdropfz.obj : dropfz2.c mw.h upam.h param.h defs.h aamap.h
+X $(CC) $(CFLAGS) -DTFAST -otdropfz.obj -c dropfz2.c
+X
+dropnsw.obj : dropnsw.c mw.h upam.h param.h structs.h
+X $(CC) $(CFLAGS) -c dropnsw.c
+X
+dropgsw.obj : dropgsw.c mw.h upam.h param.h structs.h
+X $(CC) $(CFLAGS) -c dropgsw.c
+X
+dropnw.obj : dropnw.c mw.h upam.h param.h structs.h
+X $(CC) $(CFLAGS) -c dropnw.c
+X
+llgetaa.obj : llgetaa.c altlib.h upam.h uascii.h
+X $(CC) $(CFLAGS) -DNOLIB -c llgetaa.c
+X
+lgetlib.obj : nmgetlib.c altlib.h upam.h uascii.h
+X $(CC) $(CFLAGS) -olgetlib.obj -c nmgetlib.c
+X
+regetlib.obj : re_getlib.c mw.h mm_file.h
+X $(CC) $(CFLAGS) -oregetlib.obj -c re_getlib.c
+X
+getseq.obj : getseq.c defs.h uascii.h structs.h upam.h
+X $(CC) $(CFLAGS) -c getseq.c
+X
+ncbl_lib.obj : ncbl_lib.c ncbl_head.h
+X $(CC) $(CFLAGS) -c ncbl_lib.c
+X
+ncbl2_lib.obj : ncbl2_mlib.c ncbl2_head.h
+X $(CC) $(CFLAGS) -c ncbl2_mlib.c
+X
+faatran.obj : faatran.c upam.h uascii.h
+X $(CC) $(CFLAGS) -c faatran.c
+X
+url_subs.obj : url_subs.c structs.h param.h
+X $(CC) $(CFLAGS) -c url_subs.c
+X
+nrand48.obj : nrand48.c
+X $(CC) $(CFLAGS) -c nrand48.c
+X
+nrand.obj : nrand.c
+X $(CC) $(CFLAGS) -c nrand.c
+X
+getopt.obj : getopt.c
+X $(CC) $(CFLAGS) -c getopt.c
+SHAR_EOF
+chmod 0644 Makefile.tc ||
+echo 'restore of Makefile.tc failed'
+Wc_c="`wc -c < 'Makefile.tc'`"
+test 9746 -eq "$Wc_c" ||
+ echo 'Makefile.tc: original size 9746, current size' "$Wc_c"
+fi
+# ============= Makefile34.common ==============
+if test -f 'Makefile34.common' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34.common (File already exists)'
+else
+echo 'x - extracting Makefile34.common (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.common' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34.common,v 1.9 2004/02/19 18:29:43 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+# COMP_LIBO=comp_lib.o
+# COMP_THRO=comp_thr.o
+# GETSEQO = getseq.o
+X
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib.o
+COMP_THRO=comp_mthr.o
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.o lgetlib.o
+NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+SHAR_EOF
+chmod 0644 Makefile34.common ||
+echo 'restore of Makefile34.common failed'
+Wc_c="`wc -c < 'Makefile34.common'`"
+test 1304 -eq "$Wc_c" ||
+ echo 'Makefile34.common: original size 1304, current size' "$Wc_c"
+fi
+# ============= Makefile34.common_sql ==============
+if test -f 'Makefile34.common_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34.common_sql (File already exists)'
+else
+echo 'x - extracting Makefile34.common_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.common_sql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34.common_sql,v 1.10 2005/12/07 17:22:02 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+COMP_LIBO=comp_lib.o
+COMP_THRO=comp_thr.o
+GETSEQO = getseq.o
+# use for multiple query sequences, requires "-n" for DNA fasta, does not
+# work with prss34 (yet)
+#COMP_LIB=comp_mlib.o
+#COMP_THRO=comp_mthr.o
+#
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+# LGETLIB=getseq.o lgetlib.o lgetaa_m.o
+# NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+#LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+#NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile34.common_sql ||
+echo 'restore of Makefile34.common_sql failed'
+Wc_c="`wc -c < 'Makefile34.common_sql'`"
+test 1330 -eq "$Wc_c" ||
+ echo 'Makefile34.common_sql: original size 1330, current size' "$Wc_c"
+fi
+# ============= Makefile34.nmk_com ==============
+if test -f 'Makefile34.nmk_com' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34.nmk_com (File already exists)'
+else
+echo 'x - extracting Makefile34.nmk_com (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.nmk_com' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34.nmk_com,v 1.2 2006/10/06 17:26:47 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+# COMP_LIBO=comp_lib.obj
+# COMP_THRO=comp_thr.obj
+# GETSEQO = getseq.obj
+X
+# use for multiple query sequences
+COMP_LIBO=comp_mlib.obj
+COMP_THRO=comp_mthr.obj
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+LGETLIB=getseq.obj lgetlib.obj
+NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+# no memory mapping for Win32
+#LGETLIB= lgetlib.obj lgetaa_m.obj
+X
+NRAND=nrand
+X
+# normally use ncbl2_mlib.c
+NCBL_LIB=ncbl2_mlib.obj
+#LIB_M= -lm
+X
+include Makefile.nm_pcom
+X
+include Makefile.nm_fcom
+SHAR_EOF
+chmod 0755 Makefile34.nmk_com ||
+echo 'restore of Makefile34.nmk_com failed'
+Wc_c="`wc -c < 'Makefile34.nmk_com'`"
+test 765 -eq "$Wc_c" ||
+ echo 'Makefile34.nmk_com: original size 765, current size' "$Wc_c"
+fi
+# ============= Makefile34m.common ==============
+if test -f 'Makefile34m.common' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34m.common (File already exists)'
+else
+echo 'x - extracting Makefile34m.common (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34m.common,v 1.11 2003/02/27 14:26:14 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+# COMP_LIBO=comp_lib.o
+# COMP_THRO=comp_thr.o
+# GETSEQO = getseq.o
+X
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib.o
+COMP_THRO=comp_mthr.o
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+SHAR_EOF
+chmod 0644 Makefile34m.common ||
+echo 'restore of Makefile34m.common failed'
+Wc_c="`wc -c < 'Makefile34m.common'`"
+test 1311 -eq "$Wc_c" ||
+ echo 'Makefile34m.common: original size 1311, current size' "$Wc_c"
+fi
+# ============= Makefile34m.common_mysql ==============
+if test -f 'Makefile34m.common_mysql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34m.common_mysql (File already exists)'
+else
+echo 'x - extracting Makefile34m.common_mysql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_mysql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_mysql,v 1.3 2005/12/07 17:22:02 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+#COMP_LIBO=comp_lib.o
+#COMP_THRO=comp_thr.o
+#GETSEQO = getseq.o
+X
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib.o
+COMP_THRO=comp_mthr.o
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+#LIB_M= -L/usr/lib/pgsql/ -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+#NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile34m.common_mysql ||
+echo 'restore of Makefile34m.common_mysql failed'
+Wc_c="`wc -c < 'Makefile34m.common_mysql'`"
+test 1395 -eq "$Wc_c" ||
+ echo 'Makefile34m.common_mysql: original size 1395, current size' "$Wc_c"
+fi
+# ============= Makefile34m.common_pgsql ==============
+if test -f 'Makefile34m.common_pgsql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34m.common_pgsql (File already exists)'
+else
+echo 'x - extracting Makefile34m.common_pgsql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_pgsql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_pgsql,v 1.3 2005/12/07 17:22:02 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+#COMP_LIBO=comp_lib.o
+#COMP_THRO=comp_thr.o
+#GETSEQO = getseq.o
+X
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib.o
+COMP_THRO=comp_mthr.o
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+# LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
+NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile34m.common_pgsql ||
+echo 'restore of Makefile34m.common_pgsql failed'
+Wc_c="`wc -c < 'Makefile34m.common_pgsql'`"
+test 1407 -eq "$Wc_c" ||
+ echo 'Makefile34m.common_pgsql: original size 1407, current size' "$Wc_c"
+fi
+# ============= Makefile34m.common_sql ==============
+if test -f 'Makefile34m.common_sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping Makefile34m.common_sql (File already exists)'
+else
+echo 'x - extracting Makefile34m.common_sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_sql' &&
+#
+# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_sql,v 1.14 2005/12/07 17:22:02 wrp Exp $
+#
+# commands common to all architectures
+# if your architecture does not support "include", append at the end.
+#
+X
+# use for "normal" fasta34(_t) programs - only one query
+#COMP_LIBO=comp_lib.o
+#COMP_THRO=comp_thr.o
+#GETSEQO = getseq.o
+X
+# use for multiple query sequences
+# work with prss34 (yet)
+COMP_LIBO=comp_mlib.o
+COMP_THRO=comp_mthr.o
+GETSEQO =
+X
+# standard nxgetaa, no memory mapping for 0 - 6
+#LGETLIB=getseq.o lgetlib.o
+#NGETLIB=nmgetlib
+X
+# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
+LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
+NGETLIB=nmgetlib
+X
+NRAND=nrandom
+X
+# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
+#NCBL_LIB=ncbl_lib.o
+X
+# this option should support both formats (BLAST1.4 not currently supported):
+#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
+X
+# normally use ncbl2_mlib.c
+#NCBL_LIB=ncbl2_mlib.o
+#LIB_M= -lm
+X
+# this option supports NCBI BLAST2 and mySQL
+# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
+# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
+# some systems may also require a LD_LIBRARY_PATH change
+# LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
+LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
+# LIB_M= -lm
+NCBL_LIB=ncbl2_mlib.o mysql_lib.o pgsql_lib.o
+# NCBL_LIB=ncbl2_mlib.o
+X
+include Makefile.pcom
+X
+include Makefile.fcom
+X
+SHAR_EOF
+chmod 0644 Makefile34m.common_sql ||
+echo 'restore of Makefile34m.common_sql failed'
+Wc_c="`wc -c < 'Makefile34m.common_sql'`"
+test 1406 -eq "$Wc_c" ||
+ echo 'Makefile34m.common_sql: original size 1406, current size' "$Wc_c"
+fi
+# ============= README ==============
+if test -f 'README' -a X"$1" != X"-c"; then
+ echo 'x - skipping README (File already exists)'
+else
+echo 'x - extracting README (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'README' &&
+X
+Up to date release notes are available in the file readme.v34t0
+X
+Documentation on the fasta3 version programs is available in the files:
+X
+X fasta3.1 (unix man page)
+X ssearch3.1 (unix man page)
+X
+X readme.v34t0 (text descriptions of bug fixes and version history)
+X
+X fasta3x.me (unix -me nroff file)
+X fasta3x.doc (text version of fast3x.me)
+X
+The latter two files provide background information on installing the
+fasta programs (in particular, the FASTLIBS file), that new users of
+the fasta3 package may find useful. Note that many non-database
+searching programs are available in the fasta20 package.
+X
+X
+Documentation on the pvm3/mpi versions of the programs is available
+in:
+X readme.pvm_3.4
+X
+X
+Bill Pearson
+wrp@virginia.edu
+SHAR_EOF
+chmod 0644 README ||
+echo 'restore of README failed'
+Wc_c="`wc -c < 'README'`"
+test 722 -eq "$Wc_c" ||
+ echo 'README: original size 722, current size' "$Wc_c"
+fi
+# ============= README.versions ==============
+if test -f 'README.versions' -a X"$1" != X"-c"; then
+ echo 'x - skipping README.versions (File already exists)'
+else
+echo 'x - extracting README.versions (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'README.versions' &&
+X
+August, 2002
+X
+The latest versions of the FASTA search programs are in fasta3.shar.Z.
+This file contains the fasta34 series of programs. fasta34 also runs
+the exact same functions threaded (fasta33, fasta33_t) and in parallel
+using PVM and MPI.
+X
+Here is a list of the programs, and where they can be found:
+X
+program fasta2 fasta34 replaced by
+X
+fasta yes fasta34, fasta34_t
+X
+ssearch yes ssearch34, ssearch34_t
+X
+tfasta yes tfasta34, tfasta34_t (tfastx34 preferred)
+X
+fastx yes fastx34, fastx34_t
+fasty no fasty34, fasty34_t
+X
+tfastx yes tfastx34, tfastx34_t
+tfasty no tfasty34, tfasty34_t
+X
+fasts/tfasts no fasts34(_t), tfasts34(_t)
+X
+fastf/tfastf no fastf34(_t), tfastf34(_t)
+X
+prss yes prss34
+X
+prfx no prfx34
+X
+================
+X
+The following programs are part of the fasta2 program package. The
+latest version of fasta2 is fasta20u66.shar.Z.
+X
+The most useful fasta2 programs are lalign and plalign, which calculate
+multiple non-intersecting local alignments using Webb Miller's "sim"
+implementation of the Waterman-Eggert algorithm.
+X
+You should not use the fasta2 programs for library searching; the
+fasta3 programs are more sensitive and have better statistics.
+X
+lalign yes no
+X
+plalign yes no
+X
+flalign yes no
+X
+align yes no
+X
+align0 yes no
+X
+lfasta yes no
+X
+randseq yes no
+X
+crandseq yes no
+X
+aacomp yes no
+X
+bestscor yes no
+X
+grease yes no
+X
+tgrease yes no
+X
+garnier yes no
+X
+================
+X
+The fasta3.shar.Z and fasta2.shar.Z files a Unix "shell archive" files.
+To unpack them, go into an empty directory and type:
+X
+X zcat fasta3.shar.Z | sh
+X
+You can then make the programs by typing:
+X
+X make all
+X
+Makefile's are available for many platforms, e.g.
+X
+X make -f Makefile.linux
+X make -f Makefile.sun
+X
+etc. You are much better off using the pre-configured Makefile.???
+than trying to edit the Makefile (which is designed for a Compaq/HP
+Alpha).
+X
+Precompiled versions of the programs for Mac and Windows are available
+in the mac_fasta and win32_fasta directories. If you are running
+MacOSX from the command line, use the Unix version (fasta3.shar.Z and
+Makefile.os_x).
+X
+SHAR_EOF
+chmod 0644 README.versions ||
+echo 'restore of README.versions failed'
+Wc_c="`wc -c < 'README.versions'`"
+test 2614 -eq "$Wc_c" ||
+ echo 'README.versions: original size 2614, current size' "$Wc_c"
+fi
+# ============= Readme.Mac ==============
+if test -f 'Readme.Mac' -a X"$1" != X"-c"; then
+ echo 'x - skipping Readme.Mac (File already exists)'
+else
+echo 'x - extracting Readme.Mac (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'Readme.Mac' &&
+X
+X 1-January-2002
+X
+X
+This is the first release of the fasta34t10 distribution for the
+Macintosh. In addition to the traditional "classic" applications that have
+been available in the past for Macintosh FASTA distributions, this release
+also includes "Carbon" versions of the FASTA programs. Carbon
+applications are designed to work under both MacOSX and under MacOS8.6 and
+later.
+X
+This is the first "Carbon" implementation of the FASTA programs. In this
+first implementation, the Mac-like interface for selecting sequence files
+has been disabled, as the older file interface I used is not available
+under Carbon. This should be implemented in the future.
+X
+Today, the main advantage of the Carbon implementation is its ability to
+run under MacOSX without needing the classic environment. However, the
+unix version of the FASTA programs compiles and runs fine under MacOSX,
+simply type:
+X
+X make -f Makefile.os_x all
+X
+However, the Unix version of the FASTA programs expects sequence files and
+libraries to have lines that ends with a linefeed characther ('\n', \012),
+which is different from the traditional Mac return ('\r', \015) end-of-line
+character. If you work with Mac-like text files under MacOSX, try the "carbon"
+FASTA programs. If you work with Unix-like text files, use the Unix
+version.
+X
+Bill Pearson
+SHAR_EOF
+chmod 0644 Readme.Mac ||
+echo 'restore of Readme.Mac failed'
+Wc_c="`wc -c < 'Readme.Mac'`"
+test 1332 -eq "$Wc_c" ||
+ echo 'Readme.Mac: original size 1332, current size' "$Wc_c"
+fi
+# ============= a_mark.h ==============
+if test -f 'a_mark.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping a_mark.h (File already exists)'
+else
+echo 'x - extracting a_mark.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'a_mark.h' &&
+/* a_mark.h - symbols used to indicate match/mismatch alignment code */
+X
+/* copyright (c) 2003 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: a_mark.h,v 1.1 2003/06/26 19:36:32 wrp Exp $ */
+X
+#define M_BLANK 0
+#define M_NEG 1
+#define M_ZERO 2
+#define M_POS 3
+#define M_IDENT 4
+#define M_DEL 5
+SHAR_EOF
+chmod 0644 a_mark.h ||
+echo 'restore of a_mark.h failed'
+Wc_c="`wc -c < 'a_mark.h'`"
+test 321 -eq "$Wc_c" ||
+ echo 'a_mark.h: original size 321, current size' "$Wc_c"
+fi
+# ============= aamap.h ==============
+if test -f 'aamap.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping aamap.h (File already exists)'
+else
+echo 'x - extracting aamap.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'aamap.h' &&
+X
+/* aamap.gbl character and number translations */
+X
+/* $Name: fa_34_26_5 $ - $Id: aamap.h,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp $ */
+X
+char aacmap[64]={
+X 'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
+X 'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
+X 'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
+X 'X','Y','X','Y','S','S','S','S','X','C','W','C','L','F','L','F'
+X };
+X
+int aamap[64]; /* integer aa values */
+int aamapr[64]; /* reverse sequence map */
+X
+X
+SHAR_EOF
+chmod 0644 aamap.h ||
+echo 'restore of aamap.h failed'
+Wc_c="`wc -c < 'aamap.h'`"
+test 504 -eq "$Wc_c" ||
+ echo 'aamap.h: original size 504, current size' "$Wc_c"
+fi
+# ============= ag_stats.c ==============
+if test -f 'ag_stats.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping ag_stats.c (File already exists)'
+else
+echo 'x - extracting ag_stats.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ag_stats.c' &&
+/* this procedure implements Altschul's pre-calculated values for lambda, K */
+X
+/* $Name: fa_34_26_5 $ - $Id: ag_stats.c,v 1.5 2006/04/12 18:00:01 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+X
+#include "alt_parms.h"
+X
+static double K, Lambda, H;
+X
+int
+ag_parm(char *pam_type, int gdelval, int ggapval)
+{
+X int r_v, t_gdelval, t_ggapval;
+X
+#ifdef OLD_FASTA_GAP
+X t_gdelval = gdelval;
+X t_ggapval = ggapval;
+#else
+X t_gdelval = gdelval+ggapval;
+X t_ggapval = ggapval;
+#endif
+X
+X if (strcmp(pam_type,"BL50")==0 || strcmp(pam_type,"BLOSUM50")==0)
+X r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"BL62")==0 || strcmp(pam_type,"BLOSUM62")==0)
+X r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"P250")==0)
+X r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"P120")==0)
+X r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"MD_10")==0)
+X r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"MD_20")==0)
+X r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"MD_40")==0)
+X r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pam_type,"DNA")==0 || strcmp(pam_type,"+5/-4")==0)
+X r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else r_v = 0;
+X
+X return r_v;
+}
+X
+int
+look_p(struct alt_p parm[], int gap, int ext,
+X double *K, double *Lambda, double *H)
+{
+X int i;
+X
+X gap = -gap;
+X ext = -ext;
+X
+X if (gap > parm[1].gap) {
+X *K = parm[0].K;
+X *Lambda = parm[0].Lambda;
+X *H = parm[0].H;
+X return 1;
+X }
+X
+X for (i=1; parm[i].gap > 0; i++) {
+X if (parm[i].gap > gap) continue;
+X else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
+X else if (parm[i].gap == gap && parm[i].ext == ext) {
+X *K = parm[i].K;
+X *Lambda = parm[i].Lambda;
+X *H = parm[i].H;
+X return 1;
+X }
+X else break;
+X }
+X return 0;
+}
+X
+int E1_to_s(double e_val, int n0, int n1) {
+X double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
+X int score;
+X
+X a_n0 = (double)n0;
+X a_n0f = log(a_n0)/H;
+X
+X a_n1 = (double)n1;
+X a_n1f = log(a_n1)/H;
+X
+X mp = a_n0 - a_n0f - a_n1f;
+X np = a_n1 - a_n0f - a_n1f;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X /*
+X e_val = K * np * mp * exp ( - Lambda * score);
+X log(e_val) = log(K np mp) - Lambda * score;
+X (log(K np mp)-log(e_val)) / Lambda = score;
+X */
+X score = (int)((log( K * mp * np) - log(e_val))/Lambda +0.5);
+X if (score < 0) score = 0;
+X return score;
+}
+X
+double s_to_E4(int score, int n0, int n1)
+{
+X double p_val;
+X double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
+X
+X a_n0 = (double)n0;
+X a_n0f = log(a_n0)/H;
+X
+X a_n1 = (double)n1;
+X a_n1f = log(a_n1)/H;
+X
+X mp = a_n0 - a_n0f - a_n1f;
+X np = a_n1 - a_n0f - a_n1f;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X p_val = K * np * mp * exp ( - Lambda * score);
+X
+X if (p_val > 0.01) p_val = 1.0 - exp(-p_val);
+X
+X return p_val * 10000.0;
+}
+X
+SHAR_EOF
+chmod 0644 ag_stats.c ||
+echo 'restore of ag_stats.c failed'
+Wc_c="`wc -c < 'ag_stats.c'`"
+test 3021 -eq "$Wc_c" ||
+ echo 'ag_stats.c: original size 3021, current size' "$Wc_c"
+fi
+# ============= aln_structs.h ==============
+if test -f 'aln_structs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping aln_structs.h (File already exists)'
+else
+echo 'x - extracting aln_structs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'aln_structs.h' &&
+X
+#ifndef A_STRUCT
+#define A_STRUCT
+X
+struct a_struct {
+X int smin0; /* coordinate of display start in seqc0 */
+X int smin1; /* coordinate of display start in seqc1 */
+X int amin0, amax0; /* coordinate of alignment start in seqc0 */
+X int amin1, amax1; /* coordinate of alignment start in seqc1 */
+X
+X int llen;
+X int llcntx, llcntx_flg, showall;
+X
+X int qlrev, qlfact;
+X int llrev, llfact, llmult;
+X int frame;
+X
+X int a_len; /* consensus alignment length */
+X int nident, nsim, ngap_q, ngap_l, nfs; /* number of identities, gaps in q, l */
+X long d_start0,d_stop0;
+X long d_start1,d_stop1;
+};
+X
+struct a_res_str {
+X int min0, max0; /* boundaries of alignment in aa0 */
+X int min1, max1; /* boundaries of alignment in aa1 */
+X int *res;
+X int nres;
+};
+#endif
+SHAR_EOF
+chmod 0644 aln_structs.h ||
+echo 'restore of aln_structs.h failed'
+Wc_c="`wc -c < 'aln_structs.h'`"
+test 758 -eq "$Wc_c" ||
+ echo 'aln_structs.h: original size 758, current size' "$Wc_c"
+fi
+# ============= alt_parms.h ==============
+if test -f 'alt_parms.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping alt_parms.h (File already exists)'
+else
+echo 'x - extracting alt_parms.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'alt_parms.h' &&
+/* tables of Altschul-Gish parameters */
+X
+/* $Name: fa_34_26_5 $ - $Id: alt_parms.h,v 1.4 2003/09/08 18:40:04 wrp Exp $ */
+X
+X
+/* first entry must be for (inf,inf) penalty */
+X
+struct alt_p {
+X int gap;
+X int ext;
+X float Lambda;
+X float K;
+X float H;
+};
+X
+/* BL80 1/2 bit */
+struct alt_p bl80_p[] = {
+X {0, 0, 0.343, 0.177, 0.66},
+X {14, 2, 0.336, 0.150, 0.62},
+X {12, 2, 0.328, 0.130, 0.54},
+X {12, 1, 0.314, 0.096, 0.41},
+X {11, 2, 0.320, 0.110, 0.51},
+X {11, 1, 0.296, 0.066, 0.36},
+X {10, 2, 0.311, 0.097, 0.46},
+X {10, 1, 0.282, 0.052, 0.29},
+X { 9, 2, 0.292, 0.069, 0.33},
+X { 9, 1, 0.248, 0.026, 0.18},
+X { 8, 2, 0.271, 0.050, 0.27},
+X { 8, 1, 0.189, 0.0071, 0.07}
+};
+X
+/* BL62 1/2 bit */
+struct alt_p bl62_p[] = {
+X {0, 0, 0.318, 0.13, 0.40},
+X {12, 3, 0.305, 0.10, 0.38},
+X {12, 2, 0.300, 0.09, 0.34},
+X {12, 1, 0.275, 0.05, 0.25},
+X {11, 3, 0.301, 0.09, 0.36},
+X {11, 2, 0.286, 0.07, 0.29},
+X {11, 1, 0.255, 0.035, 0.19},
+X {10, 4, 0.293, 0.08, 0.33},
+X {10, 3, 0.281, 0.06, 0.29},
+X {10, 2, 0.266, 0.04, 0.24},
+X {10, 1, 0.216, 0.014, 0.12},
+X {9, 5, 0.286, 0.08, 0.29},
+X {9, 4, 0.273, 0.06, 0.25},
+X {9, 4, 0.273, 0.06, 0.25},
+X {9, 2, 0.244, 0.030, 0.18},
+X {9, 1, 0.176, 0.008, 0.06},
+X {8, 8, 0.270, 0.06, 0.25},
+X {8, 7, 0.270, 0.06, 0.25},
+X {8, 6, 0.262, 0.05, 0.23},
+X {8, 5, 0.262, 0.05, 0.23},
+X {8, 4, 0.262, 0.05, 0.23},
+X {8, 3, 0.243, 0.035, 0.18},
+X {8, 2, 0.215, 0.021, 0.12},
+X {7, 7, 0.247, 0.05, 0.18},
+X {7, 6, 0.247, 0.05, 0.18},
+X {7, 5, 0.230, 0.030, 0.15},
+X {7, 4, 0.230, 0.030, 0.15},
+X {7, 3, 0.208, 0.021, 0.11},
+X {7, 2, 0.164, 0.009, 0.06},
+X {6, 6, 0.200, 0.021, 0.10},
+X {6, 5, 0.200, 0.021, 0.10},
+X {6, 4, 0.179, 0.014, 0.08},
+X {6, 3, 0.153, 0.010, 0.05},
+X {5, 5, 0.131, 0.009, 0.04},
+X {-1, -1, -1.0, -1.0, -1.0},
+};
+X
+/* BL50 1/3 bit */
+X
+struct alt_p bl50_p[] = {
+X {0, 0, 0.232, 0.11, 0.34},
+X {16, 4, 0.222, 0.08, 0.31},
+X {16, 3, 0.213, 0.06, 0.27},
+X {16, 2, 0.207, 0.05, 0.24},
+X {16, 1, 0.180, 0.024, 0.15},
+X {15, 8, 0.222, 0.09, 0.31},
+X {15, 7, 0.219, 0.08, 0.29},
+X {15, 6, 0.219, 0.08, 0.29},
+X {15, 5, 0.216, 0.07, 0.28},
+X {15, 4, 0.216, 0.07, 0.28},
+X {15, 3, 0.210, 0.06, 0.25},
+X {15, 2, 0.202, 0.05, 0.22},
+X {15, 1, 0.166, 0.018, 0.11},
+X {14, 8, 0.218, 0.08, 0.29},
+X {14, 7, 0.214, 0.07, 0.27},
+X {14, 6, 0.214, 0.07, 0.27},
+X {14, 5, 0.214, 0.07, 0.27},
+X {14, 4, 0.205, 0.05, 0.24},
+X {14, 3, 0.201, 0.05, 0.22},
+X {14, 2, 0.188, 0.034, 0.17},
+X {14, 1, 0.140, 0.009, 0.07},
+X {13, 8, 0.211, 0.06, 0.27},
+X {13, 7, 0.205, 0.05, 0.24},
+X {13, 6, 0.205, 0.05, 0.24},
+X {13, 5, 0.205, 0.05, 0.24},
+X {13, 4, 0.202, 0.05, 0.22},
+X {13, 3, 0.188, 0.034, 0.18},
+X {13, 2, 0.174, 0.025, 0.13},
+X {13, 1, 0.114, 0.006, 0.04},
+X {12, 7, 0.205, 0.06, 0.24},
+X {12, 6, 0.197, 0.05, 0.21},
+X {12, 5, 0.197, 0.05, 0.21},
+X {12, 4, 0.192, 0.04, 0.18},
+X {12, 3, 0.178, 0.028, 0.15},
+X {12, 2, 0.158, 0.019, 0.10},
+X {11, 8, 0.197, 0.05, 0.21},
+X {11, 7, 0.190, 0.04, 0.19},
+X {11, 6, 0.190, 0.04, 0.19},
+X {11, 5, 0.184, 0.04, 0.17},
+X {11, 4, 0.177, 0.031, 0.15},
+X {11, 3, 0.167, 0.028, 0.11},
+X {11, 2, 0.130, 0.009, 0.06},
+X {10, 8, 0.183, 0.04, 0.17},
+X {10, 7, 0.178, 0.035, 0.16},
+X {10, 6, 0.178, 0.035, 0.16},
+X {10, 5, 0.168, 0.026, 0.13},
+X {10, 4, 0.156, 0.020, 0.10},
+X {10, 3, 0.139, 0.013, 0.07},
+X {10, 2, 0.099, 0.007, 0.03},
+X {9, 7, 0.164, 0.029, 0.13},
+X {9, 6, 0.152, 0.021, 0.10},
+X {9, 5, 0.152, 0.021, 0.10},
+X {9, 4, 0.134, 0.014, 0.07},
+X {9, 3, 0.107, 0.008, 0.04},
+X {8, 8, 0.139, 0.017, 0.08},
+X {8, 7, 0.134, 0.015, 0.07},
+X {8, 6, 0.127, 0.013, 0.06},
+X {8, 5, 0.117, 0.011, 0.05},
+X {8, 4, 0.101, 0.009, 0.03},
+X {7, 7, 0.100, 0.010, 0.04},
+X {7, 6, 0.094, 0.010, 0.03},
+X {-1, -1, -1.0, -1.0, -1.0},
+};
+X
+struct alt_p p250_p[] = {
+X {0, 0, 0.229, 0.09, 0.23},
+X {16, 4, 0.217, 0.07, 0.21},
+X {16, 3, 0.208, 0.05, 0.18},
+X {16, 2, 0.200, 0.04, 0.16},
+X {16, 1, 0.172, 0.018, 0.09},
+X {15, 5, 0.215, 0.06, 0.20},
+X {15, 4, 0.208, 0.05, 0.18},
+X {15, 3, 0.203, 0.04, 0.16},
+X {15, 2, 0.193, 0.035, 0.14},
+X {15, 1, 0.154, 0.012, 0.07},
+X {14, 6, 0.212, 0.06, 0.19},
+X {14, 5, 0.204, 0.05, 0.17},
+X {14, 4, 0.204, 0.05, 0.17},
+X {14, 3, 0.194, 0.035, 0.14},
+X {14, 2, 0.180, 0.025, 0.11},
+X {14, 1, 0.131, 0.008, 0.04},
+X {13, 6, 0.206, 0.06, 0.17},
+X {13, 5, 0.196, 0.04, 0.14},
+X {13, 4, 0.196, 0.04, 0.14},
+X {13, 3, 0.184, 0.029, 0.12},
+X {13, 2, 0.163, 0.016, 0.08},
+X {13, 1, 0.110, 0.008, 0.03},
+X {12, 7, 0.199, 0.05, 0.15},
+X {12, 6, 0.191, 0.04, 0.13},
+X {12, 5, 0.191, 0.04, 0.13},
+X {12, 4, 0.181, 0.029, 0.12},
+X {12, 3, 0.170, 0.022, 0.10},
+X {12, 2, 0.145, 0.012, 0.06},
+X {11, 7, 0.186, 0.04, 0.13},
+X {11, 6, 0.180, 0.034, 0.11},
+X {11, 5, 0.180, 0.034, 0.11},
+X {11, 4, 0.165, 0.021, 0.09},
+X {11, 3, 0.153, 0.017, 0.07},
+X {11, 2, 0.122, 0.009, 0.04},
+X {10, 8, 0.175, 0.031, 0.11},
+X {10, 7, 0.171, 0.029, 0.10},
+X {10, 6, 0.165, 0.024, 0.09},
+X {10, 5, 0.158, 0.020, 0.08},
+X {10, 4, 0.148, 0.017, 0.07},
+X {10, 3, 0.129, 0.012, 0.05},
+X {9, 7, 0.151, 0.020, 0.07},
+X {9, 6, 0.146, 0.019, 0.06},
+X {9, 5, 0.137, 0.015, 0.05},
+X {9, 4, 0.121, 0.011, 0.04},
+X {9, 3, 0.102, 0.010, 0.03},
+X {8, 8, 0.123, 0.014, 0.05},
+X {8, 7, 0.123, 0.014, 0.05},
+X {8, 6, 0.115, 0.012, 0.04},
+X {8, 5, 0.107, 0.011, 0.03},
+X {7, 7, 0.090, 0.014, 0.02},
+X {-1, -1, -1.0, -1.0, -1.0},
+};
+X
+struct alt_p p120_p[] = {
+X {0, 0, 0.342, 0.19, 0.63},
+X {12, 4, 0.334, 0.14, 0.60},
+X {12, 3, 0.330, 0.13, 0.57},
+X {12, 2, 0.330, 0.13, 0.57},
+X {12, 1, 0.219, 0.11, 0.46},
+X {11, 3, 0.330, 0.13, 0.57},
+X {11, 2, 0.323, 0.12, 0.51},
+X {11, 1, 0.296, 0.06, 0.38},
+X {10, 5, 0.323, 0.12, 0.54},
+X {10, 4, 0.314, 0.09, 0.50},
+X {10, 3, 0.314, 0.09, 0.50},
+X {10, 2, 0.301, 0.07, 0.42},
+X {10, 1, 0.273, 0.04, 0.28},
+X {9, 5, 0.316, 0.11, 0.49},
+X {9, 4, 0.311, 0.10, 0.45},
+X {9, 3, 0.311, 0.10, 0.45},
+X {9, 2, 0.284, 0.05, 0.35},
+X {9, 1, 0.239, 0.023, 0.18},
+X {8, 6, 0.307, 0.10, 0.43},
+X {8, 5, 0.295, 0.08, 0.39},
+X {8, 4, 0.295, 0.08, 0.39},
+X {8, 3, 0.284, 0.06, 0.34},
+X {8, 2, 0.262, 0.04, 0.26},
+X {8, 1, 0.183, 0.009, 0.08},
+X {7, 7, 0.286, 0.08, 0.34},
+X {7, 6, 0.286, 0.08, 0.34},
+X {7, 5, 0.276, 0.06, 0.31},
+X {7, 4, 0.276, 0.06, 0.31},
+X {7, 3, 0.255, 0.04, 0.24},
+X {7, 2, 0.224, 0.023, 0.16},
+X {6, 6, 0.248, 0.04, 0.23},
+X {6, 5, 0.248, 0.04, 0.23},
+X {6, 4, 0.234, 0.033, 0.19},
+X {6, 3, 0.216, 0.025, 0.15},
+X {6, 2, 0.160, 0.009, 0.06},
+X {5, 5, 0.191, 0.019, 0.11},
+X {5, 4, 0.173, 0.013, 0.09},
+X {5, 3, 0.134, 0.006, 0.05},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+struct alt_p bl55_p[] = {
+X {0, 0, 0.224, 0.12, 0.36},
+X {16, 4, 0.213, 0.08, 0.32},
+X {16, 3, 0.205, 0.07, 0.28},
+X {16, 2, 0.198, 0.06, 0.23},
+X {16, 1, 0.164, 0.020, 0.12},
+X {15, 8, 0.212, 0.09, 0.31},
+X {15, 7, 0.209, 0.08, 0.30},
+X {15, 6, 0.209, 0.08, 0.30},
+X {15, 5, 0.205, 0.07, 0.28},
+X {15, 4, 0.205, 0.07, 0.28},
+X {15, 3, 0.199, 0.06, 0.25},
+X {15, 2, 0.190, 0.05, 0.20},
+X {15, 1, 0.146, 0.013, 0.09},
+X {14, 7, 0.207, 0.08, 0.29},
+X {14, 6, 0.203, 0.07, 0.27},
+X {14, 5, 0.203, 0.07, 0.27},
+X {14, 4, 0.195, 0.05, 0.24},
+X {14, 3, 0.189, 0.04, 0.21},
+X {14, 2, 0.175, 0.030, 0.16},
+X {14, 1, 0.119, 0.006, 0.05},
+X {13, 8, 0.201, 0.07, 0.27},
+X {13, 7, 0.196, 0.06, 0.24},
+X {13, 6, 0.196, 0.06, 0.24},
+X {13, 5, 0.196, 0.06, 0.24},
+X {13, 4, 0.191, 0.05, 0.21},
+X {13, 3, 0.176, 0.032, 0.17},
+X {13, 2, 0.158, 0.020, 0.12},
+X {12, 8, 0.195, 0.06, 0.24},
+X {12, 7, 0.188, 0.05, 0.21},
+X {12, 6, 0.188, 0.05, 0.21},
+X {12, 5, 0.188, 0.05, 0.21},
+X {12, 4, 0.180, 0.04, 0.18},
+X {12, 3, 0.165, 0.026, 0.14},
+X {12, 2, 0.140, 0.014, 0.08},
+X {11, 8, 0.185, 0.05, 0.20},
+X {11, 7, 0.179, 0.04, 0.18},
+X {11, 6, 0.179, 0.04, 0.18},
+X {11, 5, 0.171, 0.033, 0.16},
+X {11, 4, 0.163, 0.027, 0.13},
+X {11, 3, 0.151, 0.022, 0.10},
+X {11, 2, 0.110, 0.008, 0.04},
+X {10, 10, 0.173, 0.04, 0.16},
+X {10, 9, 0.173, 0.04, 0.16},
+X {10, 8, 0.167, 0.035, 0.15},
+X {10, 7, 0.167, 0.035, 0.15},
+X {10, 6, 0.167, 0.035, 0.15},
+X {10, 5, 0.155, 0.025, 0.12},
+X {10, 4, 0.142, 0.017, 0.09},
+X {10, 3, 0.121, 0.011, 0.06},
+X {9, 9, 0.152, 0.026, 0.11},
+X {9, 8, 0.152, 0.026, 0.11},
+X {9, 7, 0.152, 0.026, 0.11},
+X {9, 6, 0.137, 0.018, 0.08},
+X {9, 5, 0.137, 0.018, 0.08},
+X {9, 4, 0.117, 0.011, 0.05},
+X {9, 3, 0.090, 0.007, 0.03},
+X {8, 8, 0.125, 0.014, 0.07},
+X {8, 7, 0.119, 0.013, 0.06},
+X {8, 6, 0.113, 0.012, 0.05},
+X {8, 5, 0.102, 0.010, 0.04},
+X {8, 4, 0.085, 0.009, 0.03},
+X {7, 7, 0.087, 0.010, 0.03},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+struct alt_p nt54_p[] =
+{
+X {0, 0, 0.192, 0.173, 0.36},
+X {16, 4, 0.192, 0.177, 0.36},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+struct alt_p rnt54_p[] =
+{
+X {0, 0, 0.192, 0.173, 0.36},
+X {16, 4, 0.192, 0.177, 0.36},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+struct alt_p nt32_p[] = {
+X {0, 0, 0.2712, 0.131, 0.22},
+X {18, 2, 0.2620, 0.100, 0.22},
+X {16, 4, 0.2600, 0.098, 0.22},
+X {16, 2, 0.2540, 0.081, 0.19},
+X {12, 4, 0.2340, 0.054, 0.15},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+struct alt_p nt13_p[] = {
+X {0, 0, 1.374, 0.711, 1.31},
+X {4, 1, 1.36, 0.67, 1.30},
+X {3, 1, 1.34, 0.58, 1.19},
+X {2, 1, 1.21, 0.34, 0.77},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+/* PAM-10 (1/10 Hartley ~ 1/3 bit scale) */
+X
+struct alt_p md10_p[] = {
+X {0, 0, 0.2299, 0.309, 3.45},
+X {20, 4, 0.222, 0.21, 3.1},
+X {20, 2, 0.218, 0.18, 2.9},
+X {18, 4, 0.220, 0.20, 2.9},
+X {18, 2, 0.217, 0.18, 2.7},
+X {16, 4, 0.217, 0.19, 2.8},
+X {16, 2, 0.212, 0.17, 2.3},
+X {14, 4, 0.212, 0.17, 2.5},
+X {14, 2, 0.205, 0.15, 1.9},
+X {12, 4, 0.206, 0.16, 2.1},
+X {12, 2, 0.190, 0.11, 1.3},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+/* PAM-20 (1/10 Hartley ~ 1/3 bit scale) */
+struct alt_p md20_p[] = {
+X {0, 0, 0.230, 0.287, 2.94},
+X {20, 4, 0.221, 0.19, 2.6},
+X {20, 2, 0.219, 0.18, 2.5},
+X {18, 4, 0.220, 0.19, 2.5},
+X {18, 2, 0.218, 0.18, 2.3},
+X {16, 4, 0.218, 0.18, 2.4},
+X {16, 2, 0.213, 0.17, 2.0},
+X {14, 4, 0.213, 0.17, 2.1},
+X {14, 2, 0.204, 0.14, 1.6},
+X {12, 4, 0.207, 0.17, 1.8},
+X {12, 2, 0.187, 0.10, 1.1},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+X
+/* PAM-40 (1/10 Hartley ~ 1/3 bit scale) */
+struct alt_p md40_p[] = {
+X {0, 0, 0.2293, 0.257, 2.22},
+X {20, 4, 0.225, 0.22, 2.1},
+X {20, 2, 0.222, 0.20, 1.9},
+X {18, 4, 0.224, 0.22, 2.0},
+X {18, 2, 0.220, 0.20, 1.8},
+X {16, 4, 0.219, 0.19, 1.8},
+X {16, 2, 0.212, 0.16, 1.5},
+X {14, 4, 0.211, 0.15, 1.6},
+X {14, 2, 0.199, 0.11, 1.2},
+X {12, 4, 0.203, 0.14, 1.3},
+X {12, 2, 0.177, 0.064, 0.7},
+X {-1, -1, -1.0, -1.0, -1.0}
+};
+SHAR_EOF
+chmod 0644 alt_parms.h ||
+echo 'restore of alt_parms.h failed'
+Wc_c="`wc -c < 'alt_parms.h'`"
+test 10311 -eq "$Wc_c" ||
+ echo 'alt_parms.h: original size 10311, current size' "$Wc_c"
+fi
+# ============= altlib.h ==============
+if test -f 'altlib.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping altlib.h (File already exists)'
+else
+echo 'x - extracting altlib.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'altlib.h' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: altlib.h,v 1.9 2006/02/07 17:52:06 wrp Exp $ */
+X
+/* #ifdef UNIX */
+/* ncbi blast 1.3 format */
+/*
+#define NCBIBL13 11
+extern int ncbl_getliba();
+extern void ncbl_ranlib();
+void ncbl_closelib();
+*/
+#define NCBIBL20 12
+/* #endif */
+X
+#ifdef MYSQL_DB
+#define MYSQL_LIB 16
+#define LASTLIB MYSQL_LIB+1
+#endif
+X
+#ifdef PGSQL_DB
+#define PGSQL_LIB 17
+#define LASTLIB PGSQL_LIB+1
+#endif
+X
+#if !defined (LASTLIB) && defined(NCBIBL20)
+#define LASTLIB NCBIBL20+1
+#endif
+#if !defined (LASTLIB)
+#define LASTLIB 10
+#endif
+X
+#define FASTA_F 0
+#define DEFAULT 0
+#define FULLGB 1
+#define UNIXPIR 2
+#define EMBLSWISS 3
+#define INTELLIG 4
+#define VMSPIR 5
+#define GCGBIN 6
+#define LASTTXT 6
+X
+int agetlib(); void aranlib(); /* pearson fasta format */
+int lgetlib(); void lranlib(); /* full uncompressed GB FULLGB*/
+int pgetlib(); void pranlib(); /* PIR UNIX protein UNIXPIR */
+int egetlib(); void eranlib(); /* EMBL/SWISS-PROT EMBLSWISS */
+int igetlib(); void iranlib(); /* Intelligenetics INTELLIG */
+int vgetlib(); void vranlib(); /* PIR VMS format */
+int gcg_getlib(); void gcg_ranlib(); /* GCG 2bit format */
+X
+#ifdef NCBIBL20
+extern int ncbl2_getliba(); /* ncbi blast 2.0 format */
+extern void ncbl2_ranlib();
+void ncbl2_closelib();
+#endif
+X
+#ifdef MYSQL_DB
+extern int mysql_getlib();
+extern void mysql_ranlib();
+int mysql_closelib();
+#endif
+X
+int (*getliba[LASTLIB])()={
+X agetlib,lgetlib,pgetlib,egetlib,
+X igetlib,vgetlib,gcg_getlib,agetlib,
+X agetlib,agetlib
+#ifdef UNIX
+X ,agetlib
+#ifdef NCBIBL13
+X ,ncbl_getliba
+#else
+X ,ncbl2_getliba
+#endif
+#ifdef NCBIBL20
+X ,ncbl2_getliba
+#endif
+#ifdef MYSQL_DB
+X ,agetlib
+X ,agetlib
+X ,agetlib
+X ,mysql_getlib
+#endif
+#endif
+};
+X
+void (*ranliba[LASTLIB])()={
+X aranlib,lranlib,pranlib,eranlib,
+X iranlib,vranlib,gcg_ranlib,aranlib,
+X aranlib,aranlib
+#ifdef UNIX
+X ,aranlib
+#ifdef NCBIBL13
+X ,ncbl_ranlib
+#else
+X ,ncbl2_ranlib
+#endif
+#ifdef NCBIBL20
+X ,ncbl2_ranlib
+#endif
+#ifdef MYSQL_DB
+X ,aranlib
+X ,aranlib
+X ,aranlib
+X ,mysql_ranlib
+#endif
+#endif
+};
+X
+X
+/* mmap()ed functions */
+#ifdef USE_MMAP
+int agetlibm(); void aranlibm();
+int lgetlibm(); void lranlibm();
+void vranlibm();
+int gcg_getlibm();
+X
+int (*getlibam[])()={
+X agetlibm,lgetlibm, NULL, NULL,NULL,agetlibm,gcg_getlibm
+};
+X
+void (*ranlibam[])()={
+X aranlibm,lranlibm,NULL,NULL,NULL,vranlibm,vranlibm
+};
+#endif
+SHAR_EOF
+chmod 0644 altlib.h ||
+echo 'restore of altlib.h failed'
+Wc_c="`wc -c < 'altlib.h'`"
+test 2319 -eq "$Wc_c" ||
+ echo 'altlib.h: original size 2319, current size' "$Wc_c"
+fi
+# ============= apam.c ==============
+if test -f 'apam.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping apam.c (File already exists)'
+else
+echo 'x - extracting apam.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'apam.c' &&
+/* pam.c 19-June-86
+X copyright (c) 1987 William R. Pearson
+X read in the alphabet and pam matrix data
+X designed for universal matcher
+X
+X This version reads BLAST format (square) PAM files
+*/
+X
+/* $Name: fa_34_26_5 $ - $Id: apam.c,v 1.41 2007/03/31 18:47:20 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+X
+#include "defs.h"
+#include "param.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+X
+extern void alloc_pam (int d1, int d2, struct pstruct *ppst);
+X
+void
+pam_opts(char *smstr, struct pstruct *ppst) {
+X char *bp;
+X
+X ppst->pam_ms = 0;
+X ppst->pamoff = 0;
+X
+X if ((bp=strchr(smstr,'-'))!=NULL) {
+X if (!strncmp(bp+1,"MS",2) || !strncmp(bp+1,"ms",2)) {
+X ppst->pam_ms = 1;
+X }
+X else {
+X ppst->pamoff=atoi(bp+1);
+X }
+X *bp = '\0';
+X }
+X else if ((bp=strchr(smstr,'+'))!=NULL) {
+X ppst->pamoff= -atoi(bp+1);
+X *bp = '\0';
+X }
+}
+X
+/* modified 13-Oct-2005 to accomodate assymetrical matrices */
+X
+int
+initpam (char *mfname, struct pstruct *ppst)
+{
+X char line[512], *lp;
+X int i, j, iaa, pval;
+X int *hsq, nsq;
+X int *sascii;
+X char *sq;
+X int ess_tmp, max_val, min_val;
+X int have_es = 0;
+X FILE *fmat;
+X
+X pam_opts(mfname, ppst);
+X
+X if ((fmat = fopen (mfname, "r")) == NULL)
+X {
+X printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname);
+X fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname);
+X return 0;
+X }
+X
+/*
+X the size of the alphabet is determined in advance
+*/
+X hsq = ppst->hsq;
+X sq = ppst->sq;
+X
+X ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+X
+/*
+X look for alphabet line, skipping the comments
+X alphabet ends up in line[]
+*/
+X while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#');
+X
+X /* decide whether this is a protein or DNA matrix */
+X if (ppst->nt_align) sascii = &nascii[0];
+X else sascii = &aascii[0];
+X
+/*
+X re-initialize sascii[] for matrix alphabet
+*/
+X
+X /* save ',' value used by FASTS/FASTM/FASTF */
+X ess_tmp = sascii[','];
+X
+/* clear out sascii */
+X for (i = 0; i <= AAMASK; i++) sascii[i] = NA;
+X
+/* set end of line stop */
+X sascii[0] = sascii['\r'] = sascii['\n'] = EL;
+X
+X sascii[','] = ess_tmp;
+X
+/* read the alphabet - determine alphabet nsq */
+X sq[0] = '\0';
+X for (i = 0, nsq = 1; line[i]; i++) {
+X if (line[i] == '*') have_es = 1;
+X if (line[i] > ' ') sq[nsq++] = toupper (line[i]);
+X }
+X sq[nsq]='\0';
+X nsq--;
+X
+/* set end of sequence stop */
+X fprintf(stderr,"sq[%d]: %s\n",nsq,sq+1);
+X
+/* initialize sascii */
+X for (iaa = 1; iaa <= nsq; iaa++) {
+X sascii[sq[iaa]] = iaa;
+X }
+X if (ppst->dnaseq==SEQT_DNA) {
+X sascii['U'] = sascii['T'];
+X sascii['u'] = sascii['t'];
+X }
+X else if (ppst->dnaseq==SEQT_RNA) {
+X sascii['T'] = sascii['U'];
+X sascii['t'] = sascii['u'];
+X }
+X
+/*
+X finished with sascii[]
+*/
+X
+/*
+X setup hnt (ambiguous nt hash) values
+*/
+X hsq[0] = 0;
+X for (iaa = 1; iaa <= nsq; iaa++) {
+X hsq[iaa]=iaa;
+X }
+X if (ppst->nt_align) { /* DNA ambiguitities */
+X hsq[sascii['R']]=hsq[sascii['M']]=hsq[sascii['W']]=hsq[sascii['A']];
+X hsq[sascii['D']]=hsq[sascii['H']]=hsq[sascii['V']]=hsq[sascii['A']];
+X hsq[sascii['N']]=hsq[sascii['X']]=hsq[sascii['A']];
+X hsq[sascii['Y']]=hsq[sascii['S']]=hsq[sascii['B']]=hsq[sascii['C']];
+X hsq[sascii['K']]=hsq[sascii['G']];
+X }
+X else /* protein ambiguities */
+X if (ppst->dnaseq == SEQT_UNK || ppst->dnaseq == SEQT_PROT ||
+X (ppst->nsq >= 20 && ppst->nsq <= 24)) {
+X hsq[sascii['B']] = hsq[sascii['N']];
+X hsq[sascii['Z']] = hsq[sascii['E']];
+X hsq[sascii['X']] = hsq[sascii['A']];
+X }
+X /* here if non-DNA, non-protein sequence */
+X else ppst->dnaseq = SEQT_OTHER;
+X
+/*
+X check for 2D pam - if not found, allocate it
+*/
+X
+X if (!ppst->have_pam2) {
+X alloc_pam (MAXSQ, MAXSQ, ppst);
+X ppst->have_pam2 = 1;
+X }
+X
+/*
+X read the scoring matrix values
+*/
+X
+X max_val = -1;
+X min_val = 1;
+X for (j=0; j < nsq; j++) ppst->pam2[0][0][j] = -BIGNUM;
+X for (iaa = 1; iaa <= nsq; iaa++) { /* read pam value line */
+X if (fgets(line,sizeof(line),fmat)==NULL) {
+X fprintf (stderr," error reading pam line: %s\n",line);
+X exit (1);
+X }
+X /* fprintf(stderr,"%d/%d %s",iaa,nsq,line); */
+X strtok(line," \t\n"); /* skip the letter (residue) */
+X ppst->pam2[0][i][0] = -BIGNUM;
+X for (j = 1; j <= nsq; j++) { /* iaa limits to triangle */
+X lp=strtok(NULL," \t\n"); /* get the number string */
+X pval=ppst->pam2[0][iaa][j]=atoi(lp); /* convert to integer */
+X if (pval > max_val) max_val = pval;
+X if (pval < min_val) min_val = pval;
+X }
+X }
+X
+X if (have_es==0) {
+X sascii['*']=nsq;
+X nsq++;
+X sq[nsq]='*';
+X sq[nsq+1]='\0';
+X for (j=1; j<=nsq; j++) ppst->pam2[0][nsq][j]= -1;
+X ppst->pam2[0][nsq][nsq]= max_val/2;
+X }
+X
+X ppst->sqx[0]='\0'; /* initialize sqx[] */
+X for (i=1; i<= nsq; i++) {
+X ppst->sqx[i] = sq[i];
+X ppst->sqx[i+nsq] = tolower(sq[i]);
+X if (sascii[aa[i]] < NA && sq[i] >= 'A' && sq[i] <= 'Z')
+X sascii[aa[i] - 'A' + 'a'] = sascii[aa[i]]+nsq;
+X }
+X
+X ppst->nsq = nsq; /* save new nsq */
+X ppst->nsqx = nsq*2; /* save new nsqx */
+X
+X ppst->pam_h = max_val;
+X ppst->pam_l = min_val;
+X
+X strncpy (ppst->pamfile, mfname, MAX_FN);
+X ppst->pamfile[MAX_FN-1]='\0';
+X
+X if (ppst->pam_ms) {
+X strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1);
+X }
+X ppst->pamfile[MAX_FN-1]='\0';
+X fclose (fmat);
+X return 1;
+}
+X
+/* make a DNA scoring from +match/-mismatch values */
+X
+void mk_n_pam(int *arr,int siz, int mat, int mis)
+{
+X int i, j, k;
+X /* current default match/mismatch values */
+X int max_mat = +5;
+X int min_mis = -4;
+X float f_val, f_scale;
+X
+X f_scale = (float)(mat - mis)/(float)(max_mat - min_mis);
+X
+X k = 0;
+X for (i = 0; i<nnt-1; i++)
+X for (j = 0; j <= i; j++ ) {
+X if (arr[k] == max_mat) arr[k] = mat;
+X else if (arr[k] == min_mis) arr[k] = mis;
+X else if (arr[k] != -1) {
+X f_val = (arr[k] - min_mis)*f_scale + 0.5;
+X arr[k] = f_val + mis;
+X }
+X k++;
+X }
+}
+X
+struct std_pam_str {
+X char abbrev[6];
+X char name[10];
+X int *pam;
+X float scale;
+X int gdel, ggap;
+};
+X
+static
+struct std_pam_str std_pams[] = {
+X {"P120", "PAM120", apam120, 0.346574, -20, -3},
+X {"P250", "PAM250", apam250, 0.231049, -12, -2},
+X {"P10", "MD10", a_md10, 0.346574, -27, -4},
+X {"M10", "MD10", a_md10, 0.346574, -27, -4},
+X {"MD10", "MD10", a_md10, 0.346574, -27, -4},
+X {"P20", "MD20", a_md20, 0.346574, -26, -4},
+X {"M20", "MD20", a_md20, 0.346574, -26, -4},
+X {"MD20", "MD20", a_md20, 0.346574, -26, -4},
+X {"P40", "MD40", a_md40, 0.346574, -25, -4},
+X {"M40", "MD40", a_md40, 0.346574, -25, -4},
+X {"MD40", "MD40", a_md40, 0.346574, -25, -4},
+X {"BL50", "BL50", abl50, 0.231049, -12, -2},
+X {"BL62", "BL62", abl62, 0.346574, -8, -1},
+X {"BP62", "BL62", abl62, 0.346574, -12, -1},
+X {"BL80", "BL80", abl80, 0.346574, -12, -2},
+X {"\0", "\0", NULL, 0.0, 0, 0}
+};
+X
+int
+standard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set) {
+X
+X struct std_pam_str *std_pam_p;
+X
+X pam_opts(smstr, ppst);
+X
+X for (std_pam_p = std_pams; std_pam_p->abbrev[0]; std_pam_p++ ) {
+X if (strcmp(smstr,std_pam_p->abbrev)==0) {
+X pam = std_pam_p->pam;
+X strncpy(ppst->pamfile,std_pam_p->name,MAX_FN);
+X ppst->pamfile[MAX_FN-1]='\0';
+X if (ppst->pam_ms) {
+X strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1);
+X }
+X ppst->pamfile[MAX_FN-1]='\0';
+#ifdef OLD_FASTA_GAP
+X if (!del_set) ppst->gdelval = std_pam_p->gdel;
+#else
+X if (!del_set) ppst->gdelval = std_pam_p->gdel-std_pam_p->ggap;
+#endif
+X if (!gap_set) ppst->ggapval = std_pam_p->ggap;
+X ppst->pamscale = std_pam_p->scale;
+X return 1;
+X }
+X }
+X return 0;
+}
+X
+/* ESS must match uascii.h */
+#define ESS 49
+X
+void
+build_xascii(int *qascii, char *save_str) {
+X int i, max_save;
+X int comma_val, term_val;
+X int save_arr[MAX_SSTR];
+X
+X comma_val = qascii[','];
+X term_val = qascii['*'];
+X
+X /* preserve special characters */
+X for (i=0; i < MAX_SSTR && save_str[i]; i++ ) {
+X save_arr[i] = qascii[save_str[i]];
+X }
+X max_save = i;
+X
+X for (i=1; i<128; i++) {
+X qascii[i]=NA;
+X }
+X /* range of values in aax, ntx is from 1..naax,nntx -
+X do not zero-out qascii[0] - 9 Oct 2002 */
+X
+X for (i=1; i<naax; i++) {
+X qascii[aax[i]]=aax[i];
+X }
+X
+X for (i=1; i<nntx; i++) {
+X qascii[ntx[i]]=ntx[i];
+X }
+X
+X qascii['\n']=qascii['\r']=qascii[0] = EL;
+X
+X qascii[','] = comma_val;
+X qascii['*'] = term_val;
+X
+X for (i=0; i < max_save; i++) {
+X qascii[save_str[i]]=save_arr[i];
+X }
+}
+X
+/*
+X checks for lower case letters in *sq array;
+X if not present, map lowercase to upper
+*/
+void
+init_ascii(int is_ext, int *sascii, int is_dna) {
+X
+X int isq, have_lc;
+X char *sq, term_char;
+X int nsq;
+X
+X if (is_dna==SEQT_UNK) return;
+X
+X term_char = sascii['*'];
+X
+X if (is_dna==SEQT_DNA || is_dna == SEQT_RNA) {
+X if (is_ext) {
+X sq = &ntx[0];
+X nsq = nntx;
+X }
+X else {sq = &nt[0]; nsq = nnt;}
+X }
+X else {
+X if (is_ext) { sq = &aax[0]; nsq = naax; }
+X else {sq = &aa[0]; nsq = naa;}
+X }
+X
+X
+/* initialize sascii from sq[], checking for lower-case letters */
+X have_lc = 0;
+X for (isq = 1; isq <= nsq; isq++) {
+X sascii[sq[isq]] = isq;
+X if (sq[isq] >= 'a' && sq[isq] <= 'z') have_lc = 1;
+X }
+X
+X /* no lower case letters in alphabet, map lower case to upper */
+X if (have_lc != 1) {
+X for (isq = 1; isq <= nsq; isq++) {
+X if (sq[isq] >= 'A' && sq[isq] <= 'Z') sascii[sq[isq]-'A'+'a'] = isq;
+X }
+X if (is_dna==1) sascii['u'] = sascii['t'];
+X }
+X
+X sascii['*']=term_char;
+}
+X
+print_pam(struct pstruct *ppst) {
+X int i, nsq, ip;
+X char *sq;
+X
+X fprintf(stderr," ext_sq_set: %d\n",ppst->ext_sq_set);
+X
+X nsq = ppst->nsq;
+X ip = 0;
+X sq = ppst->sq;
+X
+X fprintf(stderr," sq[%d]: %s\n",nsq, sq);
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx;
+X ip = 1;
+X sq = ppst->sqx;
+X fprintf(stderr," sq[%d]: %s\n",nsq, sq);
+X }
+X
+X for (i=1; i<=nsq; i++) {
+X fprintf(stderr," %c:%c - %3d\n",sq[i], sq[i], ppst->pam2[ip][i][i]);
+X }
+}
+SHAR_EOF
+chmod 0644 apam.c ||
+echo 'restore of apam.c failed'
+Wc_c="`wc -c < 'apam.c'`"
+test 10085 -eq "$Wc_c" ||
+ echo 'apam.c: original size 10085, current size' "$Wc_c"
+fi
+# ============= blosum45.mat ==============
+if test -f 'blosum45.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping blosum45.mat (File already exists)'
+else
+echo 'x - extracting blosum45.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'blosum45.mat' &&
+# Matrix made by matblas from blosum45.iij
+# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 45
+# Entropy = 0.3795, Expected = -0.2789
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 0
+R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 0 -1
+N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 4 0 -1
+D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 5 1 -1
+C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2
+Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 4 -1
+E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 4 -1
+G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
+H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 0 -1
+I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 -3 -1
+L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 -2 -1
+K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 1 -1
+M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 -1 -1
+F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 -3 -1
+P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -1 -1
+S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 0 0
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 0
+W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2
+Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 -2 -1
+V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 -3 -1
+B -1 -1 4 5 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 4 2 -1
+Z -1 0 0 1 -3 4 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 2 4 -1
+XX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -2 -1 -1 -1 -1 -1
+X
+SHAR_EOF
+chmod 0644 blosum45.mat ||
+echo 'restore of blosum45.mat failed'
+Wc_c="`wc -c < 'blosum45.mat'`"
+test 1922 -eq "$Wc_c" ||
+ echo 'blosum45.mat: original size 1922, current size' "$Wc_c"
+fi
+# ============= blosum50.mat ==============
+if test -f 'blosum50.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping blosum50.mat (File already exists)'
+else
+echo 'x - extracting blosum50.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'blosum50.mat' &&
+# Matrix made by matblas from blosum50.iij
+# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 50
+# Entropy = 0.4808, Expected = -0.3573
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -1 -1
+R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 0 -1
+N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 4 0 -1
+D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 5 1 -1
+C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2
+Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 4 -1
+E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 5 -1
+G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -2 -2
+H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 0 -1
+I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 -3 -1
+L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 -3 -1
+K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 1 -1
+M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 -1 -1
+F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 -4 -2
+P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2
+S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 0 -1
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 0
+W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -3
+Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -2 -1
+V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -4 -3 -1
+B -2 -1 4 5 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -4 5 2 -1
+Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 2 5 -1
+XX -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 0 -3 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 blosum50.mat ||
+echo 'restore of blosum50.mat failed'
+Wc_c="`wc -c < 'blosum50.mat'`"
+test 1921 -eq "$Wc_c" ||
+ echo 'blosum50.mat: original size 1921, current size' "$Wc_c"
+fi
+# ============= blosum62.mat ==============
+if test -f 'blosum62.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping blosum62.mat (File already exists)'
+else
+echo 'x - extracting blosum62.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'blosum62.mat' &&
+# Matrix made by matblas from blosum62.iij
+# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 62
+# Entropy = 0.6979, Expected = -0.5209
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0
+R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1
+N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1
+D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1
+C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2
+Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1
+E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1
+G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
+H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1
+I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1
+L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1
+K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1
+M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1
+F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2
+S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2
+Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1
+V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1
+B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1
+Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1
+XX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1
+X
+SHAR_EOF
+chmod 0644 blosum62.mat ||
+echo 'restore of blosum62.mat failed'
+Wc_c="`wc -c < 'blosum62.mat'`"
+test 1922 -eq "$Wc_c" ||
+ echo 'blosum62.mat: original size 1922, current size' "$Wc_c"
+fi
+# ============= blosum80.mat ==============
+if test -f 'blosum80.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping blosum80.mat (File already exists)'
+else
+echo 'x - extracting blosum80.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'blosum80.mat' &&
+# Matrix made by matblas from blosum80_3.iij
+# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 80
+# Entropy = 0.9868, Expected = -0.7442
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1
+R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2
+N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2
+D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4
+Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2
+E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2
+G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3
+H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2
+I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2
+L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2
+K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2
+M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3
+S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1
+T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5
+Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3
+V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2
+B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3
+Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1
+XX -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2
+X
+SHAR_EOF
+chmod 0644 blosum80.mat ||
+echo 'restore of blosum80.mat failed'
+Wc_c="`wc -c < 'blosum80.mat'`"
+test 1924 -eq "$Wc_c" ||
+ echo 'blosum80.mat: original size 1924, current size' "$Wc_c"
+fi
+# ============= bovgh.seq ==============
+if test -f 'bovgh.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping bovgh.seq (File already exists)'
+else
+echo 'x - extracting bovgh.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'bovgh.seq' &&
+>BOVGH bovine growth hormone (presomatotropin) gene and flanks.
+X AAAACCTATG GGGTGGGCTC TCAAGCTGAG ACCCTGTGTG CACAGCCCTC TGGCTGGTGG
+X CAGTGGAGAC GGGATNNNAT GACAAGCCTG GGGGACATGA CCCCAGAGAA GGAACGGGAA
+X CAGGATGAGT GAGAGGAGGT TCTAAATTAT CCATTAGCAC AGGCTGCCAG TGGTCCTTGC
+X ATAAATGTAT AGAGCACACA GGTGGGGGGA AAGGGAGAGA GAGAAGAAGC CAGGGTATAA
+X AAATGGCCCA GCAGGGACCA ATTCCAGGAT CCCAGGACCC AGTTCACCAG ACGACTCAGG
+X GTCCTGTGGA CAGCTCACCA GCTATGATGG CTGCAGGTAA GCTCGCTAAA ATCCCCTCCA
+X TTCGCGTGTC CTAAAGGGGT AATGCGGGGG GCCCTGCCGA TGGATGTGTT CAGAGCTTTG
+X GGCTTTAGGG CTTCCGAATG TGAACATAGG TATCTACACC CAGACATTTG GCCAAGTTTG
+X AAATGTTCTC AGTCCCTGGA GGGAAGGGTA GGTGGGGGCT GGCAGGAGAT CAGGCGTCTA
+X GCTCCCTGGG GCCCTCCGTC GCGGCCCTCC TGGTCTCTCC CTAGGCCCCC GGACCTCCCT
+X GCTCCTGGCT TTCGCCCTGC TCTGCCTGCC CTGGACTCAG GTGGTGGGCG CCTTCCCAGC
+X CATGTCCTTG TCCGGCCTGT TTGCCAACGC TGTGCTCCGG GCTCAGCACC TGCATCAGCT
+X GGCTGCTGAC ACCTTCAAAG AGTTTGTAAG CTCCCGAGGG ATGCGTCCTA GGGGTGGGGA
+X GGCAGGAAGG GGTGAATCCA CACCCCCTCC ACACAGTGGG AGGAAACTGA GGAGTTCAGC
+X CGTATTTTAT CCAAGTAGGG ATGTGGTTAG GGGAGCAGAA ACGGGGGTGT GTGGGGTGGG
+X GAGGGTTCCG AATAAGGCGG GGAGGGGAAC CGCGCACCAG CTTAGACCTG GGTGGGTGTG
+X TTCTTCCCCC AGGAGCGCAC CTACATCCCG GAGGGACAGA GATACTCCAT CCAGAACACC
+X CAGGTTGCCT TCTGCTTCTC TGAAACCATC CCGGCCCCCA CGGGCAAGAA TGAGGCCCAG
+X CAGAAATCAG TGAGTGGCAA CCTCGGACCG AGGAGCAGGG GACCTCCTTC ATCCTAAGTA
+X GGCTGCCCCA GCTCTCCGCA CCGGGCCTGG GGCGGCCTTC TCCCCGAGGT GGCGGAGGTT
+X GTTGGATGGC AGTGGAGGAT GATGGTGGGC GGTGGTGGCA GGAGGTCCTC GGGCAGAGGC
+X CGACCTTGCA GGGCTGCCCC AAGCCCGCGG CACCCACCGA CCACCCATCT GCCAGCAGGA
+X CTTGGAGCTG CTTCGCATCT CACTGCTCCT CATCCAGTCG TGGCTTGGGC CCCTGCAGTT
+X CCTCAGCAGA GTCTTCACCA ACAGCTTGGT GTTTGGCACC TCGGACCGTG TCTATGAGAA
+X GCTGAAGGAC CTGGAGGAAG GCATCCTGGC CCTGATGCGG GTGGGGATGG CGTTGTGGGT
+X CCCTTCCATG CTGGGGGCCA TGCCCGCCCT CTCCTGGCTT AGCCAGGAGA ATGCACGTGG
+X GCTTGGGGAG ACAGATCCCT GCTCTCTCCC TCTTTCTAGC AGTCCAGCCT TGACCCAGGG
+X GAAACCTTTT CCCCTTTTGA AACCTCCTTC CTCGCCCTTC TCCAAGCCTG TAGGGGAGGG
+X TGGAAAATGG AGCGGGCAGG AGGGAGCTGC TCCTGAGGGC CCTTCGGCCT CTCTGTCTCT
+X CCCTCCCTTG GCAGGAGCTG GAAGATGGCA CCCCCCGGGC TGGGCAGATC CTCAAGCAGA
+X CCTATGACAA ATTTGACACA AACATGCGCA GTGACGACGC GCTGCTCAAG AACTACGGTC
+X TGCTCTCCTG CTTCCGGAAG GACCTGCATA AGACGGAGAC GTACCTGAGG GTCATGAAGT
+X GCCGCCGCTT CGGGGAGGCC AGCTGTGCCT TCTAGTTGCC AGCCATCTGT TGTTTGCCCC
+X TCCCCCGTGC CTTCCTTGAC CCTGGAAGGT GCCACTCCCA CTGTCCTTTC CTAATAAAAT
+X GAGGAAATTG CATCGCATTG TCTGAGTAGG TGTCATTCTA TTCTGGGGGG TGGGGTGGGG
+X CAGGACAGCA AGGGGGAGGA TTGGGAAGAC AATAGCAGGC ATGCTGGGGA TGCGGTGGGC
+X TCTATGGGTA CCCAGGTGCT GAAGAATTGA CCCGGTTCCT CCTGGG
+SHAR_EOF
+chmod 0644 bovgh.seq ||
+echo 'restore of bovgh.seq failed'
+Wc_c="`wc -c < 'bovgh.seq'`"
+test 2528 -eq "$Wc_c" ||
+ echo 'bovgh.seq: original size 2528, current size' "$Wc_c"
+fi
+# ============= bovprl.seq ==============
+if test -f 'bovprl.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping bovprl.seq (File already exists)'
+else
+echo 'x - extracting bovprl.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'bovprl.seq' &&
+>BOVPRL GenBank entry BOVPRL from omam file. 907 nucleotides.
+TGCTTGGCTGAGGAGCCATAGGACGAGAGCTTCCTGGTGAAGTGTGTTTCTTGAAATCAT
+CACCACCATGGACAGCAAAGGTTCGTCGCAGAAAGGGTCCCGCCTGCTCCTGCTGCTGGT
+GGTGTCAAATCTACTCTTGTGCCAGGGTGTGGTCTCCACCCCCGTCTGTCCCAATGGGCC
+TGGCAACTGCCAGGTATCCCTTCGAGACCTGTTTGACCGGGCAGTCATGGTGTCCCACTA
+CATCCATGACCTCTCCTCGGAAATGTTCAACGAATTTGATAAACGGTATGCCCAGGGCAA
+AGGGTTCATTACCATGGCCCTCAACAGCTGCCATACCTCCTCCCTTCCTACCCCGGAAGA
+TAAAGAACAAGCCCAACAGACCCATCATGAAGTCCTTATGAGCTTGATTCTTGGGTTGCT
+GCGCTCCTGGAATGACCCTCTGTATCACCTAGTCACCGAGGTACGGGGTATGAAAGGAGC
+CCCAGATGCTATCCTATCGAGGGCCATAGAGATTGAGGAAGAAAACAAACGACTTCTGGA
+AGGCATGGAGATGATATTTGGCCAGGTTATTCCTGGAGCCAAAGAGACTGAGCCCTACCC
+TGTGTGGTCAGGACTCCCGTCCCTGCAAACTAAGGATGAAGATGCACGTTATTCTGCTTT
+TTATAACCTGCTCCACTGCCTGCGCAGGGATTCAAGCAAGATTGACACTTACCTTAAGCT
+CCTGAATTGCAGAATCATCTACAACAACAACTGCTAAGCCCACATTCCATCCTATCCATT
+TCTGAGATGGTTCTTAATGATCCATTCCCTGGCAAACTTCTCTGAGCTTTATAGCTTTGT
+AATGCATGCTTGGCTCTAATGGGTTTCATCTTAAATAAAAACAGACTCTGTAGCGATGTC
+AAAATCT
+SHAR_EOF
+chmod 0644 bovprl.seq ||
+echo 'restore of bovprl.seq failed'
+Wc_c="`wc -c < 'bovprl.seq'`"
+test 986 -eq "$Wc_c" ||
+ echo 'bovprl.seq: original size 986, current size' "$Wc_c"
+fi
+# ============= c_dispn.c ==============
+if test -f 'c_dispn.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping c_dispn.c (File already exists)'
+else
+echo 'x - extracting c_dispn.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'c_dispn.c' &&
+/* dispn.c associated subroutines for matching sequences */
+X
+/* $Name: fa_34_26_5 $ - $Id: c_dispn.c,v 1.21 2005/10/25 20:22:52 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+X
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+X
+#define XTERNAL
+X
+#define YES 1
+#define NO 0
+X
+#define MAXOUT 201
+X
+/* the seqca[] array has the following codes:
+X 0 - no alignment symbol
+X 1 - align; pam < 0
+X 2 - align; pam == 0
+X 3 - align; pam > 0
+X 4 - align; ident
+X 5 - align; del
+X
+X the map_sym arrays determine the value to be displayed with each
+X type of aligned residue
+*/
+X
+static char *map_sym_0 =" ..: ";
+static char *map_sym_1 =" Xxx ";
+static char *map_sym_2 =" . ";
+#ifdef M10_CONS_L
+static char *map_sym_10=" mzp=-";
+#else
+static char *map_sym_10=" ..:-";
+#endif
+X
+void
+discons(FILE *fd, struct mngmsg m_msg, struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
+X int n0, int n1, char *name0, char *name1, int nml,
+X struct a_struct *aln, long loffset)
+{
+X char line[3][MAXOUT], cline[2][MAXOUT+10], *clinep[2];
+X int il, i, lend, loff, id;
+X int del0, del1, ic, ll0, ll1, ll01, cl0, cl1, rl0, rl1;
+X int ic_save;
+X char *map_sym_p;
+X int l_llen;
+X int ioff0, ioff00, ioff1, ioff10;
+X long qqoff, lloff, qoffset;
+X int llsgn, llfact, qlsgn, qlfact, qfx0, qfxn, lfx0, lfxn;
+X int have_res;
+X char *name01, *sq;
+X char blank[MAX_UID], afmt[32];
+X
+X memset(blank,' ',sizeof(blank)-1);
+X blank[sizeof(blank)-1]='\0';
+X
+X if (nml > 6) {
+X blank[nml-6]='\0';
+X sprintf(afmt,"%%-%ds %%s\n",nml);
+X }
+X else {
+X blank[0]='\0';
+X strncpy(afmt,"%-6s %s\n",sizeof(afmt));
+X }
+X if (pst.ext_sq_set) sq = pst.sqx; else sq = pst.sq;
+X
+X clinep[0]=cline[0]+1;
+X clinep[1]=cline[1]+1;
+X
+X if (aln->qlfact == 0) {qlfact = 1;}
+X else qlfact = aln->qlfact;
+X if (aln->qlrev == 1) {
+X qoffset = n0;
+X qlsgn = -1;
+X qfx0 = 0;
+X qfxn = 1;
+X }
+X else {
+X qoffset = 0;
+X qlsgn = 1;
+X qfx0 = 1;
+X qfxn = 0;
+X }
+X
+X if (aln->llfact == 0) {llfact = 1;}
+X else llfact = aln->llfact;
+X
+X if (aln->llrev == 1) {
+X loffset += n1;
+X llsgn = -1;
+X lfx0 = 0;
+X lfxn = 1;
+X }
+X else {
+X llsgn = 1;
+X lfx0 = 1;
+X lfxn = 0;
+X }
+X
+X l_llen = aln->llen;
+X if ((m_msg.markx & MX_M9SUMM) && m_msg.show_code != 1) { l_llen += 40; }
+X
+X if ((m_msg.markx & MX_ATYPE)==2) name01=name1;
+X else name01 = "\0";
+X
+X ioff0=aln->smin0;
+X ioff00 = ioff0;
+X ioff1=aln->smin1;
+X ioff10 = ioff1;
+X
+X if (m_msg.markx& MX_AMAP && (m_msg.markx & MX_ATYPE)==7) return;
+X
+X /* set *map_sym_p to correct match symbol */
+X if ((m_msg.markx&MX_ATYPE)==1) {map_sym_p = map_sym_1;}
+X else if ((m_msg.markx&MX_ATYPE)==2) {map_sym_p = map_sym_2;}
+X else if (m_msg.markx&MX_M10FORM) {map_sym_p = map_sym_10;}
+X else {map_sym_p = map_sym_0;}
+X
+X if (m_msg.markx & MX_ASEP) {
+X fprintf(fd,">%s ..\n",name0);
+X for (i=0; i<nc && seqc0[i]; i++) {
+X /* if (seqc0[i]=='-') fputc('.',fd); else */
+X fputc(seqc0[i],fd);
+X if (i%50 == 49) fputc('\n',fd);
+X }
+X if ((i-1)%50 != 49) fputc('\n',fd);
+X fprintf(fd,">%s ..\n",name1);
+X for (i=0; i<nc && seqc1[i]; i++) {
+X /* if (seqc1[i]=='-') fputc('.',fd); else */
+X fputc(seqc1[i],fd);
+X if (i%50 == 49) fputc('\n',fd);
+X }
+X if ((i-1)%50 != 49) fputc('\n',fd);
+X return;
+X }
+X
+X if (m_msg.markx & MX_M10FORM) {
+X fprintf(fd,">%s ..\n",name0);
+X fprintf(fd,"; sq_len: %d\n",n0);
+X fprintf(fd,"; sq_offset: %ld\n",m_msg.sq0off);
+X fprintf(fd,"; sq_type: %c\n",m_msg.sqtype[0]);
+X fprintf(fd,"; al_start: %ld\n",aln->d_start0);
+X fprintf(fd,"; al_stop: %ld\n",aln->d_stop0);
+X fprintf(fd,"; al_display_start: %ld\n",
+X qoffset+qlsgn*ioff0*aln->llmult+qfx0);
+X
+X have_res = 0;
+X for (i=0; i<nc && seqc0[i]; i++) {
+X if (!have_res && seqc0[i]==' ') fputc('-',fd);
+X else if (seqc0[i]==' ') break;
+X else {
+X have_res = 1;
+X fputc(seqc0[i],fd);
+X }
+X if (i%50 == 49) fputc('\n',fd);
+X }
+X if ((i-1)%50!=49 || seqc0[i-1]==' ') fputc('\n',fd);
+X fprintf(fd,">%s ..\n",name1);
+X fprintf(fd,"; sq_len: %d\n",n1);
+X fprintf(fd,"; sq_type: %c\n",m_msg.sqtype[0]);
+X fprintf(fd,"; al_start: %ld\n",aln->d_start1);
+X fprintf(fd,"; al_stop: %ld\n",aln->d_stop1);
+X fprintf(fd,"; al_display_start: %ld\n",loffset+llsgn*ioff1+lfx0);
+X
+X have_res = 0;
+X for (i=0; i<nc && seqc1[i]; i++) {
+X if (!have_res && seqc1[i]==' ') fputc('-',fd);
+X else if (seqc1[i]==' ') break;
+X else {
+X have_res = 1;
+X fputc(seqc1[i],fd);
+X }
+X if (i%50 == 49) fputc('\n',fd);
+X }
+X if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
+#ifdef M10_CONS
+X fprintf(fd,"; al_cons:\n");
+X for (i=0,del0=0,id=ioff0; id-del0<aln->amax0 && i < nc; i++,id++) {
+X if (seqc0[i] == '\0' || seqc1[i] == '\0') break;
+X if (seqc0[i]=='-' || seqc0[i]==' ' || seqc0[i]=='\\') del0++;
+X else if (seqc0[i]=='/') del0++;
+X if (id-del0<aln->amin0) fputc(' ',fd);
+X else if (seqc0[i]=='-'||seqc1[i]=='-') fputc('-',fd);
+X else fputc(map_sym_10[seqca[i]],fd);
+X
+X if (i%50 == 49) fputc('\n',fd);
+X }
+X if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
+#endif
+X return;
+X }
+X
+X memset(line[0],' ',MAXOUT);
+X memset(line[1],' ',MAXOUT);
+X memset(line[2],' ',MAXOUT);
+X
+X /* cl0 indicates whether a coordinate should be printed over the first
+X sequence; cl1 indicates a coordinate for the second;
+X */
+X
+X ic = 0; del0=del1=0;
+X for (il=0; il<(nc+l_llen-1)/l_llen; il++) {
+X loff=il*l_llen;
+X lend=min(l_llen,nc-loff);
+X
+X ll0 = NO; ll1 = NO;
+X
+X memset(cline[0],' ',MAXOUT+1);
+X memset(cline[1],' ',MAXOUT+1);
+X
+X ic_save = ic;
+X for (i=0; i<lend; i++, ic++,ioff0++,ioff1++) {
+X cl0 = cl1 = rl0 = rl1 = YES;
+X if ((line[0][i]=seqc0[ic])=='-' || seqc0[ic]=='\\') {
+X del0++; cl0=rl0=NO;
+X }
+X else if (seqc0[ic]=='/') {
+X del0++; cl0=rl0=NO;
+X }
+X if ((line[2][i]=seqc1[ic])=='-' || seqc1[ic]=='\\') {
+X del1++; cl1=rl1=NO;
+X }
+X else if (seqc1[ic]=='/') {
+X del1++; cl1=rl1=NO;
+X }
+X
+X if (seqc0[ic]==' ') {del0++; cl0=rl0=NO;}
+X else ll0 = YES;
+X if (seqc1[ic]==' ') {del1++; cl1=rl1=NO;}
+X else ll1 = YES;
+X
+X qqoff = m_msg.sq0off - 1 + qoffset + (long)qlsgn*ioff00 +
+X (long)qlsgn*qlfact*(ioff0-del0-ioff00);
+X if (cl0 && qqoff%10 == 9) {
+X sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1l);
+X clinep[0][i+8-qfxn]=' ';
+X rl0 = NO;
+X }
+X else if (cl0 && qqoff== -1) {
+X sprintf(&clinep[0][i-qfxn],"%8ld",0l);
+X clinep[0][i+8-qfxn]=' ';
+X rl0 = NO;
+X }
+X else if (rl0 && (qqoff+1)%10 == 0) {
+X sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1);
+X clinep[0][i+8-qfxn]=' ';
+X }
+X
+X /* the lloff coordinate of a residue is the sum of:
+X m_msg.sq1off-1 - the user defined coordinate
+X loffset - the offset into the library sequence
+X llsgn*ioff10 - the offset into the beginning of the alignment
+X (given in the "natural" coordinate system,
+X except for tfasta3 which provides context)
+X llsgn*llfact*(ioff1-del1-ioff10)
+X - the position in the consensus aligment, -gaps
+X */
+X
+X lloff = m_msg.sq1off-1 + loffset + aln->frame +
+X (long)llsgn*aln->llmult*ioff10 +
+X (long)llsgn*llfact*(ioff1-del1-ioff10);
+X
+X if (cl1 && lloff%10 == 9) {
+X sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1l);
+X clinep[1][i+8-lfxn]=' ';
+X rl1 = NO;
+X }
+X else if (cl1 && lloff== -1) {
+X sprintf(&clinep[1][i],"%8ld",0l);
+X clinep[1][i+8-lfxn]=' ';
+X rl1 = NO;
+X }
+X else if (rl1 && (lloff+1)%10 == 0) {
+X sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1);
+X clinep[1][i+8-lfxn]=' ';
+X }
+X
+X line[1][i] = ' ';
+X if (ioff0-del0 >= aln->amin0 && ioff0-del0 <= aln->amax0) {
+X if (seqca[ic]==4) {line[1][i]=map_sym_p[4];}
+X else if ((m_msg.markx&MX_ATYPE)==2) line[1][i]=line[2][i];
+X else line[1][i] = map_sym_p[seqca[ic]];
+X }
+X else if ((m_msg.markx&MX_ATYPE)==2) line[1][i]=line[2][i];
+X }
+X
+X if (m_msg.ann_flg) {
+X for (ic=ic_save,i=0; i<lend; ic++,i++) {
+X if (seqc0a[ic]!= ' ') clinep[0][i+7-qfxn] = seqc0a[ic];
+X }
+X }
+X
+X line[0][lend]=line[1][lend]=line[2][lend]=0;
+X clinep[0][lend+7]=clinep[1][lend+7]=0;
+X
+X ll01 = ll0&&ll1;
+X if ((m_msg.markx&MX_ATYPE)==2 && (!aln->showall || ll0)) ll1=0;
+X fprintf(fd,"\n");
+X if (ll0) fprintf(fd,"%s%s\n",blank,clinep[0]);
+X if (ll0) fprintf(fd,afmt,name0,line[0]);
+X if (ll01) fprintf(fd,afmt,name01,line[1]);
+X if (ll1) fprintf(fd,afmt,name1,line[2]);
+X if (ll1) fprintf(fd,"%s%s\n",blank,clinep[1]);
+X }
+}
+X
+static float gscale= -1.0;
+X
+void
+disgraph(FILE *fd, int n0,int n1, float percent, int score,
+X int min0, int min1, int max0, int max1, long sq0off,
+X char *name0, char *name1, int nml,
+X int mlen, int markx)
+{
+X int i, gstart, gstop, gend;
+X int llen;
+X char line[MAXOUT+1];
+X char afmt[16], afmtf[64];
+X
+X if (nml > 6) {
+X sprintf(afmt,"%%-%ds",nml);
+X }
+X else {
+X strncpy(afmt,"%-6s",sizeof(afmt));
+X }
+X strncpy(afmtf,afmt,sizeof(afmtf));
+X strncat(afmtf," %4ld-%4ld: %5.1f%%:%s:\n",sizeof(afmtf));
+X
+X llen = mlen - 10;
+X memset(line,' ',llen);
+X
+X line[llen-1]='\0';
+X if (gscale < 0.0) {
+X gscale = (float)llen/(float)n0;
+X if ((markx&MX_ATYPE) == 7 )
+X fprintf(fd,afmtf,name0,sq0off,sq0off+n0-1,100.0,line);
+X }
+X
+X gstart = (int)(gscale*(float)min0+0.5);
+X gstop = (int)(gscale*(float)max0+0.5);
+X gend = gstop+(int)(gscale*(float)(n1-max1));
+X
+X if (gstop >= llen) gstop = llen-1;
+X if (gend >= llen) gend = llen-1;
+X for (i=0; i<gstart; i++) line[i]=' ';
+X for (; i<gstop; i++) line[i]='-';
+X for (; i<llen; i++) line[i]=' ';
+X
+X line[gend]=':';
+X line[llen]='\0';
+X
+X if (markx & MX_AMAP) {
+X if ((markx & MX_ATYPE)==7) { /* markx==4 - no alignment */
+X strncpy(afmtf,afmt,sizeof(afmtf));
+X strncat(afmtf," %4ld-%4ld:%4d %5.1f%%:%s\n",sizeof(afmtf));
+X fprintf(fd,afmtf,name1,min0+sq0off,max0+sq0off-1,score,percent,line);
+X }
+X else {
+X afmtf[0]='>';
+X strncpy(&afmtf[1],afmt,sizeof(afmtf)-1);
+X strncat(afmtf," %4ld-%4ld:%s\n",sizeof(afmtf));
+X fprintf(fd,afmtf, name1,min0+sq0off,max0+sq0off-1,line);
+X }
+X }
+}
+X
+void
+aancpy(char *to, char *from, int count, struct pstruct pst)
+{
+X char *tp, *sq;
+X int nsq;
+X
+X if (pst.ext_sq_set) {
+X nsq = pst.nsqx;
+X sq = pst.sqx;
+X }
+X else {
+X nsq = pst.nsq;
+X sq = pst.sq;
+X }
+X
+X tp=to;
+X while (count-- && *from) {
+X if (*from <= nsq) *tp++ = sq[*(from++)];
+X else *tp++ = *from++;
+X }
+X *tp='\0';
+}
+X
+void
+r_memcpy(dest,src,cnt)
+X char *dest, *src;
+X int cnt;
+{
+X while (cnt--) *dest++ = *src++;
+}
+X
+void
+l_memcpy(dest,src,cnt)
+X char *dest, *src;
+X int cnt;
+{
+X dest = dest+cnt;
+X src = src+cnt;
+X while (cnt--) *--dest = *--src;
+}
+X
+/* this routine now indexs from 1 (rather than 0) because sq starts
+X with a 0 */
+X
+#define MAXSQ 50 /* must be same as upam.h */
+X
+void cal_coord(int n0, int n1, long sq0off, long loffset,
+X struct a_struct *aln)
+{
+X long qoffset;
+X int llsgn, qlsgn, qfx0, qfxn, lfx0, lfxn;
+X
+X if (aln->qlrev == 1) {
+X qoffset = sq0off -1 + n0;
+X qlsgn = -1;
+X qfx0 = 0;
+X qfxn = 1;
+X }
+X else {
+X qoffset = sq0off - 1;
+X qlsgn = 1;
+X qfx0 = 1;
+X qfxn = 0;
+X }
+X
+X if (aln->llrev == 1) {
+X loffset += n1;
+X llsgn = -1;
+X lfx0 = 0;
+X lfxn = 1;
+X }
+X else {
+X llsgn = 1;
+X lfx0 = 1;
+X lfxn = 0;
+X }
+X aln->d_start0 = qoffset+qlsgn*aln->amin0+qfx0;
+X aln->d_stop0 = qoffset+qlsgn*aln->amax0+qfxn;
+X aln->d_start1 = loffset+llsgn*aln->amin1*aln->llmult+lfx0+aln->frame;
+X aln->d_stop1 = loffset+llsgn*aln->amax1*aln->llmult+lfxn+aln->frame;
+}
+SHAR_EOF
+chmod 0644 c_dispn.c ||
+echo 'restore of c_dispn.c failed'
+Wc_c="`wc -c < 'c_dispn.c'`"
+test 11467 -eq "$Wc_c" ||
+ echo 'c_dispn.c: original size 11467, current size' "$Wc_c"
+fi
+# ============= checkevent.c ==============
+if test -f 'checkevent.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping checkevent.c (File already exists)'
+else
+echo 'x - extracting checkevent.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'checkevent.c' &&
+X
+/* Copyright 1995 William R. Pearson */
+X
+/* used only in Mac versions to provide mac multitasking */
+X
+#include <stdlib.h>
+X
+#ifdef __MWERKS__
+#include <sioux.h>
+#endif
+X
+#define SLEEP 2L
+#define NIL_MOUSE_REGION 0L
+X
+#define WNE_TRAP_NUM 0x60
+#define UNIMPL_TRAP_NUM 0x9F
+#define SUSPEND_RESUME_BIT 0x0001
+#define ACTIVATING 1
+#define RESUMING 1
+X
+Boolean gDone, gWNEImplemented=0;
+EventRecord gTheEvent;
+Rect gDragRect, gSizeRect;
+X
+void
+InitEvent()
+{
+X gWNEImplemented=(NGetTrapAddress(WNE_TRAP_NUM,ToolTrap)!=
+X NGetTrapAddress(UNIMPL_TRAP_NUM,ToolTrap));
+X }
+X
+X
+#define hiword(x) (((short *) &(x))[0])
+#define loword(x) (((short *) &(x))[1])
+static MenuHandle aMenu;
+X
+/*
+ChkEvent()
+{}
+*/
+X
+#ifdef TPLOT
+extern WindowPtr gDrawWindow;
+extern PicHandle aPic;
+#endif
+X
+static long checkTime=0;
+X
+void
+ChkEvent()
+{
+X EventRecord event;
+X WindowPeek wp;
+X Boolean gotEvent, SIOUXDidEvent;
+X long choice;
+X Str255 buf;
+X
+X if (TickCount() < checkTime) return;
+X checkTime = TickCount()+60L;
+X
+X if (gWNEImplemented)
+X gotEvent=WaitNextEvent(everyEvent-diskMask,&event,SLEEP,NIL_MOUSE_REGION);
+X else {
+X SystemTask();
+X gotEvent=GetNextEvent(everyEvent-diskMask,&event);
+X }
+X
+X if (gotEvent) SIOUXDidEvent=SIOUXHandleOneEvent(&event);
+X if (SIOUXDidEvent) return;
+X
+X if (event.what == nullEvent) {
+X if (FrontWindow() == 0) InitCursor();
+X return;
+X }
+X
+X if (SystemEvent(&event)) return;
+X
+X if (event.what == mouseDown) {
+X switch (FindWindow(event.where, (WindowPtr *)&wp)) {
+X case inMenuBar:
+X InitCursor();
+X choice = MenuSelect(event.where);
+X goto doMenu;
+X case inDrag :
+X DragWindow((WindowPtr)wp, event.where, &gDragRect);
+X break;
+X case inSysWindow:
+X SystemClick(&event, (WindowPtr)wp);
+X break;
+X }
+X }
+X
+X return;
+X
+doMenu:
+X switch (hiword(choice)) {
+X case 1:
+X GetMenuItemText(aMenu, loword(choice), buf);
+X OpenDeskAcc(buf);
+X break;
+X case 2:
+X exit(0);
+X
+X case 3:
+X SystemEdit(loword(choice) - 1);
+X break;
+X }
+X HiliteMenu(0);
+}
+X
+#ifdef TPLOT
+X
+Waitkey(keyval)
+X int keyval;
+{
+X int key;
+X EventRecord event;
+X WindowPeek wp;
+X long choice;
+X Str255 buf;
+X
+X SystemTask();
+X if (gWNEImplemented)
+X WaitNextEvent(everyEvent-diskMask,&event,SLEEP,NIL_MOUSE_REGION);
+X else {
+X SystemTask();
+X GetNextEvent(everyEvent-diskMask,&event);
+X }
+X
+X
+X InitCursor();
+X if (event.what == nullEvent) {
+X return 0;
+X }
+X
+X if (SystemEvent(&event)) return 0;
+X
+X if (event.what == updateEvt) {
+X if ((WindowPtr)event.message == gDrawWindow) {
+X BeginUpdate((WindowPtr)event.message);
+X DrawPicture(aPic,&gDrawWindow->portRect);
+X EndUpdate((WindowPtr)event.message);
+X }
+X else {
+X BeginUpdate((WindowPtr)event.message);
+X EndUpdate((WindowPtr)event.message);
+X }
+X return 0;
+X }
+X
+X if (event.what == keyDown) return 1;
+X if (event.what == mouseDown) {
+X switch (FindWindow(event.where, (WindowPtr *)&wp)) {
+X case inMenuBar:
+X InitCursor();
+X choice = MenuSelect(event.where);
+X goto doMenu;
+X case inDrag :
+X DragWindow((WindowPtr)wp, event.where, &gDragRect);
+X break;
+X case inSysWindow:
+X SystemClick(&event, (WindowPtr)wp);
+X break;
+X case inGoAway :
+X return 1;
+X case inContent:
+X SelectWindow((WindowPtr)wp);
+X SetPort(gDrawWindow);
+X DrawPicture(aPic,&gDrawWindow->portRect);
+X break;
+X }
+X }
+X
+X return 0;
+X
+doMenu:
+X switch (hiword(choice)) {
+X case 1:
+X GetItem(aMenu, loword(choice), buf);
+X OpenDeskAcc(buf);
+X break;
+X case 2:
+X return 1;
+X
+X case 3:
+X SystemEdit(loword(choice) - 1);
+X break;
+X }
+X HiliteMenu(0);
+X return 0;
+}
+#endif
+X
+X
+SHAR_EOF
+chmod 0644 checkevent.c ||
+echo 'restore of checkevent.c failed'
+Wc_c="`wc -c < 'checkevent.c'`"
+test 3492 -eq "$Wc_c" ||
+ echo 'checkevent.c: original size 3492, current size' "$Wc_c"
+fi
+# ============= comp_lib.c ==============
+if test -f 'comp_lib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping comp_lib.c (File already exists)'
+else
+echo 'x - extracting comp_lib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'comp_lib.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999, 2002 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: comp_lib.c,v 1.100 2007/04/26 18:36:36 wrp Exp $ */
+X
+/*
+X * Concurrent read version
+X *
+X * Feb 20, 1998 modifications for prss3
+X *
+X * December, 1998 - DNA searches are now down with forward and reverse
+X * strands
+X */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+X
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+X
+#ifdef UNIX
+#include <unistd.h>
+#include <sys/types.h>
+#include <signal.h>
+#endif
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#include "mw.h" /* defines beststr */
+#include "structs.h" /* mngmsg, libstruct */
+#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
+X
+#define XTERNAL
+#include "uascii.h"
+X
+char *mp_verstr="34.26";
+X
+/********************************/
+/* global variable declarations */
+/********************************/
+char gstring2[MAX_STR]; /* string for label */
+char gstring3[MAX_STR];
+char hstring1[MAX_STR];
+X
+extern int max_workers;
+X
+#ifdef SUPERFAMNUM
+int nsfnum;
+int sfnum[10];
+extern int sfn_cmp(int *q, int *s);
+int nsfnum_n;
+int sfnum_n[10];
+#endif
+X
+/********************************/
+/* extern variable declarations */
+/********************************/
+extern char *prog_func; /* function label */
+extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
+X
+/********************************/
+/*extern function declarations */
+/********************************/
+/* open sequence file (getseq.c) */
+extern int getseq(char *filen, int *sascii,
+X unsigned char *seq, int maxs,
+X char *libstr, int n_libstr,
+X long *sq0ff);
+X
+struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
+X
+void set_shuffle(struct mngmsg m_msg);
+void closelib(struct lmf_str *m_fptr);
+X
+void irand(int);
+int nrand(int);
+X
+extern int ann_scan(unsigned char *, int, struct mngmsg *, int );
+extern int scanseq(unsigned char *seq, int n, char *str);
+extern void re_ascii(int *qascii, int *sascii);
+extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
+extern void revcomp(unsigned char *seq, int n, int *c_nt);
+X
+extern void init_ascii(int is_ext, int *sascii, int is_dna);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0);
+extern void free_pam2p(int **);
+X
+/* initialize environment (doinit.c) */
+extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
+X struct pstruct *ppst, unsigned char **aa0);
+X
+/* print timing information */
+extern void ptime (FILE *, time_t);
+X
+#ifdef COMP_MLIB
+#define QGETLIB (q_file_p->getlib)
+#endif
+X
+#define GETLIB (m_file_p->getlib)
+X
+/* calculation functions */
+extern void
+init_work(unsigned char *aa0, int n0,
+X struct pstruct *ppst, void **f_arg );
+#ifndef COMP_THR
+extern void
+do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame,
+X struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
+#endif
+X
+extern void
+close_work(unsigned char *aa0, int n0, struct pstruct *ppst, void **f_arg);
+extern void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2);
+X
+#ifdef COMP_THR
+#ifndef PRSS
+void
+save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
+X FILE *fdata, int *, struct hist_str *, void **);
+#else
+void
+save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
+X FILE *fdata, int *, struct hist_str *, void **, int *, int *);
+#endif
+#endif
+X
+/* statistics functions */
+extern int
+process_hist(struct stat_str *sptr, int nstat,
+X struct mngmsg m_msg,
+X struct pstruct pst,
+X struct hist_str *hist, void **, int);
+extern void addhistz(double, struct hist_str *); /* scaleswn.c */
+void selectbestz(struct beststr **, int, int );
+extern double (*find_zp)(int score, double escore, int length, double comp,void *);
+X
+void last_stats(const unsigned char *, int,
+X struct stat_str *sptr, int nstats,
+X struct beststr **bestp_arr, int nbest,
+X struct mngmsg m_msg, struct pstruct pst,
+X struct hist_str *histp, void *);
+X
+int last_calc( unsigned char **a0, unsigned char *a1, int maxn,
+X struct beststr **bestp_arr, int nbest,
+X struct mngmsg m_msg, struct pstruct *ppst,
+X void **f_str, void *rs_str);
+X
+void scale_scores(struct beststr **bestp_arr, int nbest,
+X struct db_str,struct pstruct pst, void *);
+X
+#ifndef COMP_THR
+extern int shuffle(unsigned char *, unsigned char *, int);
+extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
+#endif
+X
+extern void set_db_size(int, struct db_str *, struct hist_str *);
+X
+/* display functions */
+extern void
+showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1,
+X int maxn, struct beststr **bestp_arr, int nbest,
+X int qlib, struct mngmsg *m_msg,struct pstruct pst,
+X struct db_str db, char *gstring2, void **f_str);
+X
+extern void
+showalign (FILE *fp, unsigned char **aa0, unsigned char *aa1,
+X int maxn, struct beststr **bestp_arr, int nbest,
+X int qlib, struct mngmsg m_msg,struct pstruct pst,
+X char *gstring2, void **f_str);
+X
+/* misc functions */
+void h_init(struct pstruct *, struct mngmsg *, char *); /* doinit.c */
+void last_init(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
+void last_params(unsigned char *, int, struct mngmsg *, struct pstruct *);
+X
+void s_abort(char *, char *); /* compacc.c */
+X
+/* initfa/sw.c */
+void resetp(struct mngmsg *, struct pstruct *);
+X
+void gettitle(char *, char *, int); /* nxgetaa.c */
+void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
+void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
+void query_parm(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
+void selectbestz(struct beststr **, int, int);
+X
+/* compacc.c */
+void prhist(FILE *, struct mngmsg, struct pstruct,
+X struct hist_str hist, int nstats, struct db_str, char *);
+void printsum(FILE *, struct db_str db);
+int reset_maxn(struct mngmsg *, int); /* set m_msg.maxt, maxn from maxl */
+X
+FILE *outfd; /* Output file */
+X
+/* this information is global for fsigint() */
+extern time_t s_time(); /* fetches time */
+time_t tstart, tscan, tprev, tdone; /* Timing */
+#ifdef COMP_MLIB
+time_t ttscan, ttdisp;
+#endif
+time_t tdstart, tddone;
+X
+static struct db_str qtt = {0l, 0l, 0};
+X
+#ifdef COMP_THR
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+X
+/* functions for getting/sending buffers to threads (thr_sub.c) */
+extern void init_thr(int , struct thr_str *, struct mngmsg, struct pstruct *,
+X unsigned char *, int);
+extern void start_thr(void);
+extern void get_rbuf(struct buf_head **cur_buf, int max_wor_buf);
+extern void put_rbuf(struct buf_head *cur_buf, int max_work_buf);
+extern void put_rbuf_done(int nthreads, struct buf_head *cur_buf,
+X int max_work_buf);
+#undef XTERNAL
+#include "thr.h"
+struct buf_head buf_list[NUM_WORK_BUF];
+#endif
+X
+/* these variables must be global for comp_thr.c so that savebest()
+X can use them */
+X
+static struct beststr
+X *best, /* array of best scores */
+X *bestp,
+X **bestp_arr; /* array of pointers */
+static int nbest; /* number of best scores */
+X
+static struct stat_str *stats, *qstats; /* array of scores for statistics */
+X
+/* these variables are global so they can be set both by the main()
+X program and savebest() in threaded mode.
+*/
+static int nstats, nqstats, kstats;
+static double zbestcut; /* cut off for best z-score */
+static int bestfull; /* index for selectbest() */
+static int stats_done=0; /* flag for z-value processing */
+void fsigint();
+X
+int
+main (int argc, char *argv[])
+{
+X unsigned char *aa0[6], *aa0s, *aa1, *aa1ptr, *aa1s;
+X int n1, n1s; /* n1s needed for PRSS so that when getlib() returns -1 (because no more
+X library sequences, we have a valid n1 for shuffling */
+X
+X int *n1tot_ptr=NULL, *n1tot_cur;
+X int n1tot_cnt=0;
+X int n1tot_v, aa1_loff;
+X
+X long qoffset; /* qoffset is the equivalent of loffset */
+X /* m_msg.sq0off is the l_off equivalent */
+X
+X long loffset, l_off; /* loffset is the coordinate of first residue
+X when lcont > 0; l_off is not used in the
+X main loop, only in showbest and showalign */
+X char lib_label[MAX_FN];
+X char pgm_abbr[MAX_SSTR];
+X char qlabel[MAX_FN];
+#ifdef COMP_MLIB
+X char q_bline[MAX_STR];
+X fseek_t qseek;
+X int qlib;
+X struct lmf_str *q_file_p;
+X int sstart, sstop, is;
+#endif
+X int id;
+X struct lmf_str *m_file_p;
+X
+X int t_best, t_rbest, t_qrbest; /* best score of two/six frames */
+X double t_escore, t_rescore, t_qrescore; /* best evalues of two/six frames */
+X int i_score;
+#ifdef PRSS
+X int s_score[3];
+X int s_n1;
+#endif
+X
+X struct pstruct pst;
+X void *f_str[6], *qf_str; /* different f_str[]'s for different
+X translation frames, or forward,reverse */
+X int have_f_str=0;
+X
+#ifdef COMP_THR
+X long ntbuff;
+X int max_buf_cnt, ave_seq_len, buf_siz;
+X int max_work_buf;
+X struct buf_head *cur_buf;
+X struct buf_str *cur_buf_p;
+X int nseq;
+X struct thr_str *work_info;
+#endif
+X
+X struct mngmsg m_msg; /* Message from host to manager */
+X int iln, itt; /* index into library names */
+X char rline[MAX_FN];
+X char argv_line[MAX_STR];
+X int t_quiet;
+X
+X struct rstruct rst; /* results structure */
+X struct rstruct rrst; /* results structure for shuffle*/
+X int i;
+X
+X FILE *fdata=NULL; /* file for full results */
+X char libstr[MAX_UID]; /* string for labeling full results */
+X char *libstr_p; /* choose between libstr and ltitle */
+X int n_libstr; /* length of libstr */
+X int jstats;
+X int leng; /* leng is length of the descriptive line */
+X int maxn; /* size of the library sequence examined */
+X int maxl; /* size of library buffer */
+X fseek_t lmark; /* seek into library of current sequence */
+X int qlcont; /* continued query sequence */
+X int lcont, ocont, maxt; /* continued sequence */
+X int igncnt=0; /* count for ignoring sequences warning */
+X int ieven=0; /* tmp for wshuffle */
+X double zscore; /* tmp value */
+X char *bp; /* general purpose string ptr */
+X
+X /* Initialization */
+X
+#if defined(UNIX)
+X m_msg.quiet= !isatty(1);
+#else
+X m_msg.quiet = 0;
+#endif
+X
+#ifdef PGM_DOC
+X argv_line[0]='#'; argv_line[1]='\0';
+X for (i=0; i<argc; i++) {
+X strncat(argv_line," ",sizeof(argv_line)-strlen(argv_line)-1);
+X if (strchr(argv[i],' ')) {
+X strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
+X strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
+X strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
+X }
+X else {
+X strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
+X }
+X }
+X argv_line[sizeof(argv_line)-1]='\0';
+#endif
+X
+X /* first initialization routine - nothing is known */
+X h_init(&pst, &m_msg, pgm_abbr);
+X
+X m_msg.db.length = qtt.length = 0l;
+X m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
+X m_msg.pstat_void = NULL;
+X m_msg.hist.entries = 0;
+X
+X for (iln=0; iln<MAX_LF; iln++) m_msg.lb_mfd[iln]=NULL;
+X
+X f_str[0] = f_str[1] = NULL;
+X
+X aa0[0] = NULL;
+X /* second initialiation - get commmand line arguments */
+X initenv (argc, argv, &m_msg, &pst,&aa0[0]);
+X
+#ifdef COMP_THR
+X /* now have max_workers - allocate work_info[] */
+X if (max_workers >= MAX_WORKERS) max_workers = MAX_WORKERS;
+X if ((work_info=
+X (struct thr_str *)calloc(max_workers,sizeof(struct thr_str)))==NULL) {
+X fprintf(stderr, " cannot allocate work_info[%d]\n",max_workers);
+X exit(1);
+X }
+#else
+X max_workers = 1;
+#endif
+X
+#ifndef PRSS
+X /* label library size limits */
+X if (m_msg.n1_low > 0 && m_msg.n1_high < BIGNUM)
+X sprintf(lib_label,"library (range: %d-%d)",m_msg.n1_low,m_msg.n1_high);
+X else if (m_msg.n1_low > 0)
+X sprintf(lib_label,"library (range: >%d)",m_msg.n1_low);
+X else if (m_msg.n1_high < BIGNUM)
+X sprintf(lib_label,"library (range: <%d)",m_msg.n1_high);
+X else
+X strncpy(lib_label,"library",sizeof(lib_label));
+#else
+X sprintf(lib_label,"shuffled sequence");
+#endif
+X lib_label[sizeof(lib_label)-1]='\0';
+X
+X tstart = tscan = s_time();
+X tdstart = time(NULL);
+X
+X /* Allocate space for the query and library sequences */
+X /* pad aa0[] with an extra 32 chars for ALTIVEC padding */
+X if (aa0[0]==NULL) {
+X if ((aa0[0] = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof(unsigned char)))
+X == NULL)
+X s_abort ("Unable to allocate query sequence", "");
+X *aa0[0]=0;
+X aa0[0]++;
+X }
+X aa0[5]=aa0[4]=aa0[3]=aa0[2]=aa0[1]=aa0[0];
+X
+X /* make room for random sequence -
+X also used as storage for COMP_THR library overlaps
+X */
+X if ((aa1s = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof (char))) == NULL) {
+X s_abort ("Unable to allocate shuffled library sequence", "");
+X }
+X *aa1s=0;
+X aa1s++;
+X
+X irand(0);
+X
+X if (m_msg.markx & MX_HTML) {
+#ifdef HTML_HEAD
+X fprintf(stdout,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
+#endif
+X fprintf(stdout,"<pre>\n");
+X }
+X
+#ifdef PGM_DOC
+X fputs(argv_line,stdout);
+X fputc('\n',stdout);
+#endif
+X
+X fprintf(stdout,"%s\n",iprompt0);
+X fprintf(stdout," %s%s\n",verstr,refstr);
+X if (m_msg.markx & MX_HTML) fputs("</pre>\n",stdout);
+X
+X /* Query library */
+X if (m_msg.tname[0] == '\0') {
+X if (m_msg.quiet == 1)
+X s_abort("Query sequence undefined","");
+X l1: fputs (iprompt1, stdout);
+X fflush (stdout);
+X if (fgets (m_msg.tname, MAX_FN, stdin) == NULL)
+X s_abort ("Unable to read query library name","");
+X m_msg.tname[MAX_FN-1]='\0';
+X if ((bp=strchr(m_msg.tname,'\n'))!=NULL) *bp='\0';
+X if (m_msg.tname[0] == '\0') goto l1;
+X }
+X
+X /* Fetch first sequence */
+X qoffset = 0l;
+X qlcont = 0;
+#ifdef COMP_MLIB
+X /* Open query library */
+X if ((q_file_p= openlib(m_msg.tname, m_msg.qdnaseq,qascii,!m_msg.quiet,NULL))==NULL) {
+X s_abort(" cannot open library ",m_msg.tname);
+X }
+X qlib = 0;
+X m_msg.n0 =
+X QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
+X &qseek, &qlcont,q_file_p,&m_msg.sq0off);
+X if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
+X strncpy(qlabel,m_msg.qtitle,sizeof(qlabel));
+X if (bp != NULL) *bp = ' ';
+X qlabel[sizeof(qlabel)-1]='\0';
+X
+X /* if annotations are included in sequence, remove them */
+X if (m_msg.ann_flg) {
+X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
+X }
+X
+X if (m_msg.term_code && !(m_msg.qdnaseq==SEQT_DNA || m_msg.qdnaseq==SEQT_RNA) &&
+X aa0[0][m_msg.n0-1]!='*') {
+X aa0[0][m_msg.n0++]='*';
+X aa0[0][m_msg.n0]=0;
+X }
+X
+X /* check for subset */
+X if (q_file_p->opt_text[0]!='\0') {
+X if (q_file_p->opt_text[0]=='-') {
+X sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
+X }
+X else {
+X sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
+X sstart--;
+X if (sstop <= 0 ) sstop = BIGNUM;
+X }
+X for (id=0,is=sstart; is<min(m_msg.n0,sstop); ) aa0[0][id++]=aa0[0][is++];
+X aa0[0][id]=0;
+X m_msg.n0 = min(m_msg.n0,sstop)-sstart;
+X if (m_msg.sq0off==1) m_msg.sq0off = sstart+1;
+X }
+X
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X /* for ALTIVEC, must pad with 15 NULL's */
+X for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
+#endif
+X
+X if (qlcont) {
+X qoffset += m_msg.n0 - m_msg.sq0off;
+X }
+X else {
+X qoffset = 0l;
+X }
+X
+#else
+X m_msg.n0 = getseq (m_msg.tname, qascii, aa0[0], m_msg.max_tot,
+X m_msg.qtitle, sizeof(m_msg.qtitle),
+X &m_msg.sq0off);
+X strncpy(qlabel,m_msg.tname,sizeof(qlabel));
+X qlabel[sizeof(qlabel)-1]='\0';
+X
+X /* if annotations are included in sequence, remove them */
+X if (m_msg.ann_flg) {
+X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
+X }
+#endif
+X
+X if (m_msg.n0 > MAXTST) {
+X fprintf(stderr," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
+X fprintf(stdout," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
+X aa0[0][MAXTST]='\0';
+X m_msg.n0=MAXTST;
+X }
+X
+X if (m_msg.qdnaseq == SEQT_UNK) {
+X
+X /* do automatic sequence recognition,but only for sequences > 20 residues */
+X if (m_msg.n0 > 20 &&
+X (float)scanseq(aa0[0],m_msg.n0,"ACGTUNacgtun")/(float)m_msg.n0 >0.85) {
+X pascii = nascii;
+X m_msg.qdnaseq = SEQT_DNA;
+X }
+X else { /* its protein */
+X pascii = aascii;
+X m_msg.qdnaseq = SEQT_PROT;
+X }
+X /* modify qascii to use encoded version
+X cannot use memcpy() because it loses annotations
+X */
+X re_ascii(qascii,pascii);
+X init_ascii(pst.ext_sq_set,qascii,m_msg.qdnaseq);
+X m_msg.n0 = recode(aa0[0],m_msg.n0,qascii, pst.nsqx);
+X }
+X
+X if (m_msg.n0 <= 0)
+X s_abort ("Query sequence length <= 0: ", m_msg.tname);
+X
+#ifdef SUPERFAMNUM
+X m_msg.nqsfnum = nsfnum;
+X for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
+X m_msg.nqsfnum_n = nsfnum_n;
+X for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
+#endif
+X
+X resetp (&m_msg, &pst);
+X
+#ifndef COMP_MLIB
+X gettitle(m_msg.tname,m_msg.qtitle,sizeof(m_msg.qtitle));
+X if (m_msg.tname[0]=='-' || m_msg.tname[0]=='@') {
+X strncmp(m_msg.tname,m_msg.qtitle,sizeof(m_msg.tname));
+X if ((bp=strchr(m_msg.tname,' '))!=NULL) *bp='\0';
+X }
+#endif
+X
+X /* get library file names */
+X
+#ifndef PRSS
+X if (strlen (m_msg.lname) == 0) {
+X if (m_msg.quiet == 1) s_abort("Library name undefined","");
+X libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
+X }
+X
+X libselect(m_msg.lname, &m_msg);
+#else
+X if (strlen (m_msg.lname) == 0) {
+X if (m_msg.quiet == 1) s_abort("Shuffle sequence undefined","");
+l2: fputs(iprompt2,stdout);
+X fflush(stdout);
+X if (fgets (m_msg.lname, MAX_FN, stdin) == NULL)
+X s_abort ("Unable to read shuffle file name","");
+X m_msg.lname[MAX_FN-1]='\0';
+X if ((bp=strchr(m_msg.lname,'\n'))!=NULL) *bp='\0';
+X if (m_msg.lname[0] == '\0') goto l2;
+X }
+X m_msg.lbnames[0]= m_msg.lname;
+X m_msg.nln = 1;
+X m_msg.nshow = 0;
+#endif
+X
+X /* Get additional parameters here */
+X if (!m_msg.quiet) query_parm (&m_msg, &pst);
+X
+X last_init(&m_msg, &pst);
+X
+X /* Allocate space for saved scores */
+X if ((best =
+X (struct beststr *)calloc((MAXBEST+1),sizeof(struct beststr)))==NULL)
+X s_abort ("Cannot allocate best struct","");
+X if ((bestp_arr =
+X (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
+X s_abort ("Cannot allocate bestp_arr","");
+X
+X /* Initialize bestp_arr */
+X for (nbest = 0; nbest < MAXBEST+1; nbest++)
+X bestp_arr[nbest] = &best[nbest];
+X best++; bestp_arr++;
+X best[-1].score[0]=best[-1].score[1]=best[-1].score[2]= INT_MAX;
+X best[-1].zscore=FLT_MAX; /* for Z-scores, bigger is best */
+X best[-1].escore=FLT_MIN; /* for E()-values, lower is best */
+X
+X if ((stats =
+X (struct stat_str *)calloc(MAXSTATS,sizeof(struct stat_str)))==NULL)
+X s_abort ("Cannot allocate stats struct","");
+X
+#ifdef UNIX
+X /* set up signals now that input is done */
+X signal(SIGHUP,SIG_IGN);
+#endif
+X
+#ifdef COMP_THR
+X /* Set up buffers for reading the library:
+X
+X We will start by using a 2 Mbyte buffer for each worker. For
+X proteins, that means 5,000 sequences of length 400 (average).
+X For DNA, that means 2,000 sequences of length 1000. At the
+X moment, those are good averages.
+X */
+X
+X if (m_msg.ldnaseq== SEQT_DNA) {
+X max_buf_cnt = MAX_NT_BUF;
+X ave_seq_len = AVE_NT_LEN;
+X }
+X else {
+X max_buf_cnt = MAX_AA_BUF;
+X ave_seq_len = AVE_AA_LEN;
+X }
+X
+X /* however - buffer sizes should be a function of the number of
+X workers so that all the workers are kept busy. Assuming a 10,000
+X entry library is the smallest we want to schedule, then
+X */
+X
+X if (max_buf_cnt > 10000/max_workers)
+X max_buf_cnt = 10000/(2*max_workers);
+X
+X max_buf_cnt /= m_msg.thr_fact;
+X
+X /* finally, max_work_buf should be mod 6 for tfasta */
+X max_buf_cnt -= (max_buf_cnt % 6);
+X
+X max_work_buf = 2*max_workers;
+X
+X /* allocate space for library buffers and results */
+X
+X buf_siz=max_buf_cnt*ave_seq_len;
+X if (buf_siz < m_msg.max_tot) buf_siz = m_msg.max_tot;
+X for (i=0; i<max_work_buf; i++) {
+X if ((buf_list[i].buf =(struct buf_str *)calloc((size_t)(max_buf_cnt+1),
+X sizeof(struct buf_str)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate buffer struct %d %d\n",i,max_buf_cnt+1);
+X exit(1);
+X }
+X buf_list[i].buf_cnt=0;
+X buf_list[i].have_results=0;
+X if ((buf_list[i].start =
+X (unsigned char *)calloc((size_t)(buf_siz),sizeof(unsigned char)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate buffer %d\n",i);
+X exit(1);
+X }
+X
+X /* make certain there is a '\0' at the beginning */
+X buf_list[i].start++;
+X
+X reader_buf[i] = &buf_list[i];
+X }
+X
+X /* initialization of global variables for threads/buffers */
+X
+X num_worker_bufs = 0;
+X num_reader_bufs = max_work_buf;
+X reader_done = 0;
+X worker_buf_workp = 0;
+X worker_buf_readp = 0;
+X reader_buf_workp = 0;
+X reader_buf_readp = 0;
+X
+X start_thread = 1; /* keeps threads from starting */
+#endif
+X
+X /* Label the output */
+X if ((bp = (char *) strchr (m_msg.lname, ' ')) != NULL) *bp = '\0';
+X if (m_msg.ltitle[0] == '\0') {
+X strncpy(m_msg.ltitle,m_msg.lname,sizeof(m_msg.ltitle));
+X m_msg.ltitle[sizeof(m_msg.ltitle)-1]='\0';
+X }
+X
+#ifdef COMP_MLIB
+X printf("Query library %s vs %s library\n", m_msg.tname,m_msg.lname);
+X if (m_msg.nln > 0) printf("searching %s library\n\n",m_msg.lbnames[0]);
+#endif
+X
+#ifdef COMP_MLIB
+X while(1) {
+X m_msg.db.length = 0l;
+X m_msg.db.entries = m_msg.db.carry = 0;
+X qlib++;
+X stats_done = 0;
+#endif
+X
+X maxl = m_msg.max_tot - m_msg.n0 -2; /* maxn = max library sequence space */
+X
+X maxn = reset_maxn(&m_msg,maxl);
+X pst.maxlen = maxn;
+X
+X outfd = stdout;
+X nbest = 0;
+X zbestcut = -FLT_MAX;
+X nstats = 0;
+X
+X /* get the last parameters */
+X last_params(aa0[0],m_msg.n0, &m_msg, &pst);
+X
+X /*
+X if our function returns approximate E()-scores, we do not need to
+X work with raw scores and later calculate z-scores. When
+X approx. E()-scores are calculated, we still need various
+X statistics structures, but we can get them immediately. In this
+X case, find_zp() must produce a z_score (large positive is good)
+X from an e_score.
+X */
+X
+X if (m_msg.escore_flg) {
+X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
+X &m_msg.hist,&m_msg.pstat_void,0);
+X stats_done=1;
+X }
+X
+#ifndef COMP_THR
+X if (m_msg.qshuffle) {
+X if ((aa0s=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate aa0s[%d]\n",m_msg.n0+2);
+X exit(1);
+X }
+X *aa0s='\0';
+X aa0s++;
+X memcpy(aa0s,aa0[0],m_msg.n0);
+X qshuffle(aa0s,m_msg.n0,m_msg.nm0);
+X }
+X
+X /* previous versions of FASTA have stored the reverse complement in
+X the same array as the forward query sequence. This version
+X changes that, by allocating separate space for the reverse complement,
+X and thus reducing the demand for a large MAXLIB/MAXTRN for long queries
+X */
+X if (m_msg.qframe == 2) {
+X if ((aa0[1]=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate aa0[1][%d]\n",m_msg.n0+2);
+X exit(1);
+X }
+X *aa0[1] = '\0';
+X aa0[1]++;
+X memcpy(aa0[1],aa0[0],m_msg.n0+1);
+X revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
+X }
+X /* set aa1 for serial - threaded points aa1 to buffer */
+X
+X aa1 = aa0[0] + m_msg.n0+1; /* modified now that aa0[1] is done separately */
+X *aa1++='\0';
+#else
+X init_thr(max_workers, work_info, m_msg, &pst, aa0[0], max_work_buf);
+#endif
+X
+X if (m_msg.qshuffle && qstats==NULL) {
+X if ((qstats =
+X (struct stat_str *)calloc(m_msg.shuff_max+1,sizeof(struct stat_str)))==NULL)
+X s_abort ("Cannot allocate qstats struct","");
+X }
+X nqstats = 0;
+X
+X if (m_msg.markx & MX_HTML) fputs("<pre>\n",stdout);
+#ifndef PRSS
+X /* rline[] is a tmp string */
+X if (m_msg.qdnaseq == SEQT_DNA || m_msg.qdnaseq == SEQT_RNA) {
+X strncpy(rline,(m_msg.qframe==1)? " (forward-only)" : "\0",sizeof(rline));
+X rline[sizeof(rline)-1]='\0';
+X }
+X else rline[0]='\0';
+X
+X leng = (int)strlen(m_msg.qtitle);
+X if (leng > 50) leng -= 10;
+X
+X sprintf (&m_msg.qtitle[leng], " %d %s", m_msg.n0, m_msg.sqnam);
+X m_msg.seqnm = 0;
+X
+X
+#ifdef COMP_MLIB
+X printf("%3d>>>%s - %d %s%s\n vs %.60s %s\n", qlib,
+X m_msg.qtitle, m_msg.n0, m_msg.sqnam,
+X (m_msg.revcomp ? " (reverse complement)" : rline),
+X m_msg.ltitle,lib_label);
+#else
+X printf("%.50s: %d %s%s\n %s\n vs %.60s %s\n",
+X qlabel, m_msg.n0, m_msg.sqnam,
+X (m_msg.revcomp ? " (reverse complement)" : rline),
+X m_msg.qtitle,m_msg.ltitle,lib_label);
+#endif
+X libstr_p = &libstr[0];
+X n_libstr=sizeof(libstr);
+#else /* PRSS */
+X libstr_p = &m_msg.ltitle[0];
+X n_libstr= sizeof(m_msg.ltitle);
+X set_shuffle(m_msg); /* set count/width parameters in llgetaa.c */
+#endif
+X
+X fflush (outfd);
+X
+X tprev = s_time();
+X
+X if (m_msg.dfile[0] && (fdata=fopen(m_msg.dfile,"w"))!=NULL)
+X fprintf(fdata,"%3d\t%-50s\n",m_msg.n0,m_msg.qtitle);
+X
+X qtt.length += m_msg.n0;
+X qtt.entries++;
+X
+#ifdef COMP_THR
+X start_thr();
+X
+X /* now open the library and start reading */
+X /* get a buffer and fill it up */
+X get_rbuf(&cur_buf,max_work_buf);
+X
+X cur_buf->buf_cnt = 0;
+X cur_buf->have_results = 0;
+X cur_buf->buf[0].aa1b = cur_buf->start;
+X ntbuff = 0;
+X nseq = 0;
+#else /* ! COMP_THR */
+X /* initialize the comparison function, returning f_str */
+X init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
+X have_f_str=1;
+X
+X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
+X if (m_msg.qframe == 2) {
+X init_work ( aa0[1], m_msg.n0, &pst, &f_str[1]);
+X }
+X if (m_msg.qshuffle) {
+X init_work ( aa0s, m_msg.n0, &pst, &qf_str);
+X }
+#endif /* COMP_THR */
+X
+X /* open the library - start the search */
+X
+X for (iln = 0; iln < m_msg.nln; iln++) {
+X if ((m_msg.lb_mfd[iln] = m_file_p=
+X openlib(m_msg.lbnames[iln], m_msg.ldnaseq, lascii, !m_msg.quiet, m_msg.lb_mfd[iln]))
+X ==NULL) {
+X fprintf(stderr," cannot open library %s\n",m_msg.lbnames[iln]);
+X continue;
+X }
+#if !defined(PRSS) && !defined(COMP_MLIB)
+X else
+X printf ("searching %s %s\n",m_msg.lbnames[iln],lib_label);
+#endif
+X
+X loffset = 0l;
+X lcont = 0;
+X ocont = 0;
+X n1tot_v = n1tot_cnt = 0;
+X n1tot_cur = n1tot_ptr = NULL;
+X
+X /* get next buffer to read into */
+X maxt = maxn;
+X
+#ifndef COMP_THR
+X aa1ptr = aa1;
+#else
+X /* read sequence directly into buffer */
+X aa1ptr = aa1 = cur_buf->buf[nseq].aa1b;
+#endif
+X
+X while ((n1=GETLIB(aa1ptr,maxt,libstr_p,n_libstr,&lmark,&lcont,m_file_p,&l_off))>=0) {
+X
+X if (n_libstr <= MAX_UID) {
+X if ((bp=strchr(libstr_p,' '))!=NULL) *bp='\0';
+X }
+X
+X if (m_msg.term_code && !lcont &&
+X m_msg.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg.term_code) {
+X aa1ptr[n1++]=m_msg.term_code;
+X aa1ptr[n1]=0;
+X }
+X
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X /* for ALTIVEC, must pad with 15 NULL's */
+X for (id=0; id<SEQ_PAD; id++) {aa1ptr[n1+id]=0;}
+#endif
+X
+#ifdef DEBUG
+X if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') {
+X fprintf(stderr,"%s: aa1[%d] missing NULL boundaries: %d %d\n",libstr_p,n1,aa1[-1],aa1ptr[n1]);
+X }
+#endif
+X
+X /* check for a continued sequence and provide a pointer to
+X the n1_tot array if lcont || ocont */
+X n1tot_v += n1;
+X if (lcont && !ocont) { /* get a new pointer */
+X if (n1tot_cnt <= 0) {
+X if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
+X fprintf(stderr," cannot allocate n1tot_ptr\n");
+X exit(1);
+X }
+X else {n1tot_cnt=1000;}
+X }
+X n1tot_cnt--;
+X n1tot_cur = n1tot_ptr++;
+X }
+X
+X if (n1tot_v < m_msg.n1_low || n1tot_v > m_msg.n1_high) {
+X goto loop2;
+X }
+X
+X m_msg.db.entries++;
+X m_msg.db.length += n1;
+X if (m_msg.db.length > LONG_MAX) {
+X m_msg.db.length -= LONG_MAX; m_msg.db.carry++;
+X }
+X
+#ifdef DEBUG
+X /* This finds most reasons for core dumps */
+X if (pst.debug_lib)
+X for (i=0; i<n1; i++)
+X if (aa1[i]>=pst.nsqx)
+X {fprintf(stderr,
+X "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
+X libstr,i,n1,aa1[i],pst.nsqx,lcont,ocont,aa1ptr+i);
+X aa1[i]=0;
+X n1=i-1;
+X break;
+X }
+#endif
+X
+X /* don't count long sequences more than once */
+X if (aa1!=aa1ptr) {n1 += m_msg.loff; m_msg.db.entries--;}
+X
+#ifdef PROGRESS
+X if (!m_msg.quiet)
+X if (m_msg.db.entries % 200 == 199) {
+X fputc('.',stderr);
+X if (m_msg.db.entries % 10000 == 9999) fputc('\n',stderr);
+X else if (m_msg.db.entries % 1000 == 999) fputc(' ',stderr);
+X
+X }
+#endif
+X
+X if (n1<=1) {
+X /* if (igncnt++ <10)
+X fprintf(stderr,"Ignoring: %s\n",libstr);
+X */
+X goto loop2;
+X }
+X
+#ifdef PRSS
+X if (lmark==0) {
+X n1s = n1;
+X memcpy(aa1s,aa1,n1s);
+X m_msg.db.entries=0;
+X m_msg.db.length=0;
+X }
+#endif
+X
+X /* if COMP_THR - fill and empty buffers */
+#ifdef COMP_THR
+X ntbuff += n1+1;
+X
+X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
+X
+X cur_buf->buf_cnt++;
+X cur_buf_p = &(cur_buf->buf[nseq++]);
+X cur_buf_p->n1 = n1;
+X cur_buf_p->n1tot_p = n1tot_cur;
+X cur_buf_p->lseek = lmark;
+X cur_buf_p->cont = ocont+1;
+X cur_buf_p->m_file_p = (void *)m_file_p;
+X cur_buf_p->frame = itt;
+X memcpy(cur_buf_p->libstr,libstr,MAX_UID);
+#ifdef SUPERFAMNUM
+X cur_buf_p->nsfnum = nsfnum;
+X if ((cur_buf_p->sfnum[0]=sfnum[0])>0 &&
+X (cur_buf_p->sfnum[1]=sfnum[1])>0 &&
+X (cur_buf_p->sfnum[2]=sfnum[2])>0 &&
+X (cur_buf_p->sfnum[3]=sfnum[3])>0 &&
+X (cur_buf_p->sfnum[4]=sfnum[4])>0 &&
+X (cur_buf_p->sfnum[5]=sfnum[5])>0 &&
+X (cur_buf_p->sfnum[6]=sfnum[6])>0 &&
+X (cur_buf_p->sfnum[7]=sfnum[7])>0 &&
+X (cur_buf_p->sfnum[8]=sfnum[8])>0 &&
+X (cur_buf_p->sfnum[9]=sfnum[9])>0) ;
+#endif
+X
+X /* this assumes that max_buf_cnt is guaranteed %6=0 so that
+X additional pointers to the same buffer can be used
+X nseq now points to next buffer
+X */
+X
+X cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b;
+X } /* for (itt .. */
+X
+X /* make a copy of the overlap (threaded only) */
+X if (lcont) {
+X memcpy(aa1s,&aa1[n1-m_msg.loff],m_msg.loff);
+X }
+X
+X /* if the buffer is filled */
+X if (nseq >= max_buf_cnt || ntbuff >= buf_siz - maxn) {
+X
+X /* provide filled buffer to workers */
+X put_rbuf(cur_buf,max_work_buf);
+X
+X /* get an empty buffer to fill */
+X get_rbuf(&cur_buf,max_work_buf);
+X
+X /* "empty" buffers have results that must be processed */
+X if (cur_buf->buf_cnt && cur_buf->have_results) {
+X save_best(cur_buf,m_msg,pst,fdata,m_msg.qsfnum,&m_msg.hist,
+X &m_msg.pstat_void
+#ifdef PRSS
+X ,s_score,&s_n1
+#endif
+X );
+X
+X }
+X
+X /* now the buffer is truly empty, fill it up */
+X cur_buf->buf_cnt = 0;
+X cur_buf->have_results = 0;
+X /* point the first aa1 ptr to the buffer start */
+X aa1=cur_buf->buf[0].aa1b = cur_buf->start;
+X ntbuff = 0;
+X nseq=0;
+X }
+X else { /* room left in current buffer, increment ptrs */
+X aa1=cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b+n1+1;
+X }
+#else /* if !COMP_THR - do a bunch of searches */
+X
+X /* t_best and t_rbest are used to save the best score or shuffled
+X score from all the frames */
+X
+X t_best = t_rbest = t_qrbest = -1;
+X t_escore = t_rescore = t_qrescore = FLT_MAX;
+X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
+X
+X rst.score[0] = rst.score[1] = rst.score[2] = 0;
+X do_work (aa0[itt], m_msg.n0,aa1,n1,itt,&pst,f_str[itt],0,&rst);
+X
+X if (rst.score[pst.score_ix] > t_best) {
+X t_best = rst.score[pst.score_ix];
+X }
+X
+X if (fdata) {
+X fprintf(fdata,
+X "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8lld\n",
+X libstr,
+#ifdef SUPERFAMNUM
+X sfn_cmp(m_msg.qsfnum,sfnum),
+#else
+X 0,
+#endif
+X n1,itt,
+X rst.comp,rst.H,
+X rst.score[0],rst.score[1],rst.score[2],
+X rst.escore, rst.segnum, rst.seglen, lmark);
+X fflush(fdata);
+X }
+X
+#ifdef PRSS
+X if (lmark==0) {
+X s_score[0] = rst.score[0];
+X s_score[1] = rst.score[1];
+X s_score[2] = rst.score[2];
+X
+X s_n1 = n1;
+X aa1_loff = l_off;
+X }
+X t_best = t_rbest = rst.score[pst.score_ix];
+X t_escore = t_rescore = rst.escore;
+#else
+X if (m_msg.qshuffle) {
+X do_work (aa0s, m_msg.n0,aa1,n1,itt,&pst,qf_str,1,&rrst);
+X
+X if (rrst.score[pst.score_ix] > t_qrbest)
+X t_qrbest = rrst.score[pst.score_ix];
+X if (rrst.escore < t_qrescore)
+X t_qrescore = rrst.escore;
+X
+X if (itt==m_msg.nitt1 && nqstats < m_msg.shuff_max) {
+X qstats[nqstats].n1 = n1; /* save the best score */
+X qstats[nqstats].comp = rst.comp;
+X qstats[nqstats].H = rst.H;
+X qstats[nqstats].escore = t_qrescore;
+X qstats[nqstats++].score = t_qrbest;
+X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
+X t_qrescore = FLT_MAX;
+X }
+X }
+X
+X if (pst.zsflag >= 10) {
+X if (pst.zs_win > 0) wshuffle(aa1,aa1s,n1,pst.zs_win,&ieven);
+X else shuffle(aa1,aa1s,n1);
+X do_work (aa0[itt], m_msg.n0, aa1s, n1,itt,&pst,f_str[itt],0,&rrst);
+X if (rrst.score[pst.score_ix] > t_rbest) {
+X t_rbest = rrst.score[pst.score_ix];
+X t_rescore = rrst.escore;
+X }
+X }
+#endif
+X i_score = rst.score[pst.score_ix];
+X
+/* this section saves scores for statistics calculations. For
+X comparisons that can be from one of 2 or 6 frames, it should only
+X be run once, for the best of the 2 or 6 scores. t_rbest,t_rescore
+X have the best of the 2 or 6 scores from the frames. For proteins,
+X this is run for every score.
+X
+*/
+#ifdef PRSS /* don't save the first score (unshuffled) with PRSS */
+X if (lmark > 0) {
+#endif
+X
+X if (itt == m_msg.nitt1) {
+X if (nstats < MAXSTATS) {
+X stats[nstats].n1 = n1; /* save the best score */
+X stats[nstats].comp = rst.comp;
+X stats[nstats].H = rst.H;
+X if (pst.zsflag >=10) {
+X t_best = t_rbest;
+X t_escore = t_rescore;
+X }
+X stats[nstats].escore = t_escore;
+X stats[nstats++].score = t_best;
+X t_best = t_rbest = -1; /* reset t_rbest, t_best */
+X t_escore = t_rescore = FLT_MAX;
+X }
+X else if (pst.zsflag >= 0) {
+X if (!stats_done) {
+X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
+X &m_msg.hist,&m_msg.pstat_void,0);
+X stats_done = 1;
+X kstats = nstats;
+X for (i=0; i<MAXBEST; i++) {
+X bestp_arr[i]->zscore =
+X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
+X bestp_arr[i]->escore, bestp_arr[i]->n1,
+X bestp_arr[i]->comp, m_msg.pstat_void);
+X }
+X zbestcut = bestp_arr[nbest-1]->zscore;
+X }
+X
+#ifdef SAMP_STATS
+/* older versions saved the first MAXSTATS scores, and ignored the
+X rest in the statistics. With SAMP_STATS, scores after MAX_STATS
+X are sampled at random, and included in the sample set and the
+X statistics parameters are re-derived at the end of the run using
+X the sampled scores.
+X
+X It would be faster not to do the nrand(); if(jstats < MAXSTATS)
+X less often.
+*/
+X if (!m_msg.escore_flg) { /* only for zscores */
+X jstats = nrand(++kstats); /* no mod % 0 */
+X if (jstats < MAXSTATS) {
+X stats[jstats].n1 = n1; /* save the best score */
+X stats[jstats].comp = rst.comp;
+X stats[jstats].H = rst.H;
+X if (pst.zsflag >=10) t_best = t_rbest;
+X stats[jstats].score = t_best;
+X }
+X }
+#endif
+X } /* ( nstats >= MAXSTATS) && zsflag >= 0 */
+X } /* itt1 == nitt1 */
+#ifdef PRSS
+X }
+#endif
+X
+X /* this section completes work on the current score */
+X if (stats_done) { /* stats_done > 0 => nstats >= MAXSTATS */
+X zscore=(*find_zp)(i_score, rst.escore, n1, rst.comp,
+X m_msg.pstat_void);
+X
+X if (itt == m_msg.nitt1) {
+X if (pst.zsflag >= 10) t_best = t_rbest;
+X
+X addhistz((*find_zp)(t_best, t_escore, n1, rst.comp,
+X m_msg.pstat_void),
+X &m_msg.hist);
+X t_best = t_rbest = -1;
+X }
+X }
+X else zscore = (double) i_score;
+X
+#ifndef PRSS
+X if (zscore > zbestcut ) {
+X if (nbest >= MAXBEST) {
+X bestfull = nbest-MAXBEST/4;
+X selectbestz(bestp_arr,bestfull-1,nbest);
+X zbestcut = bestp_arr[bestfull-1]->zscore;
+X nbest = bestfull;
+X }
+X
+X bestp = bestp_arr[nbest++];
+X bestp->score[0] = rst.score[0];
+X bestp->score[1] = rst.score[1];
+X bestp->score[2] = rst.score[2];
+X bestp->comp = rst.comp;
+X bestp->H = rst.H;
+X bestp->zscore = zscore;
+X bestp->escore = rst.escore;
+X bestp->segnum = rst.segnum;
+X bestp->seglen = rst.seglen;
+X bestp->lseek = lmark;
+X bestp->cont = ocont+1;
+X bestp->m_file_p = m_file_p;
+X bestp->n1 = n1;
+X bestp->n1tot_p=n1tot_cur;
+X bestp->frame = itt;
+X memcpy(bestp->libstr,libstr,MAX_UID);
+#ifdef SUPERFAMNUM
+X bestp->nsfnum = nsfnum;
+X if ((bestp->sfnum[0]=sfnum[0])>0 &&
+X (bestp->sfnum[1]=sfnum[1])>0 &&
+X (bestp->sfnum[2]=sfnum[2])>0 &&
+X (bestp->sfnum[3]=sfnum[3])>0 &&
+X (bestp->sfnum[4]=sfnum[4])>0 &&
+X (bestp->sfnum[5]=sfnum[5])>0 &&
+X (bestp->sfnum[6]=sfnum[6])>0 &&
+X (bestp->sfnum[7]=sfnum[7])>0 &&
+X (bestp->sfnum[8]=sfnum[8])>0 &&
+X (bestp->sfnum[9]=sfnum[9])>0) ;
+#endif
+X }
+#else /* PRSS */
+X if (lmark == 0) {
+X bestp = bestp_arr[nbest++];
+X bestp->score[0] = rst.score[0];
+X bestp->score[1] = rst.score[1];
+X bestp->score[2] = rst.score[2];
+X bestp->comp = rst.comp;
+X bestp->H = rst.H;
+X bestp->zscore = zscore;
+X bestp->escore = rst.escore;
+X bestp->segnum = rst.segnum;
+X bestp->seglen = rst.seglen;
+X bestp->lseek = lmark;
+X bestp->cont = 0;
+X bestp->m_file_p = m_file_p;
+X bestp->n1 = n1;
+X bestp->n1tot_p=n1tot_cur;
+X bestp->frame = itt;
+X memcpy(bestp->libstr,libstr,MAX_UID);
+X bestp->nsfnum = 0;
+X }
+#endif
+X }
+#endif
+X
+X loop2:
+X if (lcont) {
+X maxt = m_msg.maxt3;
+#ifndef COMP_THR
+X memcpy(aa1,&aa1[n1-m_msg.loff],m_msg.loff);
+#else
+X memcpy(aa1,aa1s,m_msg.loff);
+#endif
+X aa1ptr= &aa1[m_msg.loff];
+X loffset += n1 - m_msg.loff;
+X ocont = lcont;
+X }
+X else {
+X maxt = maxn;
+X aa1ptr=aa1;
+X if (ocont) *n1tot_cur = n1tot_v;
+X ocont = 0;
+X loffset = 0l;
+X n1tot_v = 0;
+X n1tot_cur = NULL;
+X }
+X } /* end while((n1=getlib())) */
+X } /* end iln=1..nln */
+X
+X /* all done */
+X
+#ifdef COMP_THR
+X /* check last buffers for any results */
+X put_rbuf_done(max_workers,cur_buf,max_work_buf);
+X
+X for (i=0; i < num_reader_bufs; i++) {
+X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf);
+X if (reader_buf[reader_buf_readp]->buf_cnt > 0 &&
+X reader_buf[reader_buf_readp]->have_results) {
+X save_best(reader_buf[reader_buf_readp],m_msg,pst,fdata,m_msg.qsfnum,
+X &m_msg.hist, &m_msg.pstat_void
+#ifdef PRSS
+X ,s_score,&s_n1
+#endif
+X );
+X }
+X }
+#endif
+X
+#ifdef PROGRESS
+X if (!m_msg.quiet)
+X if (m_msg.db.entries >= 200) {fprintf(stderr," Done!\n");}
+#endif
+X
+X m_msg.nbr_seq = m_msg.db.entries;
+X get_param(&pst, gstring2,gstring3);
+X
+/* *************************** */
+/* analyze the last results */
+/* *************************** */
+X
+#ifndef PRSS
+#ifndef SAMP_STATS
+X if (!stats_done && nstats > 0) {
+#endif
+X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,&m_msg.hist,
+X &m_msg.pstat_void,stats_done);
+X if (m_msg.pstat_void != NULL) {
+X stats_done = 1;
+X for (i = 0; i < nbest; i++) {
+X bestp_arr[i]->zscore =
+X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
+X bestp_arr[i]->escore, bestp_arr[i]->n1,
+X bestp_arr[i]->comp, m_msg.pstat_void);
+X }
+#ifndef SAMP_STATS
+X }
+X else pst.zsflag = -1;
+#endif
+X }
+#else /* PRSS */
+X if (pst.zsflag < 10) pst.zsflag += 10;
+X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
+X &m_msg.hist, &m_msg.pstat_void,0);
+X stats_done = 1;
+X for (i = 0; i < nbest; i++) {
+X bestp_arr[i]->zscore = (*find_zp)(bestp_arr[i]->score[pst.score_ix],
+X bestp_arr[i]->escore, bestp_arr[i]->n1,
+X bestp_arr[i]->comp, m_msg.pstat_void);
+X }
+#endif
+X
+X if (pst.zdb_size <= 1) pst.zdb_size = m_msg.db.entries;
+X
+#ifdef COMP_THR
+X /* before I call last_calc/showbest/showalign, I need init_work() to
+X get an f_str. This duplicates some code above, which is used in
+X the non-threaded version
+X */
+X
+X if (!have_f_str) {
+X init_work(aa0[0],m_msg.n0,&pst,&f_str[0]);
+X have_f_str = 1;
+X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
+X
+X if (m_msg.qframe == 2) {
+X if ((aa0[1]=(unsigned char *)calloc((size_t)m_msg.n0+2,
+X sizeof(unsigned char)))==NULL) {
+X fprintf(stderr," cannot allocate aa0[1][%d] for alignments\n",
+X m_msg.n0+2);
+X }
+X *aa0[1]='\0';
+X aa0[1]++;
+X memcpy(aa0[1],aa0[0],m_msg.n0+1);
+X revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
+X init_work(aa0[1],m_msg.n0,&pst,&f_str[1]);
+X }
+X
+X /* I also need a "real" aa1 */
+X aa1 = buf_list[0].start;
+#ifdef PRSS
+X /* for PRSS - I need the original second (non-shuffled) sequence */
+X memcpy(aa1,aa1s,n1s+1);
+#endif
+X }
+#endif
+X
+/* now we have one set of scaled scores for in bestp_arr -
+X for FASTS/F, we need to do some additional processing */
+X
+X if (!m_msg.qshuffle) {
+X last_stats(aa0[0], m_msg.n0, stats,nstats, bestp_arr,nbest,
+X m_msg, pst, &m_msg.hist, &m_msg.pstat_void);
+X }
+X else {
+X last_stats(aa0[0], m_msg.n0,
+X qstats,nqstats, bestp_arr,nbest, m_msg, pst,
+X &m_msg.hist, &m_msg.pstat_void);
+X }
+X
+X /* here is a contradiction: if pst.zsflag < 0, then m_msg.pstat_void
+X should be NULL; if it is not, then process_hist() has been called */
+X if (pst.zsflag < 0 && m_msg.pstat_void != NULL) pst.zsflag = 1;
+X
+X if (m_msg.last_calc_flg) {
+X /* last_calc may need coefficients from last_stats() */
+X nbest = last_calc(aa0, aa1, maxn, bestp_arr, nbest, m_msg, &pst,
+X f_str, m_msg.pstat_void);
+X }
+X
+X scale_scores(bestp_arr,nbest,m_msg.db,pst,m_msg.pstat_void);
+X
+X get_param(&pst, gstring2,gstring3);
+X
+#ifdef PRSS
+X /* gettitle(m_msg.lname,m_msg.ltitle,sizeof(m_msg.ltitle)); */
+X printf("%.50s - %s %d %s%s\n vs %.60s - %s shuffled sequence\n",
+X m_msg.tname, m_msg.qtitle,m_msg.n0, m_msg.sqnam,
+X (m_msg.revcomp ? " (reverse complement)" : "\0"),
+X m_msg.lname,m_msg.ltitle);
+#endif
+X
+X prhist (stdout, m_msg, pst, m_msg.hist, nstats, m_msg.db, gstring2);
+X
+X tscan = s_time();
+X printf (" Scan time: ");
+X ptime(stdout,tscan-tprev);
+X printf ("\n");
+#ifdef COMP_MLIB
+X ttscan += tscan-tprev;
+#endif
+X
+X l3:
+X if (!m_msg.quiet) {
+X printf("Enter filename for results [%s]: ", m_msg.outfile);
+X fflush(stdout);
+X }
+X
+X rline[0]='\0';
+X if (!m_msg.quiet && fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+X if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0';
+X if (rline[0]!='\0') strncpy(m_msg.outfile,rline,sizeof(m_msg.outfile));
+X if (m_msg.outfile[0]!='\0') {
+X if ((outfd=fopen(m_msg.outfile,"w"))==NULL) {
+X fprintf(stderr," could not open %s\n",m_msg.outfile);
+X if (!m_msg.quiet) goto l3;
+X else goto l4;
+X }
+X
+#ifdef PGM_DOC
+X fputs(argv_line,outfd);
+X fputc('\n',outfd);
+#endif
+X fputs(iprompt0,outfd);
+X fprintf(outfd," %s%s\n",verstr,refstr);
+X
+X fprintf(outfd," %s%s, %d %s\n vs %s %s\n",
+X qlabel, (m_msg.revcomp ? "-" : "\0"), m_msg.n0,
+X m_msg.sqnam, m_msg.ltitle, lib_label);
+X
+X prhist(outfd,m_msg,pst,m_msg.hist, nstats, m_msg.db, gstring2);
+X }
+X
+X l4:
+X if (m_msg.markx & MX_HTML) {
+X fputs("</pre>\n<p>\n<hr>\n<p>\n",outfd);
+X }
+X
+X /* code from p2_complib.c to pre-calculate -m 9 alignment info -
+X requires -q with -m 9 */
+X
+X if (m_msg.quiet || m_msg.markx & MX_M9SUMM) {
+X
+X /* to determine how many sequences to re-align (either for
+X do_opt() or calc_id() we need to modify m_msg.mshow to get
+X the correct number of alignments */
+X
+X if (m_msg.mshow_flg != 1 && pst.zsflag >= 0) {
+X for (i=0; i<nbest && bestp_arr[i]->escore< m_msg.e_cut; i++) {}
+X m_msg.mshow = i;
+X }
+X
+#ifndef PRSS
+X if (m_msg.mshow <= 0) { /* no results to display */
+X fprintf(outfd,"!! No sequences with E() < %f\n",m_msg.e_cut);
+X m_msg.nshow = 0;
+X goto end_l;
+X }
+#endif
+X }
+X
+#ifdef PRSS
+X memcpy(aa1,aa1s,n1s);
+X maxn = n1s;
+X nbest = 1;
+#endif
+X
+X showbest (stdout, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
+X m_msg.db, gstring2, f_str);
+X
+X if (outfd != stdout) {
+X t_quiet = m_msg.quiet;
+X m_msg.quiet = -1; /* should guarantee 1..nbest shown */
+X showbest (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
+X m_msg.db, gstring2, f_str);
+X m_msg.quiet = t_quiet;
+X }
+X
+X if (m_msg.nshow > 0) {
+X rline[0]='N';
+X if (!m_msg.quiet){
+X printf(" Display alignments also? (y/n) [n] "); fflush(stdout);
+X if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+X }
+X else rline[0]='Y';
+X
+X if (toupper((int)rline[0])=='Y') {
+X if (!m_msg.quiet) {
+X printf(" number of alignments [%d]? ",m_msg.nshow);
+X fflush(stdout);
+X if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
+X if (rline[0]!=0) sscanf(rline,"%d",&m_msg.nshow);
+X m_msg.ashow=m_msg.nshow;
+X }
+X
+X if (m_msg.markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) {
+X fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
+X qlabel,(m_msg.revcomp ? "_rev":"\0"), m_msg.n0,
+X m_msg.sqnam,m_msg.lname);
+X }
+X
+X if (m_msg.markx & MX_M10FORM) {
+X fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
+X qlabel,(m_msg.revcomp ? "-":"\0"), m_msg.n0, m_msg.sqnam,
+X m_msg.lname);
+X fprintf(outfd,"; pg_name: %s\n",argv[0]);
+X fprintf(outfd,"; pg_ver: %s\n",mp_verstr);
+X fprintf(outfd,"; pg_argv:");
+X for (i=0; i<argc; i++)
+X fprintf(outfd," %s",argv[i]);
+X fputc('\n',outfd);
+X fputs(gstring3,outfd);
+X fputs(hstring1,outfd);
+X }
+X
+#ifndef PRSS
+X showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
+X m_msg, pst, gstring2, f_str);
+#else
+X if (pst.sw_flag > 0 || (!m_msg.quiet && m_msg.nshow>0)) {
+X showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
+X m_msg, pst, gstring2, f_str);
+X }
+#endif
+X
+X fflush(outfd);
+X }
+X }
+X
+X end_l:
+#if defined(COMP_THR) && defined(COMP_MLIB)
+X for (i=0; i<max_work_buf; i++) {
+X buf_list[i].buf_cnt=0;
+X buf_list[i].have_results=0;
+X }
+X
+X num_worker_bufs = 0;
+X num_reader_bufs = max_work_buf;
+X reader_done = 0;
+X worker_buf_workp = 0;
+X worker_buf_readp = 0;
+X reader_buf_workp = 0;
+X reader_buf_readp = 0;
+X
+X start_thread = 1; /* stop thread from starting again */
+#endif
+X
+X /* clean up alignment encodings */
+X for (i=0; i < m_msg.nshow; i++) {
+X if (bestp_arr[i]->have_ares) {
+X free(bestp_arr[i]->a_res.res);
+X bestp_arr[i]->a_res.res = NULL;
+X bestp_arr[i]->have_ares = 0;
+X }
+X }
+X
+X if (m_msg.qframe == 2) free(aa0[1]-1);
+X
+X if (have_f_str) {
+X if (f_str[1]!=f_str[0]) {
+X close_work (aa0[1], m_msg.n0, &pst, &f_str[1]);
+X }
+X close_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
+X have_f_str = 0;
+#ifndef COMP_THR
+X if (m_msg.qshuffle) close_work (aa0s, m_msg.n0, &pst, &qf_str);
+#endif
+X if (pst.pam_pssm) {
+X free_pam2p(pst.pam2p[0]);
+X free_pam2p(pst.pam2p[1]);
+X }
+X }
+X
+X for (iln=0; iln < m_msg.nln; iln++) {
+X if (m_msg.lb_mfd[iln]!=NULL) closelib(m_msg.lb_mfd[iln]);
+X }
+X
+X tddone = time(NULL);
+X tdone = s_time();
+X fflush(outfd);
+X
+X if (fdata) {
+X fprintf(fdata,"/** %s **/\n",gstring2);
+X fprintf(fdata,"%3ld%-50s\n",qtt.entries-1,m_msg.qtitle);
+X fflush(fdata);
+X }
+X
+#ifdef COMP_MLIB
+X ttdisp += tdone-tscan;
+X
+X maxn = m_msg.max_tot;
+X m_msg.n0 =
+X QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
+X &qseek, &qlcont,q_file_p,&m_msg.sq0off);
+X if (m_msg.n0 <= 0) break;
+X if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
+X strncpy(qlabel, m_msg.qtitle,sizeof(qlabel));
+X if (bp != NULL) *bp=' ';
+X qlabel[sizeof(qlabel)-1]='\0';
+X
+X if (m_msg.ann_flg) {
+X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
+X }
+X
+X if (m_msg.term_code && m_msg.qdnaseq==SEQT_PROT &&
+X aa0[0][m_msg.n0-1]!=m_msg.term_code) {
+X aa0[0][m_msg.n0++]=m_msg.term_code;
+X aa0[0][m_msg.n0]=0;
+X }
+X
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X /* for ALTIVEC, must pad with 15 NULL's */
+X for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
+#endif
+X
+#ifdef SUPERFAMNUM
+X m_msg.nqsfnum = nsfnum;
+X for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
+X m_msg.nqsfnum_n = nsfnum_n;
+X for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
+#endif
+X }
+#endif
+X if (m_msg.markx & MX_M10FORM)
+X fprintf(outfd,">>><<<\n");
+X
+X tdone = s_time();
+X if ( m_msg.markx & MX_HTML) fputs("<p><pre>\n",outfd);
+X printsum(outfd, m_msg.db);
+X if ( m_msg.markx & MX_HTML) fputs("</pre>\n",outfd);
+#ifdef HTML_HEAD
+X if (m_msg.markx & MX_HTML) fprintf(outfd,"</body>\n</html>\n");
+#endif
+X if (outfd!=stdout) printsum(stdout,m_msg.db);
+X
+X exit(0);
+} /* End of main program */
+X
+void
+printsum(FILE *fd, struct db_str ntt)
+{
+X double db_tt;
+X char tstr1[26], tstr2[26];
+X
+X strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
+X strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
+X tstr1[24]=tstr2[24]='\0';
+X
+X /* Print timing to output file as well */
+X fprintf(fd, "\n\n%ld residues in %ld query sequences\n", qtt.length, qtt.entries);
+X if (ntt.carry == 0)
+X fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
+X else {
+X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+X fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt.entries);
+X }
+X
+#ifndef COMP_THR
+X fprintf(fd," Scomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
+#else
+X fprintf(fd," Tcomplib [%s] (%d proc)\n start: %s done: %s\n", mp_verstr,
+X max_workers,tstr1,tstr2);
+#endif
+#ifndef COMP_MLIB
+X fprintf(fd," Scan time: ");
+X ptime(fd, tscan - tprev);
+X fprintf (fd," Display time: ");
+X ptime (fd, tdone - tscan);
+#else
+X fprintf(fd," Total Scan time: ");
+X ptime(fd, ttscan);
+X fprintf (fd," Total Display time: ");
+X ptime (fd, ttdisp);
+#endif
+X fprintf (fd,"\n");
+X fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
+}
+X
+void fsigint()
+{
+X struct db_str db;
+X
+X db.entries = db.length = db.carry = 0;
+X tdone = s_time();
+X tddone = time(NULL);
+X
+X printf(" /*** interrupted ***/\n");
+X if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
+X fprintf(stderr,"/*** interrupted ***/\n");
+X
+X printsum(stdout,db);
+X if (outfd!=stdout) printsum(outfd,db);
+X
+X exit(1);
+}
+X
+#ifdef COMP_THR
+void save_best(struct buf_head *cur_buf, struct mngmsg m_msg, struct pstruct pst,
+X FILE *fdata, int *qsfnum, struct hist_str *histp,
+X void **pstat_voidp
+#ifdef PRSS
+X , int *s_score, int *s_n1
+X
+#endif
+X )
+{
+X double zscore;
+X int i_score;
+X struct buf_str *p_rbuf, *cur_buf_p;
+X int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
+X double e_score, tm_escore, t_rescore, t_qrescore;
+X int jstats;
+X
+X sc_ix = pst.score_ix;
+X
+X cur_buf_p = cur_buf->buf;
+X
+X t_best = t_rbest = t_qrbest = -1;
+X tm_escore = t_rescore = t_qrescore = FLT_MAX;
+X
+X while (cur_buf->buf_cnt--) { /* count down the number of results */
+X p_rbuf = cur_buf_p++; /* step through the results buffer */
+X
+X i_score = p_rbuf->rst.score[sc_ix];
+X e_score = p_rbuf->rst.escore;
+X
+X /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
+X if (fdata) {
+X fprintf(fdata,
+X "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8ld\n",
+X p_rbuf->libstr,
+#ifdef SUPERFAMNUM
+X sfn_cmp(qsfnum,p_rbuf->sfnum),
+#else
+X 0,
+#endif
+X p_rbuf->n1,p_rbuf->frame,p_rbuf->rst.comp,p_rbuf->rst.H,
+X p_rbuf->rst.score[0],p_rbuf->rst.score[1],p_rbuf->rst.score[2],
+X p_rbuf->rst.escore, p_rbuf->rst.segnum, p_rbuf->rst.seglen, p_rbuf->lseek);
+X }
+X
+#ifdef PRSS
+X if (p_rbuf->lseek==0) {
+X s_score[0] = p_rbuf->rst.score[0];
+X s_score[1] = p_rbuf->rst.score[1];
+X s_score[2] = p_rbuf->rst.score[2];
+X *s_n1 = p_rbuf->n1;
+X
+X bestp = bestp_arr[nbest++];
+X bestp->score[0] = s_score[0];
+X bestp->score[1] = s_score[1];
+X bestp->score[2] = s_score[2];
+X bestp->n1 = *s_n1;
+X bestp->escore = p_rbuf->rst.escore;
+X bestp->segnum = p_rbuf->rst.segnum;
+X bestp->seglen = p_rbuf->rst.seglen;
+X bestp->zscore = zscore;
+X bestp->lseek = p_rbuf->lseek;
+X bestp->m_file_p = p_rbuf->m_file_p;
+X memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
+X bestp->n1tot_p = p_rbuf->n1tot_p;
+X bestp->frame = p_rbuf->frame;
+X
+X continue;
+X }
+#endif
+X
+X t_n1 = p_rbuf->n1;
+X if (i_score > t_best) tm_best = t_best = i_score;
+X if (e_score < tm_escore) tm_escore = e_score;
+X
+X if (m_msg.qshuffle) {
+X if (p_rbuf->qr_score > t_qrbest)
+X t_qrbest = p_rbuf->qr_score;
+X if (p_rbuf->qr_escore < t_qrescore)
+X t_qrescore = p_rbuf->qr_escore;
+X
+X if (p_rbuf->frame == m_msg.nitt1 && nqstats < m_msg.shuff_max) {
+X qstats[nqstats].n1 = p_rbuf->n1; /* save the best score */
+X qstats[nqstats].comp = p_rbuf->rst.comp;
+X qstats[nqstats].H = p_rbuf->rst.H;
+X qstats[nqstats].escore = t_qrescore;
+X qstats[nqstats++].score = t_qrbest;
+X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
+X t_qrescore = FLT_MAX;
+X }
+X }
+X
+X if (pst.zsflag >= 10 && p_rbuf->r_score > t_rbest) {
+X t_rbest = p_rbuf->r_score;
+X t_rescore = p_rbuf->r_escore;
+X }
+X
+X /* statistics done for best score of set */
+X
+X
+X if (p_rbuf->frame == m_msg.nitt1) {
+X if (nstats < MAXSTATS ) {
+X stats[nstats].n1 = t_n1;
+X stats[nstats].comp = p_rbuf->rst.comp;
+X stats[nstats].H = p_rbuf->rst.H;
+X if (pst.zsflag >= 10) {
+X tm_best = t_rbest;
+X tm_escore = t_rescore;
+X t_rbest = -1;
+X t_rescore = FLT_MAX;
+X }
+X stats[nstats].escore = tm_escore;
+X stats[nstats++].score = tm_best;
+X t_best = -1;
+X tm_escore = FLT_MAX;
+X }
+X else if (pst.zsflag > 0) {
+X if (!stats_done) {
+X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
+X histp, pstat_voidp,0);
+X kstats = nstats;
+X stats_done = 1;
+X for (i=0; i<MAXBEST; i++) {
+X bestp_arr[i]->zscore =
+X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
+X bestp_arr[i]->escore, bestp_arr[i]->n1,
+X bestp_arr[i]->comp, *pstat_voidp);
+X }
+X }
+#ifdef SAMP_STATS
+X else {
+X if (!m_msg.escore_flg) {
+X jstats = nrand(++kstats);
+X if (jstats < MAXSTATS) {
+X stats[jstats].n1 = t_n1;
+X stats[jstats].comp = p_rbuf->rst.comp;
+X stats[jstats].H = p_rbuf->rst.H;
+X if (pst.zsflag >= 10) {
+X tm_best = t_rbest;
+X }
+X stats[jstats].score = tm_best;
+X }
+X }
+X }
+#endif
+X }
+X }
+X
+X /* best saved for every score */
+X if (stats_done) {
+X
+X zscore=(*find_zp)(i_score, e_score, p_rbuf->n1,(double)p_rbuf->rst.comp,
+X *pstat_voidp);
+X
+X if (p_rbuf->frame == m_msg.nitt1) {
+X addhistz((*find_zp)(t_best, tm_escore, p_rbuf->n1, (double) p_rbuf->rst.comp,
+X *pstat_voidp), histp);
+X t_best = t_rbest = -1;
+X tm_escore = t_rescore = FLT_MAX;
+X }
+X }
+X else zscore = (double) i_score;
+X
+#ifndef PRSS
+X if (zscore > zbestcut) {
+X if (nbest >= MAXBEST) {
+X bestfull = nbest-MAXBEST/4;
+X selectbestz(bestp_arr,bestfull-1,nbest);
+X zbestcut = bestp_arr[bestfull-1]->zscore;
+X nbest = bestfull;
+X }
+X bestp = bestp_arr[nbest++];
+X bestp->score[0] = p_rbuf->rst.score[0];
+X bestp->score[1] = p_rbuf->rst.score[1];
+X bestp->score[2] = p_rbuf->rst.score[2];
+X bestp->comp = (double) p_rbuf->rst.comp;
+X bestp->H = (double) p_rbuf->rst.H;
+X bestp->escore = p_rbuf->rst.escore;
+X bestp->segnum = p_rbuf->rst.segnum;
+X bestp->seglen = p_rbuf->rst.seglen;
+X bestp->zscore = zscore;
+X bestp->lseek = p_rbuf->lseek;
+X memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
+X bestp->cont = p_rbuf->cont; /* not cont+1 because incremented already */
+X bestp->m_file_p = p_rbuf->m_file_p;
+X bestp->n1 = p_rbuf->n1;
+X bestp->n1tot_p = p_rbuf->n1tot_p;
+X bestp->frame = p_rbuf->frame;
+X bestp->nsfnum = p_rbuf->nsfnum;
+#ifdef SUPERFAMNUM
+X if ((bestp->sfnum[0] = p_rbuf->sfnum[0])>0 &&
+X (bestp->sfnum[1] = p_rbuf->sfnum[1])>0 &&
+X (bestp->sfnum[2] = p_rbuf->sfnum[2])>0 &&
+X (bestp->sfnum[3] = p_rbuf->sfnum[3])>0 &&
+X (bestp->sfnum[4] = p_rbuf->sfnum[4])>0 &&
+X (bestp->sfnum[5] = p_rbuf->sfnum[5])>0 &&
+X (bestp->sfnum[6] = p_rbuf->sfnum[6])>0 &&
+X (bestp->sfnum[7] = p_rbuf->sfnum[7])>0 &&
+X (bestp->sfnum[8] = p_rbuf->sfnum[8])>0 &&
+X (bestp->sfnum[9] = p_rbuf->sfnum[9])>0) ;
+#endif
+X }
+#endif
+X }
+}
+#endif
+SHAR_EOF
+chmod 0644 comp_lib.c ||
+echo 'restore of comp_lib.c failed'
+Wc_c="`wc -c < 'comp_lib.c'`"
+test 55202 -eq "$Wc_c" ||
+ echo 'comp_lib.c: original size 55202, current size' "$Wc_c"
+fi
+# ============= compacc.c ==============
+if test -f 'compacc.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping compacc.c (File already exists)'
+else
+echo 'x - extracting compacc.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'compacc.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: compacc.c,v 1.61 2007/04/26 18:37:18 wrp Exp $ */
+X
+/* Concurrent read version */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(UNIX) || defined(WIN32)
+#include <sys/types.h>
+#endif
+X
+#include <limits.h>
+#include <float.h>
+X
+#include <string.h>
+#include <time.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+X
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+X
+#ifdef PCOMPLIB
+#include "msg.h"
+extern int nnodes;
+#ifdef PVM_SRC
+#include "pvm3.h"
+extern int pinums[],hosttid;
+#endif
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+#endif
+X
+extern time_t tdone, tstart; /* Timing */
+extern void abort ();
+extern void ptime ();
+X
+/* because it is used to pre-allocate space, maxn has various
+X constraints. For "simple" comparisons, it is simply the length of
+X the longest library sequence. But for translated comparisons, it
+X must be 3 or 6X the length of the query sequence.
+X
+X In addition, however, it can be reduced to make certain that
+X sequences are read in smaller chunks. And, maxn affect how large
+X overlaps must be when sequences are read in chunks.
+*/
+X
+int
+reset_maxn(struct mngmsg *m_msg, int maxn) {
+X
+X /* reduce maxn if requested */
+X if (m_msg->maxn > 0 && m_msg->maxn < maxn) maxn = m_msg->maxn;
+X
+X if (m_msg->qdnaseq==m_msg->ldnaseq || m_msg->qdnaseq==SEQT_DNA ||
+X m_msg->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX*/
+X
+X if (m_msg->n0> m_msg->max_tot/3) {
+X fprintf(stderr," query sequence is too long %d > %d %s\n",
+X m_msg->n0,
+X m_msg->max_tot/3,
+X m_msg->sqnam);
+X exit(1);
+X }
+X m_msg->loff = m_msg->n0;
+X m_msg->maxt3 = maxn-m_msg->loff;
+X }
+X else { /* is TFAST */
+X if (m_msg->n0 > MAXTST) {
+X fprintf(stderr," query sequence is too long %d %s\n",m_msg->n0,m_msg->sqnam);
+X exit(1);
+X }
+X
+X if (m_msg->n0*3 > maxn ) { /* n0*3 for the three frames - this
+X will only happen if maxn has been
+X set low manually */
+X
+X if (m_msg->n0*4+2 < m_msg->max_tot) { /* m_msg0*3 + m_msg0 */
+X fprintf(stderr,
+X " query sequence too long for library segment: %d - resetting to %d\n",
+X maxn,m_msg->n0*3);
+X maxn = m_msg->maxn = m_msg->n0*3;
+X }
+X else {
+X fprintf(stderr," query sequence too long for translated search: %d * 4 > %d %s\n",
+X m_msg->n0,maxn, m_msg->sqnam);
+X exit(1);
+X }
+X }
+X
+X /* set up some constants for overlaps */
+X m_msg->loff = 3*m_msg->n0;
+X m_msg->maxt3 = maxn-m_msg->loff-3;
+X m_msg->maxt3 -= m_msg->maxt3%3;
+X m_msg->maxt3++;
+X
+X maxn = maxn - 3; maxn -= maxn%3; maxn++;
+X }
+X return maxn;
+}
+X
+X
+int
+scanseq(unsigned char *seq, int n, char *str) {
+X int tot,i;
+X char aaray[128]; /* this must be set > nsq */
+X
+X for (i=0; i<128; i++) aaray[i]=0;
+X for (i=0; (size_t)i < strlen(str); i++) aaray[qascii[str[i]]]=1;
+X for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
+X return tot;
+}
+X
+/* subs_env takes a string, possibly with ${ENV}, and looks up all the
+X potential environment variables and substitutes them into the
+X string */
+X
+void subs_env(char *dest, char *src, int dest_size) {
+X char *last_src, *bp, *bp1;
+X
+X last_src = src;
+X
+X if ((bp = strchr(src,'$'))==NULL) {
+X strncpy(dest, src, dest_size);
+X dest[dest_size-1] = '\0';
+X }
+X else {
+X *dest = '\0';
+X while (strlen(dest) < dest_size-1 && bp != NULL ) {
+X /* copy stuff before ${*/
+X *bp = '\0';
+X strncpy(dest, last_src, dest_size);
+X *bp = '$';
+X
+X /* copy ENV */
+X if (*(bp+1) != '{') {
+X strncat(dest, "$", dest_size - strlen(dest) -1);
+X dest[dest_size-1] = '\0';
+X bp += 1;
+X }
+X else { /* have ${ENV} - put it in */
+X if ((bp1 = strchr(bp+2,'}'))==NULL) {
+X fprintf(stderr, "Unterminated ENV: %s\n",src);
+X break;
+X }
+X else {
+X *bp1 = '\0';
+X if (getenv(bp+2)!=NULL) {
+X strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);
+X dest[dest_size-1] = '\0';
+X *bp1 = '}';
+X }
+X bp = bp1+1; /* bump bp even if getenv == NULL */
+X }
+X }
+X last_src = bp;
+X
+X /* now get the next ${ENV} if present */
+X bp = strchr(last_src,'$');
+X }
+X /* now copy the last stuff */
+X strncat(dest, last_src, dest_size - strlen(dest) - 1);
+X dest[dest_size-1]='\0';
+X }
+}
+X
+X
+void selectbest(bptr,k,n) /* k is rank in array */
+X struct beststr **bptr;
+X int k,n;
+{
+X int v, i, j, l, r;
+X struct beststr *tmptr;
+X
+X l=0; r=n-1;
+X
+X while ( r > l ) {
+X v = bptr[r]->score[0];
+X i = l-1;
+X j = r;
+X do {
+X while (bptr[++i]->score[0] > v) ;
+X while (bptr[--j]->score[0] < v) ;
+X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+X } while (j > i);
+X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+X if (i>=k) r = i-1;
+X if (i<=k) l = i+1;
+X }
+}
+X
+void selectbestz(bptr,k,n) /* k is rank in array */
+X struct beststr **bptr;
+X int k,n;
+{
+X int i, j, l, r;
+X struct beststr *tmptr;
+X double v;
+X
+X l=0; r=n-1;
+X
+X while ( r > l ) {
+X v = bptr[r]->zscore;
+X i = l-1;
+X j = r;
+X do {
+X while (bptr[++i]->zscore > v) ;
+X while (bptr[--j]->zscore < v) ;
+X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+X } while (j > i);
+X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+X if (i>=k) r = i-1;
+X if (i<=k) l = i+1;
+X }
+}
+X
+/* improved shellsort with high-performance increments */
+/*
+shellsort(itemType a[], int l, int r)
+{ int i, j, k, h; itemType v;
+X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+X 13776, 4592, 1968, 861, 336,
+X 112, 48, 21, 7, 3, 1 };
+X for ( k = 0; k < 16; k++)
+X for (h = incs[k], i = l+h; i <= r; i++)
+X {
+X v = a[i]; j = i;
+X while (j > h && a[j-h] > v)
+X { a[j] = a[j-h]; j -= h; }
+X a[j] = v;
+X }
+}
+*/
+X
+/* ?improved? version of sortbestz using optimal increments and fewer
+X exchanges */
+void sortbestz(struct beststr **bptr, int nbest)
+{
+X int gap, i, j, k;
+X struct beststr *tmp;
+X double v;
+X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+X 13776, 4592, 1968, 861, 336,
+X 112, 48, 21, 7, 3, 1 };
+X
+X for ( k = 0; k < 16; k++) {
+X gap = incs[k];
+X for (i=gap; i < nbest; i++) {
+X tmp = bptr[i];
+X j = i;
+X v = bptr[i]->zscore;
+X while ( j >= gap && bptr[j-gap]->zscore < v) {
+X bptr[j] = bptr[j - gap];
+X j -= gap;
+X }
+X bptr[j] = tmp;
+X }
+X }
+}
+X
+X
+void sortbeste(struct beststr **bptr, int nbest)
+{
+X int gap, i, j, k;
+X struct beststr *tmp;
+X double v;
+X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+X 13776, 4592, 1968, 861, 336,
+X 112, 48, 21, 7, 3, 1 };
+X
+X for ( k = 0; k < 16; k++) {
+X gap = incs[k];
+X for (i=gap; i < nbest; i++) {
+X j = i;
+X tmp = bptr[i];
+X v = tmp->escore;
+X while ( j >= gap && bptr[j-gap]->escore > v) {
+X bptr[j] = bptr[j - gap];
+X j -= gap;
+X }
+X bptr[j] = tmp;
+X }
+X }
+X
+X /* sometimes there are many high scores with E()==0.0, sort
+X those by z() score */
+X
+X j = 0;
+X while (j < nbest && bptr[j]->escore <= 2.0*DBL_MIN ) {j++;}
+X if (j > 1) sortbestz(bptr,j);
+}
+X
+extern double zs_to_Ec(double zs, long entries);
+X
+/*
+extern double ks_dev;
+extern int ks_df; */
+extern char hstring1[];
+X
+void
+prhist(FILE *fd, struct mngmsg m_msg,
+X struct pstruct pst,
+X struct hist_str hist,
+X int nstats,
+X struct db_str ntt,
+X char *gstring2)
+{
+X int i,j,hl,hll, el, ell, ev;
+X char hline[80], pch, *bp;
+X int mh1, mht;
+X int maxval, maxvalt, dotsiz, ddotsiz,doinset;
+X double cur_e, prev_e, f_int;
+X double max_dev, x_tmp;
+X double db_tt;
+X int n_chi_sq, cum_hl=0, max_i;
+X
+X
+X fprintf(fd,"\n");
+X
+X if (pst.zsflag_f < 0) {
+X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);
+X fprintf(fd,"\n%s\n",gstring2);
+X return;
+X }
+X
+X if (nstats > 20) {
+X max_dev = 0.0;
+X mh1 = hist.maxh-1;
+X mht = (3*hist.maxh-3)/4 - 1;
+X
+X if (!m_msg.nohist && mh1 > 0) {
+X for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
+X if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
+X if (i >= mht && hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
+X }
+X n_chi_sq = 0;
+X cum_hl = -hist.hist_a[0];
+X dotsiz = (maxval-1)/60+1;
+X ddotsiz = (maxvalt-1)/50+1;
+X doinset = (ddotsiz < dotsiz && dotsiz > 2);
+X
+X if (pst.zsflag_f>=0)
+X fprintf(fd," opt E()\n");
+X else
+X fprintf(fd," opt\n");
+X
+X prev_e = zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);
+X for (i=0; i<=mh1; i++) {
+X pch = (i==mh1) ? '>' : ' ';
+X pch = (i==0) ? '<' : pch;
+X hll = hl = hist.hist_a[i];
+X if (pst.zsflag_f>=0) {
+X cum_hl += hl;
+X f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
+X cur_e = zs_to_Ec(f_int,hist.entries);
+X ev = el = ell = (int)(cur_e - prev_e + 0.5);
+X if (hl > 0 && i > 5 && i < (90-hist.min_hist)/hist.histint) {
+X x_tmp = fabs(cum_hl - cur_e);
+X if ( x_tmp > max_dev) {
+X max_dev = x_tmp;
+X max_i = i;
+X }
+X n_chi_sq++;
+X }
+X if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
+X if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
+X fprintf(fd,"%c%3d %5d %5d:",
+X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+X mh1*hist.histint+hist.min_hist,hl,ev);
+X }
+X else fprintf(fd,"%c%3d %5d :",
+X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+X mh1*hist.histint+hist.min_hist,hl);
+X
+X if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
+X if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
+X for (j=0; j<hl; j++) hline[j]='=';
+X if (pst.zsflag_f>=0) {
+X if (el <= hl ) {
+X if (el > 0) hline[el-1]='*';
+X hline[hl]='\0';
+X }
+X else {
+X for (j = hl; j < el; j++) hline[j]=' ';
+X hline[el-1]='*';
+X hline[hl=el]='\0';
+X }
+X }
+X else hline[hl] = 0;
+X if (i==1) {
+X for (j=hl; j<10; j++) hline[j]=' ';
+X sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
+X }
+X if (doinset && i == mht-2) {
+X for (j = hl; j < 10; j++) hline[j]=' ';
+X sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
+X }
+X if (i >= mht&& doinset ) {
+X for (j = hl; j < 10; j++) hline[j]=' ';
+X hline[10]=':';
+X for (j = 11; j<11+hll; j++) hline[j]='=';
+X hline[11+hll]='\0';
+X if (pst.zsflag_f>=0) {
+X if (ell <= hll) hline[10+ell]='*';
+X else {
+X for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
+X hline[10+ell] = '*';
+X hline[11+ell] = '\0';
+X }
+X }
+X }
+X
+X fprintf(fd,"%s\n",hline);
+X prev_e = cur_e;
+X }
+X }
+X }
+X
+X if (ntt.carry==0) {
+X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length, ntt.entries);
+X }
+X else {
+X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+X fprintf(fd, "%.0f residues in %5ld library sequences\n", db_tt, ntt.entries);
+X }
+X
+X if (pst.zsflag_f>=0) {
+X if (MAXSTATS < hist.entries)
+#ifdef SAMP_STATS
+X fprintf(fd," statistics sampled from %d to %ld sequences\n",
+X MAXSTATS,hist.entries);
+#else
+X fprintf(fd," statistics extrapolated from %d to %ld sequences\n",
+X MAXSTATS,hist.entries);
+#endif
+X /* summ_stats(stat_info); */
+X fprintf(fd," %s\n",hist.stat_info);
+X if (!m_msg.nohist && cum_hl > 0)
+X fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%d) at %3d\n",
+X max_dev/(float)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+X if (m_msg.markx & MX_M10FORM) {
+X while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
+X if (cum_hl <= 0) cum_hl = -1;
+X sprintf(hstring1,"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
+X MAXSTATS,hist.entries,hist.stat_info,max_dev/(float)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+X }
+X }
+X fprintf(fd,"\n%s\n",gstring2);
+X fflush(fd);
+}
+X
+extern char prog_name[], *verstr;
+X
+void s_abort (char *p, char *p1)
+{
+X int i;
+X
+X fprintf (stderr, "\n***[%s] %s%s***\n", prog_name, p, p1);
+#ifdef PCOMPLIB
+#ifdef PVM_SRC
+X for (i=FIRSTNODE; i< nnodes; i++) pvm_kill(pinums[i]);
+X pvm_exit();
+#endif
+#ifdef MPI_SRC
+X MPI_Abort(MPI_COMM_WORLD,1);
+X MPI_Finalize();
+#endif
+#endif
+X exit (1);
+}
+X
+#ifndef MPI_SRC
+void w_abort (char *p, char *p1)
+{
+X fprintf (stderr, "\n***[%s] %s%s***\n\n", prog_name, p, p1);
+X exit (1);
+}
+#endif
+X
+#ifndef PCOMPLIB
+/* copies from from to to shuffling */
+X
+extern int nrand(int);
+X
+void
+shuffle(unsigned char *from, unsigned char *to, int n)
+{
+X int i,j; unsigned char tmp;
+X
+X if (from != to) memcpy((void *)to,(void *)from,n);
+X
+X for (i=n; i>0; i--) {
+X j = nrand(i);
+X tmp = to[j];
+X to[j] = to[i-1];
+X to[i-1] = tmp;
+X }
+X to[n] = 0;
+}
+X
+/* copies from from to from shuffling, ieven changed for threads */
+void
+wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven)
+{
+X int i,j, k, mm;
+X unsigned char tmp, *top;
+X
+X memcpy((void *)to,(void *)from,n);
+X
+X mm = n%wsiz;
+X
+X if (*ieven) {
+X for (k=0; k<(n-wsiz); k += wsiz) {
+X top = &to[k];
+X for (i=wsiz; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X }
+X top = &to[n-mm];
+X for (i=mm; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X *ieven = 0;
+X }
+X else {
+X for (k=n; k>=wsiz; k -= wsiz) {
+X top = &to[k-wsiz];
+X for (i=wsiz; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X }
+X top = &to[0];
+X for (i=mm; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X *ieven = 1;
+X }
+X to[n] = 0;
+}
+X
+#endif
+X
+int
+sfn_cmp(int *q, int *s)
+{
+X if (*q == *s) return *q;
+X while (*q && *s) {
+X if (*q == *s) return *q;
+X else if (*q < *s) q++;
+X else if (*q > *s) s++;
+X }
+X return 0;
+}
+X
+#ifndef MPI_SRC
+X
+#define ESS 49
+X
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+X unsigned char tmp;
+X int i, ni;
+X
+X for (i=0, ni = n-1; i< n/2; i++,ni--) {
+X tmp = c_nt[seq[i]];
+X seq[i] = c_nt[seq[ni]];
+X seq[ni] = tmp;
+X }
+X if ((n%2)==1) {
+X i = n/2;
+X seq[i] = c_nt[seq[i]];
+X }
+X seq[n]=0;
+}
+#endif
+X
+#ifdef PCOMPLIB
+X
+/* init_stage2 sets up the data structures necessary to send a subset
+X of sequences to the nodes, and then collects the results
+*/
+X
+/* wstage2[] FIRSTNODE .. nnodes has the next sequence to be do_opt()/do_walign()ed */
+/* wstage2p[] is a list of sequence numbers/frames, to be sent to workers */
+/* wstage2b[] is a list of bptr's that shares the index with wstage2p[] */
+X
+static int wstage2[MAXWRKR +1]; /* count of second stage scores */
+static struct stage2_str *wstage2p[MAXWRKR+1]; /* list of second stage sequences */
+static int wstage2i[MAXWRKR+1]; /* index into second stage sequences */
+static struct beststr *bbptr,
+X **wstage2b[MAXWRKR+1]; /* reverse pointers to bestr */
+X
+void
+do_stage2(struct beststr **bptr, int nbest, struct mngmsg m_msg0,
+X int s_func, struct qmng_str *qm_msp) {
+X
+X int i, is, ib, iw, nres;
+X int node, snode, node_done;
+X int bufid, numt, tid;
+X char errstr[120];
+X struct comstr2 bestr2[BFR2+1]; /* temporary structure array */
+X char *seqc_buff, *seqc;
+X int seqc_buff_len, aln_code_n;
+#ifdef MPI_SRC
+X MPI_Status mpi_status;
+#endif
+X
+X /* initialize the counter for each worker to 0 */
+X for (iw = FIRSTNODE; iw < nnodes; iw++) wstage2[iw] = 0;
+X
+X /* for each result, bump the counter for the worker that has
+X the sequence */
+X for (ib = 0; ib < nbest; ib++ ) { wstage2[bptr[ib]->wrkr]++; }
+X
+X /* now allocate enough space to send each worker a
+X list of its sequences stage2_str {seqnm, frame} */
+X for (iw = FIRSTNODE; iw < nnodes; iw++) {
+X if (wstage2[iw]>0) {
+X if ((wstage2p[iw]=
+X (struct stage2_str *)
+X calloc(wstage2[iw],sizeof(struct stage2_str)))==NULL) {
+X sprintf(errstr," cannot allocate sequence listp %d %d",
+X iw,wstage2[iw]);
+X s_abort(errstr,"");
+X }
+X
+X /* allocate space to remember the bptr's for each result */
+X if ((wstage2b[iw]=(struct beststr **)
+X calloc(wstage2[iw],sizeof(struct beststr *)))==NULL) {
+X sprintf(errstr," cannot allocate sequence listb %d %d",
+X iw,wstage2[iw]);
+X s_abort(errstr,"");
+X }
+X wstage2i[iw]=0;
+X }
+X else {
+X wstage2p[iw] = NULL;
+X wstage2b[iw] = NULL;
+X }
+X }
+X
+X /* for each result, set wstage2p[worker][result_index_in_worker] */
+X for (is = 0; is < nbest; is++) {
+X iw=bptr[is]->wrkr;
+X wstage2p[iw][wstage2i[iw]].seqnm = bptr[is]->seqnm;
+X wstage2p[iw][wstage2i[iw]].frame = bptr[is]->frame;
+X wstage2b[iw][wstage2i[iw]] = bptr[is];
+X wstage2i[iw]++;
+X }
+X
+X
+X /* at this point, wstage2i[iw] should equal wstage2[iw] */
+X node_done = 0;
+X for (node = FIRSTNODE; node < nnodes; node++) {
+X
+X /* fprintf(stderr,"node: %d stage2: %d\n",node,wstage2[node]); */
+X
+X /* if a worker has no results, move on */
+X if (wstage2[node]<=0) { node_done++; continue;}
+X
+X qm_msp->slist = wstage2[node]; /* set number of results to return */
+X qm_msp->s_func = s_func; /* set s_funct for do_opt/do_walign */
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)qm_msp,sizeof(struct qmng_str),1);
+X pvm_send(pinums[node],MSEQTYPE); /* send qm_msp */
+X pvm_initsend(PvmDataRaw); /* send the list of seqnm/frame */
+X pvm_pkbyte((char *)wstage2p[node],wstage2[node]*sizeof(struct stage2_str),1);
+X pvm_send(pinums[node],LISTTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(qm_msp,sizeof(struct qmng_str),MPI_BYTE,node,MSEQTYPE,
+X MPI_COMM_WORLD);
+X MPI_Send((char *)wstage2p[node],wstage2[node]*
+X sizeof(struct stage2_str),MPI_BYTE,node,LISTTYPE,
+X MPI_COMM_WORLD);
+#endif
+X }
+X
+X /* all the workers have their list of sequences */
+X /* reset the index of results to obtain */
+X for (iw = 0; iw < nnodes; iw++) wstage2i[iw]=0;
+X
+X while (node_done < nnodes-FIRSTNODE) {
+#ifdef PVM_SRC
+X bufid = pvm_recv(-1,LISTRTYPE); /* wait for results */
+X pvm_bufinfo(bufid,NULL,NULL,&tid);
+X /* get a chunk of comstr2 results */
+X pvm_upkbyte((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),1);
+X snode = (iw=tidtonode(tid));
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),
+X MPI_BYTE,MPI_ANY_SOURCE,LISTRTYPE,MPI_COMM_WORLD,
+X &mpi_status);
+X snode = mpi_status.MPI_SOURCE;
+X iw = snode;
+#endif
+X
+X seqc_buff = NULL;
+X if (s_func == DO_OPT_FLG && m_msg0.show_code==SHOW_CODE_ALIGN) {
+#ifdef PVM_SRC
+X bufid = pvm_recv(tid,CODERTYPE);
+X pvm_upkint(&seqc_buff_len,1,1); /* get the code string length */
+#endif
+#ifdef MPI_SRC
+X MPI_Recv((char *)&seqc_buff_len,1,MPI_INT, snode,
+X CODERTYPE,MPI_COMM_WORLD, &mpi_status);
+#endif
+X
+X seqc=seqc_buff = NULL;
+X if (seqc_buff_len > 0) { /* allocate space for it */
+X if ((seqc=seqc_buff=calloc(seqc_buff_len,sizeof(char)))==NULL) {
+X fprintf(stderr,"Cannot allocate seqc_buff: %d\n",seqc_buff_len);
+X seqc_buff_len=0;
+X }
+X else {
+#ifdef PVM_SRC
+X pvm_upkbyte(seqc_buff,seqc_buff_len*sizeof(char),1);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv((char *)seqc_buff,seqc_buff_len*sizeof(char),
+X MPI_BYTE,snode,CODERTYPE,MPI_COMM_WORLD, &mpi_status);
+#endif
+X }
+X }
+#ifdef PVM_SRC
+X pvm_freebuf(bufid);
+#endif
+X }
+X
+X /* get number of results in this message */
+X nres = bestr2[BFR2].seqnm & ~FINISHED;
+X /* check to see if finished */
+X if (bestr2[BFR2].seqnm&FINISHED) {node_done++;}
+X
+X seqc = seqc_buff;
+X
+X /* count through results from a specific worker */
+X for (i=0,is=wstage2i[iw]; i < nres; i++,is++) {
+X
+X /* get the (saved) bptr for this result */
+X bbptr=wstage2b[iw][is];
+X /* consistency check seqnm's must agree */
+X if (wstage2p[iw][is].seqnm == bbptr->seqnm) {
+X if (s_func == DO_CALC_FLG && m_msg0.last_calc_flg) {
+X bbptr->score[0] = bestr2[i].score[0];
+X bbptr->score[1] = bestr2[i].score[1];
+X bbptr->score[2] = bestr2[i].score[2];
+X bbptr->escore = bestr2[i].escore;
+X bbptr->segnum = bestr2[i].segnum;
+X bbptr->seglen = bestr2[i].seglen;
+X }
+X else if (m_msg0.stages > 1) {
+X bbptr->score[0] = bestr2[i].score[0];
+X bbptr->score[1] = bestr2[i].score[1];
+X bbptr->score[2] = bestr2[i].score[2];
+X }
+X
+X if (s_func == DO_OPT_FLG && m_msg0.markx & MX_M9SUMM) {
+X /* get score, alignment information, percents */
+X bbptr->sw_score = bestr2[i].sw_score;
+X memcpy(bbptr->aln_d,&bestr2[i].aln_d,sizeof(struct a_struct));
+X bbptr->percent = bestr2[i].percent;
+X bbptr->gpercent = bestr2[i].gpercent;
+X
+X if (m_msg0.show_code == 2) { /* if show code */
+X /* length of encoding */
+X aln_code_n = bbptr->aln_code_n = bestr2[i].aln_code_n;
+X if (aln_code_n > 0) {
+X if ((bbptr->aln_code =
+X (char *)calloc(aln_code_n+1,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate seq_code[%d:%d]: %d\n",
+X bbptr->wrkr,bbptr->seqnm,aln_code_n);
+X seqc += aln_code_n+1;
+X bbptr->aln_code_n = 0;
+X }
+X else {
+X strncpy(bbptr->aln_code,seqc,aln_code_n);
+X bbptr->aln_code[aln_code_n]='\0';
+X seqc += aln_code_n+1;
+X }
+X }
+X else {
+X fprintf(stderr," aln_code_n <=0: %d\n",aln_code_n);
+X }
+X }
+X }
+X }
+X else fprintf(stderr,"phase error in phase II return %d %d", iw,i);
+X }
+X if (seqc_buff != NULL) {
+X free(seqc_buff);
+X seqc_buff = NULL;
+X }
+X wstage2i[iw] += nres;
+X }
+X
+X for (iw=FIRSTNODE; iw < nnodes; iw++) {
+X if ((void *)wstage2p[iw]!=NULL) free((void *)wstage2p[iw]);
+X if ((void *)wstage2b[iw]!=NULL) free((void *)wstage2b[iw]);
+X }
+}
+X
+#endif
+SHAR_EOF
+chmod 0644 compacc.c ||
+echo 'restore of compacc.c failed'
+Wc_c="`wc -c < 'compacc.c'`"
+test 21270 -eq "$Wc_c" ||
+ echo 'compacc.c: original size 21270, current size' "$Wc_c"
+fi
+# ============= create_seq_demo.sql ==============
+if test -f 'create_seq_demo.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping create_seq_demo.sql (File already exists)'
+else
+echo 'x - extracting create_seq_demo.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'create_seq_demo.sql' &&
+X
+DROP DATABASE seq_demo;
+CREATE DATABASE seq_demo;
+X
+USE seq_demo;
+X
+CREATE TABLE prot (
+id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+seq TEXT NOT NULL,
+bin BLOB NOT NULL,
+len INT UNSIGNED NOT NULL
+);
+X
+CREATE TABLE annot (
+prot_id INT UNSIGNED NOT NULL,
+gi INT UNSIGNED NOT NULL PRIMARY KEY,
+db ENUM("gb","emb","dbj","prf","ref","pdb","pir","sp") NOT NULL,
+descr TEXT NOT NULL,
+X
+INDEX (prot_id),
+INDEX (db)
+);
+X
+CREATE TABLE sp (
+X gi INT UNSIGNED NOT NULL,
+X acc VARCHAR(10),
+X name VARCHAR(10),
+X
+X PRIMARY KEY (gi)
+);
+SHAR_EOF
+chmod 0644 create_seq_demo.sql ||
+echo 'restore of create_seq_demo.sql failed'
+Wc_c="`wc -c < 'create_seq_demo.sql'`"
+test 536 -eq "$Wc_c" ||
+ echo 'create_seq_demo.sql: original size 536, current size' "$Wc_c"
+fi
+# ============= cvs_id ==============
+if test -f 'cvs_id' -a X"$1" != X"-c"; then
+ echo 'x - skipping cvs_id (File already exists)'
+else
+echo 'x - extracting cvs_id (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'cvs_id' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: cvs_id,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
+SHAR_EOF
+chmod 0644 cvs_id ||
+echo 'restore of cvs_id failed'
+Wc_c="`wc -c < 'cvs_id'`"
+test 81 -eq "$Wc_c" ||
+ echo 'cvs_id: original size 81, current size' "$Wc_c"
+fi
+# ============= dec_pthr_subs.c ==============
+if test -f 'dec_pthr_subs.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dec_pthr_subs.c (File already exists)'
+else
+echo 'x - extracting dec_pthr_subs.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dec_pthr_subs.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: dec_pthr_subs.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* this file isolates the pthreads calls from the main program */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+X
+#include "param.h"
+X
+#include <pthread.h>
+#define XTERNAL
+#include "thr.h"
+#undef XTERNAL
+#include "pthr_subs.h"
+X
+extern void work_thread (struct thr_str *work_info);
+X
+/* start the threads working */
+X
+void init_thr(int nthreads, struct thr_str *work_info)
+{
+X int status, i;
+X pthread_attr_t thread_attr;
+X
+X if (nthreads > MAX_WORKERS) {
+X fprintf ( stderr," cannot start %d threads, max: %d\n",
+X nthreads, MAX_WORKERS);
+X exit(1);
+X }
+X
+X /* mutex and condition variable initialisation */
+X
+X status = pthread_mutex_init(&reader_mutex, pthread_mutexattr_default);
+X check(status,"Reader_mutex init bad status\n");
+X
+X status = pthread_mutex_init(&worker_mutex, pthread_mutexattr_default);
+X check(status,"Worker_mutex init bad status\n");
+X
+X status = pthread_cond_init(&reader_cond_var, pthread_condattr_default);
+X check(status,"Reader_cond_var init bad status\n");
+X
+X status = pthread_cond_init(&worker_cond_var, pthread_condattr_default);
+X check(status,"Worker_cond_var init bad status\n");
+X
+X status = pthread_mutex_init(&start_mutex, pthread_mutexattr_default);
+X check(status,"Start_mutex init bad status\n");
+X
+X status = pthread_cond_init(&start_cond_var, pthread_condattr_default);
+X check(status,"Start_cond_var init bad status\n");
+X
+X /* change stacksize on threads */ /***************************/
+X
+X status = pthread_attr_create( &thread_attr );
+X check(status,"attribute create bad status\n");
+X
+X status = pthread_attr_setstacksize( &thread_attr, 1000000);
+X check(status,"stacksize change bad status\n");
+X
+X /* start the worker threads */
+X
+X for (work_info->worker=0; work_info->worker < nthreads;
+X work_info->worker++) {
+X /**********************/
+X status=pthread_create(&threads[work_info->worker],thread_attr,
+X (pthread_startroutine_t)&work_thread,
+X (pthread_addr_t)work_info);
+X check(status,"Pthread_create failed\n");
+X }
+}
+X
+void start_thr()
+{
+X int status;
+X
+X /* tell threads to proceed */
+X
+X status = pthread_mutex_lock(&start_mutex);
+X check(status,"Start_mutex lock bad status in main\n");
+X
+X start_thread = 0; /* lower predicate */
+X
+X status = pthread_cond_broadcast(&start_cond_var);
+X status = pthread_mutex_unlock(&start_mutex);
+X check(status,"Start_mutex unlock bad status in main\n");
+}
+X
+void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+X int status;
+X
+X status = pthread_mutex_lock(&reader_mutex); /* lock reader_buf structure */
+X
+X check(status,"Reader_mutex lock in master bad status\n");
+X
+X /* no reader bufs: wait for signal to proceed */
+X while (num_reader_bufs == 0) {
+X pthread_cond_wait(&reader_cond_var,&reader_mutex);
+X }
+X
+X *cur_buf = reader_buf[reader_buf_readp]; /* get the buffer address */
+X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf); /* increment index */
+X num_reader_bufs--;
+X
+X status = pthread_mutex_unlock(&reader_mutex); /* unlock structure */
+X check(status,"Reader_mutex unlock in master bad status\n");
+}
+X
+void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* give the buffer to a thread, and wait for more */
+X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
+X check(status,"Worker_mutex lock in master bad status\n");
+X
+X /* Put buffer onto available for workers list */
+X worker_buf[worker_buf_readp] = cur_buf;
+X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+X num_worker_bufs++; /* increment number of buffers available to workers */
+X
+X /* Signal one worker to wake and start work */
+X status = pthread_cond_signal(&worker_cond_var);
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"Worker_mutex unlock in master bad status\n");
+}
+X
+void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
+{
+X int status, i;
+X void *exit_value;
+X
+X /* give the buffer to a thread, and wait for more */
+X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
+X check(status,"Worker_mutex lock in master bad status\n");
+X
+X /* Put buffer onto available for workers list */
+X worker_buf[worker_buf_readp] = cur_buf;
+X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+X num_worker_bufs++; /* increment number of buffers available to workers */
+X
+X /* Signal one worker to wake and start work */
+X
+X reader_done = 1;
+X status = pthread_cond_broadcast(&worker_cond_var);
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"Worker_mutex unlock in master bad status\n");
+X
+X /* wait for all buffers available (means all do_workers are done) */
+X
+X for (i=0; i < nthreads; i++) {
+X status = pthread_join( threads[i], &exit_value);
+X check(status,"Pthread_join bad status\n");
+X
+X status = pthread_detach( &threads[i]);
+X check(status,"Pthread_detach bad status\n");
+X }
+}
+X
+void wait_thr()
+{
+X int status;
+X
+X /* Wait on master to give start signal */
+X status = pthread_mutex_lock(&start_mutex);
+X check(status,"Start_mutex lock bad status in worker\n");
+X
+X while (start_thread) {
+X status = pthread_cond_wait(&start_cond_var, &start_mutex);
+X check(status,"Start_cond_wait bad status in worker\n");
+X }
+X
+X status = pthread_mutex_unlock(&start_mutex);
+X check(status,"Start_mutex unlock bad status in worker\n");
+}
+X
+int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* get a buffer to work on */
+X status = pthread_mutex_lock(&worker_mutex);
+X check(status,"First worker_mutex lock in worker bad status\n");
+X
+X /* No worker_bufs available: wait for reader to produce some */
+X while (num_worker_bufs == 0) {
+X /* Exit if reader has finished */
+X if (reader_done) {
+X pthread_mutex_unlock(&worker_mutex);
+X return 0;
+X }
+X pthread_cond_wait(&worker_cond_var,&worker_mutex);
+X } /* end while */
+X
+X /* Get the buffer from list */
+X *cur_buf = worker_buf[worker_buf_workp];
+X worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
+X num_worker_bufs--;
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"First worker_mutex unlock in worker bad status\n");
+X return 1;
+}
+X
+void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* put buffer back on list for reader */
+X status = pthread_mutex_lock(&reader_mutex);
+X check(status,"Reader_mutex lock in worker bad status\n");
+X
+X reader_buf[reader_buf_workp] = cur_buf;
+X reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
+X num_reader_bufs++;
+X
+X /* No reader_bufs available: wake reader */
+X if (num_reader_bufs == 1) {
+X pthread_cond_signal(&reader_cond_var);
+X }
+X
+X status = pthread_mutex_unlock(&reader_mutex);
+X check(status,"Reader_mutex unlock in worker bad status\n");
+}
+SHAR_EOF
+chmod 0644 dec_pthr_subs.c ||
+echo 'restore of dec_pthr_subs.c failed'
+Wc_c="`wc -c < 'dec_pthr_subs.c'`"
+test 6955 -eq "$Wc_c" ||
+ echo 'dec_pthr_subs.c: original size 6955, current size' "$Wc_c"
+fi
+# ============= dec_pthr_subs.h ==============
+if test -f 'dec_pthr_subs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping dec_pthr_subs.h (File already exists)'
+else
+echo 'x - extracting dec_pthr_subs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dec_pthr_subs.h' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: dec_pthr_subs.h,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp $ */
+X
+#include <pthread.h>
+X
+#define check(status,string) \
+X if (status == -1) perror(string) /* error macro for thread calls */
+X
+#ifndef XTERNAL
+pthread_t threads[MAX_WORKERS];
+X
+/* mutex stuff */
+X
+pthread_mutex_t reader_mutex; /* empty buffer pointer structure lock */
+pthread_mutex_t worker_mutex; /* full buffer pointer structure lock */
+X
+/* condition variable stuff */
+X
+pthread_cond_t reader_cond_var; /* condition variable for reader */
+pthread_cond_t worker_cond_var; /* condition variable for workers */
+X
+pthread_mutex_t start_mutex; /* start-up synchronisation lock */
+pthread_cond_t start_cond_var; /* start-up synchronisation condition variable */
+X
+extern pthread_t threads[];
+X
+/* mutex stuff */
+X
+extern pthread_mutex_t reader_mutex;
+extern pthread_mutex_t worker_mutex;
+X
+/* condition variable stuff */
+X
+extern pthread_cond_t reader_cond_var;
+extern pthread_cond_t worker_cond_var;
+X
+extern pthread_mutex_t start_mutex;
+extern pthread_cond_t start_cond_var;
+extern int start_thread;
+X
+#endif
+SHAR_EOF
+chmod 0644 dec_pthr_subs.h ||
+echo 'restore of dec_pthr_subs.h failed'
+Wc_c="`wc -c < 'dec_pthr_subs.h'`"
+test 1116 -eq "$Wc_c" ||
+ echo 'dec_pthr_subs.h: original size 1116, current size' "$Wc_c"
+fi
+# ============= defs.h ==============
+if test -f 'defs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping defs.h (File already exists)'
+else
+echo 'x - extracting defs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'defs.h' &&
+/* Concurrent read version */
+X
+/* $Name: fa_34_26_5 $ - $Id: defs.h,v 1.26 2006/06/22 02:35:05 wrp Exp $ */
+X
+#ifdef SUNOS
+#include <sys/stdtypes.h>
+#endif
+X
+#ifndef IS_BIG_ENDIAN
+#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+#define IS_BIG_ENDIAN
+#else
+#undef IS_BIG_ENDIAN
+#endif
+#endif
+X
+#if !defined(MAX_WORKERS) && !defined(PCOMPLIB)
+#define MAX_WORKERS 1
+#endif
+X
+/* 3-Oct-2003 - we can now have 2 nucleotide query types, DNA
+X and RNA. pst.dnaseq can also be SEQT_RNA.
+X ldnaseq can only be DNA */
+X
+#define SEQT_DNA 1
+#define SEQT_RNA 3 /* DNA and RNA seqtypes must be odd */
+X
+#define SEQT_PROT 0
+#define SEQT_UNK -1
+#define SEQT_OTHER 2
+X
+#ifndef DEF_NMLEN
+#define DEF_NMLEN 6
+#endif
+X
+/* unfortunately, there is an important relationship between MAXTRN and
+X MAXTST+MAXLIB embedded here. MAXTRN must be >= (MAXTST+MAXLIB)/3
+X or it will be possible for a translated DNA sequence to be longer
+X than the translation space available */
+X
+#define MAX_STR 512 /* standard label/message buffer */
+#define MAX_SSTR 32 /* short string */
+#define MAX_FN 120 /* maximum size of a file name */
+#define MAX_CH 40 /* maximum number of library choices */
+#ifndef SMALLMEM
+#define MAX_LF 500 /* maximum numer of library files */
+#else
+#define MAX_LF 80 /* maximum numer of library files */
+#endif
+X
+/* padding at the end of sequences for ALTIVEC, other vector
+X processors */
+#define SEQ_PAD 16
+X
+#define MAX_UID 20 /* length of libstr, used for character keys with SQL */
+X
+#define AVE_AA_LEN 400
+#define AVE_NT_LEN 5000
+#define MAX_AA_BUF 5000 /* 5000 later */
+#define MAX_NT_BUF 1000 /* 2000 later */
+X
+#ifndef SMALLMEM
+#define MAXTST 40000 /* longest query */
+#define MAXLIB 120000 /* longest library */
+#define MAXPLIB 600000 /* longest library with p_comp* */
+#define MIN_RES 2000 /* minimum amount allocated for alignment */
+#ifndef TFAST
+#define MAXTRN 80000 /* buffer for fastx translation */
+#else
+#define MAXTRN 180000 /* buffer for tfastx translation */
+#endif
+#define SEQDUP 1200 /* future - overlap */
+#ifndef PCOMPLIB
+#ifndef MAXBEST
+#define MAXBEST 60000 /* max number of best scores */
+#endif
+#define MAXSTATS 60000
+#else
+#ifndef MAXBEST
+#define MAXBEST 60000 /* max number of best scores */
+#endif
+#define MAXSTATS 60000
+#endif
+#define BIGNUM 1000000000
+#ifndef MAXINT
+#define MAXINT 2147483647
+#endif
+#define MAXLN 120 /* size of a library name */
+#else
+#define MAXTST 1500
+#define MAXLIB 10000
+#define MAXPLIB 100000 /* longest library with p_comp* */
+#define MIN_RES 1000
+#ifndef TFAST
+#define MAXTRN 6000
+#else
+#define MAXTRN 11500
+#endif
+#define SEQDUP 300
+#define MAXBEST 2000
+#define MAXSTATS 20000
+#define BIGNUM 32767
+#define MAXINT 32767
+#define MAXLN 40 /* size of a library name */
+#endif
+#if !defined(TFAST)
+#define MAXTOT (MAXTST+MAXLIB)
+#define MAXDIAG (MAXTST+MAXLIB)
+#else
+#define MAXTOT (MAXTST+MAXTRN)
+#define MAXDIAG (MAXTST+MAXTRN)
+#endif
+X
+#define MAXPAM 600 /* maximum allowable size of the pam matrix */
+#define PROF_MAX 500
+#define ALF_MAX 30
+X
+#ifdef SUPERFAMNUM
+#define NSFCHAR '!'
+#endif
+X
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+X
+#define MX_ATYPE 7 /* markx==0,1,2 7=> no alignment */
+#define MX_ASEP 8 /* markx==3 - separate lines */
+#define MX_AMAP 16 /* markx==4,5 - graphic map */
+#define MX_HTML 32 /* markx==6 - HTML */
+#define MX_M9SUMM 64 /* markx==9(c) */
+#define MX_M10FORM 128 /* markx==10 */
+X
+/* codes for -m 9 */
+#define SHOW_CODE_ID 1 /* identity only */
+#define SHOW_CODE_ALIGN 2 /* encoded alignment */
+SHAR_EOF
+chmod 0644 defs.h ||
+echo 'restore of defs.h failed'
+Wc_c="`wc -c < 'defs.h'`"
+test 3530 -eq "$Wc_c" ||
+ echo 'defs.h: original size 3530, current size' "$Wc_c"
+fi
+# ============= dna.mat ==============
+if test -f 'dna.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping dna.mat (File already exists)'
+else
+echo 'x - extracting dna.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dna.mat' &&
+# Sample dna matrix
+X A C G T U R Y M W S K D H V B N X
+A 5 -4 -4 -4 -4 2 -1 2 2 -1 -1 1 1 1 -2 -1 -1
+C -4 5 -4 -4 -4 -1 2 2 -1 2 -1 -2 1 1 1 -1 -1
+G -4 -4 5 -4 -4 2 -1 -1 -1 2 2 1 -2 1 1 -1 -1
+T -4 -4 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
+U -4 -4 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
+R 2 -1 2 -1 -1 2 -2 -1 1 1 1 1 -1 1 -1 -1 -1
+Y -1 2 -1 2 2 -2 2 -1 1 1 1 -1 1 -1 1 -1 -1
+M 2 2 -1 -1 -1 -1 -1 2 1 1 -1 -1 1 1 -1 -1 -1
+W 2 -1 -1 2 2 1 1 1 2 -1 1 1 1 -1 -1 -1 -1
+S -1 2 2 -1 -1 1 1 1 -1 2 1 -1 -1 1 1 -1 -1
+K -1 -1 2 2 2 1 1 -1 1 1 2 1 -1 -1 1 -1 -1
+D 1 -2 1 1 1 1 -1 -1 1 -1 1 1 -1 -1 -1 -1 -1
+H 1 1 -2 1 1 -1 1 1 1 -1 -1 -1 1 -1 -1 -1 -1
+V 1 1 1 -2 -2 1 -1 1 -1 1 -1 -1 -1 1 -1 -1 -1
+B -2 1 1 1 1 -1 1 -1 -1 1 1 -1 -1 -1 1 -1 -1
+N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 dna.mat ||
+echo 'restore of dna.mat failed'
+Wc_c="`wc -c < 'dna.mat'`"
+test 976 -eq "$Wc_c" ||
+ echo 'dna.mat: original size 976, current size' "$Wc_c"
+fi
+# ============= doinit.c ==============
+if test -f 'doinit.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping doinit.c (File already exists)'
+else
+echo 'x - extracting doinit.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'doinit.c' &&
+/* doinit.c general and function-specific initializations */
+X
+/* copyright (c) 1996, 1997, 1998 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: doinit.c,v 1.62 2007/01/08 15:38:46 wrp Exp $ */
+X
+/* this file performs general initializations of search parameters
+X
+X In addition, it calls several functions in init??.c that provide
+X program-specific initializations:
+X
+X f_initenv() - called from initenv()
+X f_getopt() - called from initenv() during a getopt() scan
+X f_getarg() - called from initenv() after the getopt() scan
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "upam.h" /* required for 'U' option change of nascii */
+X
+#include "structs.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#undef XTERNAL
+X
+extern char *s_optstr;
+extern int optind; /* used by getopt() */
+X
+#ifdef PCOMPLIB
+#define PARALLEL
+#include "p_mw.h"
+extern char pgmdir[];
+extern char managepgm[];
+extern char workerpgm[];
+extern int max_buf_cnt;
+#define MAX_WORKERS MAXWRKR
+#endif
+X
+char prog_name[MAX_FN];
+X
+extern void f_initenv(struct mngmsg *, struct pstruct *, unsigned char **);
+extern void f_lastenv(struct mngmsg *, struct pstruct *);
+extern void f_getopt(char, char *, struct mngmsg *, struct pstruct *);
+extern void f_getarg(int, char **, int, struct mngmsg *, struct pstruct *);
+void ann_ascii(int *qascii, char *ann_arr);
+int set_markx(int markx, int val);
+X
+int optcnt;
+int max_workers=MAX_WORKERS;
+#ifdef PCOMPLIB
+int worker_1=0;
+int worker_n=0;
+#endif
+extern char *optarg;
+X
+/* initenv () initializes the environment */
+void initenv (int argc, char **argv, struct mngmsg *m_msg,
+X struct pstruct *ppst, unsigned char **aa0)
+{
+X char *cptr, ctmp;
+X int copt, itmp;
+X
+X /* options for all search functions */
+X char *g_optstr = "ab:BC:d:DE:F:HiIJ:K:l:Lm:M:N:O:QqR:T:v:V:w:W:X:z:Z:";
+X char optstring[MAX_STR];
+X
+/* these initializations will be used by all functions */
+X
+X /* prog_name[] is only used for error messages */
+X strncpy(prog_name,argv[0],sizeof(prog_name));
+X prog_name[sizeof(prog_name)-1]='\0';
+X
+#ifdef PARALLEL
+X if ((cptr = getenv ("MANAGEPGM")) != NULL) strncpy (managepgm, cptr, 120);
+X if ((cptr = getenv ("WORKERPGM")) != NULL) strncpy (workerpgm, cptr, 120);
+X if ((cptr = getenv ("PGMDIR")) != NULL) strncpy (pgmdir, cptr, 120);
+#endif
+X
+X m_msg->ltitle[0] = '\0';
+X
+X if ((cptr=getenv("FASTLIBS"))!=NULL) {
+X strncpy(m_msg->flstr,cptr,MAX_FN);
+X m_msg->flstr[MAX_FN-1] = '\0';
+X }
+X else m_msg->flstr[0]='\0';
+X
+X m_msg->hist.hist_a = NULL;
+X m_msg->outfile[0] = '\0';
+X m_msg->ldnaseq = SEQT_PROT; /* library is protein */
+X m_msg->n1_low = 0;
+X m_msg->n1_high = BIGNUM;
+X m_msg->ql_start = 1; /* start with first query sequence */
+X m_msg->ql_stop = BIGNUM; /* end with the last query sequence */
+X
+X m_msg->pamd1 = MAXSQ;
+X m_msg->pamd2 = MAXSQ;
+X
+X m_msg->term_code = 0;
+X ppst->tr_type = 0;
+X ppst->debug_lib = 0;
+X m_msg->nshow = 20;
+#if defined(PCOMPLIB)
+X m_msg->nohist = 1;
+X m_msg->mshow = 20;
+#else
+X m_msg->nohist = 0;
+X m_msg->mshow = 50;
+#endif
+X m_msg->ashow = -1;
+X m_msg->nmlen = DEF_NMLEN;
+X m_msg->z_bits = 1;
+X m_msg->mshow_flg = 0;
+X m_msg->aln.llen = 0;
+X m_msg->aln.llcntx = 30;
+X m_msg->aln.llcntx_flg = 0;
+X m_msg->e_cut = 10.0;
+X m_msg->e_low = 0.0;
+X m_msg->e_cut_set = 0;
+X m_msg->revcomp = 0;
+X m_msg->self = 0;
+X m_msg->long_info = 0;
+X m_msg->maxn = 0;
+X m_msg->dupn = SEQDUP;
+X m_msg->dfile[0] = '\0';
+X m_msg->tname[0] = '\0';
+X m_msg->lname[0] = '\0';
+X m_msg->show_code = 0;
+X m_msg->aln.showall = 0;
+X m_msg->markx = 0;
+X m_msg->sq0off = m_msg->sq1off = 1;
+X strncpy(m_msg->sqnam,"aa",4);
+X strncpy(m_msg->sqtype,"protein",10);
+X m_msg->ann_flg = 0;
+X m_msg->ann_arr[0] = '\0';
+X m_msg->aa0a = NULL;
+X
+X ppst->zsflag = ppst->zsflag_f = 1;
+X ppst->zs_win = 0;
+X
+X ppst->zdb_size = -1;
+X ppst->dnaseq = SEQT_PROT; /* default is protein */
+X ppst->nt_align = 0;
+X
+X f_initenv (m_msg, ppst, aa0);
+X
+X strncpy (optstring, g_optstr, sizeof (optstring));
+X strncat (optstring, s_optstr, sizeof (optstring));
+X
+X while ((copt = getopt (argc, argv, optstring)) != EOF)
+X {
+X if (strchr (g_optstr, copt) != NULL)
+X {
+X switch (copt) { /* switches for all options */
+X case 'a': m_msg->aln.showall = 1; break;
+X case 'b':
+X if (optarg[0] == '$') {
+X m_msg->mshow = -1;
+X m_msg->e_cut = 10000000.0;
+X break;
+X }
+X else sscanf (optarg, "%d", &m_msg->mshow);
+X m_msg->e_cut = 10000000.0;
+X m_msg->e_cut_set = 1;
+X m_msg->mshow_flg = 1;
+X break;
+X case 'B': m_msg->z_bits = 0; break;
+X case 'C': sscanf(optarg,"%d",&m_msg->nmlen);
+X if (m_msg->nmlen > MAX_UID-1) m_msg->nmlen = MAX_UID-1;
+X break;
+X case 'd': sscanf(optarg,"%d",&m_msg->ashow);
+X if (m_msg->ashow > m_msg->mshow) m_msg->mshow=m_msg->ashow;
+X /* m_msg->ashow_flg = 1; (ashow_flg not in structs.h, not used)*/
+X break;
+X case 'D': ppst->debug_lib = 1;
+X break;
+X case 'E':
+X sscanf(optarg,"%lf",&m_msg->e_cut);
+X m_msg->e_cut_set = 1;
+X break;
+X case 'F':
+X sscanf(optarg,"%lg",&m_msg->e_low);
+X m_msg->e_cut_set = 1;
+X break;
+X case 'H':
+#if defined(PCOMPLIB)
+X m_msg->nohist = 0; break;
+#else
+X m_msg->nohist = 1; break;
+#endif
+X case 'i':
+X m_msg->revcomp = 1; break;
+#ifdef PARALLEL
+X case 'I':
+X m_msg->self = 1; break;
+X case 'J':
+X if (optarg[0]==':') {
+X m_msg->ql_start = 0;
+X sscanf(optarg,":%d",&m_msg->ql_stop);
+X m_msg->ql_stop++;
+X }
+X else if (!strchr(optarg,':')) {
+X m_msg->ql_stop = BIGNUM;
+X sscanf(optarg,"%d",&m_msg->ql_start);
+X }
+X else {
+X sscanf(optarg,"%d:%d",&m_msg->ql_start,&m_msg->ql_stop);
+X m_msg->ql_stop++;
+X }
+X break;
+X case 'K':
+X sscanf(optarg,"%d",&max_buf_cnt);
+X break;
+#endif
+X case 'l':
+X strncpy(m_msg->flstr,optarg,MAX_FN);
+X m_msg->flstr[MAX_FN-1]='\0';
+X break;
+X case 'L':
+X m_msg->long_info = 1; break;
+X case 'm':
+X sscanf(optarg,"%d%c",&itmp,&ctmp);
+X if (itmp==9 && ctmp=='c') {
+X m_msg->show_code = SHOW_CODE_ALIGN;
+X }
+X else if (itmp==9 && ctmp=='i') {
+X m_msg->show_code = SHOW_CODE_ID;
+X }
+X if (itmp > 6 && itmp != 10 && itmp != 9) itmp = 0;
+X m_msg->markx = set_markx(m_msg->markx,itmp);
+X break;
+X case 'M':
+X sscanf(optarg,"%d-%d",&m_msg->n1_low,&m_msg->n1_high);
+X if (m_msg->n1_low < 0) {
+X m_msg->n1_high = -m_msg->n1_low;
+X m_msg->n1_low = 0;
+X }
+X if (m_msg->n1_high == 0) m_msg->n1_high = BIGNUM;
+X if (m_msg->n1_low > m_msg->n1_high) {
+X fprintf(stderr," low cutoff %d greater than high %d\n",
+X m_msg->n1_low, m_msg->n1_high);
+X m_msg->n1_low = 0;
+X m_msg->n1_high = BIGNUM;
+X }
+X break;
+X case 'N':
+X sscanf(optarg,"%d",&m_msg->maxn);
+X break;
+X case 'p':
+X m_msg->qdnaseq = SEQT_PROT;
+X ppst->dnaseq = SEQT_PROT;
+X strncpy(m_msg->sqnam,"aa",4);
+X break;
+X case 'O':
+X strncpy(m_msg->outfile,optarg,MAX_FN);
+X m_msg->outfile[MAX_FN-1]='\0';
+X break;
+X case 'q':
+X case 'Q':
+X m_msg->quiet = 1;
+X break;
+X case 'R':
+X strncpy (m_msg->dfile, optarg, MAX_FN);
+X m_msg->dfile[MAX_FN-1]='\0';
+X break;
+X case 'T':
+#ifdef PCOMPLIB
+X if (strchr(optarg,'-') != NULL) {
+X sscanf(optarg,"%d-%d",&worker_1,&worker_n);
+X if (worker_1 > worker_n) {
+X worker_1 = worker_n = 0;
+X }
+X }
+X else
+#endif
+X sscanf (optarg, "%d", &max_workers);
+X if (max_workers < 0) max_workers=1;
+X break;
+X case 'v':
+X sscanf (optarg,"%d",&ppst->zs_win);
+X break;
+X case 'V':
+X strncpy(m_msg->ann_arr+1,optarg,MAX_FN-2);
+X m_msg->ann_arr[0]='\0';
+X m_msg->ann_arr[MAX_FN-2]='\0';
+X m_msg->ann_flg = 1;
+X ann_ascii(qascii, m_msg->ann_arr);
+X break;
+/*
+X case 'V':
+X fprintf(stderr," -V option not currently supported in parallel\n");
+X break;
+*/
+X case 'w':
+X sscanf (optarg,"%d",&m_msg->aln.llen);
+X if (m_msg->aln.llen < 10) m_msg->aln.llen = 10;
+X if (m_msg->aln.llen > 200) m_msg->aln.llen = 200;
+X if (!m_msg->aln.llcntx_flg) m_msg->aln.llcntx = m_msg->aln.llen/2;
+X break;
+X case 'W':
+X sscanf (optarg,"%d",&m_msg->aln.llcntx);
+X m_msg->aln.llcntx_flg = 1;
+X break;
+X case 'X':
+X sscanf (optarg,"%ld %ld",&m_msg->sq0off,&m_msg->sq1off); break;
+X case 'z':
+X sscanf(optarg,"%d",&ppst->zsflag);
+X break;
+X case 'Z':
+X sscanf(optarg,"%ld",&ppst->zdb_size);
+X break;
+X }
+X }
+X else if (strchr (s_optstr, copt))
+X f_getopt (copt, optarg, m_msg, ppst);
+X }
+X optind--;
+X
+X f_lastenv (m_msg, ppst);
+X
+X if (argc - optind < 3) return;
+X m_msg->tnamesize = sizeof (m_msg->tname);
+X if (argc - optind > 1) strncpy (m_msg->tname, argv[optind + 1],MAX_FN);
+X if (argc - optind > 2) { strncpy(m_msg->lname, argv[optind + 2],MAX_FN); }
+X f_getarg (argc, argv, optind, m_msg, ppst);
+}
+X
+int
+ann_scan(unsigned char *aa0, int n0, struct mngmsg *m_msg, int seqtype)
+{
+X unsigned char *aa0p, *aa0d, *aa0ad;
+X int n_n0;
+X
+X /* count how many "real" residues */
+X
+X if (seqtype==SEQT_UNK) {
+X for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+X if (*aa0p > '@' || *aa0p == ESS ) n_n0++;
+X }
+X }
+X else {
+X for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+X if (*aa0p < NANN ) n_n0++;
+X }
+X }
+X
+X aa0d = aa0;
+X /* n_n0 has the real sequence length */
+X if ((m_msg->aa0a = calloc(n_n0+2, sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate annotation sequence: %d\n",n_n0);
+X m_msg->ann_flg = 0;
+X if (seqtype==SEQT_UNK) {
+X for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+X if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p;}
+X }
+X }
+X else {
+X for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
+X if (*aa0p < NANN) {*aa0d++ = *aa0p;}
+X }
+X }
+X *aa0d = '\0';
+X return n_n0;
+X }
+X
+X aa0ad = m_msg->aa0a;
+X if (seqtype==SEQT_UNK) {
+X for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
+X if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p; *aa0ad++='\0';}
+X else if (aa0ad > m_msg->aa0a) { aa0ad[-1] = *aa0p - NANN;}
+X }
+X }
+X else {
+X for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
+X if (*aa0p < NANN) {*aa0d++ = *aa0p; *aa0ad++='\0';}
+X else if (aa0ad > m_msg->aa0a) { aa0ad[-1] = *aa0p - NANN;}
+X }
+X }
+X *aa0ad = *aa0d = '\0';
+X return n_n0;
+}
+X
+void
+ann_ascii(int *qascii, char *ann_arr)
+{
+X char *ann_p;
+X int ann_ix = NANN+1;
+X
+X ann_arr[0] = ' ';
+X if (strchr(ann_arr+1,'*')) {qascii['*'] = NA;}
+X
+X for (ann_p = ann_arr+1; *ann_p; ann_p++) {
+X if (qascii[*ann_p] == NA) { qascii[*ann_p] = ann_ix++;}
+X }
+}
+X
+int
+set_markx(int markx, int val) {
+X
+X if (val < 3) {
+X return markx | (MX_ATYPE & val);
+X }
+X else if (val == 3) {
+X markx |= (MX_ATYPE + MX_ASEP);
+X }
+X else if (val == 4) {
+X markx |= (MX_ATYPE + MX_AMAP);
+X }
+X else if (val == 5) {
+X markx |= MX_AMAP;
+X }
+X else if (val == 6) {
+X markx |= (MX_HTML) ;
+X }
+X else if (val == 9) {
+X markx |= MX_M9SUMM;
+X }
+X else if (val == 10) {
+X markx |= MX_M10FORM;
+X }
+X
+X return markx;
+}
+SHAR_EOF
+chmod 0644 doinit.c ||
+echo 'restore of doinit.c failed'
+Wc_c="`wc -c < 'doinit.c'`"
+test 10740 -eq "$Wc_c" ||
+ echo 'doinit.c: original size 10740, current size' "$Wc_c"
+fi
+# ============= drop_func.h ==============
+if test -f 'drop_func.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping drop_func.h (File already exists)'
+else
+echo 'x - extracting drop_func.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'drop_func.h' &&
+/* drop_func.h */
+X
+/* $Name: fa_34_26_5 $ - $Id: drop_func.h,v 1.7 2006/05/31 15:31:45 wrp Exp $ */
+X
+/* functions provided by each of the drop files */
+X
+/* Copyright (c) 2005 William R. Pearson and the University of Virginia */
+X
+X
+void /* initializes f_struct **f_arg */
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+#ifndef DROP_INTERN
+X void **f_arg
+#else
+X struct f_struct **f_arg
+#endif
+);
+X
+X
+void /* frees memory allocated in f_struct */
+close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+#ifndef DROP_INTERN
+X void **f_arg
+#else
+X struct f_struct **f_arg
+#endif
+);
+X
+void /* documents search function, parameters */
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2);
+X
+void /* calculates alignment score(s), returns them in rst */
+do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+#ifndef DROP_INTERN
+X void *f_arg,
+#else
+X struct f_struct *f_arg,
+#endif
+X int qr_flg, struct rstruct *rst);
+X
+void /* calculates optimal alignment score */
+do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+#ifndef DROP_INTERN
+X void *f_arg,
+#else
+X struct f_struct *f_arg,
+#endif
+X struct rstruct *rst
+X );
+X
+int /* produces encoding of alignment */
+do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+#ifndef DROP_INTERN
+X void *f_arg,
+#else
+X struct f_struct *f_arg,
+#endif
+X struct a_res_str *a_res,
+X int *have_ares);
+X
+void
+pre_cons(const unsigned char *aa, int n, int frame,
+#ifndef DROP_INTERN
+X void *f_arg
+#else
+X struct f_struct *f_arg
+#endif
+X );
+X
+void
+aln_func_vals(int frame, struct a_struct *aln);
+X
+int /* takes aa0, aa1, a_res, and produces seqc0, seqc1 */
+calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln, /* seqc0/seqc1 coordinates */
+X struct a_res_str a_res, /* aa0,aa1, coord, inc. res, nres */
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+#ifndef DROP_INTERN
+X void *f_arg
+#else
+X struct f_struct *f_arg
+#endif
+X );
+X
+/* calcons_a - takes aa0, aa1, a_res, and produces seqc0, seqc1,
+X * and seqc0a, seqc1a - the annotated sequences
+X */
+int
+calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr,
+#ifndef DROP_INTERN
+X void *f_arg
+#else
+X struct f_struct *f_arg
+#endif
+X );
+X
+int /* returns lenc - length of aligment */
+calc_code(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n,
+#ifndef DROP_INTERN
+X void *f_arg
+#else
+X struct f_struct *f_arg
+#endif
+X );
+X
+int /* returns lenc - length of alignment */
+calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+#ifndef DROP_INTERN
+X void *f_arg
+#else
+X struct f_struct *f_arg
+#endif
+X );
+SHAR_EOF
+chmod 0644 drop_func.h ||
+echo 'restore of drop_func.h failed'
+Wc_c="`wc -c < 'drop_func.h'`"
+test 3226 -eq "$Wc_c" ||
+ echo 'drop_func.h: original size 3226, current size' "$Wc_c"
+fi
+# ============= dropff2.c ==============
+if test -f 'dropff2.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropff2.c (File already exists)'
+else
+echo 'x - extracting dropff2.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropff2.c' &&
+X
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+X
+/* - dropffa.c,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp */
+X
+/* this code implements the "fastf" algorithm, which is designed to
+X deconvolve mixtures of protein sequences derived from mixed-peptide
+X Edman sequencing. The expected input is:
+X
+X >test | 40001 90043 | mgstm1
+X MGCEN,
+X MIDYP,
+X MLLAY,
+X MLLGY
+X
+X Where the ','s indicate the length/end of the sequencing cycle
+X data. Thus, in this example, the sequence is from a mixture of 4
+X peptides, M was found in the first position, G,I, and L(2) at the second,
+X C,D, L(2) at the third, etc.
+X
+X Because the sequences are derived from mixtures, there need not be
+X any partial sequence "MGCEN", the actual deconvolved sequence might be
+X "MLDGN".
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#include "tatstats.h"
+X
+#define EOSEQ 0
+#define ESS 49
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+#define NMAP_X 23 /* re-code NMAP for 'X' */
+#define NMAP_Z 24 /* re-code NMAP for '*' */
+X
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+static char *verstr="4.21 May 2006 (ajm/wrp)";
+X
+int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
+void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
+extern void aancpy(char *to, char *from, int count, struct pstruct pst);
+X
+#ifdef TFAST
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq,
+X const int maxs, const int frame);
+#endif
+X
+struct hlstr { int next, pos;};
+X
+void savemax(struct dstruct *, struct f_struct *);
+X
+static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
+X int **, struct f_struct *);
+static int m1_spam(unsigned char *, int,
+X const unsigned char *, int,
+X struct savestr *, int **, int, struct f_struct *);
+X
+int sconn(struct savestr **v, int nsave, int cgap,
+X struct f_struct *, struct rstruct *, struct pstruct *,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int opt_prob);
+X
+void kpsort(struct savestr **, int);
+void kssort(struct savestr **, int);
+void kpsort(struct savestr **, int);
+X
+int
+sconn_a(unsigned char *, int, int, struct f_struct *,
+X struct a_res_str *);
+X
+/* initialize for fasta */
+X
+void
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int mhv, phv;
+X int hmax;
+X int i0, ii0, hv;
+X struct f_struct *f_str;
+X
+X int maxn0;
+X int i, j, q;
+X struct savestr *vmptr;
+X int *res;
+X
+X f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
+X if(f_str == NULL) {
+X fprintf(stderr, "Couldn't calloc f_str\n");
+X exit(1);
+X }
+X
+X ppst->sw_flag = 0;
+X
+X /* fastf3 cannot work with lowercase symbols as low complexity;
+X thus, NMAP must be disabled; this depends on aascii['X'] */
+X if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
+X if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
+X
+X /* this does not work for share ppst structs, as in threads */
+X /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
+X
+X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+X if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
+X
+X if (mhv <= 0) {
+X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+X exit (1);
+X }
+X
+X for (f_str->kshft = 0; mhv > 0; mhv /= 2)
+X f_str->kshft++;
+X
+/* kshft = 2; */
+X hmax = hv = (1 << f_str->kshft);
+X f_str->hmask = (hmax >> f_str->kshft) - 1;
+X
+X if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
+X exit (1);
+X }
+X for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
+X aa0 = f_str->aa0;
+X
+X if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
+X exit (1);
+X }
+X f_str->aa0ix = 0;
+X
+X if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
+X fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
+X hmax,f_str->hmask);
+X exit (1);
+X }
+X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh1 array\n");
+X exit (1);
+X }
+X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh2 array\n");
+X exit (1);
+X }
+X if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
+X fprintf (stderr, " cannot allocate hash link array");
+X exit (1);
+X }
+X
+X for (i0 = 0; i0 < hmax; i0++) {
+X f_str->harr[i0].next = -1;
+X f_str->harr[i0].pos = -1;
+X }
+X
+X for (i0 = 0; i0 < n0; i0++) {
+X f_str->link[i0].next = -1;
+X f_str->link[i0].pos = -1;
+X }
+X
+X /* encode the aa0 array */
+X /*
+X this code has been modified to allow for mixed peptide sequences
+X aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
+X the 'NULL' character resets the hash position counter, to indicate that
+X any of several residues can be in the same position.
+X We also need to keep track of the number of times this has happened, so that
+X we can redivide the sequence later
+X
+X i0 counts through the sequence
+X ii0 counts through the hashed sequence
+X
+X */
+X
+X f_str->nm0 = 1;
+X f_str->nmoff = -1;
+X phv = hv = 0;
+X for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
+X /* reset the counter and start hashing again */
+X if (aa0[i0] == ESS || aa0[i0] == 0) {
+X aa0[i0] = 0; /* set ESS to 0 */
+X /* fprintf(stderr," converted ',' to 0\n");*/
+X i0++; /* skip over the blank */
+X f_str->nm0++;
+X if (f_str->nmoff < 0) f_str->nmoff = i0;
+X phv = hv = 0;
+X ii0 = 0;
+X }
+X hv = ppst->hsq[aa0[i0]];
+X f_str->link[i0].next = f_str->harr[hv].next;
+X f_str->link[i0].pos = f_str->harr[hv].pos;
+X f_str->harr[hv].next = i0;
+X f_str->harr[hv].pos = ii0;
+X f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
+X }
+X if (f_str-> nmoff < 0) f_str->nmoff = n0;
+X
+X
+#ifdef DEBUG
+X /*
+X fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
+X */
+#endif
+X
+/*
+#ifdef DEBUG
+X fprintf(stderr," hmax: %d\n",hmax);
+X for ( hv=0; hv<hmax; hv++)
+X fprintf(stderr,"%2d %c %3d %3d\n",hv,
+X (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
+X f_str->harr[hv].pos,f_str->harr[hv].next);
+X fprintf(stderr,"----\n");
+X for ( hv=0; hv<n0; hv++)
+X fprintf(stderr,"%2d: %3d %3d\n",hv,
+X f_str->link[hv].pos,f_str->link[hv].next);
+#endif
+*/
+X
+X f_str->maxsav = MAXSAV;
+X if ((f_str->vmax = (struct savestr *)
+X calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
+X fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
+X exit(1);
+X }
+X
+X if ((f_str->vptr = (struct savestr **)
+X calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
+X fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
+X exit(1);
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+X vmptr->used = (int *) calloc(n0, sizeof(int));
+X if(vmptr->used == NULL) {
+X fprintf(stderr, "Couldn't alloc vmptr->used\n");
+X exit(1);
+X }
+X }
+X
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+X pam2[0][0] is now undefined for consistency with blast
+*/
+X
+X for (i0 = 1; i0 <= ppst->nsq; i0++)
+X f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
+X
+X ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
+X if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
+X ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
+X
+X f_str->ndo = 0;
+X f_str->noff = n0-1;
+X if (f_str->diag==NULL)
+X f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+X sizeof (struct dstruct));
+X
+X if (f_str->diag == NULL)
+X {
+X fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
+X (long) MAXDIAG * (long) (sizeof (struct dstruct)));
+X exit (1);
+X }
+X
+#ifdef TFAST
+X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+X exit (1);
+X }
+X f_str->aa1x++;
+#endif
+X
+X /* allocate space for the scoring arrays */
+X maxn0 = n0 + 4;
+X
+X maxn0 = max(3*n0/2,MIN_RES);
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X f_str->max_res = maxn0;
+X
+X /* Tatusov Statistics Setup */
+X
+X /* initialize priors array. */
+X if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate priors array.\n");
+X exit(1);
+X }
+X calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
+X
+X f_str->dotat = 0;
+X f_str->shuff_cnt = ppst->shuff_node;
+X
+X /* End of Tatusov Statistics Setup */
+X
+X *f_arg = f_str;
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifndef TFAST
+X char *pg_str="FASTF";
+#else
+X char *pg_str="TFASTF";
+#endif
+X
+X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] join: %d",pg_str,verstr,
+X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->param_u.fa.cgap);
+X
+X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X
+X if (pstring2 != NULL) {
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_join: %d\n",
+X pg_str,verstr, pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X pstr->param_u.fa.cgap);
+X }
+}
+X
+void
+close_work (const unsigned char *aa0, const int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X struct savestr *vmptr;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X free(vmptr->used);
+X
+X free(f_str->res);
+#ifdef TFAST
+X free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
+#endif
+X free(f_str->diag);
+X free(f_str->link);
+X free(f_str->pamh2);
+X free(f_str->pamh1);
+X free(f_str->harr);
+X free(f_str->aa0t);
+X free(f_str->aa0);
+X free(f_str->priors);
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+int do_fastf (unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst, int *hoff, int opt_prob)
+{
+X int nd; /* diagonal array size */
+X int lhval;
+X int kfact;
+X register struct dstruct *dptr;
+X register int tscor;
+X register struct dstruct *diagp;
+X struct dstruct *dpmax;
+X register int lpos;
+X int tpos, npos;
+X struct savestr *vmptr;
+X int scor, tmp;
+X int im, ib, nsave;
+X int cmps (); /* comparison routine for ksort */
+X int *hsq;
+X
+X hsq = ppst->hsq;
+X
+X if (n1 < 1) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X rst->segnum = 0;
+X rst->seglen = 0;
+X return 1;
+X }
+X
+X if (n0+n1+1 >= MAXDIAG) {
+X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X rst->escore = 2.0;
+X rst->segnum = 0;
+X rst->seglen = 0;
+X return -1;
+X }
+X
+X nd = n0 + n1;
+X
+X dpmax = &f_str->diag[nd];
+X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X
+X /* initialize the saved segment structures */
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+X vmptr->score = 0;
+X memset(vmptr->used, 0, n0 * sizeof(int));
+X }
+X
+X f_str->lowmax = f_str->vmax;
+X f_str->lowscor = 0;
+X
+X /* start hashing */
+X
+X diagp = &f_str->diag[f_str->noff];
+X for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lpos++ ; diagp++;
+X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+X if (lpos >= n1) break;
+X lhval = 0;
+X }
+X lhval = hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
+X tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
+X /* tscor gets position of end of current lpos diag run */
+X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+X tscor++; /* move forward one */
+X if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
+X /* includes implicit -1 mismatch penalty */
+X scor = dptr->score; /* current score of this run */
+X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
+X f_str->lowscor < scor) /* if updating tscor makes run worse, */
+X savemax (dptr, f_str); /* save it */
+X
+X if ((tscor += scor) >= kfact) { /* add to current run if continuing */
+X /* is better than restart (kfact) */
+X dptr->score = tscor;
+X dptr->stop = lpos;
+X }
+X else {
+X dptr->score = kfact; /* starting over is better */
+X dptr->start = (dptr->stop = lpos);
+X }
+X }
+X else { /* continue current run */
+X dptr->score += f_str->pamh1[aa0[tpos]];
+X dptr->stop = lpos;
+X }
+X }
+X else { /* no diagonal run yet */
+X dptr->score = f_str->pamh2[lhval];
+X dptr->start = (dptr->stop = lpos);
+X }
+X } /* end tpos */
+X } /* end lpos */
+X
+X for (dptr = f_str->diag; dptr < dpmax;) {
+X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X f_str->ndo = nd;
+X
+/*
+X at this point all of the elements of aa1[lpos]
+X have been searched for elements of aa0[tpos]
+X with the results in diag[dpos]
+*/
+X
+X /* set up pointers for sorting */
+X
+X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+X if (vmptr->score > 0) {
+X vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
+X f_str->vptr[nsave++] = vmptr;
+X }
+X }
+X
+X /* sort them */
+X kssort (f_str->vptr, nsave);
+X
+X
+#ifdef DEBUG
+X /*
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
+X fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
+X ppst->sq[aa1[im]]);
+X fputc('\n',stderr);
+X }
+X fprintf(stderr,"---\n");
+X */
+X /* now use m_spam to re-evaluate */
+X /*
+X for (tpos = 0; tpos < n0; tpos++) {
+X fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
+X if (tpos %10 == 9) fputc('\n',stderr);
+X }
+X fputc('\n',stderr);
+X */
+#endif
+X
+X f_str->aa0ix = 0;
+X for (ib=0; ib < nsave; ib++) {
+X if ((vmptr=f_str->vptr[ib])->score > 0) {
+X vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
+X ppst->pam2[0], ppst->pam_l, f_str);
+X }
+X }
+X /* reset aa0 - modified by m1_spam */
+X for (tpos = 0; tpos < n0; tpos++) {
+X if (aa0[tpos] >= 32) aa0[tpos] -= 32;
+X }
+X
+X kssort(f_str->vptr,nsave);
+X
+X for ( ; nsave > 0; nsave--)
+X if (f_str->vptr[nsave-1]->score >0) break;
+X
+X if (nsave <= 0) {
+X f_str->nsave = 0;
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X
+X return 1;
+X }
+X else f_str->nsave = nsave;
+X
+X
+#ifdef DEBUG
+X /*
+X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
+X fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
+X ppst->sq[aa1[im]]);
+X fputc('\n',stderr);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+#endif
+X
+X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
+X rst, ppst, aa0, n0, aa1, n1, opt_prob);
+X
+X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+X
+X rst->score[1] = vmptr->score;
+X rst->score[0] = rst->score[2] = max (scor, vmptr->score);
+X
+X return 1;
+}
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int opt_prob;
+X int hoff, n10, i;
+X
+X if (qr_flg==1 && f_str->shuff_cnt <= 0) {
+X rst->escore = 2.0;
+X rst->score[0]=rst->score[1]=rst->score[2]= -1;
+X return;
+X }
+X
+X if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
+X else opt_prob = 0;
+X if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
+X if (qr_flg) {
+X opt_prob=1;
+X /* if (frame==1) */
+X f_str->shuff_cnt--;
+X }
+X
+X if (n1 < 1) {
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X rst->escore = 2.0;
+X return;
+X }
+X
+#ifdef TFAST
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X if (ppst->debug_lib)
+X for (i=0; i<n10; i++)
+X if (f_str->aa1x[i]>ppst->nsq) {
+X fprintf(stderr,
+X "residue[%d/%d] %d range (%d)\n",i,n1,
+X f_str->aa1x[i],ppst->nsq);
+X f_str->aa1x[i]=0;
+X n10=i-1;
+X }
+X
+X do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
+#else /* FASTF */
+X do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
+#endif
+X
+X rst->comp = rst->H = -1.0;
+X
+}
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct rstruct *rst)
+{
+X int optflag, tscore, hoff, n10;
+X
+X optflag = ppst->param_u.fa.optflag;
+X ppst->param_u.fa.optflag = 1;
+X
+#ifdef TFAST
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
+#else /* FASTA */
+X do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
+#endif
+X ppst->param_u.fa.optflag = optflag;
+}
+X
+void
+savemax (dptr, f_str)
+X register struct dstruct *dptr;
+X struct f_struct *f_str;
+{
+X register int dpos;
+X register struct savestr *vmptr;
+X register int i;
+X
+X dpos = (int) (dptr - f_str->diag);
+X
+/* check to see if this is the continuation of a run that is already saved */
+X
+X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+X vmptr->start == dptr->start)
+X {
+X vmptr->stop = dptr->stop;
+X if ((i = dptr->score) <= vmptr->score)
+X return;
+X vmptr->score = i;
+X if (vmptr != f_str->lowmax)
+X return;
+X }
+X else
+X {
+X i = f_str->lowmax->score = dptr->score;
+X f_str->lowmax->dp = dpos;
+X f_str->lowmax->start = dptr->start;
+X f_str->lowmax->stop = dptr->stop;
+X dptr->dmax = f_str->lowmax;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X if (vmptr->score < i)
+X {
+X i = vmptr->score;
+X f_str->lowmax = vmptr;
+X }
+X f_str->lowscor = i;
+}
+X
+/* this version of spam() is designed to work with a collection of
+X subfragments, selecting the best amino acid at each position so
+X that, from each subfragment, each position is only used once.
+X
+X As a result, m_spam needs to know the number of fragments.
+X
+X In addition, it now requires a global alignment to the fragment
+X and resets the start and stop positions
+X
+X */
+X
+static int
+m1_spam (unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct savestr *dmax, int **pam2, int pam_l,
+X struct f_struct *f_str)
+{
+X int tpos, lpos, im, ii, nm, ci;
+X int tot, ctot, pv;
+X
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X unsigned char *aa0p;
+X const unsigned char *aa1p;
+X
+X lpos = dmax->start; /* position in library sequence */
+X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+X /* force global alignment, reset start*/
+X if (tpos < lpos) {
+X lpos = dmax->start -= tpos;
+X tpos = 0;
+X }
+X else {
+X tpos -= lpos;
+X lpos = dmax->start = 0;
+X }
+X
+X dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
+X if (dmax->stop > n1) dmax->stop = n1;
+X
+X /*
+X if (dmax->start < 0) {
+X tpos = -dmax->start;
+X lpos = dmax->start=0;
+X }
+X else tpos = 0;
+X */
+X
+X aa1p = &aa1[lpos];
+X aa0p = &aa0[tpos];
+X
+X nm = f_str->nm0;
+X
+X tot = curv.score = maxv.score = 0;
+X for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
+X ctot = pam_l;
+X ci = -1;
+X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+X if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+X ctot = pv;
+X ci = ii;
+/* fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
+X }
+X }
+X tot += ctot;
+X if (ci >= 0 && aa0p[ci] < 32) {
+#ifdef DEBUG
+/* fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
+#endif
+X aa0p[ci] += 32;
+X dmax->used[&aa0p[ci] - aa0] = 1;
+X }
+X }
+X return tot;
+}
+X
+int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
+X struct savestr *dmax, struct pstruct *ppst,
+X struct f_struct *f_str)
+{
+X int **pam2;
+X int tpos, lpos, im, ii, nm, ci, lp0;
+X int tot, ctot, pv;
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X const unsigned char *aa1p;
+X unsigned char *aa0p, *aa0pt;
+X int aa0t_flg;
+X
+X pam2 = ppst->pam2[0];
+X aa0t_flg = 0;
+X
+X lpos = dmax->start; /* position in library sequence */
+X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+X lp0 = lpos = dmax->start;
+X aa1p = &aa1[lpos];
+X aa0p = &aa0[tpos]; /* real aa0 sequence */
+X
+X /* the destination aa0 sequence (without nulls) */
+X aa0pt = &f_str->aa0t[f_str->aa0ix];
+X
+X curv.start = lpos;
+X nm = f_str->nm0;
+X
+X /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
+X if (lpos == 0 && tpos > 0)
+X for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31; /* filler character */
+X
+X tot = curv.score = maxv.score = 0;
+X for (; lpos <= dmax->stop; lpos++) {
+X ctot = ppst->pam_l;
+X ci = -1;
+X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+X if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+X ctot = pv;
+X ci = ii;
+X }
+X }
+X tot += ctot;
+X if (ci >= 0) {
+X if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
+X else {
+X *aa0pt++ = aa0p[ci];
+X aa0p[ci] += 32;
+X aa0t_flg=1;
+X }
+X }
+X aa0p++; aa1p++;
+X }
+X
+X if (aa0t_flg) {
+X dmax->dp -= f_str->aa0ix; /* shift ->dp for aa0t */
+X if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
+X fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
+X }
+X else
+X *aa0pt++ = 0; /* skip over NULL */
+X
+X aa0pt = &f_str->aa0t[f_str->aa0ix];
+X aa1p = &aa1[lp0];
+X
+X /*
+X for (im = 0; im < f_str->nmoff; im++)
+X fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
+X fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
+X */
+X
+X f_str->aa0ix += f_str->nmoff; /* update offset into aa0t */
+X }
+X /*
+X fprintf(stderr," ma_spam returning: %d\n",tot);
+X */
+X return tot;
+}
+X
+static int
+m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str)
+{
+X int tpos, lpos, lend, im, ii, nm;
+X int tot, ctot, pv;
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X const unsigned char *aa0p, *aa1p;
+X
+X lpos = dmax->start; /* position in library sequence */
+X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
+X if (tpos > 0) {
+X if (lpos-tpos >= 0) {
+X lpos = dmax->start -= tpos; /* force global alignment, reset start*/
+X tpos = 0;
+X }
+X else {
+X tpos -= lpos;
+X lpos = dmax->start = 0;
+X }
+X }
+X
+X nm = f_str->nm0;
+X lend = dmax->stop;
+X if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
+X lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
+X if (lend >= n1) lend = n1-1;
+X }
+X
+X aa1p = &aa1[lpos];
+X aa0p = &aa0[tpos];
+X
+X curv.start = lpos;
+X
+X tot = curv.score = maxv.score = 0;
+X for (; lpos <= lend; lpos++) {
+X ctot = -10000;
+X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
+X if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
+X ctot = pv;
+X }
+X }
+X tot += ctot;
+X aa0p++; aa1p++;
+X }
+X
+X /* reset dmax if necessary */
+X
+X return tot;
+}
+X
+/* sconn links up non-overlapping alignments and calculates the score */
+X
+int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
+X struct rstruct *rst, struct pstruct *ppst,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int opt_prob)
+{
+X int i, si, cmpp ();
+X struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
+X int lstart, plstop;
+X double tatprob;
+X
+X /* sarr[] saves each alignment score/position, and provides a link
+X back to the previous alignment that maximizes the score */
+X
+X /* sort the score left to right in lib pos */
+X kpsort (v, n);
+X
+X start = NULL;
+X
+X /* for the remaining runs, see if they fit */
+X for (i = 0, si = 0; i < n; i++) {
+X
+X /* if the score is less than the gap penalty, it never helps */
+X if (!opt_prob && (v[i]->score < cgap) ){ continue; }
+X
+X lstart = v[i]->start;
+X
+X /* put the run in the group */
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X sarr[si].prev = NULL;
+X sarr[si].tat = NULL;
+X
+X if(opt_prob) {
+X sarr[si].tatprob =
+X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0],ppst->nsq, f_str,
+X ppst->pseudocts, opt_prob,ppst->zsflag);
+X sarr[si].tat = sarr[si].newtat;
+X }
+X
+X /* if it fits, then increase the score */
+X for (sl = start; sl != NULL; sl = sl->next) {
+X plstop = sl->vp->stop;
+X /* if end < start or start > end, add score */
+X if (plstop < lstart ) {
+X if(!opt_prob) {
+X sarr[si].score = sl->score + v[i]->score;
+X sarr[si].prev = sl;
+X /*
+X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
+X i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
+X */
+X break;
+X } else {
+X tatprob =
+X calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0], ppst->nsq, f_str,
+X ppst->pseudocts, opt_prob, ppst->zsflag);
+X /* if our tatprob gets worse when we add this, forget it */
+X if(tatprob > sarr[si].tatprob) {
+X free(sarr[si].newtat->probs); /* get rid of new tat struct */
+X free(sarr[si].newtat);
+X continue;
+X } else {
+X sarr[si].tatprob = tatprob;
+X free(sarr[si].tat->probs); /* get rid of old tat struct */
+X free(sarr[si].tat);
+X sarr[si].tat = sarr[si].newtat;
+X sarr[si].prev = sl;
+X sarr[si].score = sl->score + v[i]->score;
+X /*
+X fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
+X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+X */
+X break;
+X }
+X }
+X }
+X }
+X
+X /* now recalculate where the score fits - resort the scores */
+X if (start == NULL) {
+X start = &sarr[si];
+X } else {
+X if(!opt_prob) { /* sort by scores */
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X if (sarr[si].score > sj->score) { /* if new score > best score */
+X sarr[si].next = sj; /* previous best linked to best */
+X if (so != NULL)
+X so->next = &sarr[si]; /* old best points to new best */
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj; /* old-best saved in so */
+X }
+X } else { /* sort by tatprobs */
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X if ( sarr[si].tatprob < sj->tatprob ||
+X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+X sarr[si].next = sj;
+X if (so != NULL)
+X so->next = &sarr[si];
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X }
+X }
+X si++;
+X }
+X
+X if(opt_prob) {
+X for (i = 0 ; i < si ; i++) {
+X free(sarr[i].tat->probs);
+X free(sarr[i].tat);
+X }
+X }
+X
+X if (start != NULL) {
+X
+X if(opt_prob)
+X rst->escore = start->tatprob;
+X else
+X rst->escore = 2.0;
+X
+X rst->segnum = rst->seglen = 0;
+X for(sj = start ; sj != NULL; sj = sj->prev) {
+X rst->segnum++;
+X rst->seglen += sj->vp->stop - sj->vp->start + 1;
+X }
+X return (start->score);
+X } else {
+X
+X if(opt_prob)
+X rst->escore = 1.0;
+X else
+X rst->escore = 2.0;
+X
+X rst->segnum = rst->seglen = 0;
+X return (0);
+X }
+}
+X
+void
+kssort (struct savestr **v, int n)
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->score >= v[j + gap]->score)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->start <= v[j + gap]->start)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+/* sorts alignments from right to left (back to front) based on stop */
+X
+void
+krsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->stop > v[j + gap]->stop)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X int hoff, n10;
+X struct rstruct rst;
+X int ib;
+X unsigned char *aa0t;
+X const unsigned char *aa1p;
+X
+#ifdef TFAST
+X f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
+X aa1p = f_str->aa1x;
+#else
+X n10 = n1;
+X aa1p = aa1;
+#endif
+X
+X do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1);
+X
+X /* the alignment portion takes advantage of the information left
+X over in f_str after do_fastf is done. in particular, it is
+X easy to run a modified sconn() to produce the alignments.
+X
+X unfortunately, the alignment display routine wants to have
+X things encoded as with bd_align and sw_align, so we need to do that.
+X */
+X
+X if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
+X fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
+X exit(1);
+X }
+X
+X kssort (f_str->vptr, f_str->nsave);
+X f_str->aa0ix = 0;
+X if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
+X for (ib=0; ib < f_str->nm0; ib++) {
+X if (f_str->vptr[ib]->score > 0) {
+X f_str->vptr[ib]->score =
+X ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
+X }
+X }
+X
+X /* after ma_spam is over, we need to reset aa0 */
+X for (ib = 0; ib < n0; ib++) {
+X if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
+X }
+X
+X kssort(f_str->vptr,f_str->nsave);
+X
+X for ( ; f_str->nsave > 0; f_str->nsave--)
+X if (f_str->vptr[f_str->nsave-1]->score >0) break;
+X
+X a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
+X free(aa0t);
+X
+X a_res->res = f_str->res;
+X *have_ares = 0;
+X return rst.score[0];
+}
+X
+/* this version of sconn is modified to provide alignment information */
+X
+int sconn_a (unsigned char *aa0, int n0, int cgap,
+X struct f_struct *f_str,
+X struct a_res_str *a_res)
+{
+X int i, si, cmpp (), n;
+X unsigned char *aa0p;
+X int sx, dx, doff;
+X
+X struct savestr **v;
+X struct slink {
+X int score;
+X struct savestr *vp;
+X struct slink *snext;
+X struct slink *aprev;
+X } *start, *sl, *sj, *so, sarr[MAXSAV];
+X int lstop, plstart;
+X int *res, nres, tres;
+X
+/* sort the score left to right in lib pos */
+X
+X v = f_str->vptr;
+X n = f_str->nsave;
+X
+X krsort (v, n); /* sort from left to right in library */
+X
+X start = NULL;
+X
+/* for each alignment, see if it fits */
+X
+X for (i = 0, si = 0; i < n; i++) {
+X
+/* if the score is less than the join threshold, skip it */
+X if (v[i]->score < cgap) continue;
+X
+X lstop = v[i]->stop; /* have right-most lstart */
+X
+/* put the alignment in the group */
+X
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].snext = NULL;
+X sarr[si].aprev = NULL;
+X
+/* if it fits, then increase the score */
+/* start points to a sorted (by total score) list of candidate
+X overlaps */
+X
+X for (sl = start; sl != NULL; sl = sl->snext) {
+X plstart = sl->vp->start;
+X if (plstart > lstop ) {
+X sarr[si].score = sl->score + v[i]->score;
+X sarr[si].aprev = sl;
+X break; /* quit as soon as the alignment has been added */
+X }
+X }
+X
+/* now recalculate the list of best scores */
+X if (start == NULL)
+X start = &sarr[si]; /* put the first one in the list */
+X else
+X for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
+X if (sarr[si].score > sj->score) { /* new score better than old */
+X sarr[si].snext = sj; /* snext best after new score */
+X if (so != NULL)
+X so->snext = &sarr[si]; /* prev_best->snext points to best */
+X else start = &sarr[si]; /* start points to best */
+X break; /* stop looking */
+X }
+X so = sj; /* previous candidate best */
+X }
+X si++; /* increment to snext alignment */
+X }
+X
+X /* we have the best set of alignments, write them to *res */
+X if (start != NULL) {
+X res = f_str->res; /* set a destination for the alignment ops */
+X tres = nres = 0; /* alignment op length = 0 */
+X aa0p = aa0; /* point into query (needed for calcons later) */
+X a_res->min1 = start->vp->start; /* start in library */
+X a_res->min0 = 0; /* start in query */
+X for (sj = start; sj != NULL; sj = sj->aprev ) {
+X doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
+X /*
+X fprintf(stderr,"doff: %3d\n",doff);
+X */
+X for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
+X dx <= sj->vp->stop; dx++) {
+X *aa0p++ = f_str->aa0t[sx++]; /* copy residue into aa0 */
+X tres++; /* bump alignment counter */
+X res[nres++] = 0; /* put 0-op in res */
+X }
+X sj->vp->dp -= doff;
+X if (sj->aprev != NULL) {
+X if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
+X /* put an insert op into res to get to next aligned block */
+X tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
+X }
+X /*
+X fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
+X sj->vp->start - sj->vp->dp + f_str->noff,
+X sj->vp->stop - sj->vp->dp + f_str->noff,
+X sj->vp->start,sj->vp->stop,sj->vp->dp,
+X f_str->noff,sj->vp->score);
+X fprintf(stderr,"%3d - %3d: %3d\n",
+X sj->vp->start,sj->vp->stop,sj->vp->score);
+X */
+X a_res->max1 = sj->vp->stop;
+X a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
+X }
+X
+X /*
+X fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
+X a_res->min0,a_res->max0,a_res->min1,a_res->max1);
+X */
+X
+X /* now replace f_str->aa0t with aa0 */
+X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
+X
+X return tres;
+X }
+X else return (0);
+}
+X
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
+{
+X int i, sum;
+X for (i=0,sum=0; i<n0; i++)
+X if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
+X return sum;
+}
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+X
+#ifdef TFAST
+X f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+#ifdef TFAST
+X aln->qlrev = 0;
+X aln->qlfact = 1;
+X aln->llfact = aln->llmult = 3;
+X aln->frame = 0;
+X if (frame > 3) aln->llrev = 1;
+#else /* FASTF */
+X aln->llfact = aln->qlfact = aln->llmult = 1;
+X aln->llrev = aln->qlrev = 0;
+X aln->frame = 0;
+#endif
+}
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1, n0t;
+X int op, lenc, len_gap, nd, ns, itmp;
+X const unsigned char *aa1p;
+X char *sp0, *sp1, *sq, *spa;
+X int *rp;
+X int mins, smins;
+X
+X /* do not allow low complexity */
+X sq = pst.sq;
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X /* first fill in the ends */
+X /* a_res.min0--; a_res.min1--; */
+X n0 -= (f_str->nm0-1);
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
+X /* will we show all the start ?*/
+X smins=0;
+X mins = min(a_res.min1,aln->llen/2);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X
+X memset(seqca,M_BLANK,mins);
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X
+X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X *sp0 = sq[f_str->aa0t[i0++]];
+X *sp1 = sq[aa1p[i1++]];
+X n0t++;
+X lenc++;
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) { op = *rp++;}
+X if (op>0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X else {
+X *sp0++ = sq[f_str->aa0t[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X n0t++;
+X len_gap++;
+X lenc++;
+X }
+X }
+X }
+X
+X *spa = '\0';
+X *nc = lenc-len_gap;
+X
+X /* now we have the middle, get the right end */
+X /* ns is amount to be shown */
+X /* nd is amount remaining to be shown */
+X ns = mins + lenc + aln->llen;
+X ns -= (itmp = ns %aln->llen);
+X if (itmp>aln->llen/2) ns += aln->llen;
+X nd = ns - (mins+lenc);
+X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X /* there isn't any aa0 to get */
+X memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X memset(seqc0+mins+lenc,' ',nd);
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X
+X return mins+lenc+nd;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1, nn1, n0t;
+X int op, lenc, len_gap, nd, ns, itmp;
+X const unsigned char *aa1p;
+X char *sp0, *sp0a, *sp1, *sq, *spa;
+X int *rp;
+X int mins, smins;
+X
+X /* do not allow low complexity */
+X sq = pst.sq;
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X n0 -= (f_str->nm0-1);
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
+X /* will we show all the start ?*/
+X smins=0;
+X mins = min(a_res.min1,aln->llen/2);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X
+X memset(seqca,M_BLANK,mins);
+X memset(seqc0a,' ',mins);
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp0a = seqc0a+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X
+X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X *sp0a++ = ' ';
+X *sp0 = sq[f_str->aa0t[i0++]];
+X *sp1 = sq[aa1p[i1++]];
+X n0t++;
+X lenc++;
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) { op = *rp++;}
+X if (op>0) {
+X *sp0++ = '-';
+X *sp0a++ = ' ';
+X *sp1++ = sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X else {
+X *sp0++ = sq[f_str->aa0t[i0++]];
+X *sp0a++ = ' ';
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X n0t++;
+X len_gap++;
+X lenc++;
+X }
+X }
+X }
+X
+X *sp0a = *spa = '\0';
+X *nc = lenc-len_gap;
+X
+X /* now we have the middle, get the right end */
+X /* ns is amount to be shown */
+X /* nd is amount remaining to be shown */
+X ns = mins + lenc + aln->llen;
+X ns -= (itmp = ns %aln->llen);
+X if (itmp>aln->llen/2) ns += aln->llen;
+X nd = ns - (mins+lenc);
+X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X /* there isn't any aa0 to get */
+X memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X memset(seqc0+mins+lenc,' ',nd);
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X
+X return mins+lenc+nd;
+}
+X
+void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
+X
+X int i, j, k;
+X unsigned char tmp;
+X
+X for (i = f_str->nmoff-1 ; --i ; ) {
+X
+X /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
+X j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
+X
+X /* swap all i'th column residues for all j'th column residues */
+X for(k = 0 ; k < f_str->nm0 ; k++) {
+X tmp = aa0[(k * (f_str->nmoff)) + i];
+X aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
+X aa0[(k * (f_str->nmoff)) + j] = tmp;
+X }
+X }
+}
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, len_gap;
+X int p_op, op_cnt;
+X const unsigned char *aa1p;
+X char tmp_cnt[20];
+X char sp0, sp1, *sq;
+X int *rp;
+X int mins, smins;
+X int fnum = 0;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X rp = a_res.res;
+X lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
+X op_cnt = 0;
+X
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X tmp_cnt[0]='\0';
+X
+X fnum = f_str->aa0ti[i0] + 1;
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X if (p_op == 0) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt,fnum);
+X op_cnt = 1; p_op = 0;
+X fnum = f_str->aa0ti[i0] + 1;
+X }
+X op = *rp++;
+X lenc++;
+X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
+X sp0 = pst.sq[f_str->aa0t[i0++]];
+X sp1 = pst.sq[aa1p[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X if (p_op == 1) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
+X op_cnt = 1; p_op = 1; fnum = f_str->aa0ti[i0] + 1;
+X }
+X op--; lenc++; i1++; len_gap++;
+X }
+X else {
+X if (p_op == 2) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
+X op_cnt = 1; p_op = 2; fnum = f_str->aa0ti[i0] + 1;
+X }
+X op++; lenc++; i0++; len_gap++;
+X }
+X }
+X }
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
+X
+X return lenc - len_gap;
+}
+X
+void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
+X
+X char op_char[4]={"=-+"};
+X char tmp_cnt[20];
+X
+X if (op == 0)
+X sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
+X else
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+int calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1, n0t;
+X int op, lenc, len_gap;
+X const unsigned char *aa1p;
+X int sp0, sp1;
+X int *rp;
+X int mins, smins;
+X char *sq;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X /* first fill in the ends */
+X /* a_res.min0--; a_res.min1--; */
+X n0 -= (f_str->nm0-1);
+X
+X /* now get the middle */
+X rp = a_res.res;
+X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
+X sp0 = sq[f_str->aa0t[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X n0t++;
+X lenc++;
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X }
+X else {
+X if (op==0) { op = *rp++;}
+X if (op>0) {
+X i1++;
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X else {
+X i0++;
+X op++;
+X n0t++;
+X len_gap++;
+X lenc++;
+X }
+X }
+X }
+X return lenc-len_gap;
+}
+X
+#ifdef PCOMPLIB
+X
+#include "structs.h"
+#include "p_mw.h"
+X
+void
+update_params(struct qmng_str *qm_msg,
+X struct mngmsg *m_msg, struct pstruct *ppst)
+{
+X m_msg->n0 = ppst->n0 = qm_msg->n0;
+X m_msg->nm0 = qm_msg->nm0;
+X m_msg->escore_flg = qm_msg->escore_flg;
+X m_msg->qshuffle = qm_msg->qshuffle;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropff2.c ||
+echo 'restore of dropff2.c failed'
+Wc_c="`wc -c < 'dropff2.c'`"
+test 48853 -eq "$Wc_c" ||
+ echo 'dropff2.c: original size 48853, current size' "$Wc_c"
+fi
+# ============= dropfs2.c ==============
+if test -f 'dropfs2.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropfs2.c (File already exists)'
+else
+echo 'x - extracting dropfs2.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropfs2.c' &&
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropfs2.c,v 1.40 2007/02/26 21:56:59 wrp Exp $ */
+X
+/* changed to return 2.0, rather than -1.0, for failure */
+X
+/* Feb 4, 2005 - modifications to allow searches with ktup=2 for very
+X long queries. This is a temporary solution to savemax(), spam()
+X which do not preserve exact matches
+X
+X do_fasts() has been modified to allow higher maxsav for do_walign
+X than for do_work (2*nsegs, 6*nsegs)
+X */
+X
+/* this code implements the "fasts" algorithm, which compares a set of
+X protein fragments to a protein sequence. Comma's are used to separate
+X the sequence fragments, which need not be the same length.
+X
+X The expected input is:
+X
+X >mgstm1
+X MGDAPDFD,
+X MILGYW,
+X MLLEYTDS
+X
+X The fragments do not need to be in the correct order (which is
+X presumably unknown from the peptide sequencing.
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "tatstats.h"
+X
+#define EOSEQ 0
+#define ESS 49
+#define NMAP_X 23 /* for 'X' */
+#define NMAP_Z 24 /* for '*' */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+X
+static char *verstr="4.32 Feb 2007";
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+int shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq);
+static void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
+extern void aancpy(char *to, char *from, int count, struct pstruct pst);
+X
+#ifdef TFAST
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);
+#endif
+X
+void savemax(struct dstruct *, struct f_struct *, int maxsav, int exact,int t_end);
+X
+int spam(const unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);
+int sconn(struct savestr **v,
+X int nsave,
+X struct f_struct *,
+X struct rstruct *,
+X struct pstruct *,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int opt_prob);
+X
+void kpsort(struct savestr **, int);
+void kssort(struct savestr **, int); /* sort by score */
+int sconn_a(unsigned char *, int,
+X const unsigned char *, int,
+X struct f_struct *,
+X struct a_res_str *,
+X struct pstruct *);
+void kpsort(struct savestr **, int);
+X
+/* initialize for fasta */
+X
+void
+init_work (unsigned char *aa0, const int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg
+X )
+{
+X int mhv, phv;
+X int hmax, nsegs;
+X int i0, ib, hv, old_hv;
+X int pamfact;
+X struct f_struct *f_str;
+X /* these used to be globals, but do not need to be */
+X int ktup, fact, kt1;
+X
+X int maxn0;
+X int stmp; /* temporary score */
+X int i, j, q;
+X int tat_size;
+X int *res;
+X
+X unsigned char *query;
+X int k, l, m, n, N, length, index;
+X
+X double *tatprobptr;
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+X ktup = ppst->param_u.fa.ktup;
+X if ( ktup > 2 ) {
+X ktup = ppst->param_u.fa.ktup = 2;
+X }
+X fact = ppst->param_u.fa.scfact;
+X
+X /* fasts3 cannot work with lowercase symbols as low complexity;
+X thus, NMAP must be disabled; this depends on aascii['X'] */
+X if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
+X if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
+X /* this does not work in a threaded environment */
+X /* else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
+X
+X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+X if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
+X
+X if (mhv <= 0) {
+X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+X exit (1);
+X }
+X
+X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+X
+/* kshft = 2; */
+X kt1 = ktup-1;
+X hv = 1;
+X for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
+X hmax = hv;
+X f_str->hmask = (hmax >> f_str->kshft) - 1;
+X
+X if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0ti = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0ti array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0b = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0b array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0e = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0e array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0i = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0i array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0s = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0s array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->aa0l = (int *) calloc(n0+1, sizeof(int))) == NULL) {
+X fprintf (stderr, " cannot allocate f_str0->aa0l array; %d\n",n0+1);
+X exit (1);
+X }
+X
+X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
+X hmax, f_str->hmask);
+X exit (1);
+X }
+X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh1 array\n");
+X exit (1);
+X }
+X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh2 array\n");
+X exit (1);
+X }
+X
+X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash link array");
+X exit (1);
+X }
+X
+X /* for FASTS/FASTM, we want to know when we get to the end of a peptide,
+X so we can ensure that we set the end and restart */
+X
+X if ((f_str->l_end = (int *) calloc (n0, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate link end array");
+X exit (1);
+X }
+X
+X for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
+X for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
+X for (i0 = 0; i0 < n0; i0++) f_str->l_end[i0] = 0;
+X
+X /* count the number of peptides */
+X nsegs = 1;
+X for (i0 = 0; i0 < n0; i0++) {
+X if (aa0[i0] == ESS || aa0[i0] == 0) nsegs++;
+X }
+X
+X /* allocate space for peptides offsets, nm_u */
+X if ((f_str->nmoff = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
+X fprintf(stderr, " cannot allocat nmoff array: %d\n", nsegs);
+X exit(1);
+X }
+X
+X if ((f_str->nm_u = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
+X fprintf(stderr, " cannot allocat nm_u array: %d\n", nsegs);
+X exit(1);
+X }
+X
+X phv = hv = 0;
+X f_str->nmoff[0] = 0;
+X f_str->nm0 = 1;
+X
+X /* encode the aa0 array */
+X if (kt1 > 0) {
+X hv = ppst->hsq[aa0[0]];
+X phv = ppst->pam2[0][aa0[0]][aa0[0]];
+X }
+X
+X for (i0=kt1 ; i0 < n0; i0++) {
+X if (aa0[i0] == ESS || aa0[i0] == 0) {
+X /* fprintf(stderr," converted %d to 0\n",aa0[i0]); */
+X aa0[i0] = EOSEQ; /* set ESS to 0 */
+X f_str->nmoff[f_str->nm0++] = i0+1;
+X f_str->l_end[i0-1] = 1;
+X phv = hv = 0;
+X if (kt1 > 0) {
+X i0++;
+X hv = ppst->hsq[aa0[i0]];
+X phv = ppst->pam2[0][aa0[i0]][aa0[i0]];
+X }
+X continue;
+X }
+X
+X hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+X f_str->link[i0] = f_str->harr[hv];
+X f_str->harr[hv] = i0;
+X f_str->pamh2[hv] = (phv += ppst->pam2[0][aa0[i0]][aa0[i0]]);
+X phv -= ppst->pam2[0][aa0[i0 - kt1]][aa0[i0 - kt1]];
+X }
+X f_str->l_end[n0-1] = 1;
+X
+X f_str->nmoff[f_str->nm0] = n0+1;
+X
+X /*
+#ifdef DEBUG
+X fprintf(stderr, ">>%s\n",qtitle);
+X for (j=0; j<f_str->nm0; j++) {
+X for (i=f_str->nmoff[j]; i < f_str->nmoff[j+1]-1; i++) {
+X fprintf(stderr,"%c",ppst->sq[aa0[i]]);
+X }
+X fprintf(stderr," %d\n",aa0[i]);
+X }
+X
+X for (j=1; j<=ppst->nsq; j++) {
+X fprintf(stderr, "%c %d\n", ppst->sq[j], f_str->harr[j]);
+X }
+X
+X for (j=0; j<=n0; j++) {
+X fprintf(stderr, "%c %d\n", ppst->sq[aa0[j]], f_str->link[j]);
+X }
+X
+#endif
+X */
+X
+X /* build an integer array of the max score that can be achieved
+X from that position - use in savemax to mark some segments as
+X fixed */
+X
+X /* setup aa0b[], aa0e[], which specify the begining and end of each
+X segment */
+X
+X stmp = 0;
+X q = -1;
+X for (ib = i0 = 0; i0 < n0; i0++) {
+X f_str->aa0l[i0] = i0 - q;
+X if (aa0[i0]==EOSEQ) {
+X f_str->aa0b[i0] = -1;
+X f_str->aa0e[i0] = -1;
+X f_str->aa0i[i0] = -1;
+X f_str->aa0l[i0] = -1;
+X q = i0;
+X if (i0 > 0)f_str->aa0s[i0-1] = stmp;
+X stmp = 0;
+X ib++;
+X }
+X else {
+X stmp += ppst->pam2[0][aa0[i0]][aa0[i0]];
+X }
+X
+X f_str->aa0b[i0] = f_str->nmoff[ib];
+X f_str->aa0e[i0] = f_str->nmoff[ib+1]-2;
+X f_str->aa0i[i0] = ib;
+X
+X /*
+X fprintf(stderr,"%2d %c: %2d %2d %2d\n",i0,ppst->sq[aa0[i0]],
+X f_str->aa0b[i0],f_str->aa0e[i0],f_str->aa0i[i0]);
+X */
+X }
+X f_str->aa0s[n0-1]=stmp; /* save last best possible score */
+X
+X /* maxsav - maximum number of peptide alignments saved in search */
+X /* maxsav_w - maximum number of peptide alignments saved in
+X alignment */
+X
+X f_str->maxsav = max(MAXSAV,2*f_str->nm0);
+X f_str->maxsav_w = max(MAXSAV,6*f_str->nm0);
+X
+X if ((f_str->vmax = (struct savestr *)
+X calloc(f_str->maxsav_w,sizeof(struct savestr)))==NULL) {
+X fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav_w);
+X exit(1);
+X }
+X
+X if ((f_str->vptr = (struct savestr **)
+X calloc(f_str->maxsav_w,sizeof(struct savestr *)))==NULL) {
+X fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav_w);
+X exit(1);
+X }
+X
+X if ((f_str->sarr = (struct slink *)
+X calloc(f_str->maxsav_w,sizeof(struct slink)))==NULL) {
+X fprintf(stderr, "Couldn't allocate sarr[%d].\n",f_str->maxsav_w);
+X exit(1);
+X }
+X
+X /* Tatusov Statistics Setup */
+X
+X /* initialize priors array. */
+X if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate priors array.\n");
+X exit(1);
+X }
+X
+X calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
+X
+X /* pre-calculate the Tatusov probability array for each full segment */
+X
+X if(ppst->zsflag >= 1 && ppst->zsflag <= 3 && f_str->nm0 <= 10) {
+X
+X tat_size = (1<<f_str->nm0) -1;
+X f_str->dotat = 1;
+X f_str->tatprobs = (struct tat_str **) malloc((size_t)tat_size*sizeof(struct tat_str *));
+X if (f_str->tatprobs == NULL) {
+X fprintf (stderr, " cannot allocate tatprobs array: %ld\n",
+X tat_size * sizeof(struct tat_str *));
+X exit (1);
+X }
+X
+X f_str->intprobs = (double **) malloc((size_t)tat_size * sizeof(double *));
+X if(f_str->intprobs == NULL) {
+X fprintf(stderr, "Couldn't allocate intprobs array.\n");
+X exit(1);
+X }
+X
+X for(k = 0, l = f_str->nm0 ; k < l ; k++) {
+X query = &(aa0[f_str->nmoff[k]]);
+X length = f_str->nmoff[k+1] - f_str->nmoff[k] - 1;
+X
+X /* this segment alone */
+X index = (1 << k) - 1;
+X generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), NULL);
+X
+X /* integrate the probabilities */
+X N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
+X tatprobptr = (double *) calloc(N+1, sizeof(double));
+X if(tatprobptr == NULL) {
+X fprintf(stderr, "Couldn't calloc tatprobptr.\n");
+X exit(1);
+X }
+X f_str->intprobs[index] = tatprobptr;
+X
+X for (i = 0; i <= N ; i++ ) {
+X tatprobptr[i] = f_str->tatprobs[index]->probs[i];
+X for (j = i + 1 ; j <= N ; j++ ) {
+X tatprobptr[i] += f_str->tatprobs[index]->probs[j];
+X }
+X }
+X
+X /* this segment built on top of all other subcombinations */
+X for(i = 0, j = (1 << k) - 1 ; i < j ; i++) {
+X index = (1 << k) + i;
+X generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), f_str->tatprobs[i]);
+X
+X /* integrate the probabilities */
+X N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
+X tatprobptr = (double *) calloc(N+1, sizeof(double));
+X if(tatprobptr == NULL) {
+X fprintf(stderr, "Couldn't calloc tatprobptr.\n");
+X exit(1);
+X }
+X f_str->intprobs[index] = tatprobptr;
+X
+X for (m = 0; m <= N ; m++ ) {
+X tatprobptr[m] = f_str->tatprobs[index]->probs[m];
+X for (n = m + 1 ; n <= N ; n++ ) {
+X tatprobptr[m] += f_str->tatprobs[index]->probs[n];
+X }
+X }
+X }
+X }
+X } else {
+X f_str->dotat = 0;
+X f_str->shuff_cnt = ppst->shuff_node;
+X }
+X
+X /* End of Tatusov Statistics Setup */
+X
+X /*
+X for (i0=1; i0<=ppst->nsq; i0++) {
+X fprintf(stderr," %c: %2d ",ppst->sq[i0],f_str->harr[i0]);
+X hv = f_str->harr[i0];
+X while (hv >= 0) {
+X fprintf(stderr," %2d",f_str->link[hv]);
+X hv = f_str->link[hv];
+X }
+X fprintf(stderr,"\n");
+X }
+X */
+X
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+X pam2[0][0] is now undefined for consistency with blast
+*/
+X for (i0 = 1; i0 <= ppst->nsq; i0++)
+X f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
+X
+X f_str->ndo = 0;
+X f_str->noff = n0-1;
+X if (f_str->diag==NULL)
+X f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+X sizeof (struct dstruct));
+X if (f_str->diag == NULL) {
+X fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
+X (long) MAXDIAG * (long) (sizeof (struct dstruct)));
+X exit (1);
+X }
+X
+#ifdef TFAST
+X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+X exit (1);
+X }
+X f_str->aa1x++;
+#endif
+X
+X maxn0 = max(3*n0/2,MIN_RES);
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X f_str->max_res = maxn0;
+X
+X *f_arg = f_str;
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifdef FASTS
+#ifndef TFAST
+X char *pg_str="FASTS";
+#else
+X char *pg_str="TFASTS";
+#endif
+#endif
+X
+#ifdef FASTM
+#ifndef TFAST
+X char *pg_str="FASTM";
+#else
+X char *pg_str="TFASTM";
+#endif
+#endif
+X
+X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] ktup=%d",pg_str,verstr,
+X pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->param_u.fa.ktup);
+X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X if (pstring2 != NULL) {
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n",
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval,
+X pstr->ggapval,pstr->param_u.fa.ktup);
+X }
+}
+X
+void
+close_work (const unsigned char *aa0, const int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X int i, j;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X
+X free(f_str->res);
+#ifdef TFAST
+X free(f_str->aa1x - 1); /* because f_str->aa1x got ++'ed when allocated! */
+#endif
+X free(f_str->diag);
+X free(f_str->l_end);
+X free(f_str->link);
+X free(f_str->pamh2);
+X free(f_str->pamh1);
+X free(f_str->harr);
+X free(f_str->vmax);
+X free(f_str->vptr);
+X free(f_str->sarr);
+X free(f_str->aa0i);
+X free(f_str->aa0e);
+X free(f_str->aa0b);
+X free(f_str->aa0ti);
+X free(f_str->aa0t);
+X free(f_str->nmoff);
+X free(f_str->nm_u);
+X
+X if(f_str->dotat) {
+X for(i = 0, j = (1 << f_str->nm0) - 1 ; i < j ; i++) {
+X free(f_str->tatprobs[i]->probs);
+X free(f_str->tatprobs[i]);
+X free(f_str->intprobs[i]);
+X }
+X free(f_str->tatprobs);
+X free(f_str->intprobs);
+X }
+X
+X free(f_str->priors);
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+void do_fasts (const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst, int *hoff, int opt_prob,
+X int maxsav)
+{
+X int nd; /* diagonal array size */
+X int lhval;
+X int kfact;
+X register struct dstruct *dptr;
+X register int tscor;
+X register struct dstruct *diagp;
+X struct dstruct *dpmax;
+X register int lpos;
+X int tpos;
+X struct savestr *vmptr, *vmaxmax;
+X int scor, tmp;
+X int im, ib, nsave;
+X int cmps (); /* comparison routine for ksort */
+X int ktup;
+X int doffset;
+X
+X
+X vmaxmax = &f_str->vmax[maxsav];
+X
+X ktup = ppst->param_u.fa.ktup;
+X
+X if (n1 < ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X rst->segnum = 0;
+X rst->seglen = 0;
+X return;
+X }
+X
+X if (n0+n1+1 >= MAXDIAG) {
+X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X rst->escore = 2.0;
+X rst->segnum = 0;
+X rst->seglen = 0;
+X return;
+X }
+X
+X nd = n0 + n1;
+X
+X dpmax = &f_str->diag[nd];
+X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+X {
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) {
+X vmptr->score = 0;
+X vmptr->exact = 0;
+X }
+X f_str->lowmax = f_str->vmax;
+X f_str->lowscor = 0;
+X
+X /* start hashing */
+X diagp = &f_str->diag[f_str->noff];
+X for (lhval=lpos=0; lpos < n1; lpos++, diagp++) {
+X if (ppst->hsq[aa1[lpos]]>=NMAP) { /* skip residue */
+X lpos++ ; diagp++;
+X while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+X if (lpos >= n1) break;
+X lhval = 0;
+X }
+X
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+X
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X
+X dptr = &diagp[-tpos];
+X
+X if (f_str->l_end[tpos]) {
+X if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) {
+X dptr->stop = lpos;
+X dptr->score = f_str->aa0s[tpos];
+X savemax(dptr, f_str, maxsav, 1, tpos);
+X dptr->dmax = NULL;
+X }
+X
+X else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) {
+X /*
+X fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n",
+X tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]],
+X dptr->start, dptr->stop,
+X dptr->stop - dptr->start, f_str->aa0l[tpos]);
+X */
+X dptr->stop = lpos;
+X dptr->start = lpos - f_str->aa0l[tpos];
+X dptr->score = f_str->aa0s[tpos];
+X savemax(dptr, f_str, maxsav, 1, tpos);
+X dptr->dmax = NULL;
+X }
+X }
+X else if ((tscor = dptr->stop) >= 0) {
+X tscor++; /* tscor is stop of current, increment it */
+X if ((tscor -= lpos) <= 0) { /* tscor, the end of the current
+X match, is before lpos, so there
+X is a mismatch - this is also the
+X mismatch cost */
+X tscor *= 2;
+X scor = dptr->score; /* save the run score on the diag */
+X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0
+X && f_str->lowscor < scor) {
+X /* if what we will get (tscor + kfact) is < 0 and the
+X score is better than the worst savemax() score, save
+X it */
+X savemax (dptr, f_str, maxsav,0,-1);
+X }
+X
+X /* if extending is better than starting over, extend */
+X if ((tscor += scor) >= kfact) {
+X dptr->score = tscor;
+X dptr->stop = lpos;
+X if (f_str->l_end[tpos]) {
+X if (dptr->score == f_str->aa0s[tpos]) {
+X savemax(dptr, f_str, maxsav,1,tpos);
+X dptr->dmax = NULL;
+X }
+X else if (dptr->score > f_str->lowscor)
+X savemax(dptr, f_str, maxsav,0,tpos);
+X }
+X }
+X else { /* otherwise, start new */
+X dptr->score = kfact;
+X dptr->start = dptr->stop = lpos;
+X }
+X }
+X else { /* tscor is after lpos, so extend one residue */
+X dptr->score += f_str->pamh1[aa0[tpos]];
+X dptr->stop = lpos;
+X if (f_str->l_end[tpos]) {
+X if (dptr->score == f_str->aa0s[tpos]) {
+X savemax(dptr, f_str, maxsav,1,tpos);
+X dptr->dmax = NULL;
+X }
+X else if (dptr->score > f_str->lowscor)
+X savemax(dptr, f_str, maxsav,0,tpos);
+X }
+X }
+X }
+X else { /* start new */
+X dptr->score = f_str->pamh2[lhval];
+X dptr->start = dptr->stop = lpos;
+X }
+X } /* end tpos */
+X } /* end lpos */
+X
+X for (dptr = f_str->diag; dptr < dpmax;) {
+X if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X f_str->ndo = nd;
+X
+/*
+X at this point all of the elements of aa1[lpos]
+X have been searched for elements of aa0[tpos]
+X with the results in diag[dpos]
+*/
+X
+X for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) {
+X if (vmptr->score > 0) {
+X /*
+X
+X fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d",
+X (vmptr->exact ? 'x' : ' '),
+X f_str->noff+vmptr->start-vmptr->dp,
+X f_str->noff+vmptr->stop-vmptr->dp,
+X vmptr->start,vmptr->stop,
+X vmptr->dp,vmptr->score);
+X */
+X vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
+X /*
+X fprintf(stderr," sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop);
+X */
+X if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr;
+X }
+X }
+X
+X if (nsave <= 0) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X rst->segnum = 0;
+X rst->seglen = 0;
+X f_str->nsave = 0;
+X return;
+X }
+X
+X /*
+X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->vptr[ib]->exact ? 'x' : ' ',
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+X kssort(f_str->vptr,nsave);
+X
+X /* make certain each seg is used only once */
+X
+X for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0;
+X for (ib=0; ib < nsave; ib++) {
+X doffset = f_str->vptr[ib]->dp - f_str->noff;
+X tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset];
+X if (f_str->nm_u[tpos] == 0) {
+X f_str->nm_u[tpos]=1;
+X } else {
+X f_str->vptr[ib]->score = -1;
+X }
+X }
+X
+X kssort(f_str->vptr,nsave);
+X for (ib = nsave-1; ib >= 0; ib--)
+X if (f_str->vptr[ib]->score > -1) break;
+X nsave = ib+1;
+X
+X scor = sconn (f_str->vptr, nsave,
+X f_str, rst, ppst, aa0, n0, aa1, n1,
+X opt_prob);
+X
+X if (rst->escore < 0.0) rst->escore = 2.0;
+X kssort(f_str->vptr,nsave);
+X
+X /* here we should use an nsave that is consistent with sconn and nm0 */
+X
+X f_str->nsave = nsave;
+X if (nsave > f_str->nm0) f_str->nsave = f_str->nm0;
+X
+X rst->score[1] = f_str->vptr[0]->score;
+X rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);
+X
+}
+X
+void do_work (const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int opt_prob;
+X int hoff, n10, i;
+X
+X if (qr_flg==1 && f_str->shuff_cnt <= 0) {
+X rst->escore = 2.0;
+X rst->score[0]=rst->score[1]=rst->score[2]= -1;
+X return;
+X }
+X
+X if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
+X else opt_prob = 0;
+X if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
+X if (qr_flg) {
+X opt_prob=1;
+X /* if (frame==1) */
+X f_str->shuff_cnt--;
+X }
+X
+X if (n1 < ppst->param_u.fa.ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X rst->escore = 2.0;
+X return;
+X }
+#ifdef TFAST
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X if (ppst->debug_lib)
+X for (i=0; i<n10; i++)
+X if (f_str->aa1x[i]>ppst->nsq) {
+X fprintf(stderr,
+X "residue[%d/%d] %d range (%d)\n",i,n1,
+X f_str->aa1x[i],ppst->nsq);
+X f_str->aa1x[i]=0;
+X n10=i-1;
+X }
+X
+X do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
+#else /* FASTA */
+X do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
+#endif
+X
+X rst->comp = rst->H = -1.0;
+}
+X
+void do_opt (const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst)
+{
+X int lag, tscore, hoff, n10;
+X
+#ifdef TFAST
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);
+#else /* FASTA */
+X do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);
+#endif
+}
+X
+X
+/* modify savemax() so that full length 100% matches are marked
+X so that they cannot be removed - if we have a 100% match, mark "exact"
+X
+X modify savemax() to split alignments that include a comma
+*/
+X
+/* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr),
+X and places it in the set of runs to be saved (in f_str->vmax[])
+*/
+X
+void
+savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav,
+X int exact, int tpos)
+{
+X register int dpos; /* position along the diagonal, -n0 .. n1 */
+X int i, j, lowj;
+X register struct savestr *vmptr;
+X struct savestr *vmaxmax;
+X
+X vmaxmax = &f_str->vmax[maxsav];
+X
+X dpos = (int) (dptr - f_str->diag); /* current diagonal */
+X
+/* check to see if this is the continuation of a run that is already saved */
+/* if we are at the end of the query, save it regardless */
+X
+/* if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */
+X
+X if ((vmptr = dptr->dmax) != NULL /* have an active run */
+X && vmptr->dp == dpos && /* on the correct diagonal */
+X vmptr->start == dptr->start) { /* and it starts at the same place */
+X vmptr->stop = dptr->stop; /* update the end of the match in vmax[] */
+X
+X if (exact == 1) {
+X /*
+X fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n",
+X dptr->score, dptr->start, dptr->stop,
+X vmptr->start, vmptr->stop, dptr->stop - dptr->start+1);
+X */
+X exact = 1;
+X }
+X
+X
+/* if the score is worse, don't update, return - if the score gets bad
+X enough, it will restart in the diagonal scan */
+X if ((i = dptr->score) <= vmptr->score) { return;}
+X
+/* score is better, update */
+X vmptr->score = i;
+X
+X vmptr->exact = exact;
+/* if the score is not the worst, return */
+X if (vmptr != f_str->lowmax) { return;}
+X }
+X else { /* not a continuation */
+X /* save in the lowest place */
+X /*
+X fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d",
+X f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop,
+X dptr->score, dptr->start, dptr->stop);
+X */
+X
+X vmptr = f_str->lowmax;
+X
+X /*
+X if (exact == 1) {
+X fprintf(stderr,"have new exact match: %d - %d:%d = %d\n",
+X dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1);
+X }
+X */
+X vmptr->exact = exact;
+X
+X i = vmptr->score = dptr->score; /* 'i' is used as a bound */
+X vmptr->dp = dpos;
+X vmptr->start = dptr->start;
+X vmptr->stop = dptr->stop;
+X dptr->dmax = vmptr;
+X }
+X
+X /* rescan the list for the worst score */
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[maxsav] ; vmptr++) {
+X if (vmptr->score < i && !vmptr->exact) {
+X i = vmptr->score;
+X f_str->lowmax = vmptr;
+X }
+X }
+X
+X f_str->lowscor = i;
+}
+X
+/* this version of spam scans the diagonal to find the best local score,
+X then resets the boundaries for a global alignment and re-scans */
+X
+/* NOOVERHANG allows one to score any overhanging alignment as zero.
+X Useful for SAGE alignments. Normally, one allows overhangs because
+X of the possibility of partial sequences.
+*/
+X
+#undef NOOVERHANG
+X
+/*
+X May, 2005 - spam() has an intesting bug that occurs when two
+X peptides match in order, separated by one position (the comma). In
+X this case, spam() splits the match, and only returns the better of
+X the two matches. So, if spam splits an alignment at a comma, it
+X needs the ability to insert the missing match.
+X
+*/
+X
+int spam (const unsigned char *aa0, const unsigned char *aa1,int n1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str)
+{
+X int lpos, doffset;
+X int tot, mtot;
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X register const unsigned char *aa0p, *aa1p;
+X
+X doffset = dmax->dp - f_str->noff;
+X curv.start = dmax->start;
+X aa1p = &aa1[dmax->start];
+X aa0p = &aa0[dmax->start - doffset];
+X
+X tot = curv.score = maxv.score = 0;
+X for (lpos = dmax->start; lpos <= dmax->stop; lpos++) {
+X tot += pam2[*aa0p++][*aa1p++];
+X if (tot > curv.score) {
+X curv.stop = lpos; /* here, curv.stop is actually curv.max */
+X curv.score = tot;
+X }
+X else if (tot < 0) {
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X tot = curv.score = 0;
+X curv.start = lpos+1;
+X }
+X }
+X
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X
+X if (maxv.score <= 0) return 0;
+X
+X /* now, reset the boundaries of the alignment using aa0b[]
+X and aa0e[], which specify the residues that start and end
+X the segment */
+X
+X maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset;
+X if (maxv.start < 0) {
+X maxv.start = 0;
+#ifdef NOOVERHANG
+X return 0;
+#endif
+X }
+X
+X maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset;
+X if (maxv.stop > n1) {
+X maxv.stop = n1-1;
+#ifdef NOOVERHANG
+X return 0;
+#endif
+X }
+X aa1p = &aa1[lpos = maxv.start];
+X aa0p = &aa0[lpos - doffset];
+X
+X for (tot=0; lpos <= maxv.stop; lpos++) {
+X tot += pam2[*aa0p++][*aa1p++];
+X }
+X
+X maxv.score = tot;
+X
+/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+X dmax->start,maxv.stop,dmax->stop);
+*/
+X dmax->start = maxv.start;
+X dmax->stop = maxv.stop;
+X
+X return maxv.score;
+}
+X
+int sconn (struct savestr **v, int n,
+X struct f_struct *f_str,
+X struct rstruct *rst, struct pstruct *ppst,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1, int opt_prob)
+{
+X int i, si, cmpp ();
+X struct slink *start, *sl, *sj, *so, *sarr;
+X int lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop;
+X double tatprob;
+X int dotat;
+X
+X sarr = f_str->sarr;
+X
+X /* sort the score left to right in lib pos */
+X kpsort (v, n);
+X
+X start = NULL;
+X rst->score[0] = 0;
+X rst->escore = 2.0;
+X
+/* for the remaining runs, see if they fit */
+/* lstart/lstop -> start/stop in library sequence
+X tstart/tstop -> start/stop in query sequence
+X plstart/plstop ->
+*/
+X
+X for (i = 0, si = 0; i < n; i++) {
+X
+X /* the segment is worth adding; find out where? */
+X lstart = v[i]->start;
+X ltmp = v[i]->stop;
+X tstart = lstart - v[i]->dp + f_str->noff;
+X tstop = ltmp - v[i]->dp + f_str->noff;
+X
+X /* put the run in the group */
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X sarr[si].prev = NULL;
+X sarr[si].tat = NULL;
+X
+/*
+X opt_prob for FASTS only has to do with using aa1 for priors,
+X i.e. we always calculate tatprobs for segments in FASTS (unlike
+X FASTF)
+*/
+X if(opt_prob) {
+X sarr[si].tatprob =
+X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0], ppst->nsq, f_str,
+X ppst->pseudocts, opt_prob, ppst->zsflag);
+X if (sarr[si].tatprob < 0.0) {
+X fprintf(stderr," negative tatprob: %lg\n",sarr[si].tatprob);
+X sarr[si].tatprob = 1.0;
+X }
+X sarr[si].tat = sarr[si].newtat;
+X }
+X
+/* if it fits, then increase the score
+X
+X start points to the highest scoring run
+X -> next is the second highest, etc.
+X put the segment into the highest scoring run that it fits into
+*/
+X for (sl = start; sl != NULL; sl = sl->next) {
+X ltmp = sl->vp->start;
+X /* plstop -> previous lstop */
+X plstop = sl->vp->stop;
+X /* ptstart -> previous t(query) start */
+X ptstart = ltmp - sl->vp->dp + f_str->noff;
+X /* ptstop -> previous t(query) stop */
+X ptstop = plstop - sl->vp->dp + f_str->noff;
+#ifndef FASTM
+X /* if the previous library stop is before the current library start */
+X if (plstop < lstart && ( ptstop < tstart || ptstart > tstop))
+#else
+X /* if the previous library stop is before the current library start */
+X if (plstop < lstart && ptstop < tstart)
+#endif
+X {
+X if(!opt_prob) {
+X sarr[si].score = sl->score + v[i]->score;
+X sarr[si].prev = sl;
+X break;
+X } else {
+X tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0], ppst->nsq, f_str,
+X ppst->pseudocts, opt_prob, ppst->zsflag);
+X /* if our tatprob gets worse when we add this, forget it */
+X if(tatprob > sarr[si].tatprob) {
+X free(sarr[si].newtat->probs); /* get rid of new tat struct */
+X free(sarr[si].newtat);
+X continue; /* reuse this sarr[si] */
+X } else {
+X sarr[si].tatprob = tatprob;
+X free(sarr[si].tat->probs); /* get rid of old tat struct */
+X free(sarr[si].tat);
+X sarr[si].tat = sarr[si].newtat;
+X sarr[si].prev = sl;
+X sarr[si].score = sl->score + v[i]->score;
+X /*
+X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
+X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+X */
+X break;
+X }
+X }
+X }
+X }
+X
+X /* now recalculate where the score fits */
+X if (start == NULL) start = &sarr[si];
+X else {
+X if(!opt_prob) {
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X if (sarr[si].score > sj->score) {
+X sarr[si].next = sj;
+X if (so != NULL)
+X so->next = &sarr[si];
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X } else {
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X if ( sarr[si].tatprob < sj->tatprob ||
+X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+X sarr[si].next = sj;
+X if (so != NULL)
+X so->next = &sarr[si];
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X }
+X }
+X
+X si++;
+X }
+X
+X if(opt_prob) {
+X for (i = 0 ; i < si ; i++) {
+X free(sarr[i].tat->probs);
+X free(sarr[i].tat);
+X }
+X }
+X
+X if (start != NULL) {
+X if(opt_prob) {
+X rst->escore = start->tatprob;
+X } else {
+X rst->escore = 2.0;
+X }
+X
+X rst->segnum = rst->seglen = 0;
+X for(sj = start ; sj != NULL; sj = sj->prev) {
+X rst->segnum++;
+X rst->seglen += sj->vp->stop - sj->vp->start + 1;
+X }
+X return (start->score);
+X } else {
+X rst->escore = 1.0;
+X }
+X
+X rst->segnum = rst->seglen = 0;
+X return (0);
+}
+X
+void
+kssort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->score >= v[j + gap]->score)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->start <= v[j + gap]->start)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+/* calculate the 100% identical score */
+int
+shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq)
+{
+X int i, sum;
+X for (i=0,sum=0; i<n0; i++)
+X if (aa0[i] != EOSEQ && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
+X return sum;
+}
+X
+/* sorts alignments from right to left (back to front) based on stop */
+X
+void
+krsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->stop > v[j + gap]->stop)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X int hoff, n10;
+X struct rstruct rst;
+X int ib, i;
+X unsigned char *aa0t;
+X const unsigned char *aa1p;
+X struct savestr *vmptr;
+X
+#ifdef TFAST
+X f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
+X aa1p = f_str->aa1x;
+#else
+X n10 = n1;
+X aa1p = aa1;
+#endif
+X
+X do_fasts(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1, f_str->maxsav_w);
+X
+X /* the alignment portion takes advantage of the information left
+X over in f_str after do_fasts is done. in particular, it is
+X easy to run a modified sconn() to produce the alignments.
+X
+X unfortunately, the alignment display routine wants to have
+X things encoded as with bd_align and sw_align, so we need to do that.
+X */
+X
+X /* unnecessary; do_fasts just did this */
+X /* kssort(f_str->vptr,f_str->nsave); */
+X
+X /* at some point, we want one best score for each of the segments */
+X
+X for ( ; f_str->nsave > 0; f_str->nsave--)
+X if (f_str->vptr[f_str->nsave-1]->score >0) break;
+X
+X if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
+X fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
+X exit(1);
+X }
+X
+X /* copy aa0[] into f_str->aa0t[] */
+X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0t[i] = aa0[i];
+X f_str->aa0t[i] = aa0t[i] = '\0';
+X
+X a_res->nres = sconn_a (aa0t,n0,aa1p,n10,f_str, a_res, ppst);
+X
+X free(aa0t);
+X
+X a_res->res = f_str->res;
+X *have_ares = 0;
+X return rst.score[0];
+}
+X
+/* this version of sconn is modified to provide alignment information */
+/* in addition, it needs to know whether a segment has been used before */
+X
+/* sconn_a fills in the res[nres] array, but this is passed implicitly
+X through f_str->res[f_str->nres] */
+X
+int sconn_a (unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X struct pstruct *ppst)
+{
+X int i, si, cmpp (), n;
+X unsigned char *aa0p;
+X int sx, dx, doff, *aa0tip;
+X
+X struct savestr **v;
+X struct slink *start, *sl, *sj, *so, *sarr;
+X int lstart, lstop, ltmp, plstart, tstart, plstop, ptstop, ptstart, tstop;
+X
+X int *res, nres, tres;
+X
+X double tatprob;
+X
+/* sort the score left to right in lib pos */
+X
+X v = f_str->vptr;
+X n = f_str->nsave;
+X sarr = f_str->sarr;
+X
+X /* set things up in case nothing fits */
+X if (n <=0 || v[0]->score <= 0) return 0;
+X
+X if (v[0]->score < 0) {
+X sarr[0].vp = v[0];
+X sarr[0].score = v[0]->score;
+X sarr[0].next = NULL;
+X sarr[0].prev = NULL;
+X start = &sarr[0];
+X }
+X else {
+X
+X krsort (v, n); /* sort from left to right in library */
+X
+X start = NULL;
+X
+X /* for each alignment, see if it fits */
+X
+X
+X for (i = 0, si = 0; i < n; i++) {
+X /* if the score is less than the join threshold, skip it */
+X
+X if (v[i]->score < 0) continue;
+X
+X lstart = v[i]->start;
+X lstop = v[i]->stop;
+X tstart = lstart - v[i]->dp + f_str->noff;
+X tstop = lstop - v[i]->dp + f_str->noff;
+X
+X /* put the alignment in the group */
+X
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X sarr[si].prev = NULL;
+X sarr[si].tat = NULL;
+X
+X sarr[si].tatprob =
+X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0], ppst->nsq, f_str,
+X ppst->pseudocts, 1, ppst->zsflag);
+X sarr[si].tat = sarr[si].newtat;
+X
+X
+X /* if it fits, then increase the score */
+X /* start points to a sorted (by total score) list of candidate
+X overlaps */
+X
+X for (sl = start; sl != NULL; sl = sl->next) {
+X plstart = sl->vp->start;
+X plstop = sl->vp->stop;
+X ptstart = plstart - sl->vp->dp + f_str->noff;
+X ptstop = plstop - sl->vp->dp + f_str->noff;
+#ifndef FASTM
+X if (plstart > lstop && (ptstop < tstart || ptstart > tstop)) {
+#else
+X if (plstop > lstart && ptstart > tstop) {
+#endif
+X /* alignment always uses probabilistic scoring ... */
+X /* sarr[si].score = sl->score + v[i]->score;
+X sarr[si].prev = sl;
+X break; */ /* quit as soon as the alignment has been added */
+X
+X tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
+X ppst->pam2[0], ppst->nsq, f_str,
+X ppst->pseudocts, 1, ppst->zsflag);
+X /* if our tatprob gets worse when we add this, forget it */
+X if(tatprob > sarr[si].tatprob) {
+X free(sarr[si].newtat->probs); /* get rid of new tat struct */
+X free(sarr[si].newtat);
+X continue; /* reuse this sarr[si] */
+X } else {
+X sarr[si].tatprob = tatprob;
+X free(sarr[si].tat->probs); /* get rid of old tat struct */
+X free(sarr[si].tat);
+X sarr[si].tat = sarr[si].newtat;
+X sarr[si].prev = sl;
+X sarr[si].score = sl->score + v[i]->score;
+X /*
+X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
+X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
+X */
+X break;
+X }
+X }
+X }
+X
+X /* now recalculate the list of best scores */
+X if (start == NULL)
+X start = &sarr[si]; /* put the first one in the list */
+X else
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X /* if (sarr[si].score > sj->score) { */ /* new score better than old */
+X if ( sarr[si].tatprob < sj->tatprob ||
+X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
+X sarr[si].next = sj; /* next best after new score */
+X if (so != NULL)
+X so->next = &sarr[si]; /* prev_best->next points to best */
+X else start = &sarr[si]; /* start points to best */
+X break; /* stop looking */
+X }
+X so = sj; /* previous candidate best */
+X }
+X si++; /* increment to next alignment */
+X }
+X }
+X
+X for (i = 0 ; i < si ; i++) {
+X free(sarr[i].tat->probs);
+X free(sarr[i].tat);
+X }
+X
+X res = f_str->res;
+X tres = nres = 0;
+X aa0p = aa0;
+X aa0tip = f_str->aa0ti; /* point to temporary index */
+X a_res->min1 = start->vp->start;
+X a_res->min0 = 0;
+X
+X for (sj = start; sj != NULL; sj = sj->prev ) {
+X doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
+X
+X /* fprintf(stderr,"doff: %3d\n",doff); */
+X
+X for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
+X dx <= sj->vp->stop; dx++) {
+X *aa0tip++ = f_str->aa0i[sx]; /* save index */
+X *aa0p++ = f_str->aa0t[sx++]; /* save sequence at index */
+X tres++;
+X res[nres++] = 0;
+X }
+X sj->vp->dp -= doff;
+X if (sj->prev != NULL) {
+X if (sj->prev->vp->start - sj->vp->stop - 1 > 0 )
+X tres += res[nres++] = (sj->prev->vp->start - sj->vp->stop - 1);
+X }
+X
+X /*
+X fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
+X sj->vp->start - sj->vp->dp + f_str->noff,
+X sj->vp->stop - sj->vp->dp + f_str->noff,
+X sj->vp->start,sj->vp->stop,sj->vp->dp,
+X f_str->noff,sj->vp->score);
+X
+X fprintf(stderr,"%3d - %3d: %3d\n",
+X sj->vp->start,sj->vp->stop,sj->vp->score);
+X */
+X a_res->max1 = sj->vp->stop+1;
+X a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
+X }
+X
+X /*
+X fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
+X a_res->min0,a_res->max0,a_res->min1,a_res->max1);
+X */
+X
+X /* now replace f_str->aa0t with aa0
+X (f_str->aa0t is permanent, aa0 is not)*/
+X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
+X
+X return tres;
+}
+X
+/* for fasts (and fastf), pre_cons needs to set up f_str as well as do
+X necessary translations - for right now, simply do do_walign */
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+X
+#ifdef TFAST
+X f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+X
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+#ifdef TFAST
+X aln->qlrev = 0;
+X aln->qlfact= 1;
+X aln->llfact = aln->llmult = 3;
+X if (frame > 3) aln->llrev = 1;
+X else aln->llrev = 0;
+X aln->frame = 0;
+#else /* FASTS */
+X aln->llfact = aln->llmult = aln->qlfact = 1;
+X aln->llrev = aln->qlrev = 0;
+X aln->frame = 0;
+#endif
+}
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1, n0t;
+X int op, lenc, len_gap, nd, ns, itmp;
+X const unsigned char *aa1p;
+X char *sp0, *sp1, *spa;
+X int *rp;
+X int mins, smins;
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X n0 -= (f_str->nm0-1);
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
+X /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llen/2);
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llen/2);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X
+X memset(seqca,M_BLANK,mins);
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X /* op is the previous "match/insert" operator; *rp is the current
+X operator or repeat count */
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
+X op = *rp++; /* get the next match/insert operator */
+X
+X /* get the alignment symbol */
+X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
+X *sp1 = pst.sq[aa1p[i1++]];
+X n0t++;
+X lenc++;
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X }
+X else { /* either op != 0 (previous was insert) or *rp != 0
+X (current is insert) */
+X if (op==0) { op = *rp++;} /* previous was match, start insert */
+X /* previous was insert - count through gap */
+X *sp0++ = '-';
+X *sp1++ = pst.sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X }
+X
+X *spa = '\0';
+X *nc = lenc-len_gap;
+/* now we have the middle, get the right end */
+X
+X ns = mins + lenc + aln->llen;
+X ns -= (itmp = ns %aln->llen);
+X if (itmp>aln->llen/2) ns += aln->llen;
+X nd = ns - (mins+lenc);
+X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0t-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
+X n0t-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X
+X return mins+lenc+nd;
+}
+X
+int
+calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1, nn1, n0t;
+X int op, lenc, len_gap, nd, ns, itmp, p_ac, fnum, o_fnum;
+X const unsigned char *aa1p;
+X unsigned char *aa0ap;
+X char *sp0, *sp0a, *sp1, *spa;
+X int *rp;
+X int mins, smins;
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X n0 -= (f_str->nm0-1);
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
+X /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llen/2);
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llen/2);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X
+X memset(seqca,M_BLANK,mins);
+X memset(seqc0a,' ', mins);
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp0a = seqc0a+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X n0t=lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=op=p_ac= 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X /* op is the previous "match/insert" operator; *rp is the current
+X operator or repeat count */
+X
+X o_fnum = f_str->aa0ti[i0];
+X aa0ap = &aa0a[f_str->nmoff[o_fnum]+i0];
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X fnum = f_str->aa0ti[i0];
+X if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
+X if (p_ac == 0) { /* previous code was a match */
+X if (fnum != o_fnum) { /* continuing a match, but with a different fragment */
+X aa0ap = &aa0a[f_str->nmoff[fnum]];
+X o_fnum = fnum;
+X }
+X }
+X else {
+X p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0];
+X aa0ap = &aa0a[f_str->nmoff[fnum]];
+X }
+X op = *rp++; /* get the next match/insert operator */
+X
+X /* get the alignment symbol */
+X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
+X *sp0a++ = ann_arr[*aa0ap++];
+X *sp1 = pst.sq[aa1p[i1++]];
+X n0t++;
+X lenc++;
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X }
+X else { /* either op != 0 (previous was insert) or *rp != 0
+X (current is insert) */
+X if (op==0) { op = *rp++;} /* previous was match, start insert */
+X /* previous was insert - count through gap */
+X if (p_ac != 1) {
+X p_ac = 1; fnum = f_str->aa0ti[i0];
+X }
+X
+X *sp0++ = '-';
+X *sp1++ = pst.sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X }
+X
+X *sp0a = *spa = '\0';
+X *nc = lenc-len_gap;
+/* now we have the middle, get the right end */
+X
+X ns = mins + lenc + aln->llen;
+X ns -= (itmp = ns %aln->llen);
+X if (itmp>aln->llen/2) ns += aln->llen;
+X nd = ns - (mins+lenc);
+X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0t-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
+X n0t-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X return mins+lenc+nd;
+}
+X
+void aaptrshuffle(unsigned char *res, int n) {
+X
+X int i, j;
+X unsigned char tmp;
+X
+X for( i = n; --i; ) {
+X
+X /* j = nrand(i); if (i == j) continue; */ /* shuffle */
+X j = (n - 1) - i; if (i <= j ) break; /* reverse */
+X
+X tmp = res[i];
+X res[i] = res[j];
+X res[j] = tmp;
+X }
+}
+X
+void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
+X
+X int i;
+X int j;
+X
+X for(i = 0 ; i < f_str->nm0 ; i++) { /* for each fragment */
+X
+X aaptrshuffle(&(aa0[f_str->nmoff[i]]),
+X f_str->nmoff[i+1] - f_str->nmoff[i] - 1 );
+X
+X }
+X
+}
+X
+/* build an array of match/ins/del - length strings */
+int
+calc_code(const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, len_gap;
+X int p_ac, op_cnt;
+X const unsigned char *aa1p;
+X char tmp_cnt[20];
+X char sp0, sp1, *sq;
+X int *rp;
+X int mins, smins;
+X int o_fnum,fnum = 0;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = len_gap =aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=aln->nfs=op=p_ac = 0;
+X op_cnt = 0;
+X
+X i0 = a_res.min0; /* start in aa0 (f_str->aa0t) */
+X i1 = a_res.min1; /* start in aa1 */
+X tmp_cnt[0]='\0';
+X
+X o_fnum = f_str->aa0ti[i0] + 1; /* fragment number */
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X fnum = f_str->aa0ti[i0]+1;
+X if (op == 0 && *rp == 0) { /* previous was match, this is match */
+X if (p_ac == 0) { /* previous code was a match */
+X if (fnum == o_fnum) { op_cnt++;}
+X else { /* continuing a match, but with a different fragment */
+X update_code(al_str,al_str_n-strlen(al_str), p_ac, op_cnt, o_fnum);
+X o_fnum = fnum;
+X op_cnt=1;
+X }
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_ac,op_cnt,o_fnum);
+X op_cnt = 1; p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0] + 1;
+X }
+X op = *rp++;
+X lenc++;
+X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
+X sp0 = pst.sq[f_str->aa0t[i0++]];
+X sp1 = pst.sq[aa1p[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (p_ac == 1) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
+X op_cnt = 1; p_ac = 1; fnum = f_str->aa0ti[i0] + 1;
+X }
+X op--; lenc++; i1++; len_gap++;
+X }
+X }
+X update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
+X
+X return lenc - len_gap;
+}
+X
+/* update_code(): if "op" == 0, this is the end of a match of length
+X "op_cnt" involving fragment "fnum"
+X otherwise, this is an insertion (op==1) or deletion (op==2)
+*/
+X
+void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
+X
+X char op_char[4]={"=-+"};
+X char tmp_cnt[20];
+X
+X if (op == 0)
+X sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
+X else
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+int
+calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, len_gap;
+X const unsigned char *aa1p;
+X int sp0, sp1;
+X int *rp;
+X int mins, smins;
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amin1 = a_res.min1;
+X aln->amax0 = a_res.max0;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X n0 -= (f_str->nm0-1);
+X
+X /* now get the middle */
+X rp = a_res.res;
+X lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X
+X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
+X
+X sp0 = pst.sq[f_str->aa0t[i0++]];
+X sp1 = pst.sq[aa1p[i1++]];
+X lenc++;
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X }
+X else {
+X if (op==0) { op = *rp++;}
+X i1++;
+X op--;
+X len_gap++;
+X lenc++;
+X }
+X }
+X return lenc-len_gap;
+}
+X
+#ifdef PCOMPLIB
+X
+#include "structs.h"
+#include "p_mw.h"
+X
+void
+update_params(struct qmng_str *qm_msg,
+X struct mngmsg *m_msg, struct pstruct *ppst)
+{
+X m_msg->n0 = ppst->n0 = qm_msg->n0;
+X m_msg->nm0 = qm_msg->nm0;
+X m_msg->escore_flg = qm_msg->escore_flg;
+X m_msg->qshuffle = qm_msg->qshuffle;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropfs2.c ||
+echo 'restore of dropfs2.c failed'
+Wc_c="`wc -c < 'dropfs2.c'`"
+test 59078 -eq "$Wc_c" ||
+ echo 'dropfs2.c: original size 59078, current size' "$Wc_c"
+fi
+# ============= dropfx.c ==============
+if test -f 'dropfx.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropfx.c (File already exists)'
+else
+echo 'x - extracting dropfx.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropfx.c' &&
+X
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropfx.c,v 1.68 2007/04/26 18:37:18 wrp Exp $ */
+X
+/* implements the fastx algorithm, see:
+X
+X W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+X DNA sequences with protein sequences" Genomics 46:24-36
+X
+X see dropnfa.c for better variable descriptions and comments
+*/
+X
+/* 18-Sept-2006 - remove global variables used for alignment */
+X
+/* 22-June-2006 - correct incorrect alignment coordinates generated
+X after pro_dna() on projected DNA region.
+*/
+X
+/* 9-May-2003 -> 3.46 changed lx_band to use projected protein
+X boundary end. this fixes some addressing issues on MacOSX, and
+X speeds up alignment on very long proteins
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+X
+/* this must be consistent with upam.h */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+X
+/* globals for fasta */
+#define MAXWINDOW 64
+X
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+X
+#ifndef ALLOCN0
+static char *verstr="3.5 Sept 2006";
+#else
+static char *verstr="3.5an0 May 2006";
+#endif
+X
+struct dstruct /* diagonal structure for saving current run */
+{
+X int score; /* hash score of current match */
+X int start; /* start of current match */
+X int stop; /* end of current match */
+X struct savestr *dmax; /* location in vmax[] where best score data saved */
+};
+X
+struct savestr
+{
+X int score; /* pam score with segment optimization */
+X int score0; /* pam score of best single segment */
+X int gscore; /* score from global match */
+X int dp; /* diagonal of match */
+X int start; /* start of match in lib seq */
+X int stop; /* end of match in lib seq */
+};
+X
+struct swstr { int H, E;};
+X
+struct bdstr { int CC, DD, CP, DP;};
+X
+void savemax();
+void kpsort();
+X
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+X
+struct f_struct {
+X struct dstruct *diag;
+X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
+X struct savestr *vptr[MAXSAV];
+X struct savestr *lowmax;
+X int ndo;
+X int noff;
+X int hmask; /* hash constants */
+X int *pamh1; /* pam based array */
+X int *pamh2; /* pam based kfact array */
+X int *link, *harr; /* hash arrays */
+X int kshft; /* shift width */
+X int nsav, lowscor; /* number of saved runs, worst saved run */
+#ifndef TFAST
+X unsigned char *aa0x; /* contains translated codons 111222333*/
+X unsigned char *aa0y; /* contains translated codons 123123123*/
+#else
+X unsigned char *aa1x; /* contains translated codons 111222333 */
+X unsigned char *aa1y; /* contains translated codons 123123123 */
+#endif
+X struct sx_s *cur;
+X int *waa0;
+X int *waa1;
+X int *res;
+X int max_res;
+};
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+static int dmatchx(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval, int gshift,
+X struct f_struct *f_str);
+X
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+X const unsigned char *dna_prot_seq, int len_dna_prot,
+X int **pam_matrix, int gopen, int gext,
+X int gshift, int start_diag, int width, struct f_struct *f_str);
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
+X
+extern void w_abort (char *p, char *p1);
+X
+/* initialize for fasta */
+X
+void
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int mhv, phv;
+X int hmax;
+X int i0, hv;
+X int pamfact;
+X int btemp;
+X struct f_struct *f_str;
+X int ktup; /* word size examined */
+X int fact; /* factor used to scale ktup match value */
+X int kt1; /* ktup-1 */
+X int lkt; /* last ktup - initiall kt1, but can be increased
+X for hsq >= NMAP */
+X
+X int maxn0;
+X int *pwaa;
+X int i, j, q;
+X struct swstr *ss, *r_ss;
+X int *waa;
+X int *res;
+X int nsq, ip, *hsq;
+#ifndef TFAST
+X int last_n0, itemp;
+X unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;
+X int n0x, n0x3;
+#endif
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx; ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X nsq = ppst->nsq; ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+X n0 / ppst->param_u.fa.bestscale +
+X ppst->param_u.fa.bkfact *
+X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+X btemp = min (btemp, ppst->param_u.fa.bestmax);
+X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+X
+X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+X if (ppst->param_u.fa.optcut_set != 1)
+#ifndef TFAST
+X ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+X ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+X
+#ifdef OLD_FASTA_GAP
+X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+X pamfact = ppst->param_u.fa.pamfact;
+X ktup = ppst->param_u.fa.ktup;
+X fact = ppst->param_u.fa.scfact * ktup;
+X
+X if (pamfact == -1)
+X pamfact = 0;
+X else if (pamfact == -2)
+X pamfact = 1;
+X
+X for (i0 = 1, mhv = -1; i0 <=nsq; i0++)
+X if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
+X
+X if (mhv <= 0) {
+X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+X exit (1);
+X }
+X
+X for (f_str->kshft = 0; mhv > 0; mhv /= 2)
+X f_str->kshft++;
+X
+/* kshft = 2; */
+X kt1 = ktup - 1;
+X hv = 1;
+X for (i0 = 0; i0 < ktup; i0++) {
+X hv = hv << f_str->kshft;
+X }
+X hmax = hv;
+X f_str->hmask = (hmax >> f_str->kshft) - 1;
+X
+X
+X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash array\n");
+X exit (1);
+X }
+X if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh1 array\n");
+X exit (1);
+X }
+X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh2 array\n");
+X exit (1);
+X }
+X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash link array");
+X exit (1);
+X }
+X
+#ifdef TFAST
+X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+X exit (1);
+X }
+X f_str->aa1x++;
+X
+X if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);
+X exit (1);
+X }
+X f_str->aa1y++;
+#else /* FASTX */
+X maxn0 = n0 + 2;
+X if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+X exit (1);
+X }
+X aa0x++;
+X f_str->aa0x = aa0x;
+X
+X if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);
+X exit (1);
+X }
+X aa0y++;
+X f_str->aa0y = aa0y;
+X
+X last_n0 = 0;
+X for (itemp=0; itemp<3; itemp++) {
+X n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);
+X /*
+X for (i=0; i<n0x; i++) {
+X fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+X */
+X last_n0 += n0x+1;
+X }
+X
+X /* fprintf(stderr,"\n"); */
+X
+X for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {
+X for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+X *fd=EOSEQ;
+X }
+X
+X /* now switch aa0 and aa0x for hashing functions */
+X /* this seems dangerous in threaded code, but only the pointer is changed,
+X not the data itself */
+X
+X fs = aa0;
+X aa0 = aa0x;
+X aa0x = fs;
+X
+#endif
+X
+X for (i0 = 0; i0 < hmax; i0++)
+X f_str->harr[i0] = -1;
+X for (i0 = 0; i0 < n0; i0++)
+X f_str->link[i0] = -1;
+X
+X /* encode the aa0 array */
+X
+X phv = hv = 0;
+X lkt = kt1;
+X for (i0 = 0; i0 < min(lkt,n0); i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}
+X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+X }
+X
+X for (; i0 < n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0;
+X lkt = i0+ktup;
+X /* restart hv, phv calculation */
+X for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}
+X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+X }
+X }
+X if (i0 >= n0) break;
+X hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
+X f_str->link[i0] = f_str->harr[hv];
+X f_str->harr[hv] = i0;
+X if (pamfact) {
+X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+X /* this check should always be true, but just in case */
+X if (hsq[aa0[i0-kt1]]<NMAP)
+X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+X }
+X else f_str->pamh2[hv] = fact * ktup;
+X }
+X
+#ifndef TFAST
+X /* done hashing, now switch aa0, aa0x back */
+X fs = aa0;
+X aa0 = aa0x;
+X aa0x = fs;
+#endif
+X
+/* this has been modified from 0..<nsq to 1..<=nsq because the
+X pam2[0][0] is now undefined for consistency with blast
+*/
+X
+X if (pamfact)
+X for (i0 = 1; i0 <= nsq; i0++)
+X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+X else
+X for (i0 = 1; i0 <= nsq; i0++)
+X f_str->pamh1[i0] = fact;
+X
+X f_str->ndo = 0; /* used to save time on diagonals with long queries */
+X
+#ifndef ALLOCN0
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+X (long) MAXDIAG *sizeof (struct dstruct));
+X exit (1);
+X };
+#else
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+X (long)n0*sizeof (struct dstruct));
+X exit (1);
+X };
+#endif
+X
+X
+X if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X pwaa = waa;
+X for (i=0; i<=nsq; i++) {
+X for (j=0;j<n0; j++) {
+X *pwaa = ppst->pam2[ip][i][aa0[j]];
+X pwaa++;
+X }
+X }
+X f_str->waa0 = waa;
+X
+X if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
+X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X pwaa = waa;
+X for (i=0; i<=nsq; i++) {
+X for (j=0;j<n0; j++) {
+X *pwaa = ppst->pam2[0][i][aa0[j]];
+X pwaa++;
+X }
+X }
+X f_str->waa1 = waa;
+X
+#ifndef TFAST
+X maxn0 = max(2*n0,MIN_RES);
+#else
+X /* maxn0 needs to be large enough to accomodate introns
+X for TFASTX. For all other functions, it will be
+X more reasonable. */
+X maxn0 = max(4*n0,MIN_RES);
+#endif
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X f_str->max_res = maxn0;
+X
+X *f_arg = f_str;
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifndef TFAST
+X char *pg_str="FASTX";
+#else
+X char *pg_str="TFASTX";
+#endif
+X
+X if (!pstr->param_u.fa.optflag)
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d width: %3d",pg_str,verstr,
+#else
+X sprintf (pstring1, "%s (%s) function [%s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d width: %3d",pg_str,verstr,
+#endif
+X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx,pstr->pam_xm,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->gdelval, pstr->ggapval, pstr->gshift,
+X pstr->param_u.fa.optwid);
+X else
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d shift: %3d, width: %3d",pg_str,verstr,
+#else
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d shift: %3d, width: %3d",pg_str,verstr,
+#endif
+X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx, pstr->pam_xm,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
+X pstr->gshift,pstr->param_u.fa.optwid);
+X
+X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X
+X if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#else
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open_ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#endif
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
+X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
+X pstr->param_u.fa.cgap);
+X }
+}
+X
+void
+close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X free(f_str->cur);
+#ifndef TFAST
+X f_str->aa0y--;
+X free(f_str->aa0y);
+X f_str->aa0x--;
+X free(f_str->aa0x);
+#else
+X f_str->aa1y--;
+X free(f_str->aa1y);
+X f_str->aa1x--;
+X free(f_str->aa1x);
+#endif
+X free(f_str->res);
+X free(f_str->waa1);
+X free(f_str->waa0);
+X free(f_str->diag);
+X free(f_str->link);
+X free(f_str->pamh2);
+X free(f_str->pamh1);
+X free(f_str->harr);
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+void do_fastx (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst, int *hoff)
+{
+X int nd; /* diagonal array size */
+X int lhval;
+X int kfact;
+X int i;
+X int my_hoff;
+X register struct dstruct *dptr;
+X register int tscor;
+X
+#ifndef ALLOCN0
+X register struct dstruct *diagp;
+#else
+X register int dpos;
+X int lposn0;
+#endif
+X struct dstruct *dpmax;
+X register int lpos;
+X int tpos;
+X struct savestr *vmptr;
+X int scor, tmp;
+X int im, ib, nsave;
+X int ktup, kt1, *hsq, ip, lkt;
+#ifndef TFAST
+X int n0x31, n0x32;
+X n0x31 = (n0-2)/3;
+X n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+X const unsigned char *fs;
+X unsigned char *fd;
+X int n1x31, n1x32, last_n1, itemp;
+X n1x31 = (n1-2)/3;
+X n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+X
+X if (ppst->ext_sq_set) {
+X ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X ktup = ppst->param_u.fa.ktup;
+X kt1 = ktup-1;
+X
+X if (n1 < ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+X if (n0+n1+1 >= MAXDIAG) {
+X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X return;
+X }
+X
+X f_str->noff = n0 - 1;
+X
+#ifdef ALLOCN0
+X nd = n0;
+#endif
+X
+#ifndef ALLOCN0
+X nd = n0 + n1;
+#endif
+X
+X dpmax = &f_str->diag[nd];
+X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+X {
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X vmptr->score = 0;
+X f_str->lowmax = f_str->vmax;
+X f_str->lowscor = 0;
+X
+X /* start hashing */
+X lhval = 0;
+X lkt = kt1;
+X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lhval = 0; lkt=lpos+ktup; continue;
+#ifdef ALLOCN0 /* reinitialize dptr */
+X dptr = &f_str->diag[lpos % nd];
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr->score = 0;
+#endif
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X }
+X
+#ifndef ALLOCN0
+X diagp = &f_str->diag[f_str->noff + lkt];
+X for (; lpos < n1; lpos++, diagp++) {
+X /* if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; continue;} */
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lpos++ ; diagp++;
+X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+X if (lpos >= n1) break;
+X lhval = 0;
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+X lposn0 = f_str->noff + lpos;
+X for (; lpos < n1; lpos++, lposn0++) {
+X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X dpos = lposn0 - tpos;
+X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+X tscor += ktup;
+X if ((tscor -= lpos) <= 0) { /* better to start over */
+X scor = dptr->score;
+X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
+#ifdef ALLOCN0
+X savemax (dptr, dpos, f_str);
+#else
+X savemax (dptr, f_str);
+#endif
+X if ((tscor += scor) >= kfact) {
+X dptr->score = tscor;
+X dptr->stop = lpos;
+X }
+X else {
+X dptr->score = kfact;
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X } /* continue current run in diagonal */
+X else {
+X dptr->score += f_str->pamh1[aa0[tpos]];
+X dptr->stop = lpos;
+X }
+X }
+X else {
+X dptr->score = f_str->pamh2[lhval];
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X } /* end tpos */
+X
+#ifdef ALLOCN0
+X /* reinitialize diag structure */
+X loopl:
+X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor) {
+X savemax (dptr, lpos, f_str);
+X }
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr->score = 0;
+#endif
+X } /* end lpos */
+X
+#ifdef ALLOCN0
+X for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
+X savemax (dptr, dpos, f_str);
+X }
+#else
+X for (dptr = f_str->diag; dptr < dpmax;) {
+X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X f_str->ndo = nd;
+#endif
+X
+/*
+X at this point all of the elements of aa1[lpos]
+X have been searched for elements of aa0[tpos]
+X with the results in diag[dpos]
+*/
+X
+X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X {
+X /*
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+vmptr->start-vmptr->dp,
+X f_str->noff+vmptr->stop-vmptr->dp,
+X vmptr->start,vmptr->stop,
+X vmptr->dp,vmptr->score);
+X */
+X if (vmptr->score > 0) {
+X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
+X f_str->vptr[nsave++] = vmptr;
+X }
+X }
+X
+X if (nsave <= 0) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+#ifndef TFAST
+X /* FASTX code here to modify the start, stop points for
+X the three phases of the translated protein sequence
+X */
+X /*
+X fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+X for (ib=0; ib<nsave; ib++) {
+X if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
+X f_str->vptr[ib]->dp += n0x32;
+X if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
+X f_str->vptr[ib]->dp += n0x31;
+X }
+X
+X /*
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X */
+#else
+X
+X /* TFASTX code here to modify the start, stop points for
+X the three phases of the translated protein sequence
+X TFASTX modifies library start points, rather than
+X query start points
+X */
+X
+X /*
+X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+X
+X for (ib=0; ib<nsave; ib++) {
+X if (f_str->vptr[ib]->start >= n1x32) {
+X f_str->vptr[ib]->start -= n1x32;
+X f_str->vptr[ib]->stop -= n1x32;
+X f_str->vptr[ib]->dp -= n1x32;
+X }
+X if (f_str->vptr[ib]->start >= n1x31) {
+X f_str->vptr[ib]->start -= n1x31;
+X f_str->vptr[ib]->stop -= n1x31;
+X f_str->vptr[ib]->dp -= n1x31;
+X }
+X }
+X
+X /*
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X */
+X
+#endif /* TFASTX */
+X
+X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
+X ppst->param_u.fa.pgap, f_str);
+X
+X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+X
+/* kssort (f_str->vptr, nsave); */
+X
+X rst->score[1] = vmptr->score; /* best single score - init1*/
+X rst->score[0] = max (scor, vmptr->score); /* initn */
+X rst->score[2] = rst->score[0]; /* initn */
+X
+X my_hoff=f_str->noff - vmptr->dp;
+X
+X /*
+X if (n1 > 5000) {
+X fprintf(stderr," Long n1: %d\n",n1);
+X }
+X */
+X
+X if (ppst->param_u.fa.optflag) {
+X if (rst->score[0] > ppst->param_u.fa.optcut) {
+#ifndef TFAST
+X rst->score[2] = dmatchx(aa0, n0,aa1,n1,my_hoff,
+X ppst->param_u.fa.optwid, ppst->pam2[ip],
+X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#else /* TFASTX */
+X /* generate f_str->aa1y */
+/*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[aa1[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+X fprintf(stderr,"\n-----\n");
+*/
+X for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
+X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+X *fd=EOSEQ;
+X }
+X
+/*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[f_str->aa1y[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+*/
+X rst->score[2] = dmatchx(aa0, n0, aa1, n1, my_hoff=vmptr->dp-f_str->noff,
+X ppst->param_u.fa.optwid, ppst->pam2[ip],
+X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#endif /* TFASTX */
+X }
+X }
+X *hoff = my_hoff;
+}
+X
+/* returns rst.score[0] - initn
+X rst.score[1] - init1
+X rst.score[2] - opt
+*/
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int hoff;
+X int last_n1, itx, itt, n10, i;
+X
+#ifdef TFAST
+X unsigned char *aa1x;
+X /* aa0 has a protein sequence */
+X /* aa1 has a raw DNA sequence */
+X
+X itt = frame;
+X last_n1 = 0;
+X aa1x = f_str->aa1x;
+X for (itx= itt*3; itx< itt*3+3; itx++) {
+X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
+X /*
+X fprintf(stderr," itt %d itx: %d\n",itt,itx);
+X for (i=0; i<n10; i++) {
+X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+X */
+X last_n1 += n10+1;
+X }
+X n10 = last_n1-1;
+#endif
+X
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X rst->segnum = rst->seglen = 1;
+X
+#ifndef TFAST
+X do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else /* tfastx */
+X do_fastx (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
+#endif
+}
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct rstruct *rst)
+{
+X int optflag, tscore, hoff;
+X
+X optflag = ppst->param_u.fa.optflag;
+X ppst->param_u.fa.optflag = 1;
+X
+#ifndef TFAST
+X do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else
+X do_fastx (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#endif
+X
+X ppst->param_u.fa.optflag = optflag;
+}
+X
+#ifdef ALLOCN0
+void
+savemax (dptr, dpos, f_str)
+X register struct dstruct *dptr;
+X int dpos;
+X struct f_struct *f_str;
+{
+X register struct savestr *vmptr;
+X register int i;
+X
+#else
+void
+savemax (dptr, f_str)
+X register struct dstruct *dptr;
+X struct f_struct *f_str;
+{
+X register int dpos;
+X register struct savestr *vmptr;
+X register int i;
+X
+X dpos = (int) (dptr - f_str->diag);
+X
+#endif
+X
+/* check to see if this is the continuation of a run that is already saved */
+X
+X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+X vmptr->start == dptr->start)
+X {
+X vmptr->stop = dptr->stop;
+X if ((i = dptr->score) <= vmptr->score)
+X return;
+X vmptr->score = i;
+X if (vmptr != f_str->lowmax)
+X return;
+X }
+X else
+X {
+X i = f_str->lowmax->score = dptr->score;
+X f_str->lowmax->dp = dpos;
+X f_str->lowmax->start = dptr->start;
+X f_str->lowmax->stop = dptr->stop;
+X dptr->dmax = f_str->lowmax;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X if (vmptr->score < i)
+X {
+X i = vmptr->score;
+X f_str->lowmax = vmptr;
+X }
+X f_str->lowscor = i;
+}
+X
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str)
+{
+X int lpos;
+X int tot, mtot;
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X const unsigned char *aa0p, *aa1p;
+X
+X aa1p = &aa1[lpos = dmax->start];
+X aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+X curv.start = lpos;
+X
+X tot = curv.score = maxv.score = 0;
+X for (; lpos <= dmax->stop; lpos++) {
+X tot += pam2[*aa0p++][*aa1p++];
+X if (tot > curv.score) {
+X curv.stop = lpos;
+X curv.score = tot;
+X }
+X else if (tot < 0) {
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X tot = curv.score = 0;
+X curv.start = lpos+1;
+X }
+X }
+X
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X
+/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+X dmax->start,maxv.stop,dmax->stop);
+*/
+X dmax->start = maxv.start;
+X dmax->stop = maxv.stop;
+X
+X return maxv.score;
+}
+X
+#define XFACT 10
+X
+int sconn (struct savestr **v, int n,
+X int cgap, int pgap, struct f_struct *f_str)
+{
+X int i, si;
+X struct slink {
+X int score;
+X struct savestr *vp;
+X struct slink *next;
+X } *start, *sl, *sj, *so, sarr[MAXSAV];
+X int lstart, tstart, plstop, ptstop;
+X
+/* sort the score left to right in lib pos */
+X
+X kpsort (v, n);
+X
+X start = NULL;
+X
+/* for the remaining runs, see if they fit */
+X
+X for (i = 0, si = 0; i < n; i++)
+X {
+X
+/* if the score is less than the gap penalty, it never helps */
+X if (v[i]->score < cgap)
+X continue;
+X lstart = v[i]->start;
+X tstart = lstart - v[i]->dp + f_str->noff;
+X
+/* put the run in the group */
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X
+/* if it fits, then increase the score */
+X for (sl = start; sl != NULL; sl = sl->next)
+X {
+X plstop = sl->vp->stop;
+X ptstop = plstop - sl->vp->dp + f_str->noff;
+X if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+X sarr[si].score = sl->score + v[i]->score + pgap;
+X break;
+X }
+X }
+X
+/* now recalculate where the score fits */
+X if (start == NULL)
+X start = &sarr[si];
+X else
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+X {
+X if (sarr[si].score > sj->score)
+X {
+X sarr[si].next = sj;
+X if (so != NULL)
+X so->next = &sarr[si];
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X si++;
+X }
+X
+X if (start != NULL)
+X return (start->score);
+X else
+X return (0);
+}
+X
+void
+kssort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->score >= v[j + gap]->score)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->start <= v[j + gap]->start)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+static int
+dmatchx(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval, int gshift,
+X struct f_struct *f_str)
+{
+X
+X hoff -= window/2;
+X
+#ifndef TFAST
+X return lx_band(aa1,n1,f_str->aa0y,n0,
+X pam2,
+#ifdef OLD_FASTA_GAP
+X -(gdelval-ggapval),
+#else
+X -gdelval,
+#endif
+X -ggapval,-gshift,
+X hoff,window,f_str);
+#else
+X return lx_band(aa0,n0,f_str->aa1y,n1,
+X pam2,
+#ifdef OLD_FASTA_GAP
+X -(gdelval-ggapval),
+#else
+X -gdelval,
+#endif
+X -ggapval,-gshift,
+X hoff,window,f_str);
+#endif
+}
+X
+static void
+init_row(struct sx_s *row, int sp) {
+X int i;
+X for (i = 0; i < sp; i++) {
+X row[i].C1 = row[i].I1 = 0;
+X row[i].C2 = row[i].I2 = 0;
+X row[i].C3 = row[i].I3 = 0;
+X row[i].flag = 0;
+X }
+}
+X
+int
+lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
+X int len_prot, /* length of prot. seq */
+X const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+X int len_dna_prot, /* length trans. seq. */
+X int **pam_matrix, /* scoring matrix */
+X int gopen, int gext, /* gap open, gap extend penalties */
+X int gshift, /* frame-shift penalty */
+X int start_diag, /* start diagonal of band */
+X int width, /* width for band alignment */
+X struct f_struct *f_str)
+{
+X void *ckalloc();
+X int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;
+X int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+X register int *wt;
+X const unsigned char *dp;
+X register struct sx_s *ap, *aq;
+X
+X sp = width+7;
+X gg = gopen+gext;
+X /* sp = sp/3; */
+X if (f_str->cur == NULL)
+X f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+X
+X init_row(f_str->cur, sp);
+X
+X /*
+X if (start_diag %3 !=0) start_diag = start_diag/3-1;
+X else start_diag = start_diag/3;
+X */
+X
+X /*
+X if (width % 3 != 0) width = width/3+1;
+X else width = width /3;
+X */
+X
+X /* currently, this code assumes that the DNA sequence is longer than the
+X protein sequence. This is not always true. len_prot in the loop below
+X should be decreased to the projection of the DNA on the protein */
+X
+X x1 = start_diag; /* x1 = lower bound of DNA */
+X
+X
+X end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+X end_prot = min(end_prot,len_prot);
+X
+X /* i counts through protein sequence, x1 through DNAp */
+X
+X for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {
+X bd = min(x1+width, len_dna_prot/3); /* upper bound of band */
+X bd1 = max(0,x1); /* lower bound of band */
+X wt = pam_matrix[prot_seq[i]];
+X del = 1-x1; /*adjustment*/
+X bd += del;
+X bd1 +=del;
+X
+X ap = &f_str->cur[bd1];
+X aq = ap+1;
+X e1 = f_str->cur[bd1-1].C3;
+X e2 = ap->C1;
+X cd1 = cd2= cd3= 0;
+X
+X for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+X sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];
+X if (cd1 > sc) sc = cd1;
+X cd1 -= gext;
+X if ((ci = aq->I1) > 0) {
+X if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+X else {
+X ap->C1 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd1 < sc) cd1 = sc;
+X ap->I1 = max(ci-gext, sc);
+X } else ap->I1 = ci-gext;
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I1 = ap->C1 = 0;
+X } else {
+X ap->C1 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd1 < sc) cd1 = sc;
+X ap->I1 = sc;
+X } else ap->I1 = 0;
+X }
+X }
+X sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];
+X if (cd2 > sc) sc = cd2;
+X cd2 -= gext;
+X if ((ci = aq->I2) > 0) {
+X if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+X else {
+X ap->C2 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd2 < sc) cd2 = sc;
+X ap->I2 = max(ci-gext, sc);
+X }
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I2 = ap->C2 = 0;
+X } else {
+X ap->C2 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd2 < sc) cd2 = sc;
+X ap->I2 = sc;
+X } else ap->I2 = 0;
+X }
+X }
+X sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];
+X if (cd3 > sc) sc = cd3;
+X cd3 -= gext;
+X if ((ci = aq++->I3) > 0) {
+X if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+X else {
+X ap->C3 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd3 < sc) cd3 = sc;
+X ap->I3 = max(ci-gext, sc);
+X }
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I3 = ap->C3 = 0;
+X } else {
+X ap->C3 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd3 < sc) cd3 = sc;
+X ap->I3 = sc;
+X } else ap->I3 = 0;
+X }
+X }
+X }
+X }
+X /* printf("The best score is %d\n", best); */
+X return best+gopen+gext;
+}
+X
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+X void *p;
+X
+X if ((p = (void *)malloc( (size_t)amount)) == NULL)
+X w_abort("Ran out of memory.","");
+X return(p);
+}
+X
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+X int i, sum;
+X for (i=0,sum=0; i<n0; i++)
+X sum += pam2[aa0[i]][aa0[i]];
+X return sum;
+}
+X
+#define SGW1 100
+#define SGW2 300
+#define WIDTH 60
+X
+/* code above is to convert sequence into numbers */
+X
+typedef struct mat *match_ptr;
+X
+typedef struct mat {
+X int i, j, l;
+X match_ptr next;
+} match_node;
+X
+typedef struct {
+X int i,j;
+} state;
+X
+typedef state *state_ptr;
+X
+typedef struct st_s { int C, I, D;} *st_ptr;
+X
+/* static st_ptr up=NULL, down, tp; */
+/* static int *st_up; */
+/* static int gop, gext, shift; */
+X
+void *ckalloc(size_t);
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(), init_ROW();
+X
+int
+pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
+X int len_prot, /* length of prot. seq */
+X const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+X int len_dna_prot, /* length trans. seq. */
+X int **pam_matrix, /* scoring matrix */
+X int gopen, int gex, /* gap open, gap extend penalties */
+X int gshift, /* frame-shift penalty */
+X int max_res,
+X struct a_res_str *a_res) /* alignment info */
+{
+X match_ptr align, ap, aq;
+X int x, y, ex, ey, i, score;
+X int *alignment;
+X st_ptr up, down, tp;
+X
+X /* these globals removed */
+X /* gext = gex; gop = gopen; shift = gshift; */
+X
+X /* for fastx (but not tfastx), these could be moved into init_work(),
+X and done only once */
+X
+X up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X
+X /*local alignment find the best local alignment x (prot) and y (DNA)
+X is the starting position of the best local alignment
+X and ex (prot) ey (DNA) is the ending position */
+X score= local_align(&x, &y, &ex, &ey, pam_matrix,
+X gopen, gex, gshift,
+X dna_prot_seq, len_dna_prot,
+X prot_seq, len_prot, up, down);
+X
+X /* this is very strange, since local_align initialized up, down */
+X up += 3; down += 3; tp += 3;
+X
+X /* x, y - start in prot, dna_prot */
+X a_res->min0 = x; /* prot */
+X a_res->max0 = ex; /* prot */
+X
+X a_res->min1 = y; /* DNA-prot */
+X a_res->max1 = ey; /* DNA-prot */
+X
+X align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift,
+X dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp);
+X
+X alignment = a_res->res;
+X
+X /* from earlier version */
+X /* alignment[0] = x; */ /* start of alignment in prot */
+X /* alignment[1] = y; */ /* start of alignment in DNA */
+X
+X for (ap = align, i= 0; ap; i++) {
+X if (i < max_res) {alignment[i] = ap->l;}
+X aq = ap->next; free(ap); ap = aq;
+X }
+X
+X if (i >= max_res) {
+X fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);
+X }
+X
+X up = &up[-3]; down = &down[-3]; tp = &tp[-3];
+X free(up); free(tp); free(down);
+X /* free(st_up); */ /* moved into local align */
+X
+X a_res->nres = i; /* i has the length of the alignment */
+X return score;
+}
+X
+static void
+swap(void **a, void **b) {
+X void *t;
+X
+X t = *a;
+X *a = *b;
+X *b = t;
+}
+X
+/*
+X local alignment find the best local alignment x and y
+X is the starting position of the best local alignment
+X and ex ey is the ending position
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey,
+X int **wgts, int gop, int gext, int shift,
+X unsigned char *dnap, int ld,
+X unsigned char *pro, int lp,
+X st_ptr up, st_ptr down) {
+X
+X int i, j, score, x1,x2,x3,x4, e1, e2 = 0, e3,
+X sc, del, e, best = 0, *wt, cd, ci;
+X state_ptr cur_st, last_st, cur_i_st;
+X st_ptr cur, last;
+X unsigned char *dp;
+X int *st_up, *cur_d_st;
+X
+/*
+X Array rowiC store the best scores of alignment ending at a position
+X Arrays rowiD, and rowiI store the best scores of alignment ending
+X at a position with a deletion or insrtion
+X Arrays sti stores the starting position of the best alignment whose
+X score stored in the corresponding row array.
+X The program stores two rows to complete the computation, same is
+X for the global alignment routine.
+*/
+X
+X /* for fastx (but not tfastx), this could be moved into init_work(),
+X and done only once */
+X st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+X init_row2(st_up, ld+5);
+X
+X ld += 2;
+X init_ROW(up, ld+1); /* set to zero */
+X init_ROW(down, ld+1); /* set to zero */
+X
+X
+X cur = up+1;
+X last = down+1;
+X
+X /* for fastx (but not tfastx), these could be moved into init_work(),
+X and done only once */
+X cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X
+X cur_d_st = st_up;
+X
+X dp = dnap-2;
+X for (i = 0; i < lp; i++) {
+X wt = &wgts[pro[i]][0];
+X for (j = 0; j < 2; j++) {
+X cur_st[j].i = i+1;
+X cur_st[j].j = j+1;
+X }
+X for (j = 2; j < ld; j++) {
+X score = wt[dp[j]];
+X del = -1;
+X if (j >= 3) {
+X sc = -score;
+X e3 = e2-shift; e2 = last[j-3].C;
+X e1 = last[j-2].C-shift;
+X if (e1 > sc) {sc = e1; del = 2;}
+X if (e2 > sc) {sc = e2; del = 3;}
+X if (e3 > sc) {sc = e3; del = 4;}
+X } else {
+X sc = e2 = 0;
+X if (sc < -score) sc=-score;
+X else del = 3;
+X }
+X sc += score;
+X if (sc < (ci=last[j].I)) {
+X sc = ci; del = 0;
+X }
+X if (sc < (cd=cur[j].D)) {
+X sc = cd; del = 5;
+X }
+X cur[j].C = sc;
+X e = sc - gop;
+X if (e > cd) {
+X cur[j+3].D = e-gext;
+X cur_d_st[j+3] = 3;
+X } else {
+X cur[j+3].D = cd-gext;
+X cur_d_st[j+3] = cur_d_st[j]+3;
+X }
+X switch(del) {
+X case 5:
+X e1 = cur_d_st[j];
+X cur_st[j].i = cur_st[j-e1].i;
+X cur_st[j].j = cur_st[j-e1].j;
+X break;
+X case 0:
+X cur_st[j].i = cur_i_st[j].i;
+X cur_st[j].j = cur_i_st[j].j;
+X break;
+X case 2:
+X case 3:
+X case 4:
+X if (i) {
+X if (j-del >= 0) {
+X cur_st[j].i = last_st[j-del].i;
+X cur_st[j].j = last_st[j-del].j;
+X } else {
+X cur_st[j].i = i;
+X cur_st[j].j = 0;
+X }
+X } else {
+X cur_st[j].i = 0;
+X cur_st[j].j = max(0, j-del+1);
+X }
+X break;
+X case -1:
+X cur_st[j].i = i+1;
+X cur_st[j].j = j+1;
+X break;
+X }
+X if (e > ci) {
+X cur[j].I = e -gext;
+X cur_i_st[j].i = cur_st[j].i;
+X cur_i_st[j].j = cur_st[j].j;
+X } else {
+X cur[j].I = ci- gext;
+X }
+X if (sc > best) {
+X x1 = cur_st[j].i;
+X x2 = cur_st[j].j;
+X best =sc;
+X x3 = i;
+X x4 = j;
+X }
+X }
+X swap((void **)&last, (void **)&cur);
+X swap((void **)&cur_st, (void **)&last_st);
+X }
+X /* printf("The best score is %d\n", best); */
+X *x = x1; *y = x2; *ex = x3; *ey = x4;
+X free(cur_st); free(last_st); free(cur_i_st);
+X free(st_up);
+X return best;
+}
+X
+/*
+X Both global_up and global_down do linear space score only global
+X alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+X global_up do the algorithm upwards, from row x towards row y.
+X global_down do the algorithm downwards, from row y towards x.
+*/
+X
+static void
+global_up(st_ptr *row1, st_ptr *row2,
+X int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext, int shift,
+X unsigned char *dnap,
+X unsigned char *pro,
+X int N) {
+X int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score, *wt;
+X st_ptr cur, last;
+X
+X cur = *row1; last = *row2;
+X sc = -gop-gext;
+X for (j = 1; j <= ey-y+1; j++) {
+X if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+X else { last[j].I = last[j].C = -10000;}
+X cur[j].I = -10000;
+X }
+X last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+X last[0].D = last[1].D = last[2].D = -10000;
+X if (N) last[0].I = -gext; else last[0].I = -gop-gext;
+X for (i = 1; i <= ex-x+1; i++) {
+X wt = &wgts[pro[i+x-1]][0]; e2 = last[0].C; e1 = -10000;
+X for (j = 0; j <= ey-y+1; j++) {
+X t = j+y;
+X sc = -10000;
+X if (t < 3) score = -10000;
+X else score = wt[dnap[t-3]];
+X if (j < 4) {
+X if (j == 3) sc = e2;
+X else if (j == 2) sc = e2-shift;
+X } else {
+X e3 = e2; e2 = e1;
+X e1 = last[j-2].C;
+X sc = max(max(e1, e3)-shift, e2);
+X }
+X sc += score;
+X sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+X cur[j].C = sc;
+X cur[j+3].D = max(cd, sc-gop)-gext;
+X cur[j].I = max(ci, sc-gop)-gext;
+X }
+X swap((void **)&last, (void **)&cur);
+X }
+X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+X if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+X
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+X int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext, int shift,
+X unsigned char *dnap, unsigned char *pro,
+X int N) {
+X int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, s1, s2, s3, *wt;
+X st_ptr cur, last;
+X
+X cur = (*row1); last = *row2;
+X sc = -gop-gext;
+X for (j = ey-y; j >= 0; j--) {
+X if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+X else last[j].I = last[j].C = -10000;
+X }
+X last[ey-y+1].C = 0;
+X cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+X last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+X if (N) last[ey-y+1].I = -gext; else last[ey-y+1].I = -gop-gext;
+X for (i = ex-x; i >= 0; i--) {
+X wt = &wgts[pro[i+x]][0]; e2 = last[ey-y+1].C;
+X e1 = s2 = s3 = -10000;
+X for (j = ey-y+1; j >= 0; j--) {
+X t = j+y;
+X s1 = wt[dnap[t-1]];
+X sc = -10000;
+X if (t+3 > ey) {
+X if (t+2==ey) sc = e2+s2;
+X else if (t+1==ey) sc = e2-shift+s1;
+X } else {
+X e3 = e2; e2 = e1;
+X e1 = last[j+2].C;
+X sc = max(max(e1+s1, e3+s3)-shift, e2+s2);
+X }
+X if (sc < (cd= cur[j].D)) {
+X sc = cd;
+X cur[j-3].D = cd-gext;
+X } else cur[j-3].D =max(cd, sc-gop)-gext;
+X if (sc < (ci= last[j].I)) {
+X sc = ci; del = 0;
+X cur[j].I = ci - gext;
+X } else cur[j].I = max(sc-gop,ci)-gext;
+X cur[j].C = sc;
+X s3 = s2; s2 = s1;
+X }
+X swap((void **)&last, (void **)&cur);
+X }
+X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+X if (*row1 != last) swap((void **)row1, (void **)row2);
+}
+X
+static void
+init_row2(int *row, int ld) {
+X int i;
+X for (i = 0; i < ld; i++) row[i] = 0;
+}
+X
+static void
+init_ROW(st_ptr row, int ld) {
+X int i;
+X for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+X
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+X match_ptr x;
+X
+X if (x1 == NULL) return x2;
+X for (x = x1; x->next; x = x->next);
+X x->next = x2;
+X if (st) {
+X for (x = x2; x; x = x->next) {
+X x->j++;
+X if (x->l == 3 || x->l == 4) break;
+X }
+X x->l--;
+X }
+X return x1;
+}
+X
+/*
+X global use the two upwards and downwards score only linear
+X space global alignment subroutine to recursively build the
+X alignment.
+*/
+X
+match_ptr
+global(int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext, int shift,
+X unsigned char *dnap,
+X unsigned char *pro,
+X int N1, int N2,
+X st_ptr *up_stp, st_ptr *dn_stp, st_ptr *tp_stp
+X )
+{
+X int m;
+X int m1, m2;
+X match_ptr x1, x2, mm1, mm2;
+X /*printf("%d %d %d %d\n", x,y, ex, ey);*/
+X /*
+X if the space required is limited, we can do a quadratic space
+X algorithm to find the alignment.
+X */
+X if (ex <= x) {
+X mm1 = NULL; mm2= NULL;
+X for (m = y+3; m <= ey; m+=3) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = 5; x1->next = mm1;
+X if (mm1== NULL) mm2 = x1;
+X mm1 = x1;
+X }
+X if (ex == x) {
+X if ((ey-y) % 3 != 0) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+X if (mm2) mm2->next = x1;
+X else mm1 = x1;
+X } else {
+X if (mm2) mm2->l = 4;
+X }
+X }
+X return mm1;
+X }
+X if (ey <= y) {
+X mm1 = NULL;
+X for (m = x; m <= ex; m++) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = 0; x1->next = mm1; mm1 = x1;
+X }
+X return mm1;
+X }
+X if (ex -x < SGW1-1 && ey-y < SGW2-1)
+X return small_global(x,y,ex,ey,
+X wgts, gop, gext, shift,
+X dnap, pro, N1, N2);
+X m = (x+ex)/2;
+X /*
+X Do the score only global alignment from row x to row m, m is
+X the middle row of x and ex. Store the information of row m in
+X upC, upD, and upI.
+X */
+X global_up(up_stp, tp_stp, x, y, m, ey,
+X wgts, gop, gext, shift,
+X dnap, pro, N1);
+X
+X /*
+X Do the score only global alignment downwards from row ex
+X to row m+1, store information of row m+1 in downC downI and downD
+X */
+X global_down(dn_stp, tp_stp, m+1, y, ex, ey,
+X wgts, gop, gext, shift,
+X dnap, pro, N2);
+X
+X /*
+X Use these information of row m and m+1, to find the crossing
+X point of the best alignment with the middle row. The crossing
+X point is given by m1 and m2. Then we recursively call global
+X itself to compute alignments in two smaller regions found by
+X the crossing point and combine the two alignments to form a
+X whole alignment. Return that alignment.
+X */
+X if (find_best(*up_stp, *dn_stp, &m1, &m2, ey-y+1, y, gop)) {
+X x1 = global(x, y, m, m1, wgts, gop, gext, shift, dnap, pro, N1, 0,
+X up_stp, dn_stp, tp_stp);
+X x2 = global(m+1, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 0, N2,
+X up_stp, dn_stp, tp_stp);
+X if (m1 == m2) x1 = combine(x1,x2,1);
+X else x1 = combine(x1, x2,0);
+X } else {
+X x1 = global(x, y, m-1, m1, wgts, gop, gext, shift, dnap, pro, N1, 1,
+X up_stp, dn_stp, tp_stp);
+X x2 = global(m+2, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 1, N2,
+X up_stp, dn_stp, tp_stp);
+X mm1 = (match_ptr) ckalloc(sizeof(match_node));
+X mm1->i = m; mm1->l = 0; mm1->j = m1;
+X mm2 = (match_ptr) ckalloc(sizeof(match_node));
+X mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+X mm1->next = mm2; mm2->next = x2;
+X x1 = combine(x1, mm1, 0);
+X }
+X return x1;
+}
+X
+static int
+find_best(st_ptr up, st_ptr down,
+X int *m1, int *m2,
+X int ld, int y, int gop) {
+X int i, best = -100000, j = 0, s1, s2, s3, s4, st;
+X up++;
+X for (i = 1; i < ld; i++) {
+X s2 = up[i-1].C + down[i].C;
+X s4 = up[i-1].I + down[i].I + gop;
+X if (best < s2) {
+X best = s2; j = i; st = 1;
+X }
+X if (best < s4) {
+X best = s4; j = i; st = 0;
+X }
+X }
+X *m1 = j-1+y;
+X *m2 = j+y;
+X /*printf("find best score =%d\n", best);*/
+X return st;
+}
+X
+/*
+X An alignment is represented as a linked list whose element
+X is of type match_node. Each element represent an edge in the
+X path of the alignment graph. The fields of match_node are
+X l --- gives the type of the edge.
+X i, j --- give the end position.
+*/
+X
+static match_ptr
+small_global(int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext, int shift,
+X unsigned char *dnap, unsigned char *pro,
+X int N1, int N2) {
+X static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
+X int i, j, e, sc, score, del, k, t, *wt, ci, cd;
+X int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+X match_ptr mp, first;
+X
+X /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+X sc = -gop-gext; C[0][0] = 0;
+X if (N1) I[0] = -gext; else I[0] = sc;
+X for (j = 1; j <= ey-y+1; j++) {
+X if (j % 3== 0) {
+X C[0][j] = sc; sc -= gext; I[j] = sc-gop;
+X } else I[j] = C[0][j] = -10000;
+X st[0][j] = 5;
+X }
+X lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
+X cI = I;
+X for (i = 1; i <= ex-x+1; i++) {
+X cC = &C[i][0];
+X wt = &wgts[pro[i+x-1]][0]; cst = &st[i][0];
+X for (j = 0; j <=ey-y+1; j++) {
+X sc = -10000; del = 0;
+X ci = cI[j];
+X cd= cD[j];
+X t = j+y;
+X if (t < 3) score = -10000;
+X else score = wt[dnap[t-3]];
+X if (j >= 4) {
+X e2 = lC[j-2]-shift; sc = lC[j-3]; e4 = lC[j-4]-shift;
+X del = 3;
+X if (e2 > sc) { sc = e2; del = 2;}
+X if (e4 >= sc) { sc = e4; del = 4;}
+X } else {
+X if (j ==3) {sc= lC[0]; del = 3;}
+X else if (j == 2) {sc = lC[0]-shift; del = 2;}
+X }
+X sc = sc+score;
+X if (sc < ci) {
+X sc = ci; del = 0;
+X }
+X if (sc <= cd) {
+X sc = cd;
+X del = 5;
+X }
+X cC[j] = sc;
+X sc -= gop;
+X if (sc < cd) {
+X del += 10;
+X cD[j+3] = cd - gext;
+X } else cD[j+3] = sc -gext;
+X if (sc < ci) {
+X del += 20;
+X cI[j] = ci-gext;
+X } else cI[j] = sc-gext;
+X *(cst++) = del;
+X }
+X lC = cC;
+X }
+X if (N2 && ci +gop > cC[ey-y+1]) {
+X st[ex-x+1][ey-y+1] = 0;
+X /*printf("small score = %d\n", ci+gop);*/
+X } /*else printf("small score =%d\n", cC[ey-y+1]);*/
+X first = NULL; e = 1;
+X for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+X mp = (match_ptr) ckalloc(sizeof(match_node));
+X mp->i = i-1;
+X k = (t=st[i-x][j-y])%10;
+X mp->j = j-1;
+X if (e == 5 && (t/10)%2 == 1) k = 5;
+X if (e == 0 && (t/20)== 1) k = 0;
+X if (k == 5) { j -= 3; i++; e=5;}
+X else {j -= k;if (k==0) e= 0; else e = 1;}
+X mp->l = k;
+X mp->next = first;
+X first = mp;
+X }
+X
+X /* for (i = 0; i <= ex-x; i++) {
+X for (j = 0; j <= ey-y; j++)
+X printf("%d ", C[i][j]);
+X printf("\n");
+X }
+X */
+X return first;
+}
+X
+X
+#define XTERNAL
+#include "upam.h"
+X
+extern void display_alig(a, dna, pro,length, ld)
+int *a;
+unsigned char *dna, *pro;
+int length, ld;
+{
+X int len = 0, i, j, x, y, lines, k;
+X static char line1[100], line2[100], line3[100],
+X tmp[10] = " ";
+X unsigned char *dna1, c1, c2, c3, *st;
+X
+X dna1 = ckalloc((size_t)ld);
+X for (st = dna, i = 0; i < ld; i++, st++) dna1[i] = aa[*st];
+X line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-1;
+X
+X for (len = 0, j = 2, lines = 0; j < length; j++) {
+X i = a[j];
+X /*printf("%d %d %d\n", i, len, b->j);*/
+X if (i > 0 && i < 5) tmp[i-2] = aa[pro[x++]];
+X if (i == 5) {
+X i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
+X if (a[j+1] == 2) tmp[2] = ' ';
+X }
+X if (i > 0) {
+X strncpy(&line1[len], (const char *)&dna1[y], i); y+=i;
+X } else {line1[len] = '-'; i = 1; tmp[0] = aa[pro[x++]];}
+X strncpy(&line2[len], tmp, i);
+X for (k = 0; k < i; k++) {
+X if (tmp[k] != ' ' && tmp[k] != '-') {
+X if (k == 2) tmp[k] = '\\';
+X else if (k == 1) tmp[k] = '|';
+X else tmp[k] = '/';
+X } else tmp[k] = ' ';
+X }
+X if (i == 1) tmp[0] = ' ';
+X strncpy(&line3[len], tmp, i);
+X tmp[0] = tmp[1] = tmp[2] = ' ';
+X len += i;
+X line1[len] = line2[len] =line3[len] = '\0';
+X if (len >= WIDTH) {
+X printf("\n%5d", WIDTH*lines++);
+X for (k = 10; k <= WIDTH; k+=10)
+X printf(" . :");
+X if (k-5 < WIDTH) printf(" .");
+X c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+X line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+X printf("\n %s\n %s\n %s\n", line1, line3, line2);
+X line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
+X strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+X strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+X strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+X len = len - WIDTH;
+X }
+X }
+X printf("\n%5d", WIDTH*lines);
+X for (k = 10; k < len; k+=10)
+X printf(" . :");
+X if (k-5 < len) printf(" .");
+X printf("\n %s\n %s\n %s\n", line1, line3, line2);
+}
+X
+X
+/* alignment store the operation that align the protein and dna sequence.
+X The code of the number in the array is as follows:
+X 0: delete of an amino acid.
+X 2: frame shift, 2 nucleotides match with an amino acid
+X 3: match an amino acid with a codon
+X 4: the other type of frame shift
+X 5: delete of a codon
+X
+X
+X Also the first two element of the array stores the starting point
+X in the protein and dna sequences in the local alignment.
+X
+X Display looks like where WIDTH is assumed to be divisible by 10.
+X
+X 0 . : . : . : . : . : . :
+X CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG
+X P M I L G Y W N V R G L T H P I R M L L
+X
+X 60 . : . : . : . : . : . :
+X GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT
+X E Y T D S S Y D E K R Y T M G D A P D F
+*/
+X
+X
+/* fatal - print message and die */
+void fatal(msg)
+char *msg;
+{
+X fprintf(stderr, "%s\n", msg);
+X exit(1);
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X int score;
+X int i, last_n1, itemp, n10;
+X int n_aa, n_nt, hoff, nt_min, nt_max, w_fact;
+X unsigned char *fs, *fd;
+X struct rstruct rst;
+X int itx;
+X
+#ifndef TFAST /* FASTX */
+X n_aa = n1;
+X n_nt = n0;
+X
+X /* check for large differences in sequence length */
+X nt_min = 0; nt_max = n_nt;
+X if (n_nt > 6 * n_aa) {
+X /* find out where the diagonal is - get hoff
+X hoff < 0 => seq0 is in the middle of seq1
+X */
+X do_fastx(f_str->aa0x, n0, aa1, n1, ppst, f_str, &rst, &hoff);
+X if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
+X else w_fact = 2;
+X
+X if (hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
+X nt_min = max(0,(hoff-w_fact*n_aa)*3);
+X nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
+X }
+X else {
+X nt_max = min(3*w_fact*n_aa,n_nt);
+X }
+X }
+X
+X a_res->res = f_str->res;
+X
+X score = pro_dna(aa1, n1, f_str->aa0y+nt_min, nt_max-nt_min, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval - ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X -ppst->gshift,
+X f_str->max_res, a_res);
+X
+X /* correct for nt_min missing residues in alignment */
+X
+#else /* TFASTX */
+X
+X /*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[f_str->aa1x[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+X fprintf(stderr,"\n-----\n");
+X */
+X
+X last_n1 = 0;
+X for (itx=3*frame; itx<3+3*frame; itx++) {
+X n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
+/*
+X for (i=0; i<n10; i++) {
+X fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+*/
+X last_n1 += n10+1;
+X }
+X n10 = last_n1-1;
+X
+X /* create aa1y from aa1x */
+X for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
+X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+X *fd=EOSEQ;
+X }
+X /*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[f_str->aa1y[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+X fprintf(stderr,"\n-----\n");
+X */
+X
+X n_aa = n0;
+X n_nt = n1;
+X
+X /* check for large differences in sequence length */
+X nt_min = 0; nt_max = n_nt;
+X if (n_nt > 6 * n_aa) {
+X /* find out where the diagonal is - get hoff
+X hoff < 0 => seq0 is in the middle of seq1
+X */
+X do_fastx(aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);
+X if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
+X else w_fact = 2;
+X
+X if ( hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
+X nt_min = max(0,(hoff-w_fact*n_aa)*3);
+X nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
+X }
+X else {
+X nt_max = min(3*w_fact*n_aa,n_nt);
+X }
+X }
+X
+X a_res->res = f_str->res;
+X
+X score = pro_dna(aa0, n0, f_str->aa1y+nt_min, nt_max-nt_min, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval - ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X -ppst->gshift,
+X f_str->max_res, a_res);
+X
+#endif /* TFASTX */
+X
+X /* pro_dna always compares protein to DNA, and returns protein
+X coordinates in a_res->min0,max0 */
+X
+X a_res->min1 += nt_min;
+X a_res->max1 += nt_min;
+X
+X /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0); */
+X
+X *have_ares = 1;
+X return score;
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+#ifndef TFAST
+X aln->llrev = 0;
+X aln->llfact = 1;
+X aln->llmult = 1;
+X aln->qlfact = 3;
+X aln->frame = 0;
+X if (frame > 0) aln->qlrev = 1;
+X else aln->qlrev = 0;
+#else /* TFASTX */
+X aln->qlfact = 1;
+X aln->qlrev = 0;
+X aln->llfact = 3;
+X aln->llmult = 1;
+X aln->frame = 0;
+X if (frame > 0) aln->llrev = 1;
+X else aln->llrev = 0;
+#endif /* TFASTX */
+}
+X
+/* this function is required for programs like tfastx/y/s that do
+X translations on DNA sequences and save them in f_str->aa1??
+*/
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+#ifdef TFAST
+X int i, last_n1, itemp, n10;
+X unsigned char *fs, *fd;
+X int itx;
+X
+X last_n1 = 0;
+X for (itx=3*frame; itx<3+3*frame; itx++) {
+X n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
+/*
+X for (i=0; i<n10; i++) {
+X fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+*/
+X last_n1 += n10+1;
+X }
+X n10 = last_n1-1;
+X
+X /* create aa1y from aa1x */
+X for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
+X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+X *fd=EOSEQ;
+X }
+#endif
+}
+X
+X
+/*
+X Alignment: store the operation that align the protein and dna sequence.
+X The code of the number in the array is as follows:
+X 0: delete of an amino acid.
+X 2: frame shift, 2 nucleotides match with an amino acid
+X 3: match an amino acid with a codon
+X 4: the other type of frame shift
+X 5: delete of a codon
+X
+X The first two elements of the array stores the starting point
+X in the protein and dna sequences in the local alignment.
+*/
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1, i, j;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char *sp0, *sp1, *spa, *sq;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X
+X
+#ifndef TFAST /* FASTX */
+X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
+X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
+X
+X ap0 = f_str->aa0y; /* translated DNA */
+X ap1 = aa1; /* protein */
+X
+X sp0 = seqc0;
+X sp1 = seqc1;
+#else /* TFASTX */
+X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
+X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
+X
+X ap1 = aa0; /* protein */
+X ap0 = f_str->aa1y; /* translated DNA */
+X
+X sp1 = seqc0;
+X sp0 = seqc1;
+#endif
+X
+X rp = a_res.res;
+X rpmax = rp+a_res.nres;
+X
+X spa = seqca;
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
+X i0 = a_res.min1;
+X i1 = a_res.min0;
+X
+X while (rp < rpmax) {
+X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+X */
+X switch (*rp++) {
+X case 0: /* aa insertion */
+X *sp0++ = '-';
+X *sp1++ = sq[ap1[i1++]];
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_d++;
+X break;
+X case 2: /* -1 frameshift */
+X nfs++;
+X *sp0++ = '/';
+X i0 -= 1;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 3: /* codon/aa match */
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 4: /* +1 frameshift */
+X nfs++;
+X *sp0++ = '\\';
+X i0 += 1;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 5: /* codon insertion */
+X *sp0++ = sq[ap0[i0]];
+X i0 += 3;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_p++;
+X break;
+X }
+X }
+X *spa = '\0';
+X
+#ifndef TFAST /* FASTX */
+X aln->amax0 = i0;
+X aln->amax1 = i1;
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0;
+X aln->amax0 = i1;
+X aln->amin1 = aln->smin1;
+X aln->amin0 = aln->smin0;
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X
+X if (lenc < 0) lenc = 1;
+X *nc = lenc;
+/* now we have the middle, get the right end */
+X return lenc+not_c;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1, i, j;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char *sp0, *sp0a, *sp1, *spa, *sq;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+#ifndef TFAST /* FASTX */
+X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
+X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
+X
+X ap0 = f_str->aa0y; /* translated DNA */
+X ap1 = aa1; /* protein */
+#else /* TFASTX */
+X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
+X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
+X
+X ap1 = aa0;
+X ap0 = f_str->aa1y;
+#endif
+X
+X rp = a_res.res;
+X rpmax = &a_res.res[a_res.nres];
+X
+#ifndef TFAST
+X sp0 = seqc0;
+X sp1 = seqc1;
+#else
+X sp1 = seqc0;
+X sp0 = seqc1;
+#endif
+X spa = seqca;
+X sp0a = seqc0a;
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
+X i0 = a_res.min1;
+X i1 = a_res.min0;
+X
+X while (rp < rpmax) {
+X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+X */
+X switch (*rp++) {
+X case 0: /* aa insertion */
+X *sp0++ = '-';
+X *sp1++ = sq[ap1[i1++]];
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X lenc++;
+X ngap_d++;
+X break;
+X case 2: /* -1 frameshift */
+X nfs++;
+X *sp0++ = '/';
+X i0 -= 1;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X not_c++;
+X
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+#ifndef TFAST
+X *sp0a++ = ' ';
+#else
+X *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 3: /* codon/aa match */
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+#ifndef TFAST
+X *sp0a++ = ' ';
+#else
+X *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 4: /* +1 frameshift */
+X nfs++;
+X *sp0a++ = ' ';
+X *sp0++ = '\\';
+X i0 += 1;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X
+X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
+X
+X *sp0 = sq[ap0[i0]];
+X i0 += 3;
+X *sp1 = sq[ap1[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 5: /* codon insertion */
+X *sp0a++ = ' ';
+X *sp0++ = sq[ap0[i0]];
+X i0 += 3;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_p++;
+X break;
+X }
+X }
+X *sp0a = *spa = '\0';
+X
+#ifndef TFAST
+X aln->amax0 = i0;
+X aln->amax1 = i1;
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0;
+X aln->amax0 = i1;
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X
+X if (lenc < 0) lenc = 1;
+X *nc = lenc;
+/* now we have the middle, get the right end */
+X return lenc+not_c;
+}
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, i, j;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char op_char[10];
+X int op, op_cnt;
+X char sp0, sp1, *sq;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X
+#ifndef TFAST /* FASTX */
+X strncpy(op_char,"- /=\\+*",sizeof(op_char));
+X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
+X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
+X
+X ap0 = f_str->aa0y;
+X ap1 = aa1;
+#else /* TFASTX */
+X strncpy(op_char,"+ /=\\-*",sizeof(op_char));
+X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
+X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
+X
+X ap1 = aa0;
+X ap0 = f_str->aa1y;
+#endif
+X
+X rp = a_res.res;
+X rpmax = &a_res.res[a_res.nres];
+X
+X op_cnt = lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
+X op = 3; /* code for a match - all alignments start with a match */
+X
+X i0 = a_res.min1;
+X i1 = a_res.min0;
+X
+X while (rp < rpmax) {
+X switch (*rp++) {
+X case 0: /* aa insertion */
+X if (op == 0) op_cnt++;
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 0; op_cnt = 1;
+X }
+X i1++;
+X lenc++;
+X ngap_d++;
+X break;
+X case 2: /* -1 frameshift */
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 2; op_cnt = 1;
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 3; op_cnt = 1;
+X nfs++;
+X i0 -= 1;
+X not_c++;
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X lenc++;
+X break;
+X case 3: /* codon/aa match */
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X
+X if (op == 3 || op == 6) {
+X if (sp0 != '*' && sp1 != '*') {
+X if (op == 6 ) {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op_cnt = 1; op = 3;
+X }
+X else {op_cnt++;}
+X }
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op_cnt = 1; op = 6;
+X }
+X }
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X if (op == 2 || op == 4) op_cnt = 2;
+X else op_cnt = 1;
+X op = 3;
+X }
+X lenc++;
+X break;
+X case 4: /* +1 frameshift */
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 4; op_cnt = 1;
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 3; op_cnt = 1;
+X
+X nfs++;
+X i0 += 1;
+X not_c++;
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X lenc++;
+X break;
+X case 5: /* codon insertion */
+X if (op == 5) op_cnt++;
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op = 5; op_cnt = 1;
+X }
+X i0 += 3;
+X lenc++;
+X ngap_p++;
+X break;
+X }
+X }
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X
+#ifndef TFAST
+X aln->amax0 = i0;
+X aln->amax1 = i1;
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0;
+X aln->amax0 = i1;
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X
+X if (lenc < 0) lenc = 1;
+X
+X return lenc;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
+X
+X char tmp_cnt[20];
+X
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X strncat(al_str,tmp_cnt,al_str_max-1);
+X al_str[al_str_max-1]='\0';
+}
+X
+int calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, i, j;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char sp0, sp1, *sq;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X
+#ifndef TFAST /* FASTX */
+X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
+X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
+X
+X ap0 = f_str->aa0y;
+X ap1 = aa1;
+#else /* TFASTX */
+X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
+X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
+X
+X ap1 = aa0;
+X ap0 = f_str->aa1y;
+#endif
+X
+X rp = a_res.res;
+X rpmax = &a_res.res[a_res.nres];
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
+X i0 = a_res.min1;
+X i1 = a_res.min0;
+X
+X while (rp < rpmax) {
+X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
+X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
+X */
+X switch (*rp++) {
+X case 0: /* aa insertion */
+X i1++;
+X lenc++;
+X ngap_d++;
+X break;
+X case 2: /* -1 frameshift */
+X nfs++;
+X i0 -= 1;
+X not_c++;
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X lenc++;
+X break;
+X case 3: /* codon/aa match */
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X lenc++;
+X break;
+X case 4: /* +1 frameshift */
+X nfs++;
+X i0 += 1;
+X not_c++;
+X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
+X sp0 = sq[ap0[i0]];
+X i0 += 3;
+X sp1 = sq[ap1[i1++]];
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X lenc++;
+X break;
+X case 5: /* codon insertion */
+X i0 += 3;
+X lenc++;
+X ngap_p++;
+X break;
+X }
+X }
+X
+#ifndef TFAST
+X aln->amax0 = i0;
+X aln->amax1 = i1;
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0;
+X aln->amax0 = i1;
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X
+X if (lenc < 0) lenc = 1;
+/* now we have the middle, get the right end */
+X return lenc;
+}
+X
+#ifdef PCOMPLIB
+#include "p_mw.h"
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+X ppst->n0 = qm_msg->n0;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropfx.c ||
+echo 'restore of dropfx.c failed'
+Wc_c="`wc -c < 'dropfx.c'`"
+test 73324 -eq "$Wc_c" ||
+ echo 'dropfx.c: original size 73324, current size' "$Wc_c"
+fi
+# ============= dropfz2.c ==============
+if test -f 'dropfz2.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropfz2.c (File already exists)'
+else
+echo 'x - extracting dropfz2.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropfz2.c' &&
+X
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropfz2.c,v 1.57 2007/04/26 18:37:19 wrp Exp $ */
+X
+/* 18-Sept-2006 - removed static global variables for alignment */
+X
+/* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
+X
+/* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
+X
+/* implements an improved version of the fasty algorithm, see:
+X
+X W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+X DNA sequences with protein sequences" Genomics 46:24-36
+X
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+X
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+X
+#define NT_N 16
+X
+/* globals for fasta */
+#define MAXWINDOW 64
+X
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+X
+#ifndef ALLOCN0
+static char *verstr="3.5 Sept 2006";
+#else
+static char *verstr="3.5an0 Sept 2006";
+#endif
+X
+struct dstruct /* diagonal structure for saving current run */
+{
+X int score; /* hash score of current match */
+X int start; /* start of current match */
+X int stop; /* end of current match */
+X struct savestr *dmax; /* location in vmax[] where best score data saved */
+};
+X
+struct savestr
+{
+X int score; /* pam score with segment optimization */
+X int score0; /* pam score of best single segment */
+X int gscore; /* score from global match */
+X int dp; /* diagonal of match */
+X int start; /* start of match in lib seq */
+X int stop; /* end of match in lib seq */
+};
+X
+void savemax();
+void kpsort();
+X
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+X
+struct wgt { int iii, ii, iv;};
+struct wgtc {char c2, c3, c4, c5;};
+X
+typedef struct st_s { int C, I, D;} *st_ptr;
+X
+struct f_struct {
+X struct dstruct *diag;
+X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
+X struct savestr *vptr[MAXSAV];
+X struct savestr *lowmax;
+X int ndo;
+X int noff;
+X int hmask; /* hash constants */
+X int *pamh1; /* pam based array */
+X int *pamh2; /* pam based kfact array */
+X int *link, *harr; /* hash arrays */
+X int kshft; /* shift width */
+X int nsav, lowscor; /* number of saved runs, worst saved run */
+#ifndef TFAST
+X unsigned char *aa0x, *aa0v; /* aa0x - 111122223333 */
+#else
+X unsigned char *aa1x, *aa1v; /* aa1x - 111122223333 */
+#endif /* aa1v - computed codons */
+X struct sx_s *cur;
+X struct wgt **weight0;
+X struct wgt **weight1;
+X struct wgtc **weight_c;
+X int *waa;
+X int *res;
+X int max_res;
+X st_ptr up, down, tp;
+};
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+static int dmatchx(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval, int gshift,
+X struct f_struct *f_str);
+X
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+X const unsigned char *dna_prot_seq, int len_dna_prot,
+X int **pam_matrix, int gopen, int gext,
+X int gshift, int start_diag, int width, struct f_struct *f_str);
+static void update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
+extern void w_abort (char *p, char *p1);
+extern void aagetmap(char *to, int n);
+X
+/* initialize for fasta */
+/* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
+/* Assume naa=47, and wgts[47][23] matches both upper and lower case
+amoino acids with another amino acid. And also assume the DNA letter
+does not have upper/lower case difference. If you also allow DNA
+sequence to be upper/lower case letters, more needs be changed. Not
+only here, but also in the alignment code, the way that pack a codon
+into a number between 0-63 need be changed. */
+X
+/* modified so that if **weightci==NULL, do not fiddle with characters */
+X
+void
+init_weights(struct wgt ***weighti, struct wgtc ***weightci,
+X int **wgts, int gshift, int gsubs, int naa)
+{
+X int i, j, do_wgtc=0;
+X int aa, b, a, x, y, z;
+X int *wwt, e;
+X struct wgt **weight;
+X struct wgtc **weightc;
+X char aacmap[64];
+X int temp[49][64]; /*change*/
+X char le[49][64];
+X
+X
+X if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate weights array: %d\n",naa);
+X exit(1);
+X }
+X
+X weight = *weighti;
+X for (aa=0; aa <= naa; aa++) {
+X if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
+X exit(1);
+X }
+X }
+X
+X if (weightci !=NULL) {
+X if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
+X sizeof(struct wgtc *)))==NULL) {
+X fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
+X exit(1);
+X }
+X weightc = *weightci;
+X
+X for (aa=0; aa <= naa; aa++) {
+X if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
+X exit(1);
+X }
+X }
+X do_wgtc = 1;
+X }
+X else do_wgtc = 0;
+X
+X aagetmap(aacmap,64);
+X
+X for (aa = 0; aa <= naa; aa++) { /* change*/
+X wwt = wgts[aa];
+X for (i = 0; i < 64; i++) { /* j iterates through the codons */
+X x = -1000;
+X y = i;
+X for (j = 0; j < 64; j++) { /* j iterates through the codons */
+X z = ((~i & j) | (i & ~j));
+X b = 0; /* score = 0 */
+X if (z % 4) b-= gsubs;
+X if (z /16) b-= gsubs;
+X if ((z /4) % 4) b -= gsubs;
+X b += wwt[aascii[aacmap[j]]]; /* add the match score for char j*/
+X if (b > x) {
+X x = b; /* x has the score */
+X y = j; /* y has the character */
+X }
+X }
+X /* if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y); */
+X temp[aa][i] = x;
+X le[aa][i] = y;
+X }
+X /* printf("\n"); */
+X }
+X
+X for (aa= 0; aa <= naa; aa++) {
+X wwt = temp[aa];
+X for (i = 0; i < 256; i++) {
+X for (x=-100,b = 0; b < 4; b++) {
+X z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
+X if (x < (e=wwt[z])) {
+X x = e;
+X if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
+X }
+X }
+X weight[aa][i].iv=x-gshift;
+X weight[aa][i].iii = wwt[i%64];
+X
+X if (do_wgtc) {
+X weightc[aa][i].c5 = aacmap[le[aa][i%64]];
+X weightc[aa][i].c3 = aacmap[i%64];
+X }
+X x = i %16;
+X for (y = -100, b = 0; b < 3; b++) {
+X z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2)));
+X for (a = 0; a < 4; a++) {
+X if ((e =wwt[z+(a<<(b*2))]) > y) {
+X y = e;
+X if (do_wgtc)
+X weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
+X }
+X }
+X }
+X weight[aa][i].ii = y-gshift;
+X }
+X }
+X /*106=CGGG*/
+X for (aa = 0; aa <= naa; aa++) {
+X weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
+X weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
+X if (do_wgtc) {
+X weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
+X = weightc[aa][106].c2 = 'X';
+X }
+X }
+}
+X
+void
+free_weights(struct wgt ***weighti0, struct wgt ***weighti1,
+X struct wgtc ***weightci, int naa)
+{
+X int aa;
+X struct wgt **weight0;
+X struct wgt **weight1;
+X struct wgtc **weightc;
+X
+X weight0 = *weighti0;
+X weight1 = *weighti1;
+X weightc = *weightci;
+X
+X for (aa=0; aa <= naa; aa++) {free(weight0[aa]);}
+X for (aa=0; aa <= naa; aa++) {free(weight1[aa]);}
+X for (aa=0; aa <= naa; aa++) {free(weightc[aa]);}
+X
+X free(weight0);
+X free(weight1);
+X free(weightc);
+}
+X
+static void
+pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v)
+{
+X int dnav, i;
+X dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
+X for (i=2; i<n0; i++) {
+X dnav = ((dnav<<2)+hnt[aa0[i]])&255;
+X if (aa0[i] == NT_N || aa0[i-1]==NT_N || aa0[i-2] == NT_N)
+X aa0v[i-2] = 106;
+X else {
+X if (dnav == 106/*CGGG*/) dnav = 42/*AGGG*/;
+X aa0v[i-2]=dnav;
+X }
+X }
+}
+X
+static void
+pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v)
+{
+X int dnav, i, ir;
+X dnav = (3-hnt[aa0[n0-1]]<<2) + 3-hnt[aa0[n0-2]];
+X for (i=2, ir=n0-3; i<n0; i++,ir--) {
+X dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255;
+X if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)
+X aa0v[i-2] = 106;
+X else {
+X if (dnav == 106) dnav = 42;
+X aa0v[i-2]=dnav;
+X }
+X }
+}
+X
+void
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int mhv, phv;
+X int hmax;
+X int i0, hv;
+X int pamfact;
+X int btemp;
+X struct f_struct *f_str;
+X struct bdstr *bss;
+X /* these used to be globals, but do not need to be */
+X int ktup, fact, kt1, lkt;
+X
+X int maxn0;
+X int *pwaa;
+X int i, j, q;
+X struct swstr *ss, *r_ss;
+X int *waa;
+X int *res;
+X int nsq, ip, *hsq, naat;
+#ifndef TFAST
+X int last_n0, itemp, dnav;
+X unsigned char *fd, *fs, *aa0x, *aa0v;
+X int n0x, n0x3;
+#endif
+X
+X if (nt[NT_N] != 'N') {
+X fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
+X exit(1);
+X }
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx; ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X nsq = ppst->nsq; ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+X n0 / ppst->param_u.fa.bestscale +
+X ppst->param_u.fa.bkfact *
+X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+X btemp = min (btemp, ppst->param_u.fa.bestmax);
+X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+X
+X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+X if (ppst->param_u.fa.optcut_set != 1)
+#ifndef TFAST
+X ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+X ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+X
+#ifdef OLD_FASTA_GAP
+X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+X pamfact = ppst->param_u.fa.pamfact;
+X ktup = ppst->param_u.fa.ktup;
+X fact = ppst->param_u.fa.scfact * ktup;
+X
+#ifndef TFAST
+X /* before hashing, we must set up some space and translate the sequence */
+X
+X maxn0 = n0 + 2;
+X if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+X exit (1);
+X }
+X aa0x++;
+X f_str->aa0x = aa0x;
+X
+X
+X if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
+X exit (1);
+X }
+X aa0v++;
+X f_str->aa0v = aa0v;
+X
+X /* make a precomputed codon number series */
+X pre_com(aa0, n0, aa0v);
+X
+X last_n0 = 0;
+X for (itemp=0; itemp<3; itemp++) {
+X n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
+X /* for (i=0; i<n0x; i++) {
+X fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+X */
+X last_n0 += n0x+1;
+X }
+X
+X /* fprintf(stderr,"\n"); */
+X n0x = n0;
+X n0x3 = n0x/3;
+X
+X /* now switch aa0 and aa0x for hashing functions */
+X fs = aa0;
+X aa0 = aa0x;
+X aa0x = fs;
+#endif
+X
+X if (ppst->ext_sq_set) naat = MAXLC;
+X else naat = MAXUC;
+X
+X init_weights(&f_str->weight0, NULL,
+X ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
+X init_weights(&f_str->weight1, &f_str->weight_c,
+X ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
+X
+X if (pamfact == -1)
+X pamfact = 0;
+X else if (pamfact == -2)
+X pamfact = 1;
+X
+X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+X if (hsq[i0] < NMAP && hsq[i0] > mhv)
+X mhv = ppst->hsq[i0];
+X
+X if (mhv <= 0)
+X {
+X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+X exit (1);
+X }
+X
+X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+X
+/* kshft = 2; */
+X kt1 = ktup - 1;
+X hv = 1;
+X for (i0 = 0; i0 < ktup; i0++)
+X hv = hv << f_str->kshft;
+X hmax = hv;
+X f_str->hmask = (hmax >> f_str->kshft) - 1;
+X
+X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash array\n");
+X exit (1);
+X }
+X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh1 array\n");
+X exit (1);
+X }
+X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh2 array\n");
+X exit (1);
+X }
+X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash link array");
+X exit (1);
+X }
+X
+X for (i0 = 0; i0 < hmax; i0++)
+X f_str->harr[i0] = -1;
+X for (i0 = 0; i0 < n0; i0++)
+X f_str->link[i0] = -1;
+X
+X /* encode the aa0 array */
+X phv = hv = 0;
+X lkt = kt1;
+X for (i0 = 0; i0 < min(n0,lkt); i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0; lkt = i0+ktup; continue;
+X }
+X hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+X }
+X
+X for (; i0 < n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0;
+X /* restart hv, phv calculation */
+X for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0;
+X lkt = i0+ktup;
+X continue;
+X }
+X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+X }
+X }
+X if (i0 >= n0) break;
+X hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+X f_str->link[i0] = f_str->harr[hv];
+X f_str->harr[hv] = i0;
+X if (pamfact) {
+X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+X if (hsq[aa0[i0-kt1]] < NMAP)
+X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+X }
+X else f_str->pamh2[hv] = fact * ktup;
+X }
+X
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+X pam2[0][0] is now undefined for consistency with blast
+*/
+X
+X if (pamfact)
+X for (i0 = 1; i0 <= ppst->nsq; i0++)
+X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+X else
+X for (i0 = 1; i0 <= ppst->nsq; i0++)
+X f_str->pamh1[i0] = fact;
+X
+X f_str->ndo = 0; /* used to save time on diagonals with long queries */
+X
+X
+#ifndef ALLOCN0
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
+X MAXDIAG *sizeof (struct dstruct));
+X exit (1);
+X };
+#else
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+X (long)n0*sizeof (struct dstruct));
+X exit (1);
+X };
+#endif
+X
+#ifndef TFAST
+X /* done hashing, now switch aa0, aa0x back */
+X fs = aa0;
+X aa0 = aa0x;
+X aa0x = fs;
+#else
+X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
+X exit (1);
+X }
+X f_str->aa1x++;
+X
+X if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+X sizeof(unsigned char))) == NULL) {
+X fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
+X exit (1);
+X }
+X f_str->aa1v++;
+X
+#endif
+X
+X if ((waa= (int *)malloc (sizeof(int)*(ppst->nsq+1)*n0)) == NULL) {
+X fprintf(stderr,"cannot allocate waa struct %3d\n",ppst->nsq*n0);
+X exit(1);
+X }
+X
+X pwaa = waa;
+X for (i=0; i<=ppst->nsq; i++) {
+X for (j=0;j<n0; j++) {
+X *pwaa = ppst->pam2[ip][i][aa0[j]];
+X pwaa++;
+X }
+X }
+X f_str->waa = waa;
+X
+#ifndef TFAST
+X maxn0 = max(2*n0,MIN_RES);
+#else
+X maxn0 = max(4*n0,MIN_RES);
+#endif
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X f_str->max_res = maxn0;
+X
+X *f_arg = f_str;
+}
+X
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifndef TFAST
+X char *pg_str="FASTY";
+#else
+X char *pg_str="TFASTY";
+#endif
+X
+X if (!pstr->param_u.fa.optflag)
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
+#else
+X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
+#endif
+X pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->gdelval, pstr->ggapval, pstr->gshift, pstr->gsubs,
+X pstr->param_u.fa.optwid);
+X else
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
+#else
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
+#endif
+X pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
+X pstr->gshift,pstr->gsubs,pstr->param_u.fa.optwid);
+X
+X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X
+X if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#else
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#endif
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
+X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
+X pstr->param_u.fa.cgap);
+X }
+}
+X
+void
+close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X int naat;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X if (ppst->ext_sq_set) naat = MAXLC;
+X else naat = MAXUC;
+X free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
+X free(f_str->cur);
+#ifndef TFAST
+X f_str->aa0v--;
+X free(f_str->aa0v);
+X f_str->aa0x--;
+X free(f_str->aa0x);
+#else /* TFAST */
+X f_str->aa1x--;
+X free(f_str->aa1x);
+X f_str->aa1v--;
+X free(f_str->aa1v);
+#endif
+X free(f_str->res);
+X free(f_str->waa);
+X free(f_str->diag);
+X free(f_str->link);
+X free(f_str->pamh2);
+X free(f_str->pamh1);
+X free(f_str->harr);
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+void do_fasta (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst, int *hoff)
+{
+X int nd; /* diagonal array size */
+X int lhval;
+X int kfact;
+X int i;
+X register struct dstruct *dptr;
+X register int tscor;
+X int xdebug = 0;
+X
+#ifndef ALLOCN0
+X register struct dstruct *diagp;
+#else
+X register int dpos;
+X int lposn0;
+#endif
+X struct dstruct *dpmax;
+X register int lpos;
+X int tpos;
+X struct savestr *vmptr;
+X int scor, tmp;
+X int im, ib, nsave;
+X int ktup, kt1, *hsq, ip, lkt;
+#ifndef TFAST
+X int n0x31, n0x32;
+X n0x31 = (n0-2)/3;
+X n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+X unsigned char *fs, *fd;
+X int n1x31, n1x32, last_n1, itemp;
+X n1x31 = (n1-2)/3;
+X n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+X
+X if (ppst->ext_sq_set) {
+X ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X ktup = ppst->param_u.fa.ktup;
+X kt1 = ktup-1;
+X
+X if (n1 < ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+X if (n0+n1+1 >= MAXDIAG) {
+X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X return;
+X }
+X
+X f_str->noff = n0 - 1;
+X
+#ifdef ALLOCN0
+X nd = n0;
+#endif
+X
+#ifndef ALLOCN0
+X nd = n0 + n1;
+#endif
+X
+X dpmax = &f_str->diag[nd];
+X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+X {
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X vmptr->score = 0;
+X f_str->lowmax = f_str->vmax;
+X f_str->lowscor = 0;
+X
+X if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
+X aa1[1400]==23 && aa1[1401]!=23) {
+X xdebug = 1;
+X }
+X else xdebug = 0;
+X
+X /* start hashing */
+X lhval = 0;
+X lkt = kt1;
+X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+X /* restart lhval calculation */
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lhval = 0; lkt=lpos+ktup;
+X continue;
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+X }
+X
+#ifndef ALLOCN0
+X diagp = &f_str->diag[f_str->noff + lkt];
+X for (; lpos < n1; lpos++, diagp++) {
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lpos++ ; diagp++;
+X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+X if (lpos >= n1) break;
+X lhval = 0;
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+X lposn0 = f_str->noff + lpos;
+X for (; lpos < n1; lpos++, lposn0++) {
+X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X dpos = lposn0 - tpos;
+X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+X tscor += ktup;
+X if ((tscor -= lpos) <= 0) {
+X scor = dptr->score;
+X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
+#ifdef ALLOCN0
+X savemax (dptr, dpos, f_str);
+#else
+X savemax (dptr, f_str);
+#endif
+X if ((tscor += scor) >= kfact) {
+X dptr->score = tscor;
+X dptr->stop = lpos;
+X }
+X else {
+X dptr->score = kfact;
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X }
+X else {
+X dptr->score += f_str->pamh1[aa0[tpos]];
+X dptr->stop = lpos;
+X }
+X }
+X else {
+X dptr->score = f_str->pamh2[lhval];
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X } /* end tpos */
+X
+#ifdef ALLOCN0
+X /* reinitialize diag structure */
+X loopl:
+X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor)
+X savemax (dptr, lpos, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr->score = 0;
+#endif
+X } /* end lpos */
+X
+#ifdef ALLOCN0
+X for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
+X savemax (dptr, dpos, f_str);
+X }
+#else
+X for (dptr = f_str->diag; dptr < dpmax;) {
+X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X f_str->ndo = nd;
+#endif
+X
+/*
+X at this point all of the elements of aa1[lpos]
+X have been searched for elements of aa0[tpos]
+X with the results in diag[dpos]
+*/
+X /*
+X if (xdebug)
+X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",
+X n0, f_str->noff,n1,n1x31,n1x32);
+X */
+X
+X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X {
+X /*
+X if (xdebug)
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+vmptr->start-vmptr->dp,
+X f_str->noff+vmptr->stop-vmptr->dp,
+X vmptr->start,vmptr->stop,
+X vmptr->dp,vmptr->score);
+X */
+X if (vmptr->score > 0) {
+X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[0], f_str);
+X f_str->vptr[nsave++] = vmptr;
+X }
+X }
+X
+X if (nsave <= 0) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+#ifndef TFAST
+X /* FASTX code here to modify the start, stop points for
+X the three phases of the translated protein sequence
+X */
+X
+X /*
+X fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+X
+X for (ib=0; ib<nsave; ib++) {
+X if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
+X f_str->vptr[ib]->dp += n0x32;
+X if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
+X f_str->vptr[ib]->dp += n0x31;
+X }
+X
+X /*
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X */
+#else
+X /* TFAST code here to modify the start, stop points for
+X the three phases of the translated protein sequence
+X TFAST modifies library start points, rather than
+X query start points
+X */
+X
+X /*
+X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X
+X fprintf(stderr,"---\n");
+X */
+X
+X for (ib=0; ib<nsave; ib++) {
+X if (f_str->vptr[ib]->start >= n1x32) {
+X f_str->vptr[ib]->start -= n1x32;
+X f_str->vptr[ib]->stop -= n1x32;
+X f_str->vptr[ib]->dp -= n1x32;
+X }
+X if (f_str->vptr[ib]->start >= n1x31) {
+X f_str->vptr[ib]->start -= n1x31;
+X f_str->vptr[ib]->stop -= n1x31;
+X f_str->vptr[ib]->dp -= n1x31;
+X }
+X }
+X
+X /*
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X */
+X
+#endif /* TFAST */
+X
+X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
+X ppst->param_u.fa.pgap, f_str);
+X
+X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+X
+/* kssort (f_str->vptr, nsave); */
+X
+X rst->score[1] = vmptr->score;
+X rst->score[0] = max (scor, vmptr->score);
+X rst->score[2] = rst->score[0]; /* initn */
+X
+X if (ppst->param_u.fa.optflag) {
+X if (rst->score[0] > ppst->param_u.fa.optcut) {
+#ifndef TFAST
+X rst->score[2] = dmatchx(aa0, n0,aa1,n1,*hoff=f_str->noff - vmptr->dp,
+X ppst->param_u.fa.optwid, ppst->pam2[0],
+X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#else /* TFAST */
+X /* generate f_str->aa1x */
+/*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[aa1[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+X fprintf(stderr,"\n-----\n");
+*/
+/*
+X fprintf(stderr,"n1: %d, aa1x[n1]: %d; EOSEQ: %d\n",
+X n1,f_str->aa1x[n1],EOSEQ);
+X for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
+X for (fd= &f_str->aa1x[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+X fprintf(stderr,"fs stopped at: %d\n",(int)(fs-f_str->aa1x));
+X *fd=EOSEQ;
+X }
+*/
+/*
+X for (i=0; i<n1; i++) {
+X fputc(ppst->sq[f_str->aa1x[i]],stderr);
+X if (i%60==59) fputc('\n',stderr);
+X }
+*/
+X rst->score[2] = dmatchx(aa0, n0, aa1, n1, *hoff=vmptr->dp-f_str->noff,
+X ppst->param_u.fa.optwid, ppst->pam2[0],
+X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#endif /* TFAST */
+X }
+X }
+}
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int hoff;
+X int last_n1, itx, dnav, n10, i, ir;
+X unsigned char *aa1x;
+X
+X rst->escore = 1.0;
+X rst->segnum = rst->seglen = 1;
+X
+X if (n1 < ppst->param_u.fa.ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+#ifndef TFAST
+X do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else
+X /* make a precomputed codon number series */
+X
+X if (frame == 0) {
+X pre_com(aa1, n1, f_str->aa1v);
+X }
+X else {
+X pre_com_r(aa1, n1, f_str->aa1v);
+X }
+X
+X /* make translated sequence */
+X last_n1 = 0;
+X aa1x = f_str->aa1x;
+X for (itx= frame*3; itx< frame*3+3; itx++) {
+X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
+X /*
+X fprintf(stderr," itt %d frame: %d\n",itx,frame);
+X for (i=0; i<n10; i++) {
+X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+X
+X fprintf(stderr,"n10: %d aa1x[] %d last_n1: %d\n",n10,aa1x[last_n1+n10],
+X last_n1);
+X */
+X last_n1 += n10+1;
+X }
+X n10 = last_n1-1;
+X
+X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
+#endif
+}
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct rstruct *rst)
+{
+X int optflag, tscore, hoff;
+X
+X optflag = ppst->param_u.fa.optflag;
+X ppst->param_u.fa.optflag = 1;
+X
+#ifndef TFAST
+X do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else
+X do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#endif
+X
+X ppst->param_u.fa.optflag = optflag;
+}
+X
+#ifdef ALLOCN0
+void
+savemax (dptr, dpos, f_str)
+X register struct dstruct *dptr;
+X int dpos;
+X struct f_struct *f_str;
+{
+X register struct savestr *vmptr;
+X register int i;
+X
+#else
+void
+savemax (dptr, f_str)
+X register struct dstruct *dptr;
+X struct f_struct *f_str;
+{
+X register int dpos;
+X register struct savestr *vmptr;
+X register int i;
+X
+X dpos = (int) (dptr - f_str->diag);
+X
+#endif
+X
+/* check to see if this is the continuation of a run that is already saved */
+X
+X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+X vmptr->start == dptr->start)
+X {
+X vmptr->stop = dptr->stop;
+X if ((i = dptr->score) <= vmptr->score)
+X return;
+X vmptr->score = i;
+X if (vmptr != f_str->lowmax)
+X return;
+X }
+X else
+X {
+X i = f_str->lowmax->score = dptr->score;
+X f_str->lowmax->dp = dpos;
+X f_str->lowmax->start = dptr->start;
+X f_str->lowmax->stop = dptr->stop;
+X dptr->dmax = f_str->lowmax;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X if (vmptr->score < i)
+X {
+X i = vmptr->score;
+X f_str->lowmax = vmptr;
+X }
+X f_str->lowscor = i;
+}
+X
+int spam (const unsigned char *aa0,
+X const unsigned char *aa1,
+X struct savestr *dmax, int **pam2,
+X struct f_struct *f_str)
+{
+X int lpos;
+X int tot, mtot;
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X const unsigned char *aa0p, *aa1p;
+X
+X aa1p = &aa1[lpos = dmax->start];
+X aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+X curv.start = lpos;
+X
+X tot = curv.score = maxv.score = 0;
+X for (; lpos <= dmax->stop; lpos++) {
+X tot += pam2[*aa0p++][*aa1p++];
+X if (tot > curv.score) {
+X curv.stop = lpos;
+X curv.score = tot;
+X }
+X else if (tot < 0) {
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X tot = curv.score = 0;
+X curv.start = lpos+1;
+X }
+X }
+X
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X
+/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+X dmax->start,maxv.stop,dmax->stop);
+*/
+X dmax->start = maxv.start;
+X dmax->stop = maxv.stop;
+X
+X return maxv.score;
+}
+X
+#define XFACT 10
+X
+int sconn (struct savestr **v, int n,
+X int cgap, int pgap, struct f_struct *f_str)
+{
+X int i, si;
+X struct slink {
+X int score;
+X struct savestr *vp;
+X struct slink *next;
+X } *start, *sl, *sj, *so, sarr[MAXSAV];
+X int lstart, tstart, plstop, ptstop;
+X
+/* sort the score left to right in lib pos */
+X
+X kpsort (v, n);
+X
+X start = NULL;
+X
+/* for the remaining runs, see if they fit */
+X
+X for (i = 0, si = 0; i < n; i++)
+X {
+X
+/* if the score is less than the gap penalty, it never helps */
+X if (v[i]->score < cgap)
+X continue;
+X lstart = v[i]->start;
+X tstart = lstart - v[i]->dp + f_str->noff;
+X
+/* put the run in the group */
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X
+/* if it fits, then increase the score */
+X for (sl = start; sl != NULL; sl = sl->next)
+X {
+X plstop = sl->vp->stop;
+X ptstop = plstop - sl->vp->dp + f_str->noff;
+X if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+X sarr[si].score = sl->score + v[i]->score + pgap;
+X break;
+X }
+X }
+X
+/* now recalculate where the score fits */
+X if (start == NULL)
+X start = &sarr[si];
+X else
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+X {
+X if (sarr[si].score > sj->score)
+X {
+X sarr[si].next = sj;
+X if (so != NULL)
+X so->next = &sarr[si];
+X else
+X start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X si++;
+X }
+X
+X if (start != NULL)
+X return (start->score);
+X else
+X return (0);
+}
+X
+void
+kssort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->score >= v[j + gap]->score)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->start <= v[j + gap]->start)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+static int
+dmatchx(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval, int gshift,
+X struct f_struct *f_str)
+{
+X
+X hoff -= window/2;
+X
+#ifndef TFAST
+X return lx_band(aa1,n1,f_str->aa0v,n0-2,
+X pam2,
+#ifdef OLD_FASTA_GAP
+X -(gdelval - ggapval),
+#else
+X -gdelval,
+#endif
+X -ggapval,-gshift,
+X hoff,window,f_str);
+#else
+X return lx_band(aa0,n0,f_str->aa1v,n1-2,
+X pam2,
+#ifdef OLD_FASTA_GAP
+X -(gdelval - ggapval),
+#else
+X -gdelval,
+#endif
+X -ggapval,-gshift,
+X hoff,window,f_str);
+#endif
+}
+X
+static void
+init_row(struct sx_s *row, int sp) {
+X int i;
+X for (i = 0; i < sp; i++) {
+X row[i].C1 = row[i].I1 = 0;
+X row[i].C2 = row[i].I2 = 0;
+X row[i].C3 = row[i].I3 = 0;
+X row[i].flag = 0;
+X }
+}
+X
+int lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
+X int len_prot, /* length of prot. seq */
+X const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+X int len_dna_prot, /* length trans. seq. */
+X int **pam_matrix, /* scoring matrix */
+X int gopen, int gext, /* gap open, gap extend penalties */
+X int gshift, /* frame-shift penalty */
+X int start_diag, /* start diagonal of band */
+X int width, /* width for band alignment */
+X struct f_struct *f_str)
+{
+X void *ckalloc();
+X int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
+X struct sx_s *last, *tmp;
+X int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+X const unsigned char *dp;
+X register struct sx_s *ap, *aq;
+X struct wgt *wt, *ww;
+X int aa, b, a,x,y,z;
+X
+X sp = width+7;
+X gg = gopen+gext;
+X /* sp = sp/3+1; */
+X
+X if (f_str->cur == NULL) {
+X f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+X }
+X
+X init_row(f_str->cur, sp);
+X
+X /*
+X if (start_diag %3 !=0) start_diag = start_diag/3-1;
+X else start_diag = start_diag/3;
+X if (width % 3 != 0) width = width/3+1;
+X else width = width /3;
+X */
+X
+X x1 = start_diag; /* x1 = lower bound of DNA */
+X x2 = 1; /* the amount of position shift from last row*/
+X
+X end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+X end_prot = min(end_prot,len_prot);
+X
+X /* i counts through protein sequence, x1 through DNAp */
+X
+X for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
+X bd = min(x1+width, (len_dna_prot+2)/3); /* upper bound of band */
+X bd1 = max(0,x1); /* lower bound of band */
+X wt = f_str->weight0[prot_seq[i]];
+X del = 1-x1; /*adjustment*/
+X bd += del;
+X bd1 +=del;
+X
+X ap = &f_str->cur[bd1]; aq = ap+1;
+X e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
+X for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+X ww = &wt[(unsigned char) *dp++];
+X sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
+X if (cd1 > sc) sc = cd1;
+X cd1 -= gext;
+X if ((ci = aq->I1) > 0) {
+X if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+X else {
+X ap->C1 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd1 < sc) cd1 = sc;
+X ap->I1 = max(ci-gext, sc);
+X } else ap->I1 = ci-gext;
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I1 = ap->C1 = 0;
+X } else {
+X ap->C1 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd1 < sc) cd1 = sc;
+X ap->I1 = sc;
+X } else ap->I1 = 0;
+X }
+X }
+X ww = &wt[(unsigned char) *dp++];
+X sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
+X if (cd2 > sc) sc = cd2;
+X cd2 -= gext;
+X if ((ci = aq->I2) > 0) {
+X if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+X else {
+X ap->C2 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd2 < sc) cd2 = sc;
+X ap->I2 = max(ci-gext, sc);
+X }
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I2 = ap->C2 = 0;
+X } else {
+X ap->C2 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd2 < sc) cd2 = sc;
+X ap->I2 = sc;
+X } else ap->I2 = 0;
+X }
+X }
+X ww = &wt[(unsigned char)*dp++];
+X sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
+X if (cd3 > sc) sc = cd3;
+X cd3 -= gext;
+X if ((ci = aq++->I3) > 0) {
+X if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+X else {
+X ap->C3 = sc;
+X sc -= gg;
+X if (sc > 0) {
+X if (sc > best) best =sc;
+X if (cd3 < sc) cd3 = sc;
+X ap->I3 = max(ci-gext, sc);
+X }
+X }
+X } else {
+X if (sc <= 0) {
+X ap->I3 = ap->C3 = 0;
+X } else {
+X ap->C3 = sc; sc-=gg;
+X if (sc >0) {
+X if (sc > best) best =sc;
+X if (cd3 < sc) cd3 = sc;
+X ap->I3 = sc;
+X } else ap->I3 = 0;
+X }
+X }
+X }
+X }
+X /* printf("The best score is %d\n", best); */
+X return best+gg;
+}
+X
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+X void *p;
+X
+X if ((p = (void *)malloc( (size_t)amount)) == NULL)
+X w_abort("Ran out of memory.","");
+X return(p);
+}
+X
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+X int i, sum;
+X for (i=0,sum=0; i<n0; i++)
+X sum += pam2[aa0[i]][aa0[i]];
+X return sum;
+}
+X
+#define SGW1 100
+#define SGW2 300
+#define WIDTH 60
+X
+typedef struct mat *match_ptr;
+X
+typedef struct mat {
+X int i, j, l;
+X match_ptr next;
+} match_node;
+X
+typedef struct { int i,j;} state;
+typedef state *state_ptr;
+X
+X
+void *ckalloc();
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(), init_ROW();
+X
+int
+pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
+X int len_prot, /* length of prot. seq */
+X const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+X int len_dna_prot, /* length trans. seq. */
+X int **pam_matrix, /* scoring matrix */
+X int gopen, int gext, /* gap open, gap extend penalties */
+X int gshift, /* frame-shift penalty */
+X struct f_struct *f_str,
+X int max_res,
+X struct a_res_str *a_res) /* alignment info */
+{
+X match_ptr align, ap, aq;
+X int x, y, ex, ey, i, score;
+X int *alignment;
+X
+X f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+X
+X /*local alignment find the best local alignment x and y
+X is the starting position of the best local alignment
+X and ex ey is the ending position */
+X
+X score= local_align(&x, &y, &ex, &ey,
+X pam_matrix, gopen, gext,
+X dna_prot_seq, len_dna_prot,
+X prot_seq, len_prot, f_str);
+X
+X f_str->up += 3; f_str->down += 3; f_str->tp += 3;
+X
+X /* x, y - start in prot, dna_prot */
+X a_res->min0 = x; /* prot */
+X a_res->min1 = y; /* DNA */
+X a_res->max0 = ex; /* prot */
+X a_res->max1 = ey; /* DNA */
+X
+X align = global(x, y, ex, ey,
+X pam_matrix, gopen, gext,
+X dna_prot_seq, prot_seq,
+X 0, 0, f_str);
+X
+X alignment = a_res->res;
+X
+X for (ap = align, i= 0; ap; i++) {
+X if (i < max_res) alignment[i] = ap->l;
+X aq = ap->next; free(ap); ap = aq;
+X }
+X if (i >= max_res)
+X fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
+X
+X /* up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
+X free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
+X
+X a_res->nres = i;
+X return score;
+}
+X
+static void
+swap(void **a, void **b)
+{
+X void *t = *a;
+X *a = *b; *b = t;
+}
+X
+/*
+X local alignment find the best local alignment x and y
+X is the starting position of the best local alignment
+X and ex ey is the ending position
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey,
+X int **wgts, int gop, int gext,
+X const unsigned char *dnap, int ld,
+X const unsigned char *pro, int lp,
+X struct f_struct *f_str)
+{
+X int i, j, score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
+X sc, del, e, best = 0, cd, ci, c;
+X struct wgt *wt, *ww;
+X state_ptr cur_st, last_st, cur_i_st;
+X st_ptr cur, last;
+X const unsigned char *dp;
+X int *cur_d_st, *st_up;
+X
+X /*
+X Array rowiC stores the best scores of alignment ending at a position
+X Arrays rowiD and rowiI store the best scores of alignment ending
+X at a position with a deletion or insrtion
+X Arrays sti stores the starting position of the best alignment whose
+X score stored in the corresponding row array.
+X The program stores two rows to complete the computation, same is
+X for the global alignment routine.
+X */
+X
+X
+X st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+X init_row2(st_up, ld+5);
+X
+X ld += 2;
+X
+X init_ROW(f_str->up, ld+1);
+X init_ROW(f_str->down, ld+1);
+X cur = f_str->up+1;
+X last = f_str->down+1;
+X
+X cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+X cur_d_st = st_up;
+X dp = dnap-2;
+X for (i = 0; i < lp; i++) {
+X wt = f_str->weight1[pro[i]]; e2 =0; e1 = last[0].C;
+X for (j = 0; j < 2; j++) {
+X cur_st[j].i = i+1;
+X cur_st[j].j = j+1;
+X }
+X for (j = 2; j < ld; j++) {
+X ww = &wt[(unsigned char) dp[j]];
+X del = -1;
+X if (j >= 3) {
+X sc = 0;
+X e3 = e2; e2 = e1;
+X e1 = last[j-2].C;
+X if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
+X if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
+X if ((e = e3+ww->iv) > sc) {sc = e; del = 4;}
+X } else {
+X sc = e2 = 0;
+X if (ww->iii > 0) {sc = ww->iii; del = 3;}
+X }
+X if (sc < (ci=last[j].I)) {
+X sc = ci; del = 0;
+X }
+X if (sc < (cd=cur[j].D)) {
+X sc = cd; del = 5;
+X }
+X cur[j].C = sc;
+X e = sc - gop;
+X if (e > cd) {
+X cur[j+3].D = e-gext;
+X cur_d_st[j+3] = 3;
+X } else {
+X cur[j+3].D = cd-gext;
+X cur_d_st[j+3] = cur_d_st[j]+3;
+X }
+X switch(del) {
+X case 5:
+X c = cur_d_st[j];
+X cur_st[j].i = cur_st[j-c].i;
+X cur_st[j].j = cur_st[j-c].j;
+X break;
+X case 0:
+X cur_st[j].i = cur_i_st[j].i;
+X cur_st[j].j = cur_i_st[j].j;
+X break;
+X case 2:
+X case 3:
+X case 4:
+X if (i) {
+X if (j-del >= 0) {
+X cur_st[j].i = last_st[j-del].i;
+X cur_st[j].j = last_st[j-del].j;
+X } else {
+X cur_st[j].i = i;
+X cur_st[j].j = 0;
+X }
+X } else {
+X cur_st[j].i = 0;
+X cur_st[j].j = max(0, j-del+1);
+X }
+X break;
+X case -1:
+X cur_st[j].i = i+1;
+X cur_st[j].j = j+1;
+X break;
+X }
+X if (e > ci) {
+X cur[j].I = e -gext;
+X cur_i_st[j].i = cur_st[j].i;
+X cur_i_st[j].j = cur_st[j].j;
+X } else {
+X cur[j].I = ci- gext;
+X }
+X if (sc > best) {
+X x1 = cur_st[j].i;
+X x2 = cur_st[j].j;
+X best =sc;
+X x3 = i;
+X x4 = j;
+X }
+X }
+X swap((void *)&last, (void *)&cur);
+X swap((void *)&cur_st, (void *)&last_st);
+X }
+X /* printf("The best score is %d\n", best);*/
+X *x = x1; *y = x2; *ex = x3; *ey = x4;
+X free(cur_st); free(last_st); free(cur_i_st);
+X free(st_up);
+X return best;
+}
+X
+/*
+X Both global_up and global_down do linear space score only global
+X alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+X global_up do the algorithm upwards, from row x towards row y.
+X global_down do the algorithm downwards, from row y towards x.
+*/
+X
+static void
+global_up(st_ptr *row1, st_ptr *row2,
+X int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext,
+X unsigned char *dnap, unsigned char *pro,
+X int N, struct f_struct *f_str)
+{
+X int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
+X struct wgt *wt, *ww;
+X st_ptr cur, last;
+X
+X cur = *row1; last = *row2;
+X sc = -gop;
+X for (j = 0; j <= ey-y+1; j++) {
+X if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+X else { last[j].I = last[j].C = -10000;}
+X }
+X last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+X last[0].D = last[1].D = last[2].D = -10000;
+X if (N) last[0].I = -gext;
+X for (i = 1; i <= ex-x+1; i++) {
+X wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
+X for (j = 0; j <= ey-y+1; j++) {
+X t = j+y;
+X sc = -10000;
+X ww = &wt[(unsigned char) dnap[t-3]];
+X if (j < 4) {
+X if (j == 3) {
+X sc = e2+ww->iii;
+X } else if (j == 2) {
+X sc = e2 + ww->ii;
+X }
+X } else {
+X e3 = e2; e2 = e1;
+X e1 = last[j-2].C;
+X sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
+X }
+X sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+X cur[j].C = sc;
+X cur[j+3].D = max(cd, sc-gop)-gext;
+X cur[j].I = max(ci, sc-gop)-gext;
+X }
+X swap((void *)&last, (void *)&cur);
+X }
+X /*printf("global up score =%d\n", last[ey-y+1].C);*/
+X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+X if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+X
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+X int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext,
+X unsigned char *dnap, unsigned char *pro,
+X int N, struct f_struct *f_str)
+{
+X int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, score;
+X struct wgt *wt, *w1, *w2, *w3;
+X st_ptr cur, last;
+X
+X cur = (*row1); last = *row2;
+X sc = -gop;
+X for (j = ey-y+1; j >= 0; j--) {
+X if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+X else last[j].I = last[j].C = -10000;
+X cur[j].I = -10000;
+X }
+X last[ey-y+1].C = 0;
+X if (N) last[ey-y+1].I = -gext;
+X cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+X last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+X for (i = ex-x; i >= 0; i--) {
+X wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C;
+X e1 = -10000;
+X w3 = &wt[(unsigned char) dnap[ey]];
+X w2 = &wt[(unsigned char) dnap[ey-1]];
+X for (j = ey-y+1; j >= 0; j--) {
+X t = j+y;
+X w1 = &wt[(unsigned char) dnap[t-1]];
+X sc = -10000;
+X if (t+3 > ey) {
+X if (t+2 == ey) {
+X sc = e2+w2->iii;
+X } else if (t+1 == ey) {
+X sc = e2+w1->ii;
+X }
+X } else {
+X e3 = e2; e2 = e1;
+X e1 = last[j+2].C;
+X sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
+X }
+X if (sc < (cd= cur[j].D)) {
+X sc = cd;
+X cur[j-3].D = cd-gext;
+X } else cur[j-3].D =max(cd, sc-gop)-gext;
+X if (sc < (ci= last[j].I)) {
+X sc = ci;
+X cur[j].I = ci - gext;
+X } else cur[j].I = max(sc-gop,ci)-gext;
+X cur[j].C = sc;
+X w3 = w2; w2 = w1;
+X }
+X swap((void *)&last, (void *)&cur);
+X }
+X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+X if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+X
+static void
+init_row2(int *row, int ld) {
+X int i;
+X for (i = 0; i < ld; i++) row[i] = 0;
+}
+X
+static void init_ROW(st_ptr row, int ld) {
+X int i;
+X for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+X
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+X match_ptr x;
+X
+X if (x1 == NULL) return x2;
+X for (x = x1; x->next; x = x->next);
+X x->next = x2;
+X if (st) {
+X for (x = x2; x; x = x->next) {
+X x->j++;
+X if (x->l == 3 || x->l == 4) break;
+X }
+X x->l--;
+X }
+X return x1;
+}
+X
+/*
+X global use the two upwards and downwards score only linear
+X space global alignment subroutine to recursively build the
+X alignment.
+*/
+X
+match_ptr
+global(int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext,
+X unsigned char *dnap, unsigned char *pro, int N1, int N2,
+X struct f_struct *f_str)
+{
+X int m;
+X int m1, m2;
+X match_ptr x1, x2, mm1, mm2;
+X
+X /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
+X /*
+X if the space required is limited, we can do a quadratic space
+X algorithm to find the alignment.
+X */
+X
+X if (ex <= x) {
+X mm1 = NULL;
+X for (m = y+3; m <= ey; m+=3) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = 5; x1->next = mm1;
+X if (mm1== NULL) mm2 = x1;
+X mm1 = x1;
+X }
+X if (ex == x) {
+X if ((ey-y) % 3 != 0) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+X if (mm1) mm2->next = x1; else mm1 = x1;
+X } else mm2->l = 4;
+X }
+X return mm1;
+X }
+X if (ey <= y) {
+X mm1 = NULL;
+X for (m = x; m <= ex; m++) {
+X x1 = (match_ptr) ckalloc(sizeof(match_node));
+X x1->l = 0; x1->next = mm1; mm1 = x1;
+X }
+X return mm1;
+X }
+X if (ex -x < SGW1 && ey-y < SGW2)
+X return small_global(x,y,ex,ey,wgts, gop, gext, dnap, pro, N1, N2,f_str);
+X m = (x+ex)/2;
+X /*
+X Do the score only global alignment from row x to row m, m is
+X the middle row of x and ex. Store the information of row m in
+X upC, upD, and upI.
+X */
+X global_up(&f_str->up, &f_str->tp, x, y, m, ey,
+X wgts, gop, gext,
+X dnap, pro, N1, f_str);
+X /*
+X Do the score only global alignment downwards from row ex
+X to row m+1, store information of row m+1 in downC downI and downD
+X */
+X global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
+X wgts, gop, gext,
+X dnap, pro, N2, f_str);
+X
+X /*
+X Use this information for row m and m+1 to find the crossing
+X point of the best alignment with the middle row. The crossing
+X point is given by m1 and m2. Then we recursively call global
+X itself to compute alignments in two smaller regions found by
+X the crossing point and combine the two alignments to form a
+X whole alignment. Return that alignment.
+X */
+X if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
+X x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
+X x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
+X if (m1 == m2) x1 = combine(x1,x2,1);
+X else x1 = combine(x1, x2,0);
+X } else {
+X x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
+X x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
+X mm1 = (match_ptr) ckalloc(sizeof(match_node));
+X mm1->i = m; mm1->l = 0; mm1->j = m1;
+X mm2 = (match_ptr) ckalloc(sizeof(match_node));
+X mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+X mm1->next = mm2; mm2->next = x2;
+X x1 = combine(x1, mm1, 0);
+X }
+X return x1;
+}
+X
+static int
+find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
+X
+X int i, best = -1000, j = 0, s1, s2, s3, s4, st;
+X
+X for (i = 1; i < ld; i++) {
+X s2 = up[i].C + down[i].C;
+X s4 = up[i].I + down[i].I + gop;
+X if (best < s2) {
+X best = s2; j = i; st = 1;
+X }
+X if (best < s4) {
+X best = s4; j = i; st = 0;
+X }
+X }
+X *m1 = j-1+y;
+X *m2 = j+y;
+X /*printf("score=%d\n", best);*/
+X return st;
+}
+X
+/*
+X An alignment is represented as a linked list whose element
+X is of type match_node. Each element represent an edge in the
+X path of the alignment graph. The fields of match_node are
+X l --- gives the type of the edge.
+X i, j --- give the end position.
+*/
+X
+static match_ptr
+small_global(int x, int y, int ex, int ey,
+X int **wgts, int gop, int gext,
+X unsigned char *dnap, unsigned char *pro,
+X int N1, int N2, struct f_struct *f_str) {
+X
+X static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
+X int i, j, e, sc, score, del, k, t, ci, cd;
+X int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+X match_ptr mp, first;
+X struct wgt *wt, *ww;
+X
+X /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+X sc = -gop-gext; C[0][0] = 0;
+X if (N1) I[0] = -gext; else I[0] = sc;
+X
+X for (j = 1; j <= ey-y+1; j++) {
+X if (j % 3== 0) {
+X C[0][j] = sc; sc -= gext; I[j] = sc-gop;
+X } else I[j] = C[0][j] = -10000;
+X st[0][j] = 5;
+X }
+X lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
+X cI = I;
+X for (i = 1; i <= ex-x+1; i++) {
+X cC = &C[i][0];
+X wt = f_str->weight1[pro[i+x-1]]; cst = &st[i][0];
+X for (j = 0; j <=ey-y+1; j++) {
+X ci = cI[j];
+X cd= cD[j];
+X t = j+y;
+X ww = &wt[(unsigned char) dnap[t-3]];
+X if (j >= 4) {
+X sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;
+X e4 = lC[j-4]+ww->iv; del = 3;
+X if (e2 > sc) { sc = e2; del = 2;}
+X if (e4 >= sc) { sc = e4; del = 4;}
+X } else {
+X if (j == 3) {
+X sc = lC[0]+ww->iii; del =3;
+X } else if (j == 2) {
+X sc = lC[0]+ww->ii; del = 2;
+X } else {sc = -10000; del = 0;}
+X }
+X if (sc < ci) {
+X sc = ci; del = 0;
+X }
+X if (sc <= cd) {
+X sc = cd;
+X del = 5;
+X }
+X cC[j] = sc;
+X sc -= gop;
+X if (sc <= cd) {
+X del += 10;
+X cD[j+3] = cd - gext;
+X } else cD[j+3] = sc -gext;
+X if (sc < ci) {
+X del += 20;
+X cI[j] = ci-gext;
+X } else cI[j] = sc-gext;
+X *(cst++) = del;
+X }
+X lC = cC;
+X }
+X /*printf("small global score =%d\n", C[ex-x+1][ey-y+1]);*/
+X if (N2 && cC[ey-y+1] < ci+gop) st[ex-x+1][ey-y+1] =0;
+X first = NULL; e = 1;
+X for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+X mp = (match_ptr) ckalloc(sizeof(match_node));
+X mp->i = i-1;
+X k = (t=st[i-x][j-y])%10;
+X mp->j = j-1;
+X if (e == 5 && (t/10)%2 == 1) k = 5;
+X if (e == 0 && (t/20)== 1) k = 0;
+X if (k == 5) { j -= 3; i++; e=5;}
+X else {j -= k;if (k==0) e= 0; else e = 1;}
+X mp->l = k;
+X mp->next = first;
+X first = mp;
+X }
+X
+X /* for (i = 0; i <= ex-x; i++) {
+X for (j = 0; j <= ey-y; j++)
+X printf("%d ", C[i][j]);
+X printf("\n");
+X }
+X */
+X return first;
+}
+X
+#define XTERNAL
+#include "upam.h"
+X
+void
+display_alig(int *a, unsigned char *dna, unsigned char *pro,
+X int length, int ld, struct f_struct *f_str)
+{
+X int len = 0, i, j, x, y, lines, k, iaa;
+X static char line1[100], line2[100], line3[100],
+X tmp[10] = " ", *st;
+X char *dna1, c1, c2, c3;
+X
+X line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
+X
+X printf("\n%5d\n%5d", y+3, x);
+X for (len = 0, j = 2, lines = 0; j < length; j++) {
+X i = a[j];
+X line3[len] = ' ';
+X switch (i) {
+X case 3:
+X y += 3;
+X line2[len] = aa[iaa=pro[x++]];
+X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
+X if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
+X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+X break;
+X case 2:
+X y += 2;
+X line1[len] = '\\';
+X line2[len++] = ' ';
+X line2[len] = aa[iaa=pro[x++]];
+X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
+X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+X break;
+X case 4:
+X y += 4;
+X line1[len] = '/';
+X line2[len++] = ' ';
+X line2[len] = aa[iaa=pro[x++]];
+X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
+X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+X break;
+X case 5:
+X y += 3;
+X line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
+X line2[len] = '-';
+X break;
+X case 0:
+X line1[len] = '-';
+X line2[len] = aa[pro[x++]];
+X break;
+X }
+X len++;
+X line1[len] = line2[len] = line3[len] = '\0';
+X if (len >= WIDTH) {
+X for (k = 10; k <= WIDTH; k+=10)
+X printf(" . :");
+X if (k-5 < WIDTH) printf(" .");
+X c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+X line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+X printf("\n %s\n %s\n %s\n", line1, line3, line2);
+X line1[WIDTH] = c1; line2[WIDTH] = c2;
+X strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+X strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+X strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+X len = len - WIDTH;
+X printf("\n%5d\n%5d", y+3, x);
+X }
+X }
+X for (k = 10; k < len; k+=10)
+X printf(" . :");
+X if (k-5 < len) printf(" .");
+X printf("\n %s\n %s\n %s\n", line1, line3, line2);
+}
+X
+X
+/* alignment store the operation that align the protein and dna sequence.
+X The code of the number in the array is as follows:
+X 0: delete of an amino acid.
+X 2: frame shift, 2 nucleotides match with an amino acid
+X 3: match an amino acid with a codon
+X 4: the other type of frame shift
+X 5: delete of a codon
+X
+X
+X Also the first two element of the array stores the starting point
+X in the protein and dna sequences in the local alignment.
+X
+X Display looks like where WIDTH is assumed to be divisible by 10.
+X
+X 0 . : . : . : . : . : . :
+X AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
+X I S G S V F N R Q L A G S V F N R Q L A
+X AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
+X
+X 60 . : . : . : . : . : . :
+X /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
+X G S V F N R Q L A G S V F N R Q L A G S V F
+X G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
+X
+For frame shift, the middle row show the letter in the original sequence,
+and the letter in the top row is the amino acid that is chose by the
+alignment (translated codon chosen from 4 nucleotides, or 2+1).
+*/
+X
+/* fatal - print message and die */
+void
+fatal(msg)
+X char *msg;
+{
+X fprintf(stderr, "%s\n", msg);
+X exit(1);
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X int score;
+X int i, ir, last_n1, itemp, n10, itx, dnav;
+X unsigned char *aa1x;
+X
+X a_res->res = f_str->res;
+X
+#ifndef TFAST
+X score = pro_dna(aa1, n1, f_str->aa0v, n0-2, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval - ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X -ppst->gshift,
+X f_str, f_str->max_res, a_res);
+X /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
+X
+#else
+X /* make a precomputed codon number series */
+X if (frame==0) {
+X pre_com(aa1, n1, f_str->aa1v);
+X }
+X else { /* must do things backwards */
+X pre_com_r(aa1, n1, f_str->aa1v);
+X }
+X
+X /* make translated sequence */
+X last_n1 = 0;
+X aa1x = f_str->aa1x;
+X for (itx= frame*3; itx< frame*3+3; itx++) {
+X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
+X /*
+X fprintf(stderr," itt %d itx: %d\n",itt,itx);
+X for (i=0; i<n10; i++) {
+X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+X if ((i%60)==59) fprintf(stderr,"\n");
+X }
+X fprintf(stderr,"\n");
+X */
+X last_n1 += n10+1;
+X }
+X n10 = last_n1-1;
+X
+X score = pro_dna(aa0, n0, f_str->aa1v, n1-2, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval - ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X -ppst->gshift,
+X f_str, f_str->max_res, a_res);
+X /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0,f_str); */
+#endif
+X a_res->res = f_str->res;
+X *have_ares = 1;
+X
+X return score;
+}
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+X
+#ifdef TFAST
+X int i, last_n1, itemp, n10;
+X unsigned char *fs, *fd;
+X int itx;
+X
+X /* make a precomputed codon number series */
+X if (frame==0) {
+X pre_com(aa1, n1, f_str->aa1v);
+X }
+X else { /* must do things backwards */
+X pre_com_r(aa1, n1, f_str->aa1v);
+X }
+#endif
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+#ifndef TFAST
+X aln->llrev = 0;
+X aln->llfact = 1;
+X aln->llmult = 1;
+X aln->qlfact = 3;
+X aln->frame = 0;
+X if (frame > 0) aln->qlrev = 1;
+X else aln->qlrev = 0;
+#else /* TFASTX */
+X aln->qlfact = 1;
+X aln->qlrev = 0;
+X aln->llfact = 3;
+X aln->llmult = 1;
+X aln->frame = 0;
+X if (frame > 0) aln->llrev = 1;
+X else aln->llrev = 0;
+#endif /* TFASTX */
+}
+X
+#include "structs.h"
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char *sp0, *sp1, *spa, *sq;
+X unsigned char aap;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X int *res;
+X
+X /* don't fill in the ends */
+X
+X
+X res = a_res.res;
+X rpmax = &res[a_res.nres]; /* end of alignment info */
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X /* res[0] has start of protein sequence */
+X /* res[1] has start of translated DNA sequence */
+X
+#ifndef TFAST /* FASTX */
+X ap0 = f_str->aa0v; /* computed codons -> ap0*/
+X ap1 = aa1; /* protein sequence -> ap1 */
+X aln->smin1 = a_res.min0; /* start in protein sequence */
+X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+X ap0 = f_str->aa1v; /* computed codons -> ap0*/
+X ap1 = aa0; /* protein sequence */
+X aln->smin0 = a_res.min0; /* start in protein sequence */
+X aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+X
+X rp = a_res.res; /* start of alignment info */
+X
+/* now get the middle */
+X spa = seqca;
+#ifndef TFAST
+X sp0 = seqc0; /* sp0/seqc0 is codon sequence */
+X sp1 = seqc1; /* sp1/seqc1 is protein sequence */
+#else
+X sp1 = seqc0; /* sp1/seqc0 is protein sequence */
+X sp0 = seqc1; /* sp0/seqc1 is codon sequence */
+#endif
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+X i0 = a_res.min1-3; /* start of codon sequence */
+X i1 = a_res.min0; /* start of protein sequence */
+X
+X while (rp < rpmax ) {
+X switch (*rp++) {
+X case 3: /* match */
+X i0 += 3;
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+X
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 2: /* frame shift +2, then match */
+X nfs++;
+X i0 += 2;
+X *sp0++ = '/';
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 4: /* frame shift, -1, then match */
+X nfs++;
+X i0 += 4;
+X *sp0++ = '\\';
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 5: /* insertion in 1 */
+X i0 += 3;
+X *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_p++;
+X break;
+X case 0: /* insertion in 0 */
+X *sp0++ = '-';
+X *sp1++ = sq[ap1[i1++]];
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_d++;
+X break;
+X }
+X }
+X
+X *spa = '\0';
+X
+#ifndef TFAST
+X aln->amax0 = i0+3; /* end of codon sequence */
+X aln->amax1 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0+3; /* end of codon sequence */
+X aln->amax0 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X aln->amin0 = aln->smin0;
+X aln->amin1 = aln->smin1;
+X
+X if (lenc < 0) lenc = 1;
+X
+X *nc = lenc;
+/* now we have the middle, get the right end */
+X
+X return lenc+not_c;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1;
+X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+X char *sp0, *sp0a, *sp1, *spa, *sq;
+X unsigned char aap;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X /* don't fill in the ends */
+X
+X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X /* res[0] has start of protein sequence */
+X /* res[1] has start of translated DNA sequence */
+X
+#ifndef TFAST
+X ap0 = f_str->aa0v; /* computed codons -> ap0*/
+X ap1 = aa1; /* protein sequence -> ap1 */
+X aln->smin1 = a_res.min0; /* start in protein sequence */
+X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+X ap0 = f_str->aa1v; /* computed codons -> ap0*/
+X ap1 = aa0; /* protein sequence */
+X aln->smin0 = a_res.min0; /* start in protein sequence */
+X aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+X
+X rp = a_res.res; /* start of alignment info */
+X
+X
+/* now get the middle */
+X spa = seqca;
+X sp0a = seqc0a;
+#ifndef TFAST
+X sp0 = seqc0; /* sp0/seqc0 is codon sequence */
+X sp1 = seqc1; /* sp1/seqc1 is protein sequence */
+#else
+X sp1 = seqc0; /* sp1/seqc0 is protein sequence */
+X sp0 = seqc1; /* sp0/seqc1 is codon sequence */
+#endif
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+X i0 = a_res.min1-3; /* start of codon sequence */
+X i1 = a_res.min0; /* start of protein sequence */
+X
+X while (rp < rpmax ) {
+X switch (*rp++) {
+X case 3: /* match */
+X i0 += 3;
+X *sp0a++ = ' ';
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+X
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 2: /* frame shift +2, then match */
+X nfs++;
+X i0 += 2;
+X *sp0a++ = ' ';
+X *sp0++ = '/';
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X
+#ifndef TFAST
+X *sp0a++ = ' ';
+#else
+X *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 4: /* frame shift, -1, then match */
+X nfs++;
+X i0 += 4;
+#ifndef TFAST
+X *sp0a++ = ' ';
+#else
+X *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+X *sp0++ = '\\';
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X not_c++;
+X *sp1 = sq[aap=ap1[i1++]];
+X *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X sp0++; sp1++; spa++;
+X lenc++;
+X break;
+X case 5: /* insertion in 1 */
+X i0 += 3;
+X *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X lenc++;
+X ngap_p++;
+X break;
+X case 0: /* insertion in 0 */
+X *sp0++ = '-';
+#ifndef TFAST
+X *sp0a++ = ' ';
+#else
+X *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+X *sp1++ = sq[ap1[i1++]];
+X *spa++ = M_DEL;
+X lenc++;
+X ngap_d++;
+X break;
+X }
+X }
+X
+X *sp0a = *spa = '\0';
+X
+#ifndef TFAST
+X aln->amax0 = i0+3; /* end of codon sequence */
+X aln->amax1 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0+3; /* end of codon sequence */
+X aln->amax0 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X aln->amin0 = aln->smin0;
+X aln->amin1 = aln->smin1;
+X
+X if (lenc < 0) lenc = 1;
+X
+X *nc = lenc;
+/* now we have the middle, get the right end */
+X
+X return lenc+not_c;
+}
+X
+void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
+X
+X char tmp_cnt[20];
+X
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1;
+X int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
+X int op, op_cnt;
+X char sp0, sp1, op_char[10];
+X unsigned char aap;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X /* don't fill in the ends */
+X
+#ifndef TFAST
+X strncpy(op_char,"- /=\\+*",sizeof(op_char));
+X ap0 = f_str->aa0v; /* computed codons -> ap0*/
+X ap1 = aa1; /* protein sequence -> ap1 */
+X aln->smin1 = a_res.min0; /* start in protein sequence */
+X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+X strncpy(op_char,"+ /=\\-*",sizeof(op_char));
+X ap0 = f_str->aa1v; /* computed codons -> ap0*/
+X ap1 = aa0; /* protein sequence */
+X aln->smin0 = a_res.min0; /* start in protein sequence */
+X aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+X
+X rp = a_res.res; /* start of alignment info */
+X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+X
+/* now get the middle */
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+X op_cnt = 0;
+X op = 3;
+X
+X i0 = a_res.min1-3; /* start of codon sequence */
+X i1 = a_res.min0; /* start of protein sequence */
+X
+X while (rp < rpmax ) {
+X switch (*rp++) {
+X case 3: /* match */
+X sp1 = pst.sq[aap=ap1[i1++]];
+X i0 += 3;
+X sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X
+X if (op == 3 || op == 6) {
+X if (sp0 != '*' && sp1 != '*') {
+X if (op == 6 ) {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op_cnt = 1; op = 3;
+X }
+X else {op_cnt++;}
+X }
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+X op_cnt = 1; op = 6;
+X }
+X }
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op_cnt = 1; op = 3;
+X }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 2: /* -1 frame shift */
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 2; op_cnt = 1;
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 3; op_cnt = 1;
+X
+X nfs++;
+X i0 += 2;
+X not_c++;
+X sp1 = pst.sq[aap=ap1[i1++]];
+X sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 4: /* +1 frame shift */
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 4; op_cnt = 1;
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 3; op_cnt = 1;
+X
+X nfs++;
+X i0 += 4;
+X not_c++;
+X sp1 = pst.sq[aap=ap1[i1++]];
+X sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 5: /* insert in 1 */
+X if (op == 5) op_cnt++;
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 5; op_cnt = 1;
+X }
+X
+X i0 += 3;
+X lenc++;
+X ngap_p++;
+X break;
+X case 0: /* insert in 0 */
+X if (op == 0) op_cnt++;
+X else {
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X op = 0; op_cnt = 1;
+X }
+X
+X i1++;
+X lenc++;
+X ngap_d++;
+X break;
+X }
+X }
+X
+X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+X
+#ifndef TFAST
+X aln->amax0 = i0+3; /* end of codon sequence */
+X aln->amax1 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0+3; /* end of codon sequence */
+X aln->amax0 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X aln->amin0 = aln->smin0;
+X aln->amin1 = aln->smin1;
+X
+X if (lenc < 0) lenc = 1;
+X
+/* now we have the middle, get the right end */
+X
+X return lenc;
+}
+X
+int calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1;
+X int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
+X char sp0, sp1;
+X unsigned char aap;
+X const unsigned char *ap0, *ap1;
+X int *rp, *rpmax;
+X
+X /* don't fill in the ends */
+X
+#ifndef TFAST /* FASTYZ */
+X ap0 = f_str->aa0v; /* computed codons -> ap0*/
+X ap1 = aa1; /* protein sequence -> ap1 */
+X aln->smin1 = a_res.min0; /* start in protein sequence */
+X aln->smin0 = a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+X ap0 = f_str->aa1v; /* computed codons -> ap0*/
+X ap1 = aa0; /* protein sequence */
+X aln->smin0 = a_res.min0; /* start in protein sequence */
+X aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+X
+X rp = a_res.res; /* start of alignment info */
+X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+X
+/* now get the middle */
+X
+X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+X i0 = a_res.min1-3; /* start of codon sequence */
+X i1 = a_res.min0; /* start of protein sequence */
+X
+X while (rp < rpmax ) {
+X switch (*rp++) {
+X case 3:
+X i0 += 3;
+X sp1 = pst.sq[aap=ap1[i1++]];
+X sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 2:
+X nfs++;
+X i0 += 2;
+X not_c++;
+X sp1 = pst.sq[aap=ap1[i1++]];
+X sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 4:
+X nfs++;
+X i0 += 4;
+X not_c++;
+X sp1 = pst.sq[aap=ap1[i1++]];
+X sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+X if (sp0 == sp1) aln->nident++;
+X lenc++;
+X break;
+X case 5:
+X i0 += 3;
+X lenc++;
+X ngap_p++;
+X break;
+X case 0:
+X i1++;
+X lenc++;
+X ngap_d++;
+X break;
+X }
+X }
+X
+#ifndef TFAST
+X aln->amax0 = i0+3; /* end of codon sequence */
+X aln->amax1 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_d;
+X aln->ngap_l = ngap_p;
+#else
+X aln->amax1 = i0+3; /* end of codon sequence */
+X aln->amax0 = i1; /* end of protein sequence */
+X aln->ngap_q = ngap_p;
+X aln->ngap_l = ngap_d;
+#endif
+X aln->nfs = nfs;
+X aln->amin0 = aln->smin0;
+X aln->amin1 = aln->smin1;
+X
+X if (lenc < 0) lenc = 1;
+X
+/* now we have the middle, get the right end */
+X
+X return lenc;
+}
+X
+#ifdef PCOMPLIB
+#include "p_mw.h"
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+X ppst->n0 = qm_msg->n0;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropfz2.c ||
+echo 'restore of dropfz2.c failed'
+Wc_c="`wc -c < 'dropfz2.c'`"
+test 77360 -eq "$Wc_c" ||
+ echo 'dropfz2.c: original size 77360, current size' "$Wc_c"
+fi
+# ============= dropgsw.c ==============
+if test -f 'dropgsw.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropgsw.c (File already exists)'
+else
+echo 'x - extracting dropgsw.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropgsw.c' &&
+/* copyright (c) 1996 William R. Pearson */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropgsw.c,v 1.80 2006/10/19 15:12:11 wrp Exp $ */
+X
+/* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe */
+X
+/* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates */
+X
+/* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included */
+X
+/* 14-May-2003 - modified to return alignment start at 0, rather than
+X 1, for begin:end alignments
+X
+X 25-Feb-2003 - modified to support Altivec parallel Smith-Waterman
+X
+X 22-Sep-2003 - removed Altivec support at request of Sencel lawyers
+*/
+X
+/* the do_walign() code in this file is not thread_safe */
+/* init_work(), do_work(), are thread safe */
+X
+/* this code uses an implementation of the Smith-Waterman algorithm
+X designed by Phil Green, U. of Washington, that is 1.5 - 2X faster
+X than my Miller and Myers implementation. */
+X
+/* the shortcuts used in this program prevent it from calculating scores
+X that are less than the gap penalty for the first residue in a gap. As
+X a result this code cannot be used with very large gap penalties, or
+X with very short sequences, and probably should not be used with prss3.
+*/
+X
+/* version 3.2 fixes a subtle bug that was encountered while running
+X do_walign() interspersed with do_work(). This happens only with -m
+X 9 and pvcomplib. The fix was to more explicitly zero-out ss[] at
+X the beginning of do_work.
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+X
+static char *verstr="5.5 Sept 2006";
+X
+#include "dropgsw.h"
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+#ifdef SW_ALTIVEC
+#include "smith_waterman_altivec.h"
+#endif
+#ifdef SW_SSE2
+#include "smith_waterman_sse2.h"
+#endif
+X
+struct swstr {int H, E;};
+X
+extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+X double *aa0_f, double **kp);
+extern int do_karlin(const unsigned char *aa1, int n1,
+X int **pam2, struct pstruct *ppst,
+X double *aa0_f, double *kar_p, double *lambda, double *H);
+X
+static int
+ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int **W, int IW, int G, int H, int *res, int *nres,
+X struct f_struct *f_str);
+X
+static int
+FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
+X int n0, int n1, int low, int up,
+X int **W, int GG,int HH, int MW,
+X struct f_struct *f_str);
+X
+static
+void DISPLAY(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int *S, int AP, int BP, char *sq);
+X
+extern void aancpy(char *to, char *from, int count, struct pstruct pst);
+X
+/* initialize for Smith-Waterman optimal score */
+X
+void
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int maxn0, ip;
+X int *pwaa_s, *pwaa_a;
+X int e, f, i, j, l;
+X int *res;
+X struct f_struct *f_str;
+X int **pam2p;
+X struct swstr *ss;
+X int nsq;
+X
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X int data,bias;
+X unsigned char * pc;
+X unsigned short * ps;
+X int overflow;
+X
+X int n_count;
+X int col_len;
+#endif
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx; ip = 1;
+X }
+X else {
+X nsq = ppst->nsq; ip = 0;
+X }
+X
+X /* allocate space for function globals */
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+X if(ppst->zsflag == 6 || ppst->zsflag == 16) {
+X f_str->kar_p = NULL;
+X init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
+X }
+X
+X /* allocate space for the scoring arrays */
+X if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+X exit (1);
+X }
+X ss++;
+X
+X ss[n0].H = -1; /* this is used as a sentinel - normally H >= 0 */
+X ss[n0].E = 1;
+X f_str->ss = ss;
+X
+X /* initialize variable (-S) pam matrix */
+X if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X /* initialize pam2p[1] pointers */
+X if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+X exit(1);
+X }
+X
+X pam2p = f_str->pam2p[1];
+X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X for (i=1; i<n0; i++) {
+X pam2p[i]= pam2p[0] + (i*(nsq+1));
+X }
+X
+X /* initialize universal (alignment) matrix */
+X if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X /* initialize pam2p[0] pointers */
+X if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+X exit(1);
+X }
+X
+X pam2p = f_str->pam2p[0];
+X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X for (i=1; i<n0; i++) {
+X pam2p[i]= pam2p[0] + (i*(nsq+1));
+X }
+X
+X /*
+X pwaa effectively has a sequence profile --
+X pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+X pwaa[n0..2n0-1] has pam scores for residue 1 (A)
+X pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
+X
+X thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+X *pwaa++ rapidly moves though the scores of the aa1p[] position
+X without further indexing
+X
+X For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+X a different score in each position.
+X */
+X
+X if (ppst->pam_pssm) {
+X pwaa_s = f_str->waa_s;
+X pwaa_a = f_str->waa_a;
+X for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
+X for (f = 0; f < n0; f++) { /* for each position in aa0 */
+X *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
+X }
+X }
+X }
+X else { /* initialize scanning matrix */
+X pwaa_s = f_str->waa_s;
+X pwaa_a = f_str->waa_a;
+X for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
+X for (f = 0; f < n0; f++) { /* for each position in aa0 */
+X *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
+X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
+X }
+X }
+X
+#if defined(SW_ALTIVEC)
+X
+X /* First we allocate memory for the workspace - i.e. the single row
+X * of storage for H/F. Since this might be run on Linux or AIX too,
+X * we don't assume anything about the memory allocation but align
+X * it ourselves. We need two vectors (16 bytes each) per element,
+X * and some padding space to make it cache-line aligned.
+X
+X * MAXTST+MAXLIB is longest allowed database sequence length...
+X * this should be m_msg.max_tot, but m_msg is not available, but
+X * ppst->maxlen has maxn, which is appropriate.
+X */
+X
+X f_str->workspace_memory = (void *)malloc(2*16*(ppst->maxlen+SEQ_PAD)+256);
+X f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
+X
+X
+X
+X /* We always use a scoring profile in altivec, but the layout is a bit strange
+X * in order to optimize memory access order and thus cache efficiency.
+X * Normally we first try 8-bit scoring in altivec, and if this leads to overflow
+X * we recompute the score with 16-bit accuracy. Because of this we need to construct
+X * two score profiles.
+X * Since altivec always loads 16 bytes from aligned memory, corresponding to 8 or 16
+X * elements (for 16 and 8 bit scoring, respectively), we organize the scoring
+X * profile like this for 8-bit accuracy:
+X *
+X * 1. The profile starts on 256-byte aligned memory (cache line on G5 is 128 bytes).
+X * 2. First we have the score for the full alphabet for the first 16 residues of
+X * the query, i.e. positions 0-15 are the scores for the first 16 query letters
+X * vs. the first in the alphabet, positions 16-31 the scores for the same 16
+X * query positions against alphabet letter two, etc.
+X * 3. After alphabet_size*16bytes we start with the scores for residues 16-31 in
+X * the query, organized in the same way.
+X * 4. At the end of the query sequence, we pad the scoring to the next 16-tuple
+X * with neutral scores.
+X * 5. The total size of the profile is thus alphabet_size*N, where N is the
+X * size of the query rounded up to the next 16-tuple.
+X *
+X * The word (16-bit) profile is identical, but scores are stored as 8-tuples.
+X */
+X
+X f_str->word_score_memory = (void *)malloc(10*2*(nsq+2)*(n0+1+16)+256);
+X f_str->byte_score_memory = (void *)malloc(10*(nsq+2)*(n0+1+16)+256);
+X
+X f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
+X f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
+X
+X overflow = 0;
+X
+X if (ppst->pam_pssm) {
+X /* Use a position-specific scoring profile.
+X * This is essentially what we are going to construct anyway, but we'll
+X * reorder it to suit altivec.
+X */
+X bias = 127;
+X for(i = 1; i <= nsq ; i++) {
+X for(j = 0; j < n0 ; j++) {
+X data = ppst->pam2p[ip][j][i];
+X if(data<bias) bias = data;
+X }
+X }
+X
+X /* Fill our specially organized byte- and word-size scoring arrays. */
+X ps = f_str->word_score;
+X for(f = 0; f<n0 ; f+=8) {
+X /* e=0 */
+X for(i=0 ; i<8 ; i++) {
+X *ps++ = (unsigned short) 0;
+X }
+X /* for each chunk of 8 residues in our query */
+X for(e = 1; e<=nsq; e++) {
+X for(i=0 ; i<8 ; i++) {
+X l = f + i;
+X if(l<n0) {
+X data = ppst->pam2p[ip][l][e] - bias;
+X }
+X else {
+X data = 0;
+X }
+X *ps++ = (unsigned short)data;
+X }
+X }
+X }
+X pc = f_str->byte_score;
+X for(f = 0; f<n0 ; f+=16) {
+X /* e=0 */
+X for(i=0 ; i<16 ; i++) {
+X *pc++ = (unsigned char)0;
+X }
+X
+X for(e = 1; e<=nsq; e++) {
+X for(i=0 ; i<16 ; i++) {
+X l = f + i;
+X if(l<n0) {
+X data = ppst->pam2p[ip][l][e] - bias;
+X }
+X else {
+X data = 0;
+X }
+X if(data>255) {
+X /*
+X printf("Fatal error. data: %d bias: %d, position: %d/%d, Score out of range for 8-bit Altivec/VMX datatype.\n",data,bias,l,e);
+X exit(1);
+X */
+X overflow = 1;
+X }
+X *pc++ = (unsigned char)data;
+X }
+X }
+X }
+X }
+X else {
+X /* Classical simple substitution matrix */
+X /* Find the bias to use in the substitution matrix */
+X bias = 127;
+X for(i = 1; i <= nsq ; i++) {
+X for(j = 1; j <= nsq ; j++) {
+X data = ppst->pam2[ip][i][j];
+X if(data<bias) bias = data;
+X }
+X }
+X /* Fill our specially organized byte- and word-size scoring arrays. */
+X ps = f_str->word_score;
+X for(f = 0; f<n0 ; f+=8) {
+X /* e=0 */
+X for(i=0 ; i<8 ; i++) {
+X *ps++ = (unsigned short) 0;
+X }
+X /* for each chunk of 8 residues in our query */
+X for(e = 1; e<=nsq; e++) {
+X for(i=0 ; i<8 ; i++) {
+X l = f + i;
+X if(l<n0) {
+X data = ppst->pam2[ip][aa0[l]][e] - bias;
+X }
+X else {
+X data = 0;
+X }
+X *ps++ = (unsigned short)data;
+X }
+X }
+X }
+X pc = f_str->byte_score;
+X for(f = 0; f<n0 ; f+=16) {
+X /* e=0 */
+X for(i=0 ; i<16 ; i++) {
+X *pc++ = (unsigned char)0;
+X }
+X
+X for(e = 1; e<=nsq; e++) {
+X for(i=0 ; i<16 ; i++) {
+X l = f + i;
+X if (l<n0) {
+X data = ppst->pam2[ip][aa0[l]][e] - bias;
+X }
+X else {
+X data = 0;
+X }
+X if(data>255) {
+X /*
+X printf("Fatal error. Score out of range for 8-bit Altivec/VMX datatype.\n");
+X exit(1);
+X */
+X overflow = 1;
+X }
+X *pc++ = (unsigned char)data;
+X }
+X }
+X }
+X }
+X
+X f_str->bias = (unsigned char) (-bias);
+X f_str->alphabet_size = nsq+1;
+X
+X /* Some variable to keep track of how many 8-bit runs we need to rerun
+X * in 16-bit accuracy. If there are too many reruns it can be faster
+X * to use 16-bit alignments directly.
+X */
+X
+X /* We can only do 8-bit alignments if the scores were small enough. */
+X if(overflow==0) f_str->try_8bit = 1;
+X else f_str->try_8bit = 0;
+X
+X f_str->done_8bit = 0;
+X f_str->done_16bit = 0;
+X
+#endif /* SW_ALTIVEC */
+X
+#if defined(SW_SSE2)
+X /* First we allocate memory for the workspace - i.e. two rows for H and
+X * one row for F. We also need enough space to hold a temporary
+X * scoring profile which will be query_length * 16 (sse2 word length).
+X * Since this might be run on Linux or AIX too, we don't assume
+X * anything about the memory allocation but align it ourselves.
+X */
+X f_str->workspace_memory = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256);
+X f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
+X
+X /* We always use a scoring profile for the SSE2 implementation, but the layout
+X * is a bit strange. The scoring profile is parallel to the query, but is
+X * accessed in a stripped pattern. The query is divided into equal length
+X * segments. The number of segments is equal to the number of elements
+X * processed in the SSE2 register. For 8-bit calculations, the query will
+X * be divided into 16 equal length parts. If the query is not long enough
+X * to fill the last segment, it will be filled with neutral weights. The
+X * first element in the SSE register will hold a value from the first segment,
+X * the second element of the SSE register will hold a value from the
+X * second segment and so on. So if the query length is 288, then each
+X * segment will have a length of 18. So the first 16 bytes will have
+X * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will
+X * have the following weights: Q2, Q20, Q38, ... Q272; and so on until
+X * all parts of all segments have been written. The last seqment will
+X * have the following weights: Q18, Q36, Q54, ... Q288. This will be
+X * done for the entire alphabet.
+X */
+X
+X f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof (short) * (nsq + 1) + 256);
+X f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof (char) * (nsq + 1) + 256);
+X
+X f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
+X f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
+X
+X overflow = 0;
+X
+X if (ppst->pam_pssm) {
+X /* Use a position-specific scoring profile.
+X * This is essentially what we are going to construct anyway, but we'll
+X * reorder it to suit sse2.
+X */
+X bias = 127;
+X for (i = 1; i <= nsq ; i++) {
+X for (j = 0; j < n0 ; j++) {
+X data = ppst->pam2p[ip][j][i];
+X if (data < bias) {
+X bias = data;
+X }
+X }
+X }
+X
+X /* Fill our specially organized byte- and word-size scoring arrays. */
+X ps = f_str->word_score;
+X col_len = (n0 + 7) / 8;
+X n_count = (n0 + 7) & 0xfffffff8;
+X for (f = 0; f < n_count; ++f) {
+X *ps++ = 0;
+X }
+X for (f = 1; f <= nsq ; f++) {
+X for (e = 0; e < col_len; e++) {
+X for (i = e; i < n_count; i += col_len) {
+X if ( i < n0) { data = ppst->pam2p[ip][i][f];}
+X else {data = 0;}
+X *ps++ = (unsigned short)data;
+X }
+X }
+X }
+X pc = f_str->byte_score;
+X col_len = (n0 + 15) / 16;
+X n_count = (n0 + 15) & 0xfffffff0;
+X for (f = 0; f < n_count; ++f) {
+X *pc++ = 0;
+X }
+X for (f = 1; f <= nsq ; f++) {
+X for (e = 0; e < col_len; e++) {
+X for (i = e; i < n_count; i += col_len) {
+X if ( i < n0 ) { data = ppst->pam2p[ip][i][f] - bias;}
+X else {data = 0 - bias;}
+X if (data > 255) {
+X printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+X "Score out of range for 8-bit SSE2 datatype.\n",
+X data, bias, f, e);
+X exit(1);
+X }
+X *pc++ = (unsigned char)data;
+X }
+X }
+X }
+X }
+X else
+X {
+X /* Classical simple substitution matrix */
+X /* Find the bias to use in the substitution matrix */
+X bias = 127;
+X for (i = 1; i <= nsq ; i++) {
+X for (j = 1; j <= nsq ; j++) {
+X data = ppst->pam2[ip][i][j];
+X if (data < bias) {
+X bias = data;
+X }
+X }
+X }
+X
+X /* Fill our specially organized byte- and word-size scoring arrays. */
+X ps = f_str->word_score;
+X col_len = (n0 + 7) / 8;
+X n_count = (n0 + 7) & 0xfffffff8;
+X for (f = 0; f < n_count; ++f) {
+X *ps++ = 0;
+X }
+X for (f = 1; f <= nsq ; f++) {
+X for (e = 0; e < col_len; e++) {
+X for (i = e; i < n_count; i += col_len) {
+X if (i >= n0) {
+X data = 0;
+X } else {
+X data = ppst->pam2[ip][aa0[i]][f];
+X }
+X *ps++ = (unsigned short)data;
+X }
+X }
+X }
+X
+X pc = f_str->byte_score;
+X col_len = (n0 + 15) / 16;
+X n_count = (n0 + 15) & 0xfffffff0;
+X for (f = 0; f < n_count; ++f) {
+X *pc++ = 0;
+X }
+X for (f = 1; f <= nsq ; f++) {
+X for (e = 0; e < col_len; e++) {
+X for (i = e; i < n_count; i += col_len) {
+X if (i >= n0) {
+X data = -bias;
+X } else {
+X data = ppst->pam2[ip][aa0[i]][f] - bias;
+X }
+X if (data > 255) {
+X printf("Fatal error. data: %d bias: %d, position: %d/%d, "
+X "Score out of range for 8-bit SSE2 datatype.\n",
+X data, bias, f, e);
+X exit(1);
+X }
+X *pc++ = (unsigned char)data;
+X }
+X }
+X }
+X }
+X
+X f_str->bias = (unsigned char) (-bias);
+X f_str->alphabet_size = nsq+1;
+X
+X /* Some variable to keep track of how many 8-bit runs we need to rerun
+X * in 16-bit accuracy. If there are too many reruns it can be faster
+X * to use 16-bit alignments directly.
+X */
+X
+X /* We can only do 8-bit alignments if the scores were small enough. */
+X f_str->try_8bit = (overflow == 0) ? 1 : 0;
+X
+X f_str->done_8bit = 0;
+X f_str->done_16bit = 0;
+#endif /* SW_SSE2 */
+X
+X /* these structures are used for producing alignments */
+X
+X maxn0 = max(3*n0/2,MIN_RES); /* minimum allocation for alignment */
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X
+X
+X *f_arg = f_str;
+}
+X
+void close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X if (f_str->kar_p !=NULL) free(f_str->kar_p);
+X f_str->ss--;
+X free(f_str->ss);
+X free(f_str->res);
+X free(f_str->waa_a);
+X free(f_str->pam2p[0][0]);
+X free(f_str->pam2p[0]);
+X free(f_str->waa_s);
+X free(f_str->pam2p[1][0]);
+X free(f_str->pam2p[1]);
+X
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X free(f_str->workspace_memory);
+X free(f_str->word_score_memory);
+X free(f_str->byte_score_memory);
+#endif
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/*void get_param(struct pstruct *pstr,char *pstring1)*/
+void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+X char pg_str[120];
+X char psi_str[120];
+X
+#if defined(SW_ALTIVEC)
+X strncpy(pg_str,"Smith-Waterman (Altivec/VMX, Erik Lindahl 2004)",sizeof(pg_str));
+#endif
+#if defined(SW_SSE2)
+X strncpy(pg_str,"Smith-Waterman (SSE2, Michael Farrar 2006)",sizeof(pg_str));
+#endif
+#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
+X strncpy(pg_str,"Smith-Waterman (PGopt)",sizeof(pg_str));
+#endif
+X
+X if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
+X else { psi_str[0]='\0';}
+X
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
+#else
+X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
+#endif
+X pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled\n");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
+#else
+X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
+#endif
+X pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
+X }
+}
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int score;
+X double lambda, H;
+X int i;
+X
+#ifdef SW_ALTIVEC
+X if(f_str->try_8bit)
+X {
+X score = smith_waterman_altivec_byte(aa0,
+X f_str->byte_score,
+X n0,
+X aa1,
+X n1,
+X f_str->bias,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X
+X f_str->done_8bit++;
+X
+X if(score>=255)
+X {
+X /* Overflow, so we have to redo it in 16 bits. */
+X score = smith_waterman_altivec_word(aa0,
+X f_str->word_score,
+X n0,
+X aa1,
+X n1,
+X f_str->bias,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X
+X /* The 8 bit version is roughly 50% faster than the 16 bit version,
+X * so we are fine if less than about 1/3 of the runs have to
+X * be rerun with 16 bits. If it is more, and we have tried at least
+X * 500 sequences, we switch off the 8-bit mode.
+X */
+X f_str->done_16bit++;
+X if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
+X f_str->try_8bit = 0;
+X }
+X }
+X else
+X {
+X /* Just use the 16-bit altivec version directly */
+X score = smith_waterman_altivec_word(aa0,
+X f_str->word_score,
+X n0,
+X aa1,
+X n1,
+X f_str->bias,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X }
+X
+#endif /* not Altivec */
+X
+#if defined(SW_SSE2)
+X
+X if(f_str->try_8bit)
+X {
+X score = smith_waterman_sse2_byte(aa0,
+X f_str->byte_score,
+X n0,
+X aa1,
+X n1,
+X f_str->bias,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X
+X f_str->done_8bit++;
+X
+X if(score>=255)
+X {
+X /* Overflow, so we have to redo it in 16 bits. */
+X score = smith_waterman_sse2_word(aa0,
+X f_str->word_score,
+X n0,
+X aa1,
+X n1,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X
+X /* The 8 bit version is roughly 50% faster than the 16 bit version,
+X * so we are fine if less than about 1/3 of the runs have to
+X * be rerun with 16 bits. If it is more, and we have tried at least
+X * 500 sequences, we switch off the 8-bit mode.
+X */
+X f_str->done_16bit++;
+X if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
+X f_str->try_8bit = 0;
+X }
+X }
+X else
+X {
+X /* Just use the 16-bit altivec version directly */
+X score = smith_waterman_sse2_word(aa0,
+X f_str->word_score,
+X n0,
+X aa1,
+X n1,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X f_str);
+X }
+#endif
+X
+#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
+X
+X score = FLOCAL_ALIGN(aa0,aa1,n0,n1,0,0,
+X NULL,
+#ifndef OLD_FASTA_GAP
+X -(ppst->gdelval + ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X ppst->ggapval,0,f_str);
+#endif
+X
+X rst->score[0] = score;
+X
+X if(( ppst->zsflag == 6 || ppst->zsflag == 16) &&
+X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
+X f_str->kar_p, &lambda, &H)>0)) {
+X rst->comp = 1.0/lambda;
+X rst->H = H;
+X }
+X else {rst->comp = rst->H = -1.0;}
+X
+}
+X
+static int
+FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
+X int n0, int n1, int low, int up,
+X int **W, int GG,int HH, int MW,
+X struct f_struct *f_str) {
+X
+X register int *pwaa;
+X register struct swstr *ssj;
+X struct swstr *ss;
+X register int h, e, f, p;
+X int temp, score;
+X int gap_ext, n_gap_init;
+X
+X const unsigned char *aa1p;
+X ss = f_str->ss;
+X ss[n0].H = -1;
+X ss[n0].E = 1;
+X
+X n_gap_init = GG;
+X gap_ext = HH;
+X
+X score = 0;
+X for (h=0; h<n0; h++) { /* initialize 0th row */
+X ss[h].H = ss[h].E = 0;
+X }
+X
+X aa1p=aa1;
+X while (*aa1p) { /* relies on aa1[n1]==0 for EOS flag */
+X /* waa_s has the offsets for each residue in aa0 into pam2 */
+X /* waa_s has complexity (-S) dependent scores */
+X pwaa = f_str->waa_s + (*aa1p++)*n0;
+X ssj = ss;
+X
+X e = f = h = p = 0;
+X zero_f: /* in this section left-gap f==0, and is never examined */
+X
+X while (1) { /* build until h > n_gap_init (f < 0 until h > n_gap_init) */
+X /* bump through the pam[][]'s for each of the aa1[] matches to
+X aa0[], because of the way *pwaa is set up */
+X
+X h = p + *pwaa++; /* increment diag value */
+X p = ssj->H; /* get next diag value */
+X if ((e = ssj->E) > 0 ) { /* >0 from up-gap */
+X if (p == -1) goto next_row; /* done, -1=ss[n0].H sentinel */
+X if (h < e) h = e; /* up-gap better than diag */
+X else
+X if (h > n_gap_init) { /* we won't starting a new up-gap */
+X e += gap_ext; /* but we might be extending one */
+X goto transition; /* good h > n_gap_diag; scan f */
+X }
+X e += gap_ext; /* up-gap decreased */
+X ssj->E = (e > 0) ? e : 0; /* set to 0 if < 0 */
+X ssj++->H = h; /* diag match updated */
+X }
+X else { /* up-gap (->E) is 0 */
+X if ( h > 0) { /* diag > 0 */
+X if (h > n_gap_init) { /* we won't be starting a new up-gap */
+X e = 0; /* and we won't be extending one */
+X goto transition; /* good h > n_gap_diag; scan f */
+X }
+X ssj++->H = h; /* update diag */
+X }
+X else ssj++->H = 0; /* update diag to 0 */
+X }
+X }
+X
+X /* here h > n_gap_init and h > e, => the next f will be > 0 */
+X transition:
+#ifdef DEBUG
+X if ( h > 10000)
+X fprintf(stderr,"h: %d ssj: %d\n",h, (int)(ssj-ss));
+#endif
+X if ( score < h ) score = h; /* save best score, only when h > n_gap_init */
+X
+X temp = h - n_gap_init; /* best score for starting a new gap */
+X if ( f < temp ) f = temp; /* start a left-gap? */
+X if ( e < temp ) e = temp; /* start an up-gap? */
+X ssj->E = ( e > 0 ) ? e : 0; /* update up-gap */
+X ssj++->H = h; /* update diag */
+X e = 0;
+X
+X do { /* stay here until f <= 0 */
+X h = p + *pwaa++; /* diag + match/mismatch */
+X p = ssj->H; /* save next (right) diag */
+X
+X if ( h < f ) h = f; /* update diag using left gap */
+X f += gap_ext; /* update next left-gap */
+X
+X if ((e = ssj->E) > 0) { /* good up gap */
+X if (p == -1) goto next_row; /* at the end of the row */
+X if ( h < e ) h = e; /* update diag using up-gap */
+X else
+X if ( h > n_gap_init ) {
+X e += gap_ext; /* update up gap */
+X goto transition; /* good diag > n_gap_init, restart */
+X }
+X e += gap_ext; /* update up-gap */
+X ssj->E = (e > 0) ? e : 0; /* e must be >= 0 */
+X ssj++->H = h; /* update diag */
+X }
+X else { /* up-gap <= 0 */
+X if ( h > n_gap_init ) {
+X e = 0;
+X goto transition; /* good diag > n_gap_init; restart */
+X }
+X ssj++->H = h; /* update diag */
+X }
+X } while ( f > 0 ); /* while left gap f > 0 */
+X goto zero_f; /* otherwise, go to f==0 section */
+X next_row:
+X ;
+X } /* end while(*aap1) {} */
+X
+X return score;
+X
+} /* here we should be all done */
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst)
+{
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X const unsigned char *aa0p, *aa1p;
+X register int *pwaa;
+X register int i, j;
+X register struct swstr *ssj;
+X struct swstr *ss;
+X int *res, *waa;
+X int e, f, h, p;
+X int q, r, m;
+X int score;
+X int cost, I, J, K, L;
+X
+X ss = f_str->ss;
+X
+X res = f_str->res;
+X waa = f_str->waa_a; /* this time use universal pam2[0] */
+X
+X
+#ifdef OLD_FASTA_GAP
+X q = -(ppst->gdelval - ppst->ggapval);
+#else
+X q = -ppst->gdelval;
+#endif
+X
+X r = -ppst->ggapval;
+X m = q + r;
+X
+X /* initialize 0th row */
+X for (ssj=ss; ssj<ss+n0; ssj++) {
+X ssj->H = 0;
+X ssj->E = -q;
+X }
+X
+X score = 0;
+X aa1p = aa1;
+X i = 0;
+X while (*aa1p) {
+X h = p = 0;
+X f = -q;
+X pwaa = waa + (*aa1p++ * n0);
+X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
+X if ((h = h - m) > /* gap open from left best */
+X /* gap extend from left gapped */
+X (f = f - r)) f = h; /* if better, use new gap opened */
+X if ((h = ssj->H - m) > /* gap open from up best */
+X /* gap extend from up gap */
+X (e = ssj->E - r)) e = h; /* if better, use new gap opened */
+X h = p + *pwaa++; /* diagonal match */
+X if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
+X if (h < f ) h = f; /* left gap better, reset */
+X if (h < e ) h = e; /* up gap better, reset */
+X p = ssj->H; /* save previous best score */
+X ssj->H = h; /* save (new) up diag-matched */
+X ssj->E = e; /* save upper gap opened */
+X if (h > score) { /* ? new best score */
+X score = h; /* save best */
+X I = i; /* row */
+X J = (int)(ssj-ss); /* column */
+X }
+X }
+X i++;
+X } /* done with forward pass */
+X if (score <= 0) return 0;
+X
+X /* to get the start point, go backwards */
+X
+X /* 18-June-2003 fix bug in backtracking code to identify start of
+X alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
+X pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
+X */
+X
+X cost = K = L = 0;
+X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
+X
+X for (i=I; i>=0; i--) {
+X h = f = -1;
+X p = (i == I) ? 0 : -1;
+X for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
+X f = max (f,h-q)-r;
+X ssj->E=max(ssj->E,ssj->H-q)-r;
+X h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
+X p = ssj->H;
+X ssj->H=h;
+X if (h > cost) {
+X cost = h;
+X K = i;
+X L = (int)(ssj-ss);
+X if (cost >= score) goto found;
+X }
+X }
+X }
+X
+found:
+X
+/* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
+X
+/* in the f_str version, the *res array is already allocated at 4*n0/3 */
+X
+X a_res->res = f_str->res;
+X *have_ares = 1;
+X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
+X
+/* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
+X
+X
+/* this code no longer refers to aa0[], it uses pam2p[0][L] instead */
+X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
+X a_res->res,&a_res->nres,f_str);
+X
+/* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
+X
+/* return *res and nres */
+X
+X return score;
+}
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int *S, int **W, int IW, int G, int H, int *nres);
+X
+#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
+X
+/* Append "Delete k" op */
+#define DEL(k) \
+{ if (*last < 0) \
+X *last = (*sapp)[-1] -= (k); \
+X else { \
+X *last = (*sapp)[0] = -(k); \
+X (*sapp)++; \
+X } \
+}
+X
+/* Append "Insert k" op */
+#define INS(k) \
+{ if (*last > 0) \
+X *last = (*sapp)[-1] += (k); \
+X else { \
+X *last = (*sapp)[0] = (k); \
+X (*sapp)++; \
+X } \
+}
+X
+/*
+#define XTERNAL
+#include "upam.h"
+X
+void
+print_seq_prof(unsigned char *A, int M,
+X unsigned char *B, int N,
+X int **w, int iw, int dir) {
+X char c_max;
+X int i_max, j_max, i,j;
+X
+X char *c_dir="LRlr";
+X
+X for (i=1; i<=min(60,M); i++) {
+X fprintf(stderr,"%c",aa[A[i]]);
+X }
+X fprintf(stderr, - %d\n,M);
+X
+X for (i=0; i<min(60,M); i++) {
+X i_max = -1;
+X for (j=1; j<21; j++) {
+X if (w[iw+i][j]> i_max) {
+X i_max = w[iw+i][j];
+X j_max = j;
+X }
+X }
+X fprintf(stderr,"%c",aa[j_max]);
+X }
+X fputc(':',stderr);
+X
+X for (i=1; i<=min(60,N); i++) {
+X fprintf(stderr,"%c",aa[B[i]]);
+X }
+X
+X fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
+}
+*/
+X
+/* align(A,B,M,N,tb,te,last) returns the cost of an optimum conversion between
+X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+X and appends such a conversion to the current script. */
+X
+static int
+align(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int tb, int te, int **w, int iw, int g, int h,
+X struct f_struct *f_str, int dir,
+X int **sapp, int *last)
+{
+X
+X int midi, midj, type; /* Midpoint, type, and cost */
+X int midc;
+X int c1, c2;
+X
+X register int i, j;
+X register int c, e, d, s;
+X int m, t, *wa;
+X struct swstr *f_ss, *r_ss;
+X
+/* print_seq_prof(A,M,B,N,w,iw,dir); */
+X
+X m = g + h;
+X
+X f_ss = f_str->f_ss;
+X r_ss = f_str->r_ss;
+X
+/* Boundary cases: M <= 1 or N == 0 */
+X
+X if (N <= 0) {
+X if (M > 0) {DEL(M)}
+X return -gap(M);
+X }
+X
+X if (M <= 1) {
+X if (M <= 0) {
+X INS(N)
+X return -gap(N);
+X }
+X
+X if (tb < te) tb = te;
+X midc = (tb-h) - gap(N);
+X midj = 0;
+/* wa = w[A[1]]; */
+X wa = w[iw];
+X for (j = 1; j <= N; j++) {
+X c = -gap(j-1) + wa[B[j]] - gap(N-j);
+X if (c > midc) { midc = c; midj = j;}
+X }
+X if (midj == 0) { DEL(1) INS(N) }
+X else {
+X if (midj > 1) { INS(midj-1)}
+X *last = (*sapp)[0] = 0;
+X (*sapp)++;
+X if (midj < N) { INS(N-midj)}
+X }
+X return midc;
+X }
+X
+/* Divide: Find optimum midpoint (midi,midj) of cost midc */
+X
+X midi = M/2; /* Forward phase: */
+X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
+X t = -g;
+X for (j = 1; j <= N; j++) {
+X f_ss[j].H = t = t-h;
+X f_ss[j].E = t-g;
+X }
+X t = tb;
+X for (i = 1; i <= midi; i++) {
+X s = f_ss[0].H;
+X f_ss[0].H = c = t = t-h;
+X e = t-g;
+/* wa = w[A[i]]; */
+X wa = w[iw+i-1];
+X for (j = 1; j <= N; j++) {
+X if ((c = c - m) > (e = e - h)) e = c;
+X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
+X c = s + wa[B[j]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = f_ss[j].H;
+X f_ss[j].H = c;
+X f_ss[j].E = d;
+X }
+X }
+X f_ss[0].E = f_ss[0].H;
+X
+X r_ss[N].H = 0; /* Reverse phase: */
+X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
+X
+X for (j = N-1; j >= 0; j--) {
+X r_ss[j].H = t = t-h;
+X r_ss[j].E = t-g;
+X }
+X
+X t = te;
+X for (i = M-1; i >= midi; i--) {
+X s = r_ss[N].H;
+X r_ss[N].H = c = t = t-h;
+X e = t-g;
+/* wa = w[A[i+1]]; */
+X wa = w[iw+i];
+X for (j = N-1; j >= 0; j--) {
+X if ((c = c - m) > (e = e - h)) { e = c; }
+X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) { d = c; }
+X c = s + wa[B[j+1]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = r_ss[j].H;
+X r_ss[j].H = c;
+X r_ss[j].E = d;
+X }
+X }
+X r_ss[N].E = r_ss[N].H;
+X
+X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
+X midj = 0;
+X type = 1;
+X
+X for (j = 0; j <= N; j++) {
+X if ((c = f_ss[j].H + r_ss[j].H) >= midc) {
+X if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)) {
+X midc = c;
+X midj = j;
+X }
+X }
+X }
+X
+X for (j = N; j >= 0; j--) {
+X if ((c = f_ss[j].E + r_ss[j].E + g) > midc) {
+X midc = c;
+X midj = j;
+X type = 2;
+X }
+X }
+X
+/* Conquer: recursively around midpoint */
+X
+X if (type == 1)
+X { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0,sapp,last);
+X c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
+X }
+X else
+X { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp,last);
+X DEL(2);
+X align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
+X }
+X return midc;
+}
+X
+/* Interface and top level of comparator */
+X
+static int
+ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int **W, int IW, int G, int H, int *S, int *NC,
+X struct f_struct *f_str)
+{
+X struct swstr *f_ss, *r_ss;
+X int *sapp, last;
+X int c, ck;
+X
+X sapp = S;
+X last = 0;
+X
+X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
+X exit (1);
+X }
+X f_ss++;
+X f_str->f_ss = f_ss;
+X
+X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
+X exit (1);
+X }
+X r_ss++;
+X f_str->r_ss = r_ss;
+X
+X /* print_seq_prof(A,M,W,IW); */
+X c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp,&last); /* OK, do it */
+X
+X ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
+X if (c != ck) {
+X fprintf(stdout,"*** Check_score error. %d != %d ***\n",c,ck);
+X fprintf(stderr,"*** Check_score error. %d != %d ***\n",c,ck);
+X }
+X
+X f_ss--; r_ss--;
+X free(r_ss); free(f_ss);
+X
+X return c;
+}
+X
+/* Alignment display routine */
+X
+static void
+DISPLAY(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int *S, int AP, int BP, char *sq)
+{ register char *a, *b, *c;
+X register int i, j, op;
+X int lines, ap, bp;
+X
+X char ALINE[51], BLINE[51], CLINE[51];
+X
+X i = j = op = lines = 0;
+X ap = AP;
+X bp = BP;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X while (i < M || j < N)
+X { if (op == 0 && *S == 0)
+X { op = *S++;
+X *a = sq[A[++i]];
+X *b = sq[B[++j]];
+X *c++ = (*a++ == *b++) ? '|' : ' ';
+X }
+X else
+X { if (op == 0)
+X op = *S++;
+X if (op > 0)
+X { *a++ = ' ';
+X *b++ = sq[B[++j]];
+X op--;
+X }
+X else
+X { *a++ = sq[A[++i]];
+X *b++ = ' ';
+X op++;
+X }
+X *c++ = '-';
+X }
+X if (a >= ALINE+50 || (i >= M && j >= N))
+X { *a = *b = *c = '\0';
+X printf("\n%5d ",50*lines++);
+X for (b = ALINE+10; b <= a; b += 10)
+X printf(" . :");
+X if (b <= a+5)
+X printf(" .");
+X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
+X ap = AP + i;
+X bp = BP + j;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X }
+X }
+}
+X
+/* CHECK_SCORE - return the score of the alignment stored in S */
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int *S, int **w, int iw,
+X int g, int h, int *NC)
+{
+X register int i, j, op, nc;
+X int score;
+X
+X /* print_seq_prof(A,M,w,iw); */
+X
+X score = i = j = op = nc = 0;
+X while (i < M || j < N) {
+X op = *S++;
+X if (op == 0) {
+X score = w[iw+i][B[++j]] + score;
+X i++;
+X nc++;
+X }
+X else if (op > 0) {
+X score = score - (g+op*h);
+X j += op;
+X nc += op;
+X } else {
+X score = score - (g-op*h);
+X i -= op;
+X nc -= op;
+X }
+X }
+X *NC = nc;
+X return score;
+}
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+X
+#ifdef TFAST
+X f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+X
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+X aln->llfact = aln->llmult = aln->qlfact = 1;
+X aln->qlrev = aln->llrev = 0;
+X aln->frame = 0;
+}
+X
+/* 29-June-2003 this version has been modified to use pst.pam2p
+X instead of pam2 to indicate similarity */
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc, struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1;
+X int op, lenc, nd, ns, itmp;
+X char *sp0, *sp1, *spa, *sq;
+X int mins, smins;
+X int *rp;
+X
+X if (pst.ext_sq_set) { sq = pst.sqx; }
+X else { sq = pst.sq; }
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else { /* we are not showing the start */
+X /* mins has the amount of unaligned context to be shown */
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X
+X aln->smin0=a_res.min0 - mins;
+X aln->smin1=a_res.min1 - mins;
+X
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X }
+X
+/* now get the middle */
+X
+X memset(seqca,M_BLANK,mins);
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1[i1++]];
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
+X (*sp0=='U' && *sp1=='T'))) {
+X aln->nident++; *spa=M_IDENT;
+X }
+X
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *spa = '\0';
+/* now we have the middle, get the right end */
+X
+#ifndef LFASTA
+X /* how much extra to show at end ? */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,n1-a_res.max1))
+X nd = max(n0-a_res.max0,n1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(n1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
+X }
+X
+#else /* LFASTA */
+X nd = 0;
+#endif
+X /* #undef LFASTA */
+X return mins+lenc+nd;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1;
+X int op, lenc, nd, ns, itmp;
+X char *sp0, *sp0a, *sp1, *spa, *sq;
+X int *rp;
+X int mins, smins;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0 - smins;
+X aln->smin1=a_res.min1 - smins;
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X }
+X
+/* now get the middle */
+X
+X memset(seqca,M_BLANK,mins);
+X memset(seqc0a,' ',mins);
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp0a = seqc0a+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X *sp0a++ = ann_arr[aa0a[i0]];
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1[i1++]];
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
+X (*sp0=='U' && *sp1=='T'))) {
+X aln->nident++; *spa=M_IDENT;
+X }
+X
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1[i1++]];
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0a++ = ann_arr[aa0a[i0]];
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *sp0a = *spa = '\0';
+/* now we have the middle, get the right end */
+X
+X /* how much extra to show at end ? */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,n1-a_res.max1))
+X nd = max(n0-a_res.max0,n1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(n1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
+X }
+X
+X return mins+lenc+nd;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt);
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc;
+X int p_op, op_cnt;
+X const unsigned char *aa1p;
+X char tmp_cnt[20];
+X char sp0, sp1, *sq;
+X int *rp;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
+X op_cnt = 0;
+X
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X tmp_cnt[0]='\0';
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X
+X if (p_op == 0 || p_op==3) {
+X if (sp0 != '*' && sp1 != '*') {
+X if (p_op == 3) {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X else {op_cnt++;}
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 3;
+X }
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X
+X op = *rp++;
+X lenc++;
+X
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X else if (pst.nt_align) {
+X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
+X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
+X else if (toupper(sp0) == 'N') aln->ngap_q++;
+X else if (toupper(sp1) == 'N') aln->ngap_l++;
+X }
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X if (p_op == 1) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 1;
+X }
+X op--; lenc++; i1++; aln->ngap_q++;
+X }
+X else {
+X if (p_op == 2) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 2;
+X }
+X op++; lenc++; i0++; aln->ngap_l++;
+X }
+X }
+X }
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X
+X return lenc;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt) {
+X
+X char op_char[5]={"=-+*"};
+X char tmp_cnt[20];
+X
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+int calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1, n_id;
+X int op, lenc;
+X int sp0, sp1;
+X const unsigned char *aa1p;
+X int *rp;
+X char *sq;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFAST
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X if (toupper(sp0) == toupper(sp1)) n_id++;
+X else if (pst.nt_align &&
+X ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
+X else {op++; lenc++; i0++; aln->ngap_l++; }
+X }
+X }
+X aln->nident = n_id;
+X return lenc;
+}
+X
+#ifdef PCOMPLIB
+#include "p_mw.h"
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+X ppst->n0 = qm_msg->n0;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropgsw.c ||
+echo 'restore of dropgsw.c failed'
+Wc_c="`wc -c < 'dropgsw.c'`"
+test 55870 -eq "$Wc_c" ||
+ echo 'dropgsw.c: original size 55870, current size' "$Wc_c"
+fi
+# ============= dropgsw.h ==============
+if test -f 'dropgsw.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropgsw.h (File already exists)'
+else
+echo 'x - extracting dropgsw.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropgsw.h' &&
+X
+/* global definitions shared by dropgsw.c and altivec.c */
+X
+/* definitions for SW */
+X
+struct f_struct {
+X struct swstr *ss;
+X struct swstr *f_ss, *r_ss;
+X int *waa_s, *waa_a;
+X int **pam2p[2];
+X int *res;
+X double aa0_f[MAXSQ];
+X double *kar_p;
+#if defined(SW_ALTIVEC) || defined(SW_SSE2)
+X unsigned char bias;
+X unsigned short * word_score;
+X unsigned char * byte_score;
+X void * workspace;
+X int alphabet_size;
+X void * word_score_memory;
+X void * byte_score_memory;
+X void * workspace_memory;
+X int try_8bit;
+X int done_8bit;
+X int done_16bit;
+#endif
+};
+X
+SHAR_EOF
+chmod 0644 dropgsw.h ||
+echo 'restore of dropgsw.h failed'
+Wc_c="`wc -c < 'dropgsw.h'`"
+test 677 -eq "$Wc_c" ||
+ echo 'dropgsw.h: original size 677, current size' "$Wc_c"
+fi
+# ============= dropnfa.c ==============
+if test -f 'dropnfa.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropnfa.c (File already exists)'
+else
+echo 'x - extracting dropnfa.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropnfa.c' &&
+X
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropnfa.c,v 1.81 2007/04/26 18:37:19 wrp Exp $ */
+X
+/* 18-Sep-2006 - removed global variables for alignment from nw_align
+X and bg_align */
+X
+/* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates */
+X
+/* 14-May-2003 - modified to return alignment start at 0, rather than
+X 1, for begin:end alignments
+*/
+X
+/*
+X implements the fasta algorithm, see:
+X
+X W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological
+X sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448
+X
+X This version uses Smith-Waterman for final protein alignments
+X
+X W. R. Pearson (1996) "Effective protein sequence comparison"
+X Methods Enzymol. 266:227-258
+X
+X
+X 26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty
+X
+X 4-Nov-2001 - modify spam() while(1).
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+X
+/* this must be consistent with upam.h */
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+X
+/* globals for fasta */
+#define MAXWINDOW 64
+X
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+X
+#ifndef ALLOCN0
+static char *verstr="3.5 Sept 2006";
+#else
+static char *verstr="3.5an0 Sept 2006";
+#endif
+X
+extern void w_abort(char *, char *);
+int shscore(const unsigned char *aa0, int n0, int **pam2);
+extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+X double *aa0_f, double **kp);
+extern void init_karlin_a(struct pstruct *, double *, double **);
+extern int do_karlin(const unsigned char *, int n1, int **,
+X struct pstruct *, double *, double *,
+X double *, double *);
+extern void aancpy(char *to, char *from, int count, struct pstruct pst);
+char *ckalloc(size_t);
+X
+#ifdef TFASTA
+extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+#endif
+X
+#include "dropnfa.h"
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+struct swstr { int H, E;};
+X
+static int
+dmatch (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval,
+X struct f_struct *f_str);
+X
+/* initialize for fasta */
+X
+void
+init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int mhv, phv;
+X int hmax;
+X int i0, hv;
+X int pamfact;
+X int btemp;
+X struct f_struct *f_str;
+X /* these used to be globals, but do not need to be */
+X int ktup; /* word size examined */
+X int fact; /* factor used to scale ktup match value */
+X int kt1; /* ktup-1 */
+X int lkt; /* last ktup - initiall kt1, but can be increased
+X for hsq >= NMAP */
+X
+X int maxn0; /* used in band alignment */
+X int *pwaa; /* pam[aa0[]] profile */
+X int i, j;
+X struct swstr *ss;
+X int *waa;
+X int nsq, ip, *hsq;
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx; ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X nsq = ppst->nsq; ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+#ifndef TFASTA
+X if((ppst->zsflag%10) == 6) {
+X f_str->kar_p = NULL;
+X init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
+X }
+#endif
+X
+X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+X n0 / ppst->param_u.fa.bestscale +
+X ppst->param_u.fa.bkfact *
+X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+X
+X if (ppst->nt_align)
+X btemp = (btemp*ppst->pam_h)/5; /* normalize to standard +5/-4 */
+X
+X btemp = min (btemp, ppst->param_u.fa.bestmax);
+X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+X
+X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+X
+X if (ppst->param_u.fa.optcut_set != 1)
+#ifndef TFASTA
+X ppst->param_u.fa.optcut = btemp;
+#else
+X ppst->param_u.fa.optcut = (btemp*3)/2;
+#endif
+X
+#ifndef OLD_FASTA_GAP
+X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#else
+X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#endif
+X pamfact = ppst->param_u.fa.pamfact;
+X ktup = ppst->param_u.fa.ktup;
+X fact = ppst->param_u.fa.scfact * ktup;
+X
+X if (pamfact == -1) pamfact = 0;
+X else if (pamfact == -2) pamfact = 1;
+X
+X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+X if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
+X
+X if (mhv <= 0) {
+X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+X exit (1);
+X }
+X
+X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+X
+/* kshft = 2; */
+X kt1 = ktup - 1;
+X hv = 1;
+X for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
+X hmax = hv;
+X f_str->hmask = (hmax >> f_str->kshft) - 1;
+X
+X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
+X hmax, f_str->hmask);
+X exit (1);
+X }
+X
+X if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh1 array nsq=%d\n",nsq);
+X exit (1);
+X }
+X
+X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate pamh2 array hmax=%d\n",hmax);
+X exit (1);
+X }
+X
+X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+X fprintf (stderr, " cannot allocate hash link array n0=%d",n0);
+X exit (1);
+X }
+X
+X for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
+X for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
+X
+X /* encode the aa0 array */
+X phv = hv = 0;
+X lkt = kt1;
+X /* restart hv, phv calculation */
+X for (i0 = 0; i0 < min(lkt,n0); i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;}
+X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+X }
+X
+X for (; i0 < n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0;
+X /* restart hv, phv calculation */
+X for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+X if (hsq[aa0[i0]] >= NMAP) {
+X hv=phv=0;
+X lkt = i0+ktup;
+X continue;
+X }
+X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+X }
+X }
+X if (i0 >= n0) break;
+X hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
+X f_str->link[i0] = f_str->harr[hv];
+X f_str->harr[hv] = i0;
+X if (pamfact) {
+X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+X /* this check should always be true, but just in case */
+X if (hsq[aa0[i0-kt1]]<NMAP)
+X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+X }
+X else f_str->pamh2[hv] = fact * ktup;
+X }
+X
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+X pam2[0][0] is now undefined for consistency with blast
+*/
+X
+X if (pamfact)
+X for (i0 = 1; i0 <= nsq; i0++)
+X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+X else
+X for (i0 = 1; i0 <= nsq; i0++)
+X f_str->pamh1[i0] = fact;
+X
+X f_str->ndo = 0;
+X f_str->noff = n0-1;
+#ifndef ALLOCN0
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
+X MAXDIAG *sizeof (struct dstruct));
+X exit (1);
+X };
+#else
+X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+X sizeof (struct dstruct)))==NULL) {
+X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+X (long)n0*sizeof (struct dstruct));
+X exit (1);
+X };
+#endif
+X
+X
+#ifdef TFASTA
+X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
+X sizeof(unsigned char)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
+X exit (1);
+X }
+X f_str->aa1x++;
+#endif
+X
+X f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4,
+X sizeof(struct bdstr));
+X f_str->bss++;
+X
+X /* allocate space for the scoring arrays */
+X maxn0 = n0 + 4;
+X if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+X exit (1);
+X }
+X ss++;
+X f_str->ss = ss;
+X
+X /* initialize the "variable" pam array */
+X
+X if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X pwaa = waa;
+X for (i=0; i<=nsq; i++) {
+X for (j=0;j<n0; j++) {
+X *pwaa = ppst->pam2[ip][aa0[j]][i];
+X pwaa++;
+X }
+X }
+X f_str->waa0 = waa;
+X
+X /* initialize the "conventional" pam array used for alignments */
+X
+X if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X pwaa = waa;
+X for (i=0; i<=nsq; i++) {
+X for (j=0;j<n0; j++) {
+X *pwaa = ppst->pam2[0][aa0[j]][i];
+X pwaa++;
+X }
+X }
+X f_str->waa1 = waa;
+X
+X f_str->max_res = max(3*n0/2,MIN_RES);
+X
+X /* now we need alignment storage - get it */
+X if ((f_str->res = (int *)calloc((size_t)f_str->max_res,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",f_str->max_res);
+X exit(1);
+X }
+X
+X *f_arg = f_str;
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifndef TFASTA
+X char *pg_str="FASTA";
+#else
+X char *pg_str="TFASTA";
+#endif
+X
+X if (!pstr->param_u.fa.optflag)
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, width: %3d",
+#else
+X sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, width: %3d",
+#endif
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->gdelval, pstr->ggapval, pstr->param_u.fa.optwid);
+X else
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d, width: %3d",
+#else
+X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d, width: %3d",
+#endif
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set) ? "xS":"\0",
+X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
+X pstr->param_u.fa.optwid);
+X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+X /*
+X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+X */
+X
+X if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#else
+X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#endif
+X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval,
+X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
+X pstr->param_u.fa.cgap);
+X }
+}
+X
+void
+close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X
+X
+X f_str = *f_arg;
+X
+X
+X if (f_str != NULL) {
+X if (f_str->kar_p!=NULL) free(f_str->kar_p);
+X f_str->ss--;
+X f_str->bss--;
+X
+X free(f_str->res);
+X free(f_str->waa1);
+X free(f_str->waa0);
+X free(f_str->ss);
+X free(f_str->bss);
+X free(f_str->diag);
+X free(f_str->link);
+X free(f_str->pamh2);
+X free(f_str->pamh1);
+X free(f_str->harr);
+X
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+#ifdef ALLOCN0
+void savemax (struct dstruct *, int, struct f_struct *);
+#else
+void savemax (struct dstruct *, struct f_struct *);
+#endif
+X
+int spam (const unsigned char *, const unsigned char *, struct savestr *,
+X int **, int, int, int);
+int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff);
+void kpsort(struct savestr **, int);
+X
+static int
+ALIGN(const unsigned char *, const unsigned char *, int, int,
+X int **, int, int, int *, int *, struct f_struct *);
+X
+static int
+LOCAL_ALIGN(const unsigned char *, const unsigned char *,
+X int, int, int, int,
+X int **, int, int, int *, int *, int *, int *, int,
+X struct f_struct *);
+X
+static int
+B_ALIGN(const unsigned char *A, const unsigned char *B, int M,
+X int N, int low, int up, int **W, int G, int H, int *S,
+X int *nS, int MW, int MX, struct bdstr *bss);
+X
+static void
+do_fasta (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst, struct f_struct *f_str,
+X struct rstruct *rst, int *hoff)
+{
+X int nd; /* diagonal array size */
+X int lhval;
+X int kfact;
+X register struct dstruct *dptr;
+X register int tscor;
+X
+#ifndef ALLOCN0
+X register struct dstruct *diagp;
+#else
+X register int dpos;
+X int lposn0;
+#endif
+X int noff;
+X struct dstruct *dpmax;
+X register int lpos;
+X int tpos;
+X struct savestr *vmptr;
+X int scor, ib, nsave;
+X int xdrop, do_extend;
+X int ktup, kt1, lkt, *hsq, ip;
+X
+X if (ppst->ext_sq_set) {
+X ip = 1;
+X hsq = ppst->hsqx;
+X }
+X else {
+X ip = 0;
+X hsq = ppst->hsq;
+X }
+X
+X xdrop = -ppst->pam_l;
+X /* do extended alignment in spam iff protein or short sequences */
+X do_extend = !ppst->nt_align || (n0 < 50) || (n1 < 50);
+X
+X ktup = ppst->param_u.fa.ktup;
+X kt1 = ktup-1;
+X
+X if (n1 < ktup) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+X if (n0+n1+1 >= MAXDIAG) {
+X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+X rst->score[0] = rst->score[1] = rst->score[2] = -1;
+X return;
+X }
+X
+#ifdef ALLOCN0
+X nd = n0;
+#else
+X nd = n0 + n1;
+#endif
+X
+X dpmax = &f_str->diag[nd];
+X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+X {
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X vmptr->score = 0;
+X f_str->lowmax = f_str->vmax;
+X f_str->lowscor = 0;
+X
+X /* start hashing */
+X lhval = 0;
+X lkt = kt1;
+X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos <n1; lpos++) {
+X /* restart lhval calculation */
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lhval = 0; lkt = lpos + ktup;
+X continue;
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X }
+X
+X noff = f_str->noff;
+#ifndef ALLOCN0
+X diagp = &f_str->diag[noff + lkt];
+X for (; lpos < n1; lpos++, diagp++) {
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lpos++ ; diagp++;
+X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+X if (lpos >= n1) break;
+X lhval = 0;
+X }
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+X lposn0 = noff + lpos;
+X for (; lpos < n1; lpos++, lposn0++) {
+X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+X /*
+X if (hsq[aa1[lpos]]>=NMAP) {
+X lpos++; lposn0++;
+X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; lposn0++;}
+X }
+X */
+X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
+X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+X dpos = lposn0 - tpos;
+X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+X tscor += ktup;
+X if ((tscor -= lpos) <= 0) {
+X scor = dptr->score;
+X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
+#ifdef ALLOCN0
+X savemax (dptr, dpos, f_str);
+#else
+X savemax (dptr, f_str);
+#endif
+X if ((tscor += scor) >= kfact) {
+X dptr->score = tscor;
+X dptr->stop = lpos;
+X }
+X else {
+X dptr->score = kfact;
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X }
+X else {
+X dptr->score += f_str->pamh1[aa0[tpos]];
+X dptr->stop = lpos;
+X }
+X }
+X else {
+X dptr->score = f_str->pamh2[lhval];
+X dptr->start = (dptr->stop = lpos) - kt1;
+X }
+X } /* end tpos */
+X
+#ifdef ALLOCN0
+X /* reinitialize diag structure */
+X loopl:
+X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor)
+X savemax (dptr, lpos, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr->score = 0;
+#endif
+X } /* end lpos */
+X
+#ifdef ALLOCN0
+X for (tpos = 0, dpos = noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
+X savemax (dptr, dpos, f_str);
+X }
+#else
+X for (dptr = f_str->diag; dptr < dpmax;) {
+X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+X dptr->stop = -1;
+X dptr->dmax = NULL;
+X dptr++->score = 0;
+X }
+X f_str->ndo = nd;
+#endif
+X
+/*
+X at this point all of the elements of aa1[lpos]
+X have been searched for elements of aa0[tpos]
+X with the results in diag[dpos]
+*/
+X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
+X /*
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X noff+vmptr->start-vmptr->dp,
+X noff+vmptr->stop-vmptr->dp,
+X vmptr->start,vmptr->stop,
+X vmptr->dp,vmptr->score);
+X
+X */
+X if (vmptr->score > 0) {
+X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], xdrop,
+X noff,do_extend);
+X f_str->vptr[nsave++] = vmptr;
+X }
+X }
+X
+X if (nsave <= 0) {
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X return;
+X }
+X
+X /*
+X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,noff);
+X for (ib=0; ib<nsave; ib++) {
+X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+X noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+X noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+X }
+X fprintf(stderr,"---\n");
+X */
+X
+X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
+X ppst->param_u.fa.pgap, noff);
+X
+X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+X
+/* kssort (f_str->vptr, nsave); */
+X
+X rst->score[1] = vmptr->score;
+X rst->score[0] = max (scor, vmptr->score);
+X rst->score[2] = rst->score[0]; /* initn */
+X
+X if (ppst->param_u.fa.optflag) {
+X if (rst->score[0] > ppst->param_u.fa.optcut)
+X rst->score[2] = dmatch (aa0, n0, aa1, n1, *hoff=noff - vmptr->dp,
+X ppst->param_u.fa.optwid, ppst->pam2[ip],
+X ppst->gdelval,ppst->ggapval,f_str);
+X }
+}
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg, struct rstruct *rst)
+{
+X int hoff, n10;
+X
+X double lambda, H;
+X
+X rst->score[0] = rst->score[1] = rst->score[2] = 0;
+X rst->escore = 1.0;
+X rst->segnum = rst->seglen = 1;
+X
+X if (n1 < ppst->param_u.fa.ktup) return;
+X
+#ifdef TFASTA
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
+#else /* FASTA */
+X do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#endif
+X
+#ifndef TFASTA
+X if((ppst->zsflag%10) == 6 &&
+X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
+X f_str->kar_p, &lambda, &H)>0)) {
+X rst->comp = 1.0/lambda;
+X rst->H = H;
+X }
+X else {rst->comp = rst->H = -1.0;}
+#else
+X rst->comp = rst->H = -1.0;
+#endif
+}
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct rstruct *rst)
+{
+X int optflag, tscore, hoff, n10;
+X
+X optflag = ppst->param_u.fa.optflag;
+X ppst->param_u.fa.optflag = 1;
+X
+#ifdef TFASTA
+X n10=aatran(aa1,f_str->aa1x,n1,frame);
+X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
+#else /* FASTA */
+X do_fasta(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff);
+#endif
+X ppst->param_u.fa.optflag = optflag;
+}
+X
+#ifdef ALLOCN0
+void
+savemax (dptr, dpos, f_str)
+X register struct dstruct *dptr;
+X int dpos;
+X struct f_struct *f_str;
+{
+X register struct savestr *vmptr;
+X register int i;
+X
+#else
+void
+savemax (dptr, f_str)
+X register struct dstruct *dptr;
+X struct f_struct *f_str;
+{
+X register int dpos;
+X register struct savestr *vmptr;
+X register int i;
+X
+X dpos = (int) (dptr - f_str->diag);
+X
+#endif
+X
+/* check to see if this is the continuation of a run that is already saved */
+X
+X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+X vmptr->start == dptr->start)
+X {
+X vmptr->stop = dptr->stop;
+X if ((i = dptr->score) <= vmptr->score)
+X return;
+X vmptr->score = i;
+X if (vmptr != f_str->lowmax)
+X return;
+X }
+X else
+X {
+X i = f_str->lowmax->score = dptr->score;
+X f_str->lowmax->dp = dpos;
+X f_str->lowmax->start = dptr->start;
+X f_str->lowmax->stop = dptr->stop;
+X dptr->dmax = f_str->lowmax;
+X }
+X
+X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+X if (vmptr->score < i)
+X {
+X i = vmptr->score;
+X f_str->lowmax = vmptr;
+X }
+X f_str->lowscor = i;
+}
+X
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+X struct savestr *dmax, int **pam2, int xdrop,
+X int noff, int do_extend)
+{
+X register int lpos, tot;
+X register const unsigned char *aa0p, *aa1p;
+X
+X int drop_thresh;
+X
+X struct {
+X int start, stop, score;
+X } curv, maxv;
+X
+X aa1p = &aa1[lpos= dmax->start]; /* get the start of lib seq */
+X aa0p = &aa0[lpos - dmax->dp + noff]; /* start of query */
+#ifdef DEBUG
+X /* also add check in calling routine */
+X if (aa0p < aa0) { return -99; }
+#endif
+X curv.start = lpos; /* start index in lib seq */
+X
+X tot = curv.score = maxv.score = 0;
+X
+X for (; lpos <= dmax->stop; lpos++) {
+X tot += pam2[*aa0p++][*aa1p++];
+X if (tot > curv.score) { /* update current score */
+X curv.stop = lpos;
+X curv.score = tot;
+X }
+X else if (tot < 0) {
+X if (curv.score > maxv.score) { /* save score, start, stop */
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X tot = curv.score = 0; /* reset running score */
+X curv.start = lpos+1; /* reset start */
+X if(lpos >= dmax->stop) break; /* if the zero is beyond stop, quit */
+X }
+X }
+X
+X if (curv.score > maxv.score) {
+X maxv.start = curv.start;
+X maxv.stop = curv.stop;
+X maxv.score = curv.score;
+X }
+X
+#ifndef NOSPAM_EXT
+X
+X /* now check to see if the score gets better by extending */
+X if (do_extend && maxv.score > xdrop) {
+X
+X if (maxv.stop == dmax->stop) {
+X tot = maxv.score;
+X drop_thresh = maxv.score - xdrop;
+X aa1p = &aa1[lpos= dmax->stop];
+X aa0p = &aa0[lpos - dmax->dp + noff];
+X while (tot > drop_thresh ) {
+X ++lpos;
+X tot += pam2[*(++aa0p)][*(++aa1p)];
+X if (tot > maxv.score) {
+X maxv.start = lpos;
+X maxv.score = tot;
+X drop_thresh = tot - xdrop;
+X }
+X }
+X }
+X
+X /* scan backwards now */
+X
+X if (maxv.start == dmax->start) {
+X tot = maxv.score;
+X drop_thresh = maxv.score - xdrop;
+X aa1p = &aa1[lpos= dmax->start];
+X aa0p = &aa0[lpos - dmax->dp + noff];
+X while (tot > drop_thresh) {
+X --lpos;
+X tot += pam2[*(--aa0p)][*(--aa1p)];
+X if (tot > maxv.score) {
+X maxv.start = lpos;
+X maxv.score = tot;
+X drop_thresh = tot - xdrop;
+X }
+X }
+X }
+X }
+#endif
+X
+/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+X dmax->start,maxv.stop,dmax->stop);
+*/
+X dmax->start = maxv.start;
+X dmax->stop = maxv.stop;
+X
+X return maxv.score;
+}
+X
+int sconn (struct savestr **v, int n, int cgap, int pgap, int noff)
+{
+X int i, si;
+X struct slink
+X {
+X int score;
+X struct savestr *vp;
+X struct slink *next;
+X } *start, *sl, *sj, *so, sarr[MAXSAV];
+X int lstart, tstart, plstop, ptstop;
+X
+/* sort the score left to right in lib pos */
+X
+X kpsort (v, n);
+X
+X start = NULL;
+X
+/* for the remaining runs, see if they fit */
+X
+X for (i = 0, si = 0; i < n; i++)
+X {
+X
+/* if the score is less than the gap penalty, it never helps */
+X if (v[i]->score < cgap)
+X continue;
+X lstart = v[i]->start;
+X tstart = lstart - v[i]->dp + noff;
+X
+/* put the run in the group */
+X sarr[si].vp = v[i];
+X sarr[si].score = v[i]->score;
+X sarr[si].next = NULL;
+X
+/* if it fits, then increase the score */
+X for (sl = start; sl != NULL; sl = sl->next)
+X {
+X plstop = sl->vp->stop;
+X ptstop = plstop - sl->vp->dp + noff;
+X if (plstop < lstart && ptstop < tstart)
+X {
+X sarr[si].score = sl->score + v[i]->score + pgap;
+X break;
+X }
+X }
+X
+/* now recalculate where the score fits */
+X if (start == NULL)
+X start = &sarr[si];
+X else
+X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
+X if (sarr[si].score > sj->score) {
+X sarr[si].next = sj;
+X if (so != NULL) so->next = &sarr[si];
+X else start = &sarr[si];
+X break;
+X }
+X so = sj;
+X }
+X si++;
+X }
+X
+X if (start != NULL)
+X return (start->score);
+X else
+X return (0);
+}
+X
+void
+kssort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->score >= v[j + gap]->score)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+X int gap, i, j;
+X struct savestr *tmp;
+X
+X for (gap = n / 2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap)
+X {
+X if (v[j]->start <= v[j + gap]->start)
+X break;
+X tmp = v[j];
+X v[j] = v[j + gap];
+X v[j + gap] = tmp;
+X }
+}
+X
+static int dmatch (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int hoff, int window,
+X int **pam2, int gdelval, int ggapval,
+X struct f_struct *f_str)
+{
+X int low, up;
+X
+X window = min (n1, window);
+X /* hoff is the offset found from aa1 to seq 2 by hmatch */
+X
+X low = -window/2-hoff;
+X up = low+window;
+X
+X return FLOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
+X pam2,
+#ifdef OLD_FASTA_GAP
+X -(gdelval-ggapval),
+#else
+X -gdelval,
+#endif
+X -ggapval,window,f_str);
+X }
+X
+X
+/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+X
+X To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,MW).
+X The parameters are explained as follows:
+X A, B : two sequences to be aligned
+X M : the length of sequence A
+X N : the length of sequence B
+X L : lower bound of the band
+X U : upper bound of the band
+X W : scoring table for matches and mismatches
+X G : gap-opening penalty
+X H : gap-extension penalty
+X MW : maximum window size
+*/
+X
+#include <stdio.h>
+X
+#define MININT -9999999
+X
+static int
+FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N, int low, int up,
+X int **W, int G,int H, int MW,
+X struct f_struct *f_str)
+{
+X int band;
+X register struct bdstr *bssp;
+X int i, j, si, ei;
+X int c, d, e, m;
+X int leftd, rightd;
+X int best_score;
+X int *wa, curd;
+X int ib;
+X
+X bssp = f_str->bss;
+X
+X m = G+H;
+X low = max(-M, low);
+X up = min(N, up);
+X
+X if (N <= 0) return 0;
+X
+X if (M <= 0) return 0;
+X
+X band = up-low+1;
+X if (band < 1) {
+X fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
+X M, N, low, up);
+X return 0;
+X }
+X
+X if (low > 0) leftd = 1;
+X else if (up < 0) leftd = band;
+X else leftd = 1-low;
+X rightd = band;
+X si = max(0,-up); /* start index -1 */
+X ei = min(M,N-low); /* end index */
+X bssp[leftd].CC = 0;
+X for (j = leftd+1; j <= rightd; j++) {
+X bssp[j].CC = 0;
+X bssp[j].DD = -G;
+X }
+X
+X bssp[rightd+1].CC = MININT;
+X bssp[rightd+1].DD = MININT;
+X
+X best_score = 0;
+X bssp[leftd-1].CC = MININT;
+X bssp[leftd].DD = -G;
+X
+X for (i = si+1; i <= ei; i++) {
+X if (i > N-up) rightd--;
+X if (leftd > 1) leftd--;
+X wa = W[A[i]];
+X if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
+X if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
+X
+X if (d > c) c = d;
+X if (c < 0) c = 0;
+X e = c-G;
+X bssp[leftd].DD = d;
+X bssp[leftd].CC = c;
+X if (c > best_score) best_score = c;
+X
+X for (curd=leftd+1; curd <= rightd; curd++) {
+X if ((c = c-m) > (e = e-H)) e = c;
+X if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
+X c = bssp[curd].CC + wa[B[curd+low-1+i]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X if (c < 0) c = 0;
+X bssp[curd].CC = c;
+X bssp[curd].DD = d;
+X if (c > best_score) best_score = c;
+X }
+X }
+X
+X return best_score;
+}
+X
+/* ckalloc - allocate space; check for success */
+char *ckalloc(size_t amount)
+{
+X char *p;
+X
+X if ((p = malloc( (unsigned) amount)) == NULL)
+X w_abort("Ran out of memory.","");
+X return(p);
+}
+X
+/* calculate the 100% identical score */
+int
+shscore(const unsigned char *aa0, int n0, int **pam2)
+{
+X int i, sum;
+X for (i=0,sum=0; i<n0; i++)
+X sum += pam2[aa0[i]][aa0[i]];
+X return sum;
+}
+X
+int sw_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res
+X )
+{
+X register const unsigned char *aa0p, *aa1p;
+X register int *pwaa;
+X register int i, j;
+X register struct swstr *ssj;
+X struct swstr *ss;
+X int *waa;
+X int e, f, h, p;
+X int q, r, m;
+X int score;
+X int cost, I, J, K, L;
+X
+X ss = f_str->ss;
+X waa = f_str->waa1;
+X
+#ifdef OLD_FASTA_GAP
+X q = -(ppst->gdelval - ppst->ggapval);
+#else
+X q = -ppst->gdelval;
+#endif
+X r = -ppst->ggapval;
+X m = q + r;
+X
+X /* initialize 0th row */
+X for (ssj=ss; ssj<ss+n0; ssj++) {
+X ssj->H = 0;
+X ssj->E = -q;
+X }
+X
+X score = I = J = 0;
+X aa1p = aa1;
+X i = 0;
+X while (*aa1p) {
+X h = p = 0;
+X f = -q;
+X pwaa = waa + (*aa1p++ * n0);
+X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
+X if ((h = h - m) > (f = f - r)) f = h;
+X if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
+X h = p + *pwaa++;
+X if (h < 0 ) h = 0;
+X if (h < f ) h = f;
+X if (h < e ) h = e;
+X p = ssj->H;
+X ssj->H = h;
+X ssj->E = e;
+X if (h > score) {
+X score = h;
+X I = i;
+X J = (int)(ssj-ss);
+X }
+X }
+X i++;
+X } /* done with forward pass */
+X if (score <= 0) return 0;
+X
+X /* to get the start point, go backwards */
+X
+X cost = K = L = 0;
+X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
+X
+X for (i=I; i>=0; i--) {
+X h = f = -1;
+X p = (i == I) ? 0 : -1;
+X ssj = ss+J; /* bug in compiler */
+X for (aa0p = &aa0[J]; ssj>=ss; ssj--,aa0p--) {
+X f = max (f,h-q)-r;
+X ssj->E=max(ssj->E,ssj->H-q)-r;
+X h = max(max(ssj->E,f),p+ppst->pam2[0][*aa0p][aa1[i]]);
+X p = ssj->H;
+X ssj->H=h;
+X if (h > cost) {
+X cost = h;
+X K = i;
+X L = (int)(ssj-ss);
+X if (cost >= score) goto found;
+X }
+X }
+X }
+X
+found:
+X
+X /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
+X
+/* in the f_str version, the *res array is already allocated at 4*n0/3 */
+X
+X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
+X
+X /* the seq array arguments in this call have been reversed to allow
+X assymetric scoring matrices - this affects the score decoding,
+X and allocation of the score row matrix */
+X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,ppst->pam2[0],q,r,a_res->res,&a_res->nres,f_str);
+X
+X /* DISPLAY(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,res,L,K,ppst->sq); */
+X
+X return score;
+}
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int *S, int **W, int G, int H, int *nres);
+X
+#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
+X
+/* static int *sapp; */ /* Current script append ptr */
+/* static int last; */ /* Last script op appended */
+X
+X /* Append "Delete k" op */
+#define DEL(k) \
+{ if (*last < 0) \
+X *last = (*sapp)[-1] -= (k); \
+X else { \
+X *last = (*sapp)[0] = -(k); \
+X (*sapp)++; \
+X } \
+}
+X /* Append "Insert k" op */
+#define INS(k) \
+{ if (*last > 0) \
+X *last = (*sapp)[-1] += (k); \
+X else { \
+X *last = (*sapp)[0] = (k); \
+X (*sapp)++; \
+X } \
+}
+X
+#define REP { *last = (*sapp)[0] = 0; (*sapp)++;} /* Append "Replace" op */
+X
+/* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
+X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+X and appends such a conversion to the current script. */
+X
+static int
+nw_align(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int tb, int te, int **w, int g, int h,
+X struct f_struct *f_str,
+X int **sapp, int *last)
+{
+X int midi, midj, type; /* Midpoint, type, and cost */
+X int midc;
+X
+X register int i, j;
+X register int c, e, d, s;
+X int m, t, *wa;
+X struct swstr *f_ss, *r_ss;
+X
+X m = g + h;
+X
+X f_ss = f_str->f_ss;
+X r_ss = f_str->r_ss;
+X
+/* Boundary cases: M <= 1 or N == 0 */
+X
+X if (N <= 0) {
+X if (M > 0) {DEL(M)}
+X return -gap(M);
+X }
+X
+X if (M <= 1) {
+X if (M <= 0) {
+X INS(N);
+X return -gap(N);
+X }
+X if (tb < te) tb = te;
+X midc = (tb-h) - gap(N);
+X midj = 0;
+X wa = w[A[1]]; /* in the original version of this code, A[]
+X is the second sequence */
+X for (j = 1; j <= N; j++) {
+X c = -gap(j-1) + wa[B[j]] - gap(N-j);
+X if (c > midc) {
+X midc = c;
+X midj = j;
+X }
+X }
+X if (midj == 0) { DEL(1) INS(N) }
+X else {
+X if (midj > 1) { INS(midj-1) }
+X REP
+X if (midj < N) { INS(N-midj) }
+X }
+X return midc;
+X }
+X
+/* Divide: Find optimum midpoint (midi,midj) of cost midc */
+X
+X midi = M/2; /* Forward phase: */
+X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
+X t = -g;
+X for (j = 1; j <= N; j++) {
+X f_ss[j].H = t = t-h;
+X f_ss[j].E = t-g;
+X }
+X t = tb;
+X for (i = 1; i <= midi; i++) {
+X s = f_ss[0].H;
+X f_ss[0].H = c = t = t-h;
+X e = t-g;
+X wa = w[A[i]];
+X for (j = 1; j <= N; j++) {
+X if ((c = c - m) > (e = e - h)) e = c;
+X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
+X c = s + wa[B[j]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = f_ss[j].H;
+X f_ss[j].H = c;
+X f_ss[j].E = d;
+X }
+X }
+X f_ss[0].E = f_ss[0].H;
+X
+X r_ss[N].H = 0; /* Reverse phase: */
+X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
+X for (j = N-1; j >= 0; j--)
+X { r_ss[j].H = t = t-h;
+X r_ss[j].E = t-g;
+X }
+X t = te;
+X for (i = M-1; i >= midi; i--)
+X { s = r_ss[N].H;
+X r_ss[N].H = c = t = t-h;
+X e = t-g;
+X wa = w[A[i+1]];
+X for (j = N-1; j >= 0; j--)
+X { if ((c = c - m) > (e = e - h)) e = c;
+X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c;
+X c = s + wa[B[j+1]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = r_ss[j].H;
+X r_ss[j].H = c;
+X r_ss[j].E = d;
+X }
+X }
+X r_ss[N].E = r_ss[N].H;
+X
+X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
+X midj = 0;
+X type = 1;
+X for (j = 0; j <= N; j++)
+X if ((c = f_ss[j].H + r_ss[j].H) >= midc)
+X if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E))
+X { midc = c;
+X midj = j;
+X }
+X for (j = N; j >= 0; j--)
+X if ((c = f_ss[j].E + r_ss[j].E + g) > midc)
+X { midc = c;
+X midj = j;
+X type = 2;
+X }
+X
+X
+/* Conquer: recursively around midpoint */
+X
+X if (type == 1) {
+X nw_align(A,B,midi,midj,tb,-g,w,g,h,f_str, sapp, last);
+X nw_align(A+midi,B+midj,M-midi,N-midj,-g,te,w,g,h,f_str,sapp, last);
+X }
+X else {
+X nw_align(A,B,midi-1,midj,tb,0,w,g,h,f_str, sapp, last);
+X DEL(2);
+X nw_align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,g,h,f_str, sapp, last);
+X }
+X return midc;
+}
+X
+/* Interface and top level of comparator */
+X
+static int
+ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int **W, int G, int H, int *S, int *nS,
+X struct f_struct *f_str)
+{
+X int c, ck;
+X struct swstr *f_ss, *r_ss;
+X int *sapp, last;
+X
+X sapp = S;
+X last = 0;
+X
+X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
+X exit (1);
+X }
+X f_ss++;
+X f_str->f_ss = f_ss;
+X
+X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
+X exit (1);
+X }
+X r_ss++;
+X f_str->r_ss = r_ss;
+X
+X c = nw_align(A,B,M,N,-G,-G,W,G,H,f_str,&sapp, &last); /* OK, do it */
+X
+X ck = CHECK_SCORE(A,B,M,N,S,W,G,H,nS);
+X if (c != ck) fprintf(stderr,"Check_score error %d != %d\n",c,ck);
+X
+X f_ss--; r_ss--;
+X free(r_ss); free(f_ss);
+X
+X return c;
+}
+X
+/* Alignment display routine */
+X
+static char ALINE[51], BLINE[51], CLINE[51];
+X
+void DISPLAY(unsigned char *A, unsigned char *B, int M, int N,
+X int *S, int AP, int BP, char *sq)
+{ register char *a, *b, *c;
+X register int i, j, op;
+X int lines, ap, bp;
+X
+X i = j = op = lines = 0;
+X ap = AP;
+X bp = BP;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X while (i < M || j < N)
+X { if (op == 0 && *S == 0)
+X { op = *S++;
+X *a = sq[A[++i]];
+X *b = sq[B[++j]];
+X *c++ = (*a++ == *b++) ? '|' : ' ';
+X }
+X else
+X { if (op == 0)
+X op = *S++;
+X if (op > 0)
+X { *a++ = ' ';
+X *b++ = sq[B[++j]];
+X op--;
+X }
+X else
+X { *a++ = sq[A[++i]];
+X *b++ = ' ';
+X op++;
+X }
+X *c++ = '-';
+X }
+X if (a >= ALINE+50 || (i >= M && j >= N))
+X { *a = *b = *c = '\0';
+X printf("\n%5d ",50*lines++);
+X for (b = ALINE+10; b <= a; b += 10)
+X printf(" . :");
+X if (b <= a+5)
+X printf(" .");
+X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
+X ap = AP + i;
+X bp = BP + j;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X }
+X }
+}
+X
+/* CHECK_SCORE - return the score of the alignment stored in S */
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N, int *S, int **w, int g, int h,
+X int *nres)
+{
+X register int i, j, op, nc;
+X int score;
+X
+X score = i = j = op = nc = 0;
+X while (i < M || j < N) {
+X op = *S++;
+X if (op == 0) {
+X score = w[A[++i]][B[++j]] + score;
+X nc++;
+X /* fprintf(stderr,"=%4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */
+X }
+X else if (op > 0) {
+X score = score - (g+op*h);
+X /* fprintf(stderr,">%4d %4d %4d %4d\n",i,j,-(g+op*h),score); */
+X j += op;
+X nc += op;
+X } else {
+X score = score - (g-op*h);
+X /* fprintf(stderr,"<%4d %4d %4d %4d\n",i,j,-(g-op*h),score); */
+X i -= op;
+X nc -= op;
+X }
+X }
+X *nres = nc;
+X return score;
+}
+X
+X
+static int
+BCHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N, int *S, int **w, int g, int h,
+X int *nres)
+{
+X register int i, j, op, nc;
+X int *Ssave;
+X int score;
+X
+X score = i = j = op = nc = 0;
+X Ssave = S;
+X while (i < M || j < N) {
+X op = *S++;
+X if (op == 0) {
+X score = w[A[++i]][B[++j]] + score;
+X nc++;
+/* fprintf(stderr,"op0 %4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */
+X }
+X else if (op > 0) {
+X score = score - (g+op*h);
+/* fprintf(stderr,"op> %4d %4d %4d %4d %4d\n",i,j,op,-(g+op*h),score); */
+X j += op;
+X nc += op;
+X } else {
+X score = score - (g-op*h);
+/* fprintf(stderr,"op< %4d %4d %4d %4d %4d\n",i,j,op,-(g-op*h),score); */
+X i -= op;
+X nc -= op;
+X }
+X }
+X *nres = nc;
+X return score;
+}
+X
+X
+/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+X
+X To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,S,dflag,&SI,&SJ,&EI,&EJ,MW).
+X The parameters are explained as follows:
+X A, B : two sequences to be aligned
+X M : the length of sequence A
+X N : the length of sequence B
+X L : lower bound of the band
+X U : upper bound of the band
+X W : scoring table for matches and mismatches
+X G : gap-opening penalty
+X H : gap-extension penalty
+X dflag : 0 - no display or backward pass
+X *SI : starting position of sequence A in the optimal local alignment
+X *SJ : starting position of sequence B in the optimal local alignment
+X *EI : ending position of sequence A in the optimal local alignment
+X *EJ : ending position of sequence B in the optimal local alignment
+X MW : maximum window size
+*/
+X
+int bd_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct pstruct *ppst,
+X struct f_struct *f_str, int hoff,
+X struct a_res_str *a_res)
+{
+X int low, up, score;
+X int min0, min1, max0, max1;
+X int window;
+X
+X window = min (n1, ppst->param_u.fa.optwid);
+X /* hoff is the offset found from aa1 to seq 2 by hmatch */
+X
+X low = -window/2-hoff;
+X up = low+window;
+X
+X score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
+X ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval-ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X &min0,&min1,&max0,&max1,ppst->param_u.fa.optwid,f_str);
+X
+X if (score <=0) {
+X fprintf(stderr,"n0/n1: %d/%d hoff: %d window: %d\n",
+X n0, n1, hoff, window);
+X return 0;
+X }
+X
+/*
+X fprintf(stderr," ALIGN: start0: %d start1: %d stop0: %d stop1: %d, bot: %d top: %d, win: %d MX %d\n",
+X min0-1,min1-1,max0-min0+1,max1-min1+1,low-(min1-min0),up-(min1-min0),
+X ppst->param_u.fa.optwid,n0);
+*/
+X
+X a_res->min0 = min0-1; a_res->min1 = min1-1;
+X a_res->max0 = max0; a_res->max1 = max1;
+X
+X B_ALIGN(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1,
+X low-(min1-min0),up-(min1-min0),
+X ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+X -(ppst->gdelval-ppst->ggapval),
+#else
+X -ppst->gdelval,
+#endif
+X -ppst->ggapval,
+X a_res->res,&a_res->nres,ppst->param_u.fa.optwid,n0,f_str->bss);
+X
+X return score;
+}
+X
+static int
+LOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int low, int up, int **W, int G,int H,
+X int *psi, int *psj, int *pei, int *pej, int MW,
+X struct f_struct *f_str)
+{
+X int band;
+X register struct bdstr *bssp;
+X int i, j, si, ei;
+X int c, d, e, t, m;
+X int leftd, rightd;
+X int best_score, starti, startj, endi, endj;
+X int *wa, curd;
+X int ib;
+X char flag;
+X
+X bssp = f_str->bss;
+X
+X m = G+H;
+X low = max(-M, low);
+X up = min(N, up);
+X
+X if (N <= 0) {
+X *psi = *psj = *pei = *pej;
+X return 0;
+X }
+X if (M <= 0) {
+X *psi = *psj = *pei = *pej;
+X return 0;
+X }
+X band = up-low+1;
+X if (band < 1) {
+X fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
+X M, N, low, up);
+X return -1;
+X }
+X
+X j = (MW + 2 + 2) * sizeof(struct bdstr);
+X
+X /* already done by init_work();
+X if (f_str->bss==NULL) f_str->bss = (struct bdstr *) ckalloc(j);
+X */
+X
+X if (low > 0) leftd = 1;
+X else if (up < 0) leftd = band;
+X else leftd = 1-low;
+X rightd = band;
+X si = max(0,-up);
+X ei = min(M,N-low);
+X bssp[leftd].CC = 0;
+X for (j = leftd+1; j <= rightd; j++) {
+X bssp[j].CC = 0;
+X bssp[j].DD = -G;
+X }
+X bssp[rightd+1].CC = MININT;
+X bssp[rightd+1].DD = MININT;
+X best_score = 0;
+X endi = si;
+X endj = si+low;
+X bssp[leftd-1].CC = MININT;
+X bssp[leftd].DD = -G;
+X for (i = si+1; i <= ei; i++) {
+X if (i > N-up) rightd--;
+X if (leftd > 1) leftd--;
+X wa = W[A[i]];
+X if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
+X if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
+/*
+X if (ib > N) fprintf(stderr,"B[%d] out of range %d\n",ib,N);
+*/
+X if (d > c) c = d;
+X if (c < 0) c = 0;
+X e = c-G;
+X bssp[leftd].DD = d;
+X bssp[leftd].CC = c;
+X if (c > best_score) {
+X best_score = c;
+X endi = i;
+X endj = ib;
+X }
+X for (curd=leftd+1; curd <= rightd; curd++) {
+X if ((c = c-m) > (e = e-H)) e = c;
+X if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
+/*
+X if ((ib=curd+low-1+i) <= 0 || ib > N)
+X fprintf(stderr,"B[%d]:%d\n",ib,B[ib]);
+*/
+X c = bssp[curd].CC + wa[B[curd+low-1+i]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X if (c < 0) c = 0;
+X bssp[curd].CC = c;
+X bssp[curd].DD = d;
+X if (c > best_score) {
+X best_score = c;
+X endi = i;
+X endj = curd+low-1+i;
+X }
+X }
+X }
+X
+X leftd = max(1,-endi-low+1);
+X rightd = band-(up-(endj-endi));
+X bssp[rightd].CC = 0;
+X t = -G;
+X for (j = rightd-1; j >= leftd; j--) {
+X bssp[j].CC = t = t-H;
+X bssp[j].DD = t-G;
+X }
+X for (j = rightd+1; j <= band; ++j) bssp[j].CC = MININT;
+X bssp[leftd-1].CC = bssp[leftd-1].DD = MININT;
+X bssp[rightd].DD = -G;
+X flag = 0;
+X for (i = endi; i >= 1; i--) {
+X if (i+low <= 0) leftd++;
+X if (rightd < band) rightd++;
+X wa = W[A[i]];
+X if ((c = bssp[rightd-1].CC-m) > (d = bssp[rightd-1].DD-H)) d = c;
+X if ((ib = rightd+low-1+i) <= N) c = bssp[rightd].CC+wa[B[ib]];
+X
+/*
+X if (ib <= 0) fprintf(stderr,"rB[%d] <1\n",ib);
+*/
+X if (d > c) c = d;
+X e = c-G;
+X bssp[rightd].DD = d;
+X bssp[rightd].CC = c;
+X if (c == best_score) {
+X starti = i;
+X startj = ib;
+X flag = 1;
+X break;
+X }
+X for (curd=rightd-1; curd >= leftd; curd--) {
+X if ((c = c-m) > (e = e-H)) e = c;
+X if ((c = bssp[curd-1].CC-m) > (d = bssp[curd-1].DD-H)) d = c;
+X
+/*
+X if ((ib=curd+low-1+i) <= 0 || ib > N)
+X fprintf(stderr,"i: %d, B[%d]:%d\n",i,ib,B[ib]);
+*/
+X c = bssp[curd].CC + wa[B[curd+low-1+i]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X bssp[curd].CC = c;
+X bssp[curd].DD = d;
+X if (c == best_score) {
+X starti = i;
+X startj = curd+low-1+i;
+X flag = 1;
+X break;
+X }
+X }
+X if (flag == 1) break;
+X }
+X
+X if (starti < 0 || starti > M || startj < 0 || startj > N) {
+X printf("starti=%d, startj=%d\n",starti,startj);
+X *psi = *psj = *pei = *pej;
+X exit(1);
+X }
+X *psi = starti;
+X *psj = startj;
+X *pei = endi;
+X *pej = endj;
+X return best_score;
+}
+X
+/* A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
+X
+X To invoke, call B_ALIGN(A,B,M,N,L,U,W,G,H,S,MW,MX).
+X The parameters are explained as follows:
+X A, B : two sequences to be aligned
+X M : the length of sequence A
+X N : the length of sequence B
+X L : lower bound of the band
+X U : upper bound of the band
+X W : scoring table for matches and mismatches
+X G : gap-opening penalty
+X H : gap-extension penalty
+X S : script for DISPLAY routine
+X MW : maximum window size
+X MX : maximum length sequence M to be aligned
+*/
+X
+static int IP;
+static int *MP[3]; /* save crossing points */
+static int *FP; /* forward dividing points */
+static char *MT[3]; /* 0: rep, 1: del, 2: ins */
+static char *FT;
+X
+/* bg_align(A,B,M,N,up,low,tb,te) returns the cost of an optimum conversion between
+X A[1..M] and B[1..N] and appends such a conversion to the current script.
+X tb(te)= 1 no gap-open penalty if the conversion begins(ends) with a delete.
+X tb(te)= 2 no gap-open penalty if the conversion begins(ends) with an insert.
+*/
+static int
+bg_align(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int low, int up, int tb, int te,
+X int **w, int g, int h,
+X struct bdstr *bss, int **sapp, int *last)
+{
+X int rmid, k, l, r, v, kt;
+X int t1, t2, t3;
+X
+X {
+X int band, midd;
+X int leftd, rightd; /* for CC, DD, CP and DP */
+X register int curd; /* current index for CC, DD CP and DP */
+X register int i, j;
+X register int c, d, e;
+X int t, fr, *wa, ib, m;
+X
+X /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
+X if (N <= 0) {
+X if (M > 0) { DEL(M) }
+X return 0;
+X }
+X if (M <= 0) {
+X INS(N)
+X return 0;
+X }
+X if ((band = up-low+1) <= 1) {
+X for (i = 1; i <= M; i++) { REP }
+X return 0;
+X }
+X
+X /* Divide: Find all crossing points */
+X
+X /* Initialization */
+X m = g + h;
+X
+X midd = band/2 + 1;
+X rmid = low + midd - 1;
+X leftd = 1-low;
+X rightd = up-low+1;
+X if (leftd < midd) {
+X fr = -1;
+X for (j = 0; j < midd; j++)
+X bss[j].CP = bss[j].DP = -1;
+X for (j = midd; j <= rightd; j++) {
+X bss[j].CP = bss[j].DP = 0;
+X }
+X MP[0][0] = -1;
+X MP[1][0] = -1;
+X MP[2][0] = -1;
+X MT[0][0] = MT[1][0] = MT[2][0] = 0;
+X } else if (leftd > midd) {
+X fr = leftd-midd;
+X for (j = 0; j <= midd; j++) {
+X bss[j].CP = bss[j].DP = fr;
+X }
+X for (j = midd+1; j <= rightd; j++)
+X bss[j].CP = bss[j].DP = -1;
+X MP[0][fr] = -1;
+X MP[1][fr] = -1;
+X MP[2][fr] = -1;
+X MT[0][fr] = MT[1][fr] = MT[2][fr] = 0;
+X } else {
+X fr = 0;
+X for (j = 0; j < midd; j++) {
+X bss[j].CP = bss[j].DP = 0;
+X }
+X for (j = midd; j <= rightd; j++) {
+X bss[j].CP = bss[j].DP = 0;
+X }
+X MP[0][0] = -1;
+X MP[1][0] = -1;
+X MP[2][0] = -1;
+X MT[0][0] = MT[1][0] = MT[2][0] = 0;
+X }
+X
+X bss[leftd].CC = 0;
+X if (tb == 2) t = 0;
+X else t = -g;
+X for (j = leftd+1; j <= rightd; j++) {
+X bss[j].CC = t = t-h;
+X bss[j].DD = t-g;
+X }
+X bss[rightd+1].CC = MININT;
+X bss[rightd+1].DD = MININT;
+X if (tb == 1) bss[leftd].DD = 0;
+X else bss[leftd].DD = -g;
+X bss[leftd-1].CC = MININT;
+X for (i = 1; i <= M; i++) {
+X if (i > N-up) rightd--;
+X if (leftd > 1) leftd--;
+X wa = w[A[i]];
+X if ((c = bss[leftd+1].CC-m) > (d = bss[leftd+1].DD-h)) {
+X d = c;
+X bss[leftd].DP = bss[leftd+1].CP;
+X } else bss[leftd].DP = bss[leftd+1].DP;
+X if ((ib = leftd+low-1+i) > 0) c = bss[leftd].CC+wa[B[ib]];
+X if (d > c || ib <= 0) {
+X c = d;
+X bss[leftd].CP = bss[leftd].DP;
+X }
+X e = c-g;
+X bss[leftd].DD = d;
+X bss[leftd].CC = c;
+X IP = bss[leftd].CP;
+X if (leftd == midd) bss[leftd].CP = bss[leftd].DP = IP = i;
+X for (curd=leftd+1; curd <= rightd; curd++) {
+X if (curd != midd) {
+X if ((c = c-m) > (e = e-h)) {
+X e = c;
+X IP = bss[curd-1].CP;
+X } /* otherwise, IP is unchanged */
+X if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
+X d = c;
+X bss[curd].DP = bss[curd+1].CP;
+X } else {
+X bss[curd].DP = bss[curd+1].DP;
+X }
+X c = bss[curd].CC + wa[B[curd+low-1+i]];
+X if (c < d || c < e) {
+X if (e > d) {
+X c = e;
+X bss[curd].CP = IP;
+X } else {
+X c = d;
+X bss[curd].CP = bss[curd].DP;
+X }
+X } /* otherwise, CP is unchanged */
+X bss[curd].CC = c;
+X bss[curd].DD = d;
+X } else {
+X if ((c = c-m) > (e = e-h)) {
+X e = c;
+X MP[1][i] = bss[curd-1].CP;
+X MT[1][i] = 2;
+X } else {
+X MP[1][i] = IP;
+X MT[1][i] = 2;
+X }
+X if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
+X d = c;
+X MP[2][i] = bss[curd+1].CP;
+X MT[2][i] = 1;
+X } else {
+X MP[2][i] = bss[curd+1].DP;
+X MT[2][i] = 1;
+X }
+X c = bss[curd].CC + wa[B[curd+low-1+i]];
+X if (c < d || c < e) {
+X if (e > d) {
+X c = e;
+X MP[0][i] = MP[1][i];
+X MT[0][i] = 2;
+X } else {
+X c = d;
+X MP[0][i] = MP[2][i];
+X MT[0][i] = 1;
+X }
+X } else {
+X MP[0][i] = i-1;
+X MT[0][i] = 0;
+X }
+X if (c-g > e) {
+X MP[1][i] = MP[0][i];
+X MT[1][i] = MT[0][i];
+X }
+X if (c-g > d) {
+X MP[2][i] = MP[0][i];
+X MT[2][i] = MT[0][i];
+X }
+X bss[curd].CP = bss[curd].DP = IP = i;
+X bss[curd].CC = c;
+X bss[curd].DD = d;
+X }
+X }
+X }
+X
+X /* decide which path to be traced back */
+X if (te == 1 && d+g > c) {
+X k = bss[rightd].DP;
+X l = 2;
+X } else if (te == 2 && e+g > c) {
+X k = IP;
+X l = 1;
+X } else {
+X k = bss[rightd].CP;
+X l = 0;
+X }
+X if (rmid > N-M) l = 2;
+X else if (rmid < N-M) l = 1;
+X v = c;
+X }
+X /* Conquer: Solve subproblems recursively */
+X
+X /* trace back */
+X r = -1;
+X for (; k > -1; r=k, k=MP[l][r], l=MT[l][r]){
+X FP[k] = r;
+X FT[k] = l; /* l=0,1,2 */
+X }
+X /* forward dividing */
+X if (r == -1) { /* optimal alignment did not cross the middle diagonal */
+X if (rmid < 0) {
+X bg_align(A,B,M,N,rmid+1,up,tb,te,w,g,h,bss, sapp, last);
+X }
+X else {
+X bg_align(A,B,M,N,low,rmid-1,tb,te,w,g,h,bss, sapp, last);
+X }
+X } else {
+X k = r;
+X l = FP[k];
+X kt = FT[k];
+X
+X /* first block */
+X if (rmid < 0) {
+X bg_align(A,B,r-1,r+rmid,rmid+1,min(up,r+rmid),tb,1,w,g,h,bss,sapp,last);
+X DEL(1)
+X } else if (rmid > 0) {
+X bg_align(A,B,r,r+rmid-1,max(-r,low),rmid-1,tb,2,w,g,h,bss,sapp,last);
+X INS(1)
+X }
+X
+X /* intermediate blocks */
+X t2 = up-rmid-1;
+X t3 = low-rmid+1;
+X for (; l > -1; k = l, l = FP[k], kt = FT[k]) {
+X if (kt == 0) { REP }
+X else if (kt == 1) { /* right-hand side triangle */
+X INS(1)
+X t1 = l-k-1;
+X bg_align(A+k,B+k+rmid+1,t1,t1,0,min(t1,t2),2,1,w,g,h,bss,sapp,last);
+X DEL(1)
+X }
+X else { /* kt == 2, left-hand side triangle */
+X DEL(1)
+X t1 = l-k-1;
+X bg_align(A+k+1,B+k+rmid,t1,t1,max(-t1,t3),0,1,2,w,g,h,bss,sapp,last);
+X INS(1)
+X }
+X }
+X
+X /* last block */
+X if (N-M > rmid) {
+X INS(1)
+X t1 = k+rmid+1;
+X bg_align(A+k,B+t1,M-k,N-t1,0,min(N-t1,t2),2,te,w,g,h,bss,sapp,last);
+X } else if (N-M < rmid) {
+X DEL(1)
+X t1 = M-(k+1);
+X bg_align(A+k+1,B+k+rmid,t1,N-(k+rmid),max(-t1,t3),0,1,te,w,g,h,
+X bss,sapp,last);
+X }
+X }
+X return(v);
+}
+X
+int B_ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N,
+X int low, int up, int **W, int G, int H, int *S, int *nS,
+X int MW, int MX, struct bdstr *bss)
+{
+X int c, i, j;
+X int g, h;
+X size_t mj;
+X int check_score;
+X int **sapp, *sapp_v, *last, last_v;
+X
+X g = G;
+X h = H;
+X sapp_v = S;
+X sapp = &sapp_v;
+X
+X last_v = 0;
+X last = &last_v;
+X
+X low = min(max(-M, low),min(N-M,0));
+X up = max(min(N, up),max(N-M,0));
+X
+X if (N <= 0) {
+X if (M > 0) { DEL(M); }
+X return -gap(M);
+X }
+X if (M <= 0) {
+X INS(N);
+X return -gap(N);
+X }
+X if (up-low+1 <= 1) {
+X c = 0;
+X for (i = 1; i <= M; i++) {
+X REP;
+X c += W[A[i]][B[i]];
+X }
+X return c;
+X }
+X
+X if (MT[0]==NULL) {
+X mj = MX+1;
+X MT[0] = (char *) ckalloc(mj);
+X MT[1] = (char *) ckalloc(mj);
+X MT[2] = (char *) ckalloc(mj);
+X FT = (char *) ckalloc(mj);
+X
+X mj *= sizeof(int);
+X MP[0] = (int *) ckalloc(mj);
+X MP[1] = (int *) ckalloc(mj);
+X MP[2] = (int *) ckalloc(mj);
+X FP = (int *) ckalloc(mj);
+X }
+X
+X c = bg_align(A,B,M,N,low,up,0,0,W,G,H,bss, sapp, last);
+X
+X check_score = BCHECK_SCORE(A,B,M,N,S,W,G,H,nS);
+X
+X free(FP); free(MP[2]); free(MP[1]); free(MP[0]);
+X free(FT); free(MT[2]); free(MT[1]); free(MT[0]);
+X MT[0]=NULL;
+X
+X if (check_score != c)
+X printf("\nBCheck_score=%d != %d\n", check_score,c);
+X return c;
+}
+X
+int do_walign (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares)
+{
+X int hoff, optflag_s, optcut_s, optwid_s, n10, score;
+X const unsigned char *aa1p;
+X struct rstruct rst;
+X
+#ifdef TFASTA
+X f_str->n10 = n10=aatran(aa1,f_str->aa1x,n1,frame);
+X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);
+X aa1p = f_str->aa1x;
+X
+#else
+X n10 = n1;
+X aa1p = aa1;
+#endif
+X
+X a_res->res = f_str->res;
+X *have_ares = 1;
+X
+X if (ppst->sw_flag)
+X return sw_walign(aa0, n0, aa1p, n10, ppst, f_str, a_res);
+X else {
+X optflag_s = ppst->param_u.fa.optflag;
+X optcut_s = ppst->param_u.fa.optcut;
+X optwid_s = ppst->param_u.fa.optwid;
+X ppst->param_u.fa.optflag = 1;
+X ppst->param_u.fa.optcut = 0;
+X ppst->param_u.fa.optwid *= 2;
+X
+X do_fasta(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff);
+X
+X if (rst.score[0]>0) {
+X score=bd_walign(aa0, n0, aa1p, n10, ppst, f_str, hoff, a_res);
+X }
+X else {
+X a_res->nres = 0;
+X score=0;
+X }
+X
+X ppst->param_u.fa.optflag = optflag_s;
+X ppst->param_u.fa.optcut = optcut_s;
+X ppst->param_u.fa.optwid = optwid_s;
+X return score;
+X }
+}
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+X
+#ifdef TFASTA
+X f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
+#endif
+}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+#ifdef TFASTA
+X aln->qlfact = 1;
+X aln->llfact = 3;
+X aln->llmult = 3;
+X aln->qlrev = 0;
+X aln->frame = frame;
+X if (frame > 2) {
+X aln->llrev = 1;
+X aln->frame = 3 - frame;
+X }
+X else aln->llrev = 0;
+#else /* FASTA */
+X aln->llfact = aln->qlfact = aln->llmult = 1;
+X aln->llrev = 0;
+X if (frame > 0) aln->qlrev = 1;
+X else aln->qlrev = 0;
+X aln->frame = 0;
+#endif
+}
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln, struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, nd, ns, itmp;
+X const unsigned char *aa1p;
+X char *sp0, *sp1, *spa, *sq;
+X int *rp;
+X int smins, mins;
+X
+X if (pst.ext_sq_set) { sq = pst.sqx; }
+X else { sq = pst.sq; }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X /* will we show all the start ?*/
+X if (min(a_res.min0,a_res.min1) < aln->llen || aln->showall==1)
+X if (a_res.min0 >= a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins = a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0 - mins;
+X aln->smin1=a_res.min1 - mins;
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X /* set the alignment code to zero for context */
+X memset(seqca,0,mins);
+X
+X /* TFASTA
+X smins = mins = 0;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X */
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1p[i1++]];
+X if (toupper(*sp0) == toupper(*sp1)) {
+X aln->nident++;
+X *spa = M_IDENT;
+X }
+X else if (pst.nt_align) {
+X if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') ||
+X (toupper(*sp0)=='U' && toupper(*sp1)=='T')) {
+X aln->nident++;
+X *spa = M_IDENT;
+X }
+X else if (toupper(*sp0) == 'N') aln->ngap_q++;
+X else if (toupper(*sp1) == 'N') aln->ngap_l++;
+X }
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op > 0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *spa = '\0';
+X
+/* now we have the middle, get the right end */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,nn1-a_res.max1))
+X nd = max(n0-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X
+X /* fprintf(stderr,"%d\n",mins+lenc+nd); */
+X
+X return mins+lenc+nd;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, nd, ns, itmp;
+X const unsigned char *aa1p;
+X char *sp0, *sp0a, *sp1, *spa, *sq;
+X int *rp;
+X int smins, mins;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X /* will we show all the start ?*/
+X /* will we show all the start ?*/
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins = a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0 - smins;
+X aln->smin1=a_res.min1 - smins;
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
+X }
+X /* set the alignment code to zero for context */
+X memset(seqca,0,mins);
+X memset(seqc0a,' ',mins);
+X
+X /* TFASTA
+X smins = mins = 0;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X */
+X
+/* now get the middle */
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp0a = seqc0a+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X *sp0a++ = ann_arr[aa0a[i0]];
+X
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1p[i1++]];
+X
+X if (toupper(*sp0) == toupper(*sp1)) {
+X aln->nident++;
+X *spa = M_IDENT;
+X }
+X else if (pst.nt_align) {
+X if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') ||
+X (toupper(*sp0)=='U' && toupper(*sp1)=='T')) {
+X aln->nident++;
+X *spa = M_IDENT;
+X }
+X else if (toupper(*sp0) == 'N') aln->ngap_q++;
+X else if (toupper(*sp1) == 'N') aln->ngap_l++;
+X }
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1p[i1++]];
+X *spa++ = M_DEL;
+X *sp0a++ = ' ';
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0a++ = ann_arr[aa0a[i0]];
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *sp0a = *spa = '\0';
+X
+/* now we have the middle, get the right end */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,nn1-a_res.max1))
+X nd = max(n0-a_res.max0,nn1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(nn1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
+X }
+X
+X /* fprintf(stderr,"%d\n",mins+lenc+nd); */
+X
+X return mins+lenc+nd;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt) {
+X
+X char op_char[5]={"=-+*"};
+X char tmp_cnt[20];
+X
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln, struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc;
+X int p_op, op_cnt;
+X const unsigned char *aa1p;
+X char sp0, sp1, *sq;
+X int *rp;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
+X op_cnt = 0;
+X
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X
+X if (p_op == 0 || p_op==3) {
+X if (sp0 != '*' && sp1 != '*') {
+X if (p_op == 3) {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X else {op_cnt++;}
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 3;
+X }
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X
+X op = *rp++;
+X lenc++;
+X
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X else if (pst.nt_align) {
+X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
+X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
+X else if (toupper(sp0) == 'N') aln->ngap_q++;
+X else if (toupper(sp1) == 'N') aln->ngap_l++;
+X }
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X if (p_op == 1) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 1;
+X }
+X op--; lenc++; i1++; aln->ngap_q++;
+X }
+X else {
+X if (p_op == 2) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 2;
+X }
+X op++; lenc++; i0++; aln->ngap_l++;
+X }
+X }
+X }
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X
+X return lenc;
+}
+X
+int calc_id(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc;
+X int sp0, sp1;
+X const unsigned char *aa1p;
+X int *rp;
+X char *sq;
+X
+X if (pst.ext_sq_set) { sq = pst.sqx; }
+X else { sq = pst.sq; }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X if (toupper(sp0) == toupper(sp1)) {aln->nident++;}
+X else if (pst.nt_align) {
+X if ((toupper(sp0)=='T' && toupper(sp1)== 'U')||
+X (toupper(sp0)=='U' && toupper(sp1)=='T')) {aln->nident++;}
+X else if (toupper(sp0) == 'N') aln->ngap_q++;
+X else if (toupper(sp1) == 'N') aln->ngap_l++;
+X }
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {op--; lenc++; i1++; aln->ngap_q++;}
+X else {op++; lenc++; i0++; aln->ngap_l++; }
+X }
+X }
+X return lenc;
+}
+X
+#ifdef PCOMPLIB
+X
+#include "w_mw.h"
+X
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+X ppst->n0 = qm_msg->n0;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropnfa.c ||
+echo 'restore of dropnfa.c failed'
+Wc_c="`wc -c < 'dropnfa.c'`"
+test 70110 -eq "$Wc_c" ||
+ echo 'dropnfa.c: original size 70110, current size' "$Wc_c"
+fi
+# ============= dropnfa.h ==============
+if test -f 'dropnfa.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropnfa.h (File already exists)'
+else
+echo 'x - extracting dropnfa.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropnfa.h' &&
+X
+/* global definitions shared by dropnfa.c and altivec.c */
+X
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+X
+X
+X
+struct dstruct /* diagonal structure for saving current run */
+{
+X int score; /* hash score of current match */
+X int start; /* start of current match */
+X int stop; /* end of current match */
+X struct savestr *dmax; /* location in vmax[] where best score data saved */
+};
+X
+struct savestr
+{
+X int score; /* pam score with segment optimization */
+X int score0; /* pam score of best single segment */
+X int gscore; /* score from global match */
+X int dp; /* diagonal of match */
+X int start; /* start of match in lib seq */
+X int stop; /* end of match in lib seq */
+};
+X
+struct bdstr { int CC, DD, CP, DP;};
+X
+struct f_struct {
+X struct dstruct *diag;
+X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
+X struct savestr *vptr[MAXSAV];
+X struct savestr *lowmax;
+X int ndo;
+X int noff;
+X int hmask; /* hash constants */
+X int *pamh1; /* pam based array */
+X int *pamh2; /* pam based kfact array */
+X int *link, *harr; /* hash arrays */
+X int kshft; /* shift width */
+X int nsav, lowscor; /* number of saved runs, worst saved run */
+#ifdef TFASTA
+X unsigned char *aa1x;
+X int n10;
+#endif
+X struct bdstr *bss;
+X struct swstr *ss;
+X struct swstr *f_ss, *r_ss;
+X int *waa0;
+X int *waa1;
+X int *res;
+X int max_res;
+X double aa0_f[MAXSQ];
+X double *kar_p;
+X
+#ifdef FA_ALTIVEC
+X int vec_len;
+X vecInt **vec_matrix;
+X vector signed ALTIVEC_SIZE *vec_HH;
+X vector signed ALTIVEC_SIZE *vec_EE;
+X
+X int vec_len2;
+X vecInt2 **vec_matrix2;
+X vector signed ALTIVEC_SIZE2 *vec_HH2;
+X vector signed ALTIVEC_SIZE2 *vec_EE2;
+#endif
+};
+X
+static int
+FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
+X int M, int N, int low, int up,
+X int **W, int G,int H, int MW,
+X struct f_struct *f_str);
+SHAR_EOF
+chmod 0644 dropnfa.h ||
+echo 'restore of dropnfa.h failed'
+Wc_c="`wc -c < 'dropnfa.h'`"
+test 1882 -eq "$Wc_c" ||
+ echo 'dropnfa.h: original size 1882, current size' "$Wc_c"
+fi
+# ============= dropnsw.c ==============
+if test -f 'dropnsw.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping dropnsw.c (File already exists)'
+else
+echo 'x - extracting dropnsw.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'dropnsw.c' &&
+/* copyright (c) 1994, 1995, 1996 William R. Pearson */
+X
+/* $Name: fa_34_26_5 $ - $Id: dropnsw.c,v 1.35 2006/10/19 14:49:14 wrp Exp $ */
+X
+/*
+X this is a slower version of dropgsw.c that implements the Smith-Waterman
+X algorithm. It lacks the shortcuts in dropgsw.c that prevent scores less
+X than the penalty for the first residue in a gap from being generated.
+X
+X Thus, dropnsw.c should be used for tests with very large gap penalties,
+X and is more appropriate for programs like prss3, which are interested
+X in accurate low scores.
+*/
+X
+/* the do_walign() code in this file is not thread_safe */
+/* init_work(), do_work(), are thread safe */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+X
+static char *verstr="3.5 Sept 2006";
+X
+struct swstr { int H, E;};
+X
+struct f_struct {
+X struct swstr *ss;
+X struct swstr *f_ss;
+X struct swstr *r_ss;
+X int *waa_s, *waa_a;
+X int **pam2p[2];
+X int *res;
+X double aa0_f[MAXSQ];
+X double *kar_p;
+};
+X
+#define DROP_INTERN
+#include "drop_func.h"
+X
+extern int do_karlin(const unsigned char *aa1, int n1,
+X int **pam2, struct pstruct *ppst,
+X double *aa0_f, double *kar_p, double *lambda, double *H);
+extern void aancpy(char *to, char *from, int count, struct pstruct pst);
+int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
+X int **W, int IW, int G, int H, int *S, int *NC,
+X struct f_struct *f_str);
+X
+/* initialize for Smith-Waterman optimal score */
+X
+void init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst,
+X struct f_struct **f_arg)
+{
+X int maxn0;
+X int *pwaa_s, *pwaa_a;
+X int e, f, i, j, q;
+X int *res;
+X struct f_struct *f_str;
+X int **pam2p;
+X struct swstr *ss, *f_ss, *r_ss;
+X int nsq, ip;
+X
+X if (ppst->ext_sq_set) {
+X nsq = ppst->nsqx; ip = 1;
+X }
+X else {
+X nsq = ppst->nsq; ip = 0;
+X }
+X
+X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+X
+X /* allocate space for the scoring arrays */
+X maxn0 = n0 + 2;
+X if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
+X exit (1);
+X }
+X ss++;
+X f_str->ss = ss;
+X
+X if ((f_ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate f_ss array %3d\n", n0);
+X exit (1);
+X }
+X f_ss++;
+X f_str->f_ss = f_ss;
+X
+X if ((r_ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate r_ss array %3d\n", n0);
+X exit (1);
+X }
+X r_ss++;
+X f_str->r_ss = r_ss;
+X
+X /* initialize variable (-S) pam matrix */
+X if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+X exit(1);
+X }
+X
+X pam2p = f_str->pam2p[1];
+X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X for (i=1; i<n0; i++) {
+X pam2p[i]= pam2p[0] + (i*(nsq+1));
+X }
+X
+X /* initialize universal (alignment) matrix */
+X if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
+X exit(1);
+X }
+X
+X pam2p = f_str->pam2p[0];
+X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
+X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
+X exit(1);
+X }
+X
+X for (i=1; i<n0; i++) {
+X pam2p[i]= pam2p[0] + (i*(nsq+1));
+X }
+X
+X /*
+X pwaa effectively has a sequence profile --
+X pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
+X pwaa[n0..2n0-1] has pam scores for residue 1 (A)
+X pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
+X
+X thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
+X *pwaa++ rapidly moves though the scores of the aa1p[] position
+X without further indexing
+X
+X For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
+X a different score in each position.
+X */
+X
+X if (ppst->pam_pssm) {
+X pwaa_s = f_str->waa_s;
+X pwaa_a = f_str->waa_a;
+X for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
+X for (f = 0; f < n0; f++) { /* for each position in aa0 */
+X *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
+X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
+X }
+X }
+X }
+X else { /* initialize scanning matrix */
+X pwaa_s = f_str->waa_s;
+X pwaa_a = f_str->waa_a;
+X for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
+X for (f = 0; f < n0; f++) { /* for each position in aa0 */
+X *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][e][aa0[f]];
+X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][e][aa0[f]];
+X }
+X }
+X
+X maxn0 = max(3*n0/2,MIN_RES);
+X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+X exit(1);
+X }
+X f_str->res = res;
+X
+X *f_arg = f_str;
+}
+X
+void close_work (const unsigned char *aa0, int n0,
+X struct pstruct *ppst, struct f_struct **f_arg)
+{
+X struct f_struct *f_str;
+X
+X f_str = *f_arg;
+X
+X if (f_str != NULL) {
+X if (f_str->kar_p !=NULL) free(f_str->kar_p);
+X f_str->ss--;
+X free(f_str->ss);
+X free(f_str->res);
+X free(f_str->waa_a);
+X free(f_str->pam2p[0][0]);
+X free(f_str->pam2p[0]);
+X free(f_str->waa_s);
+X free(f_str->pam2p[1][0]);
+X free(f_str->pam2p[1]);
+X
+X free(f_str);
+X *f_arg = NULL;
+X }
+}
+X
+X
+/* pstring1 is a message to the manager, currently 512 */
+/*void get_param(struct pstruct *pstr,char *pstring1)*/
+void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+X char psi_str[120];
+X
+X char *pg_str="Smith-Waterman";
+X
+X if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
+X else { psi_str[0]='\0';}
+X
+#ifdef OLD_FASTA_GAP
+X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
+#else
+X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
+#endif
+X pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
+X
+X if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
+#else
+X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
+#endif
+X pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
+X (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
+X }
+}
+X
+X
+void do_work (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, struct f_struct *f_str,
+X int qr_flg,
+X struct rstruct *rst)
+{
+X const unsigned char *aa0p, *aa1p;
+X register struct swstr *ssj;
+X struct swstr *ss, *f_ss, *r_ss;
+X register int *pwaa;
+X int *waa;
+X register int i, j;
+X int e, f, h, p;
+X int q, r, m;
+X int score;
+X
+X double lambda, H, K;
+X
+X rst->escore = 1.0;
+X rst->segnum = rst->seglen = 1;
+X
+X waa = f_str->waa_s;
+X ss = f_str->ss;
+X f_ss = f_str->f_ss;
+X r_ss = f_str->r_ss;
+X
+#ifdef OLD_FASTA_GAP
+X q = -(ppst->gdelval - ppst->ggapval);
+#else
+X q = -ppst->gdelval;
+#endif
+X r = -ppst->ggapval;
+X m = q + r;
+X
+X /* initialize 0th row */
+X for (ssj=ss; ssj<&ss[n0]; ssj++) {
+X ssj->H = 0;
+X ssj->E = -q;
+X }
+X
+X score = 0;
+X aa1p = aa1;
+X while (*aa1p) {
+X h = p = 0;
+X f = -q;
+X pwaa = waa + (*aa1p++ * n0);
+X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
+X if ((h = h - m) > (f = f - r)) f = h;
+X if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
+X h = p + *pwaa++;
+X if (h < 0 ) h = 0;
+X if (h < f ) h = f;
+X if (h < e ) h = e;
+X p = ssj->H;
+X ssj->H = h;
+X ssj->E = e;
+X if (h > score) score = h;
+X }
+X } /* done with forward pass */
+X
+X rst->score[0] = score;
+X
+X if(ppst->zsflag == 6 || ppst->zsflag == 16 &&
+X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
+X f_str->kar_p, &lambda, &H)>0)) {
+X rst->comp = 1.0/lambda;
+X rst->H = H;
+X }
+X else {rst->comp = rst->H = -1.0;}
+} /* here we should be all done */
+X
+void do_opt (const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *pst, struct f_struct *f_str,
+X struct rstruct *rstr)
+{
+}
+X
+int do_walign (const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X int frame,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X struct a_res_str *a_res,
+X int *have_ares )
+{
+X const unsigned char *aa0p, *aa1p;
+X register int *pwaa;
+X register int i, j;
+X register struct swstr *ssj;
+X struct swstr *f_ss, *r_ss, *ss;
+X int *res, *waa;
+X int e, f, h, p;
+X int q, r, m;
+X int score;
+X int cost, I, J, K, L;
+X
+X ss = f_str->ss;
+X
+X res = f_str->res;
+X waa = f_str->waa_a; /* this time use universal pam2[0] */
+X
+#ifdef OLD_FASTA_GAP
+X q = -(ppst->gdelval - ppst->ggapval);
+#else
+X q = -ppst->gdelval;
+#endif
+X
+X r = -ppst->ggapval;
+X m = q + r;
+X
+X /* initialize 0th row */
+X for (ssj=ss; ssj<ss+n0; ssj++) {
+X ssj->H = 0;
+X ssj->E = -q;
+X }
+X
+X score = 0;
+X aa1p = aa1;
+X i = 0;
+X while (*aa1p) {
+X h = p = 0;
+X f = -q;
+X pwaa = waa + (*aa1p++ * n0);
+X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
+X if ((h = h - m) > /* gap open from left best */
+X /* gap extend from left gapped */
+X (f = f - r)) f = h; /* if better, use new gap opened */
+X if ((h = ssj->H - m) > /* gap open from up best */
+X /* gap extend from up gap */
+X (e = ssj->E - r)) e = h; /* if better, use new gap opened */
+X h = p + *pwaa++; /* diagonal match */
+X if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
+X if (h < f ) h = f; /* left gap better, reset */
+X if (h < e ) h = e; /* up gap better, reset */
+X p = ssj->H; /* save previous best score */
+X ssj->H = h; /* save (new) up diag-matched */
+X ssj->E = e; /* save upper gap opened */
+X if (h > score) { /* ? new best score */
+X score = h; /* save best */
+X I = i; /* row */
+X J = (int)(ssj-ss); /* column */
+X }
+X }
+X i++;
+X } /* done with forward pass */
+X if (score <= 0) return 0;
+X
+X /* to get the start point, go backwards */
+X
+X /* 18-June-2003 fix bug in backtracking code to identify start of
+X alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
+X pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
+X */
+X
+X cost = K = L = 0;
+X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
+X
+X for (i=I; i>=0; i--) {
+X h = f = -1;
+X p = (i == I) ? 0 : -1;
+X for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
+X f = max (f,h-q)-r;
+X ssj->E=max(ssj->E,ssj->H-q)-r;
+X h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
+X p = ssj->H;
+X ssj->H=h;
+X if (h > cost) {
+X cost = h;
+X K = i;
+X L = (int)(ssj-ss);
+X if (cost >= score) goto found;
+X }
+X }
+X }
+X
+found:
+X
+X /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
+X
+/* in the f_str version, the *res array is already allocated at 4*n0/3 */
+X
+X a_res->res = f_str->res;
+X *have_ares = 1;
+X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
+X
+/* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
+X
+/* this code no longer refers to aa0[], it used pam2p[0][L] instead */
+X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
+X a_res->res,&a_res->nres,f_str);
+X
+/* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
+X
+X return score;
+}
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N,
+X int *S, int **W, int IW, int G, int H, int *nres);
+X
+#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
+X
+/* Append "Delete k" op */
+#define DEL(k) \
+{ if (*last < 0) \
+X *last = (*sapp)[-1] -= (k); \
+X else { \
+X *last = (*sapp)[0] = -(k); \
+X (*sapp)++; \
+X } \
+}
+X
+/* Append "Insert k" op */
+#define INS(k) \
+{ if (*last > 0) \
+X *last = (*sapp)[-1] += (k); \
+X else { \
+X *last = (*sapp)[0] = (k); \
+X (*sapp)++; \
+X } \
+}
+X
+#define REP { *last = (*sapp)[0] = 0; (*sapp)++; } /* Append "Replace" op */
+X
+/*
+#define XTERNAL
+#include "upam.h"
+X
+void
+print_seq_prof(unsigned char *A, int M,
+X unsigned char *B, int N,
+X int **w, int iw, int dir) {
+X char c_max;
+X int i_max, j_max, i,j;
+X
+X char *c_dir="LRlr";
+X
+X for (i=1; i<=min(60,M); i++) {
+X fprintf(stderr,"%c",aa[A[i]]);
+X }
+X fprintf(stderr, - %d\n,M);
+X
+X for (i=0; i<min(60,M); i++) {
+X i_max = -1;
+X for (j=1; j<21; j++) {
+X if (w[iw+i][j]> i_max) {
+X i_max = w[iw+i][j];
+X j_max = j;
+X }
+X }
+X fprintf(stderr,"%c",aa[j_max]);
+X }
+X fputc(':',stderr);
+X for (i=1; i<=min(60,N); i++) {
+X fprintf(stderr,"%c",aa[B[i]]);
+X }
+X fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
+}
+*/
+X
+/* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
+X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
+X and appends such a conversion to the current script. */
+X
+static int
+align(const unsigned char *A, const unsigned char *B, int M, int N,
+X int tb, int te, int **w, int iw, int g, int h,
+X struct f_struct *f_str, int dir,
+X int **sapp, int *last)
+{
+X int midi, midj, type; /* Midpoint, type, and cost */
+X int midc;
+X int c1, c2;
+X
+{ register int i, j;
+X register int c, e, d, s;
+X int m, t, *wa;
+X struct swstr *f_ss, *r_ss;
+X
+/* print_seq_prof(A,M,B,N,w,iw,dir); */
+X
+X m = g + h;
+X
+X f_ss = f_str->f_ss;
+X r_ss = f_str->r_ss;
+X
+/* Boundary cases: M <= 1 or N == 0 */
+X
+X if (N <= 0) {
+X if (M > 0) {
+X DEL(M)
+X }
+X return -gap(M);
+X }
+X
+X if (M <= 1) {
+X if (M <= 0){
+X INS(N)
+X return -gap(N); }
+X if (tb < te) tb = te;
+X midc = (tb-h) - gap(N);
+X midj = 0;
+/* wa = w[A[1]]; */
+X wa = w[iw];
+X for (j = 1; j <= N; j++) {
+X c = -gap(j-1) + wa[B[j]] - gap(N-j);
+X if (c > midc) { midc = c; midj = j;}
+X }
+X if (midj == 0) {
+X DEL(1)
+X INS(N)
+X }
+X else {
+X if (midj > 1) { INS(midj-1)}
+X REP
+X if (midj < N) { INS(N-midj)}
+X }
+X return midc;
+X }
+X
+/* Divide: Find optimum midpoint (midi,midj) of cost midc */
+X
+X midi = M/2; /* Forward phase: */
+X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
+X t = -g;
+X for (j = 1; j <= N; j++)
+X { f_ss[j].H = t = t-h;
+X f_ss[j].E = t-g;
+X }
+X t = tb;
+X for (i = 1; i <= midi; i++)
+X { s = f_ss[0].H;
+X f_ss[0].H = c = t = t-h;
+X e = t-g;
+/* wa = w[A[i]]; */
+X wa = w[iw+i-1];
+X for (j = 1; j <= N; j++)
+X { if ((c = c - m) > (e = e - h)) e = c;
+X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
+X c = s + wa[B[j]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = f_ss[j].H;
+X f_ss[j].H = c;
+X f_ss[j].E = d;
+X }
+X }
+X f_ss[0].E = f_ss[0].H;
+X
+X r_ss[N].H = 0; /* Reverse phase: */
+X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
+X for (j = N-1; j >= 0; j--)
+X { r_ss[j].H = t = t-h;
+X r_ss[j].E = t-g;
+X }
+X t = te;
+X for (i = M-1; i >= midi; i--)
+X { s = r_ss[N].H;
+X r_ss[N].H = c = t = t-h;
+X e = t-g;
+/* wa = w[A[i+1]]; */
+X wa = w[iw+i];
+X for (j = N-1; j >= 0; j--)
+X { if ((c = c - m) > (e = e - h)) e = c;
+X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c;
+X c = s + wa[B[j+1]];
+X if (e > c) c = e;
+X if (d > c) c = d;
+X s = r_ss[j].H;
+X r_ss[j].H = c;
+X r_ss[j].E = d;
+X }
+X }
+X r_ss[N].E = r_ss[N].H;
+X
+X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
+X midj = 0;
+X type = 1;
+X for (j = 0; j <= N; j++)
+X if ((c = f_ss[j].H + r_ss[j].H) >= midc)
+X if (c > midc || f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)
+X { midc = c;
+X midj = j;
+X }
+X for (j = N; j >= 0; j--)
+X if ((c = f_ss[j].E + r_ss[j].E + g) > midc)
+X { midc = c;
+X midj = j;
+X type = 2;
+X }
+X }
+X
+/* Conquer: recursively around midpoint */
+X
+X if (type == 1)
+X { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0, sapp, last);
+X c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
+X }
+X else
+X { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp, last);
+X DEL(2);
+X align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
+X }
+X return midc;
+}
+X
+/* Interface and top level of comparator */
+X
+int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
+X int **W, int IW, int G, int H, int *S, int *NC,
+X struct f_struct *f_str)
+{
+X struct swstr *f_ss, *r_ss;
+X int *sapp, last;
+X int c, ck;
+X
+X sapp = S;
+X last = 0;
+X
+X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
+X exit (1);
+X }
+X f_ss++;
+X f_str->f_ss = f_ss;
+X
+X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
+X == NULL) {
+X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
+X exit (1);
+X }
+X r_ss++;
+X f_str->r_ss = r_ss;
+X
+X /* print_seq_prof(A,M,W,IW); */
+X c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp, &last); /* OK, do it */
+X
+X ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
+X if (c != ck) printf("Check_score error. %d != %d\n",c,ck);
+X
+X f_ss--; r_ss--;
+X free(r_ss); free(f_ss);
+X
+X return c;
+}
+X
+/* Alignment display routine */
+X
+static char ALINE[51], BLINE[51], CLINE[51];
+X
+void DISPLAY(unsigned char *A, unsigned char *B, int M, int N,
+X int *S, int AP, int BP, char *sq)
+{ register char *a, *b, *c;
+X register int i, j, op;
+X int lines, ap, bp;
+X
+X i = j = op = lines = 0;
+X ap = AP;
+X bp = BP;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X while (i < M || j < N)
+X { if (op == 0 && *S == 0)
+X { op = *S++;
+X *a = sq[A[++i]];
+X *b = sq[B[++j]];
+X *c++ = (*a++ == *b++) ? '|' : ' ';
+X }
+X else
+X { if (op == 0)
+X op = *S++;
+X if (op > 0)
+X { *a++ = ' ';
+X *b++ = sq[B[++j]];
+X op--;
+X }
+X else
+X { *a++ = sq[A[++i]];
+X *b++ = ' ';
+X op++;
+X }
+X *c++ = '-';
+X }
+X if (a >= ALINE+50 || i >= M && j >= N)
+X { *a = *b = *c = '\0';
+X printf("\n%5d ",50*lines++);
+X for (b = ALINE+10; b <= a; b += 10)
+X printf(" . :");
+X if (b <= a+5)
+X printf(" .");
+X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
+X ap = AP + i;
+X bp = BP + j;
+X a = ALINE;
+X b = BLINE;
+X c = CLINE;
+X }
+X }
+}
+X
+/* CHECK_SCORE - return the score of the alignment stored in S */
+X
+static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
+X int M, int N, int *S, int **w, int iw,
+X int g, int h, int *NC)
+{
+X register int i, j, op, nc;
+X int score;
+X
+X /* print_seq_prof(A,M,w,iw); */
+X
+X score = i = j = op = nc = 0;
+X while (i < M || j < N) {
+X op = *S++;
+X if (op == 0) {
+X score = w[iw+i][B[++j]] + score;
+X i++;
+X nc++;
+X }
+X else if (op > 0) {
+X score = score - (g+op*h);
+X j += op;
+X nc += op;
+X } else {
+X score = score - (g-op*h);
+X i -= op;
+X nc -= op;
+X }
+X }
+X *NC = nc;
+X return score;
+}
+X
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {}
+X
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+X
+X aln->llfact = aln->llmult = aln->qlfact = 1;
+X aln->qlrev = aln->llrev = 0;
+X aln->frame = 0;
+}
+X
+/* 29-June-2003 this version has been modified to use pst.pam2p
+X instead of pam2 to indicate similarity */
+X
+#include "a_mark.h"
+X
+int calcons(const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc1, char *seqca,
+X struct f_struct *f_str)
+{
+X int i0, i1;
+X int op, lenc, nd, ns, itmp;
+X char *sp0, *sp1, *spa, *sq;
+X int *rp;
+X int mins, smins;
+X
+X if (pst.ext_sq_set) { sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X /* #define LFASTA */
+#ifndef LFASTA
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X }
+#else
+X aln->smin0 = a_res.min0;
+X aln->smin1 = a_res.min1;
+X smins = mins = 0;
+#endif
+X
+/* now get the middle */
+X
+X memset(seqca,M_BLANK,mins);
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1[i1++]];
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
+X (*sp0=='U' && *sp1=='T'))) {
+X aln->nident++; *spa=M_IDENT;
+X }
+X
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X *sp0++ = '-';
+X *sp1++ = sq[aa1[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *spa = '\0';
+/* now we have the middle, get the right end */
+X
+#ifndef LFASTA
+X /* how much extra to show at end ? */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,n1-a_res.max1))
+X nd = max(n0-a_res.max0,n1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(n1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
+X }
+X
+#else /* LFASTA */
+X nd = 0;
+#endif
+X /* #undef LFASTA */
+X return mins+lenc+nd;
+}
+X
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+X const unsigned char *aa1, int n1,
+X int *nc,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X char *ann_arr, struct f_struct *f_str)
+{
+X int i0, i1;
+X int op, lenc, nd, ns, itmp;
+X char *sp0, *sp0a, *sp1, *spa, *sq;
+X int *rp;
+X int mins, smins;
+X
+X if (pst.ext_sq_set) {sq = pst.sqx;}
+X else {sq = pst.sq;}
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X /* first fill in the ends */
+X
+X /* #define LFASTA */
+#ifndef LFASTA
+X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
+X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
+X smins=0;
+X if (aln->showall==1) mins=a_res.min0;
+X else mins = min(a_res.min0,aln->llcntx);
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X if ((mins-a_res.min1)>0) {
+X memset(seqc1,' ',mins-a_res.min1);
+X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
+X aln->smin1 = 0;
+X }
+X else {
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X }
+X }
+X else {
+X smins=0;
+X if (aln->showall == 1) mins=a_res.min1;
+X else mins = min(a_res.min1,aln->llcntx);
+X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
+X aln->smin1 = a_res.min1-mins;
+X if ((mins-a_res.min0)>0) {
+X memset(seqc0,' ',mins-a_res.min0);
+X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
+X aln->smin0 = 0;
+X }
+X else {
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aln->smin0 = a_res.min0-mins;
+X }
+X }
+X else {
+X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
+X smins=mins;
+X aln->smin0=a_res.min0;
+X aln->smin1=a_res.min1;
+X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
+X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
+X }
+#else
+X aln->smin0 = a_res.min0;
+X aln->smin1 = a_res.min1;
+X smins = mins = 0;
+#endif
+X
+/* now get the middle */
+X
+X memset(seqca,M_BLANK,mins);
+X memset(seqc0a,' ',mins);
+X
+X spa = seqca+mins;
+X sp0 = seqc0+mins;
+X sp0a = seqc0a+mins;
+X sp1 = seqc1+mins;
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
+X else if (itmp == 0) { *spa = M_ZERO;}
+X else {*spa = M_POS;}
+X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
+X
+X *sp0a++ = ann_arr[aa0a[i0]];
+X *sp0 = sq[aa0[i0++]];
+X *sp1 = sq[aa1[i1++]];
+X
+X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+X else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
+X (*sp0=='U' && *sp1=='T'))) {
+X aln->nident++; *spa=M_IDENT;
+X }
+X
+X sp0++; sp1++; spa++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X *sp0++ = '-';
+X *sp0a++ = ' ';
+X *sp1++ = sq[aa1[i1++]];
+X *spa++ = M_DEL;
+X op--;
+X lenc++;
+X aln->ngap_q++;
+X }
+X else {
+X *sp0a++ = ann_arr[aa0a[i0]];
+X *sp0++ = sq[aa0[i0++]];
+X *sp1++ = '-';
+X *spa++ = M_DEL;
+X op++;
+X lenc++;
+X aln->ngap_l++;
+X }
+X }
+X }
+X
+X *nc = lenc;
+X *spa = '\0';
+/* now we have the middle, get the right end */
+X
+#ifndef LFASTA
+X /* how much extra to show at end ? */
+X if (!aln->llcntx_flg) {
+X ns = mins + lenc + aln->llen; /* show an extra line? */
+X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
+X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
+X nd = ns - (mins+lenc); /* this much extra */
+X }
+X else nd = aln->llcntx;
+X
+X if (nd > max(n0-a_res.max0,n1-a_res.max1))
+X nd = max(n0-a_res.max0,n1-a_res.max1);
+X
+X if (aln->showall==1) {
+X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
+X /* get right end */
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X /* fill with blanks - this is required to use one 'nc' */
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else {
+X if ((nd-(n0-a_res.max0))>0) {
+X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
+X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
+X }
+X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
+X
+X if ((nd-(n1-a_res.max1))>0) {
+X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
+X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
+X }
+X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
+X }
+X
+#else /* LFASTA */
+X nd = 0;
+#endif
+X /* #undef LFASTA */
+X return mins+lenc+nd;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt);
+X
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X char *al_str, int al_str_n, struct f_struct *f_str)
+{
+X int i0, i1, nn1;
+X int op, lenc, nd, ns, itmp;
+X int p_op, op_cnt;
+X const unsigned char *aa1p;
+X char tmp_cnt[20];
+X char sp0, sp1, *sq;
+X int *rp;
+X int mins, smins;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X aln->amin0 = a_res.min0;
+X aln->amax0 = a_res.max0;
+X aln->amin1 = a_res.min1;
+X aln->amax1 = a_res.max1;
+X
+X rp = a_res.res;
+X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
+X op_cnt = 0;
+X
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X tmp_cnt[0]='\0';
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X
+X if (p_op == 0 || p_op==3) {
+X if (sp0 != '*' && sp1 != '*') {
+X if (p_op == 3) {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X else {op_cnt++;}
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 3;
+X }
+X }
+X else {
+X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 0;
+X }
+X
+X op = *rp++;
+X lenc++;
+X
+X if (toupper(sp0) == toupper(sp1)) aln->nident++;
+X else if (pst.dnaseq==1) {
+X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
+X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
+X else if (toupper(sp0) == 'N') aln->ngap_q++;
+X else if (toupper(sp1) == 'N') aln->ngap_l++;
+X }
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {
+X if (p_op == 1) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 1;
+X }
+X op--; lenc++; i1++; aln->ngap_q++;
+X }
+X else {
+X if (p_op == 2) { op_cnt++;}
+X else {
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X op_cnt = 1; p_op = 2;
+X }
+X op++; lenc++; i0++; aln->ngap_l++;
+X }
+X }
+X }
+X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
+X
+X return lenc;
+}
+X
+static void
+update_code(char *al_str, int al_str_max, int op, int op_cnt) {
+X
+X char op_char[5]={"=-+*"};
+X char tmp_cnt[20];
+X
+X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+X strncat(al_str,tmp_cnt,al_str_max);
+}
+X
+int calc_id(const unsigned char *aa0, const int n0,
+X const unsigned char *aa1, const int n1,
+X struct a_struct *aln,
+X struct a_res_str a_res,
+X struct pstruct pst,
+X struct f_struct *f_str)
+{
+X int i0, i1, nn1, n_id;
+X int op, lenc, nd, ns, itmp;
+X int sp0, sp1;
+X const unsigned char *aa1p;
+X int *rp;
+X char *sq;
+X
+X if (pst.ext_sq_set) {
+X sq = pst.sqx;
+X }
+X else {
+X sq = pst.sq;
+X }
+X
+#ifndef TFASTA
+X aa1p = aa1;
+X nn1 = n1;
+#else
+X aa1p = f_str->aa1x;
+X nn1 = f_str->n10;
+#endif
+X
+X rp = a_res.res;
+X lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
+X i0 = a_res.min0;
+X i1 = a_res.min1;
+X
+X while (i0 < a_res.max0 || i1 < a_res.max1) {
+X if (op == 0 && *rp == 0) {
+X op = *rp++;
+X lenc++;
+X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
+X
+X sp0 = sq[aa0[i0++]];
+X sp1 = sq[aa1p[i1++]];
+X if (toupper(sp0) == toupper(sp1)) n_id++;
+X else if (pst.dnaseq==1 &&
+X ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
+X }
+X else {
+X if (op==0) op = *rp++;
+X if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
+X else {op++; lenc++; i0++; aln->ngap_l++; }
+X }
+X }
+X aln->nident = n_id;
+X return lenc;
+}
+X
+#ifdef PCOMPLIB
+#include "p_mw.h"
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+X ppst->n0 = qm_msg->n0;
+}
+#endif
+SHAR_EOF
+chmod 0644 dropnsw.c ||
+echo 'restore of dropnsw.c failed'
+Wc_c="`wc -c < 'dropnsw.c'`"
+test 34172 -eq "$Wc_c" ||
+ echo 'dropnsw.c: original size 34172, current size' "$Wc_c"
+fi
+# ============= egmsmg.aa ==============
+if test -f 'egmsmg.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping egmsmg.aa (File already exists)'
+else
+echo 'x - extracting egmsmg.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'egmsmg.aa' &&
+>EGMSMG Epidermal growth factor precursor - Mouse
+MPWGRRPTWLLLAFLLVFLKISILSVTAWQTGNCQPGPLERSERSGTCAGPAPFLVFSQGKSISRIDPDG
+TNHQQLVVDAGISADMDIHYKKERLYWVDVERQVLLRVFLNGTGLEKVCNVERKVSGLAIDWIDDEVLWV
+DQQNGVITVTDMTGKNSRVLLSSLKHPSNIAVDPIERLMFWSSEVTGSLHRAHLKGVDVKTLLETGGISV
+LTLDVLDKRLFWVQDSGEGSHAYIHSCDYEGGSVRLIRHQARHSLSSMAFFGDRIFYSVLKSKAIWIANK
+HTGKDTVRINLHPSFVTPGKLMVVHPRAQPRTEDAAKDPDPELLKQRGRPCRFGLCERDPKSHSSACAEG
+YTLSRDRKYCEDVNECATQNHGCTLGCENTPGSYHCTCPTGFVLLPDGKQCHELVS
+CPGNVSKCSHGCVLTSDGPRCICPAGSVLGRDGKTCTGCSSPDNGGCSQICLPLRPGSWECDCFPGYDLQ
+SDRKSCAASGPQPLLLFANSQDIRHMHFDGTDYKVLLSRQMGMVFALDYDPVESKIYFAQTALKWIERAN
+MDGSQRERLITEGVDTLEGLALDWIGRRIYWTDSGKSVVGGSDLSGKHHRIIIQERISRPRGIAVHPRAR
+RLFWTDVGMSPRIESASLQGSDRVLIASSNLLEPSGITIDYLTDTLYWCDTKRSVIEMANLDGSKRRRLI
+QNDVGHPFSLAVFEDHLWVSDWAIPSVIRVNKRTGQNRVRLQGSMLKPSSLVVVHPLAKPGADPCLYRNG
+GCEHICQESLGTARCLCREGFVKAWDGKMCLPQDYPILSGENADLSKEVTSLSNST
+QAEVPDDDGTESSTLVAEIMVSGMNYEDDCGPGGCGSHARCVSDGETAECQCLKGFARDGNLCSDIDECV
+LARSDCPSTSSRCINTEGGYVCRCSEGYEGDGISCFDIDECQRGAHNCAENAACTNTEGGYNCTCAGRPS
+SPGRSCPDSTAPSLLGEDGHHLDRNSYPGCPSSYDGYCLNGGVCMHIESLDSYTCNCVIGYSGDRCQTRD
+LRWWELRHAGYGQKHDIMVVAVCMVALVLLLLLGMWGTYYYRTRKQLSNPPKNPCDEPSGSVSSSGPDSS
+SGAAVASCPQPWFVVLEKHQDPKNGSLPADGTNGAVVDAGLSPSLQLGSVHLTSWRQKPHIDGMGTGQSC
+WIPPSSDRGPQEIEGNSHLPSYRPVGPEKLHSLQSANGSCHERAPDLPRQTEPVK
+SHAR_EOF
+chmod 0644 egmsmg.aa ||
+echo 'restore of egmsmg.aa failed'
+Wc_c="`wc -c < 'egmsmg.aa'`"
+test 1286 -eq "$Wc_c" ||
+ echo 'egmsmg.aa: original size 1286, current size' "$Wc_c"
+fi
+# ============= faatran.c ==============
+if test -f 'faatran.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping faatran.c (File already exists)'
+else
+echo 'x - extracting faatran.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'faatran.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: faatran.c,v 1.6 2007/04/02 18:08:11 wrp Exp $ */
+X
+/* aatran.c translates from nt to aa, 1 char codes */
+/* modified July 2, 1987 for all 6 frames */
+/* 23 Jan 1991 fixed bug for short sequences */
+X
+/* this mapping is not alphabet independent */
+X
+#define XTERNAL
+#include <stdio.h>
+#include <stdlib.h>
+X
+#include "upam.h"
+#include "uascii.h"
+X
+/*
+1. The Standard Code (transl_table=1)
+X
+By default all transl_table in GenBank flatfiles are equal to id 1, and this
+is not shown. When transl_table is not equal to id 1, it is shown as a
+qualifier on the CDS feature.
+X
+*/
+static
+char *AA1="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = ---M---------------M---------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+2. The Vertebrate Mitochondrial Code (transl_table=2)
+*/
+static
+char *AA2 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
+/*
+X Starts = --------------------------------MMMM---------------M------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+3. The Yeast Mitochondrial Code (transl_table=3)
+*/
+static
+char *AA3 ="FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
+Mycoplasma/Spiroplasma Code (transl_table=4)
+*/
+static
+char *AA4 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = --MM---------------M------------MMMM---------------M------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+5. The Invertebrate Mitochondrial Code (transl_table=5)
+*/
+static
+char *AA5 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
+/*
+X Starts = ---M----------------------------MMMM---------------M------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
+*/
+static
+char *AA6 ="FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+9. The Echinoderm Mitochondrial Code (transl_table=9)
+*/
+static
+char *AA7 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+10. The Euplotid Nuclear Code (transl_table=10)
+*/
+static
+char *AA10="FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+11. The Bacterial "Code" (transl_table=11)
+*/
+static
+char *AA11="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = ---M---------------M------------MMMM---------------M------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+12. The Alternative Yeast Nuclear Code (transl_table=12)
+*/
+static
+char *AA12 ="FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = -------------------M---------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+13. The Ascidian Mitochondrial Code (transl_table=13)
+*/
+static
+char *AA13="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+14. The Flatworm Mitochondrial Code (transl_table=14)
+*/
+static
+char *AA14 ="FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+X
+15. Blepharisma Nuclear Code (transl_table=15)
+*/
+static
+char *AA15="FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X Starts = -----------------------------------M----------------------------
+X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+X
+static
+char *AA16 ="FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X id 16 ,
+X name "Chlorophycean Mitochondrial" ,
+X sncbieaa "-----------------------------------M----------------------------"
+X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+X
+static
+char *AA21 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+/*
+X name "Trematode Mitochondrial" ,
+X id 21 ,
+X sncbieaa "-----------------------------------M---------------M------------"
+X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+X
+static
+char *AA22 ="FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X name "Scenedesmus obliquus Mitochondrial" ,
+X id 22 ,
+X sncbieaa "-----------------------------------M----------------------------"
+X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+X
+static
+char *AA23 ="FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+/*
+X name "Thraustochytrium Mitochondrial" ,
+X id 23 ,
+X sncbieaa "--------------------------------M--M---------------M------------"
+X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+*/
+X
+X
+static char aacmap[64]={
+X 'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
+X 'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
+X 'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
+X '*','Y','*','Y','S','S','S','S','*','C','W','C','L','F','L','F'
+};
+X
+static int aamap[64]; /* integer aa values */
+static int aamapr[64]; /* reverse sequence map */
+X
+/* tnt is used only by aatran.c. It must be consistent with lascii and
+the nt alphabet. It uses 3,3 because T and U are considered separately
+*/
+static int tnt[]={0,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0,
+X 0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
+X
+static int debug_set;
+X
+int
+aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame)
+{
+X int iaa, im, nna, i;
+X register int *nnp;
+X const unsigned char *nts0;
+X register int *aamp;
+X register unsigned char *aap;
+X
+X iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
+X if (nna <= 3 ) {
+X aaseq[0]=EOSEQ;
+X return 0;
+X }
+X
+X nnp = tnt;
+X
+X if (frame < 3) {
+X aamp = aamap;
+X nts0 = &ntseq[frame];
+X aap = aaseq;
+X while (nna--) {
+X im = nnp[*nts0++]<<4;
+X im += nnp[*nts0++]<<2;
+X im += nnp[*nts0++];
+X *aap++ = aamp[im];
+X
+X /* this check is included because of a bug in tfasty
+X which occurs only during the alignment process */
+X
+#ifdef DEBUG
+X if (debug_set && aamp[im] > MAXUC) {
+X fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
+X *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
+X
+X /* this allows recovery, but should not be done frequently */
+X for (i=0; i<64; i++) {
+X aamap[i]=aascii[aacmap[i]];
+X aamapr[i]=aascii[aacmap[(~i)&63]];
+X }
+X *(aap-1) = aamp[im];
+X }
+#endif
+X }
+X }
+X else {
+X aamp = aamapr;
+X nts0 = &ntseq[maxs-(frame-3)];
+X aap = aaseq;
+X while (nna--) {
+X im = nnp[*--nts0]<<4;
+X im += nnp[*--nts0]<<2;
+X im += nnp[*--nts0];
+X *aap++ = aamp[im];
+X /* this check is included because of a bug in tfasty
+X which occurs only during the alignment process */
+X
+#ifdef DEBUG
+X if (debug_set && aamp[im] > MAXUC) {
+X fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
+X *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
+X
+X /* this allows recovery, but should not be done frequently */
+X for (i=0; i<64; i++) {
+X aamap[i]=aascii[aacmap[i]];
+X aamapr[i]=aascii[aacmap[(~i)&63]];
+X }
+X *(aap-1) = aamp[im];
+X }
+#endif
+X }
+X }
+X aaseq[iaa]=EOSEQ;
+X return iaa;
+}
+X
+/* slower version that masks out NNN,XXX */
+X
+/* - A C G T U R Y M W S K D H V B N X */
+static int snt[]={0,0,1,2,3,3,0,1,0,0,4,4,4,4,4,4,4,4};
+X
+int
+saatran(const unsigned char *ntseq,
+X unsigned char *aaseq, int maxs, int frame)
+{
+X int iaa, im, it, nna, xflag;
+X register int *nnp;
+X const unsigned char *nts0;
+X register int *aamp;
+X register unsigned char *aap;
+X
+X iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
+X if (nna <= 3 ) {
+X aaseq[0]=EOSEQ;
+X return 0;
+X }
+X
+X nnp = snt;
+X if (frame < 3) {
+X aamp = aamap;
+X nts0 = &ntseq[frame];
+X aap = aaseq;
+X while (nna--) {
+X xflag = 0;
+X if ((it=nnp[*nts0++])<4) {im = it<<4;}
+X else {xflag = 1; im=0;}
+X if ((it=nnp[*nts0++])<4) {im += it<<2;}
+X else xflag = 1;
+X if ((it=nnp[*nts0++])<4) {im += it;}
+X else xflag = 1;
+X if (xflag) *aap++ = aascii['X'];
+X else *aap++ = aamp[im];
+X }
+X }
+X else {
+X aamp = aamapr;
+X nts0 = &ntseq[maxs-(frame-3)];
+X aap = aaseq;
+X while (nna--) {
+X xflag = 0;
+X if ((it=nnp[*--nts0]) < 4) im = it<<4;
+X else {xflag = 1; im=0;}
+X if ((it=nnp[*--nts0]) < 4) im += it<<2;
+X else xflag = 1;
+X if ((it=nnp[*--nts0]) < 4) im += it;
+X else xflag = 1;
+X if (xflag) *aap++ = aascii['X'];
+X else *aap++ = aamp[im];
+X }
+X }
+X aaseq[iaa]=EOSEQ;
+X return iaa;
+}
+X
+void
+aainit(int tr_type, int debug)
+{
+X int i,j;
+X char *aasmap;
+X int imap[4]={3,1,0,2}, i0, i1, i2, ii;
+X
+X debug_set = debug;
+X
+X aasmap = AA1;
+X if (tr_type > 0) {
+X /* need to put in a new translation table */
+X switch (tr_type) {
+X case 1: aasmap = AA1; break;
+X case 2: aasmap = AA2; break;
+X case 3: aasmap = AA3; break;
+X case 4: aasmap = AA4; break;
+X case 5: aasmap = AA5; break;
+X case 6: aasmap = AA6; break;
+X case 7: aasmap = AA7; break;
+X case 10: aasmap = AA10; break;
+X case 11: aasmap = AA11; break;
+X case 12: aasmap = AA12; break;
+X case 13: aasmap = AA13; break;
+X case 14: aasmap = AA14; break;
+X case 15: aasmap = AA15; break;
+X case 16: aasmap = AA16; break;
+X case 21: aasmap = AA21; break;
+X case 22: aasmap = AA22; break;
+X case 23: aasmap = AA23; break;
+X
+X default: aasmap = AA1; break;
+X }
+X
+X if (debug) fprintf(stderr," codon table: %d\n new old\n",tr_type);
+X for (i0 = 0; i0 < 4; i0++)
+X for (i1 = 0; i1 < 4; i1++)
+X for (i2 = 0; i2 < 4; i2++) {
+X ii = (imap[i0]<<4) + (imap[i1]<<2) + imap[i2];
+X if (debug && aacmap[ii] != *aasmap)
+X fprintf(stderr," %c%c%c: %c - %c\n",
+X nt[imap[i0]+1],nt[imap[i1]+1],nt[imap[i2]+1],
+X *aasmap,aacmap[ii]);
+X aacmap[ii]= *aasmap++;
+X }
+X
+X /*
+X for (i=0; i<64; i++) {
+X fprintf(stderr,"'%c',",aacmap[i]);
+X if ((i%16)==15) fputc('\n',stderr);
+X }
+X fputc('\n',stderr);
+X */
+X }
+X for (i=0; i<64; i++) {
+X aamap[i]=aascii[aacmap[i]];
+X aamapr[i]=aascii[aacmap[(~i)&63]];
+X }
+}
+X
+void
+aagetmap(char *to, int n)
+{
+X int i;
+X for (i=0; i<n; i++) to[i] = aacmap[i];
+}
+SHAR_EOF
+chmod 0644 faatran.c ||
+echo 'restore of faatran.c failed'
+Wc_c="`wc -c < 'faatran.c'`"
+test 13742 -eq "$Wc_c" ||
+ echo 'faatran.c: original size 13742, current size' "$Wc_c"
+fi
+# ============= fast_new ==============
+if test -f 'fast_new' -a X"$1" != X"-c"; then
+ echo 'x - skipping fast_new (File already exists)'
+else
+echo 'x - extracting fast_new (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fast_new' &&
+NBRF PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/blast/pir1.lseg
+NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0+nr+/slib2/blast/nr.lseg
+NCBI/Blast Swissprot$0+sp+/slib2/blast/swissprot.lseg
+GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/blast/genpept.fsa
+Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
+Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
+C. elegans blast server$0W/slib2/blast/C.elegans_blast.fa
+E. coli proteome$0E/slib0/genomes/ecoli.npep
+H. influenzae proteome$0I/slib0/genomes/hinf.npep
+H. pylori proteome$0L/slib0/genomes/hpyl.npep
+NCBI Entrez Human proteins$0H/slib2/blast/human.aa
+M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
+M. jannaschii proteome$0J/slib0/genomes/mjan.npep
+Synechosystis proteome$0C/slib0/genomes/synecho.npep
+GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
+GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
+GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
+GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
+GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
+GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
+GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
+GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
+GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
+GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
+GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
+GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
+GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
+GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
+GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
+Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
+E. coli genome$1D/slib0/genomes/ecoli.gbk 1
+Blast Human ESTs$1F/slib2/blast/est_human
+TIGR Human Gene Index$1K/slib2/blast/HGI.nr.031898
+Blast Mouse ESTs$1C/slib2/blast/est_mouse
+TIGR Mouse Gene Index$1J/slib2/blast/MGI.nr.022498
+NCBI/BLAST NR DNA$1n/slib2/blast/nt
+SHAR_EOF
+chmod 0644 fast_new ||
+echo 'restore of fast_new failed'
+Wc_c="`wc -c < 'fast_new'`"
+test 1959 -eq "$Wc_c" ||
+ echo 'fast_new: original size 1959, current size' "$Wc_c"
+fi
+# ============= fasta.defaults ==============
+if test -f 'fasta.defaults' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta.defaults (File already exists)'
+else
+echo 'x - extracting fasta.defaults (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta.defaults' &&
+#pgm mol matrix g_open g_ext fr_shft e_cut ktup
+# -n/-p -s -e -f -h/-j -E argv[3]
+fasta prot bl50 -10 -2 - 10.0 2
+fasta dna +5/-4 -14 -4 - 2.0 6
+ssearch prot bl50 -10 -2 - 10.0 -
+ssearch dna +5/-4 -14 -4 - 2.0 -
+fastx prot BL50 -12 -2 -20 5.0 2
+fasty prot BL50 -12 -2 -20/-24 5.0 2
+tfastx dna BL50 -14 -2 -20 5.0 2
+tfasty dna BL50 -14 -2 -20/-24 5.0 2
+fasts prot MD20-MS - - - 5.0 -
+tfasts prot MD10-MS - - - 2.0 -
+fastf prot MD20 - - - 5.0 -
+tfastf prot MD10 - - - 2.0 -
+fastm prot MD20 - - - 5.0 -
+tfastm prot MD10 - - - 2.0 -
+SHAR_EOF
+chmod 0644 fasta.defaults ||
+echo 'restore of fasta.defaults failed'
+Wc_c="`wc -c < 'fasta.defaults'`"
+test 529 -eq "$Wc_c" ||
+ echo 'fasta.defaults: original size 529, current size' "$Wc_c"
+fi
+# ============= fasta.options ==============
+if test -f 'fasta.options' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta.options (File already exists)'
+else
+echo 'x - extracting fasta.options (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta.options' &&
+doinit.c
+X case 'a': m_msg->aln.showall = 1;
+X case 'B': m_msg->z_bits = 0;
+X case 'b': m_msg->mshow
+X case 'C': m_msg->nmlen
+X case 'd': m_msg->ashow);
+X case 'D': ppst->debug_lib = 1;
+X case 'E': m_msg->e_cut
+X case 'F': m_msg->e_low
+X case 'f': ppst->gdelval
+X case 'g': ppst->ggapval
+X case 'H': m_msg->nohist = 1; break;
+X case 'i': m_msg->revcomp = 1; break;
+X case 'I': m_msg->self = 1; break;
+X case 'J': m_msg->ql_start, ql_stop
+X case 'K': max_buf_cnt (PCOMPLIB)
+X case 'l': m_msg->flstr
+X case 'L': m_msg->long_info = 1
+X case 'M': m_msg->n1_low,&m_msg->n1_high
+X case 'm': m_msg->markx
+X case 'n': m_msg->qdnaseq = 1
+X case 'N': m_msg->maxn
+X case 'p': m_msg->qdnaseq = 0;
+X case 'O': m_msg->outfile
+X case 'q':
+X case 'Q': m_msg->quiet = 1;
+X case 'r': ppst->p_d_mat,&ppst->p_d_mis
+X case 'R': m_msg->dfile
+X case 's': standard_pam(smstr); ppst->pamoff=atoi(bp+1);
+X case 'S': ppst->ext_sq_set = 1;
+X case 't': ppst->tr_type
+X case 'T': PCOMPLIB: worker_1,worker_n
+X _t: max_workers
+X case 'v': ppst->zs_win
+X case 'w': m_msg->aln.llen
+X case 'W': m_msg->aln.llcntx);
+X case 'X': m_msg->sq0off,&m_msg->sq1off
+X case 'x': ppst->pam_x
+X case 'z': ppst->zsflag
+X case 'Z': ppst->zdb_size
+X
+initfa.c
+X case '1': ppst->param_u.fa.iniflag=1;
+X case '3': m_msg->nframe = 3; /* TFASTA */
+X m_msg->nframe = 1; /* for TFASTXY */
+X m_msg->qframe = 1; /* for FASTA, FASTX */
+X case 'A': ppst->sw_flag= 1;
+X case 'c': ppst->param_u.fa.optcut
+X case 'h': ppst->gshift
+X case 'j': ppst->gsubs
+X case 'o': ppst->param_u.fa.optflag = 0;
+X case 'y': ppst->param_u.fa.optwid
+X
+initsw.c
+X case '3': m_msg->qframe = m_msg->nframe = 1;
+SHAR_EOF
+chmod 0644 fasta.options ||
+echo 'restore of fasta.options failed'
+Wc_c="`wc -c < 'fasta.options'`"
+test 1670 -eq "$Wc_c" ||
+ echo 'fasta.options: original size 1670, current size' "$Wc_c"
+fi
+# ============= fasta20.doc ==============
+if test -f 'fasta20.doc' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta20.doc (File already exists)'
+else
+echo 'x - extracting fasta20.doc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta20.doc' &&
+X
+X COPYRIGHT NOTICE
+X
+Copyright 1988, 1991, 1992, 1994, 1995, 1996 by William R.
+Pearson and the University of Virginia. All rights reserved. The
+FASTA program and documentation may not be sold or incorporated
+into a commercial product, in whole or in part, without written
+consent of William R. Pearson and the University of Virginia.
+For further information regarding permission for use or
+reproduction, please contact: David Hudson, Assistant Provost for
+Research, University of Virginia, P.O. Box 9025, Charlottesville,
+VA 22906-9025, (434) 924-6853
+X
+X
+The FASTA program package
+X
+Introduction
+X
+X This documentation describes the version 2.0x of the FASTA
+program package (see W. R. Pearson and D. J. Lipman (1988),
+"Improved Tools for Biological Sequence Analysis", PNAS 85:2444-
+2448, and W. R. Pearson (1990) "Rapid and Sensitive Sequence
+Comparison with FASTP and FASTA" Methods in Enzymology 183:63-
+98). Version 2.0 modifies version 1.8 to include explicit
+statistical estimates for similarity scores based on the extreme
+value distribution. In addition, FASTA protein alignments now
+use the Smith-Waterman algorithm with no limitation on gap size.
+FASTA and SSEARCH now use the BLOSUM50 matrix by default, with
+options to change gap penalties on the command line. Version 1.7
+replaces rdf2 and rss with prdf and prss, which use the extreme-
+value distribution to calculate accurate probability estimates.
+X
+X
+Although there are a large number of programs in this package,
+they belong to four groups:
+X
+X
+X Library search programs: FASTA, FASTX, TFASTA, TFASTX, SSEARCH
+X
+X Local homology programs: LFASTA, PLFASTA, LALIGN, PLALIGN, FLALIGN
+X
+X Statistical significance: PRDF, RELATE, PRSS, RANDSEQ
+X
+X Global alignment: ALIGN
+X
+X
+X
+In addition, I have included several programs for protein
+sequence analysis, including a Kyte-Doolittle hydropathicity
+plotting program (GREASE, TGREASE), and a secondary structure
+prediction package (GARNIER).
+X
+X The FASTA sequence comparison programs on this disk are
+improved versions of the FASTP program, originally described in
+Science (Lipman and Pearson, (1985) Science 227:1435-1441). We
+have made several improvements. First, the library search
+programs use a more sensitive method for the initial comparison
+of two sequences which allows the scores of several similar
+regions to be combined. As a result, the results of a library
+search are now given with three scores, initn (the new initial
+score which may include several similar regions), init1 (the old
+fastp initial score from the best initial region), and opt (the
+old fastp optimized score allowing gaps in a 32 residue wide
+band).
+X
+X These programs have also been modified to become "universal"
+(hence FAST-A, for FASTA-All, as opposed to FAST-P (protein) or
+FAST-N (nucleotides)); by changing the environment variable
+SMATRIX, the programs can be used to search protein sequences,
+DNA sequences, or whatever you like. By default, FASTA, LFASTA,
+and the PRDF programs automatically recognize protein and DNA
+sequences. Sequences are first read as amino acids, and then
+converted to nucleotides if the sequence is greater than 85%
+A,C,G,T (the '-n' option can be used to indicate DNA sequences).
+TFASTA compares protein sequences to a translated DNA sequence.
+Alternative scoring matrices can also be used. In addition to
+the BLOSUM50 matrix for proteins, the PAM250 matrix or matrices
+based on simple identities or the genetic code can also be used
+for sequence comparisons or evaluation of significance. Several
+different protein sequence matrices have been included;
+instructions for constructing your own scoring matrix are
+included in the file FORMAT.DOC.
+X
+X
+The remainder of this document is divided into three sections:
+(1) a brief history of the changes to the FASTA package; (2) A
+guide to installing the programs and databases; (3) A guide to
+using the FASTA programs. The programs are very easy to use, so
+if you are using them on a machine that is administered by
+someone else, you may want to skip to section (3) to learn how to
+use the programs, and then read section (1) to look at some of
+the more recent changes. If you are installing the programs on
+your own machine, you will need to read section (2) carefully.
+X
+X
+1. Revision History
+X
+1.1. Changes with version 2.0u
+X
+X Version 2.0u provides several major improvements over
+previous versions of FASTA (and SSEARCH). The most important is
+the incorporation of explicit statistical estimates and
+appropriate normalization of similarity scores. This improvement
+is discussed in more detail below in the section entitled
+Statistical Significance. In addition, all of the protein
+comparison programs now use the BLOSUM50 matrix, with gap
+penalties of -12, -2, by default. BLOSUM50 performs
+significantly better than the older PAM250 matrix. PAM250 can
+still be used with the command line option: -s 250. (DNA
+sequence comparisons use a more stringent gap penalty of -16, -4,
+which produces excellent statistical estimates when optimized
+scores are used. TFASTA uses -16, -4 as well.)
+X
+X The quality of the fit of the extreme value distribution to
+the actual distribution of similarity scores is summarized with
+the Kolmogorov-Smirnov statistic. The acceptance limits for this
+statistic can be found in many statistics books. In general,
+values <0.10 (N=30) indicate excellent agreement between the
+actual and theoretical distributions. If this statistic is >
+0.2, consider using a higher (more stringent) gap penalty, e.g.
+-16, -4 rather than -12, -2. The default scoring matrix for DNA
+has been changed to score +5 for an identity and -4 for a
+mismatch. These are the same scores used by BLASTN.
+X
+X With explicit expectation calculations, the program now
+shows all scores and alignments with expectations less than 10.0
+(with optimized scores, 2.0 without optimization) when the "-Q"
+(quiet) mode is used. The expectation threshold can be changed
+with the "-E" option.
+X
+X Finally, the algorithm used to produce the final alignments
+of protein sequences is now a full Smith-Waterman, with unlimited
+gaps. (The older band-limited alignments are used for DNA
+sequences and TFASTA by default, because Smith-Waterman
+alignments are very slow for long sequences.) Both the optimized
+and Smith-Waterman scores are reported; if the Smith-Waterman
+score is higher, then additional gaps allowed a better alignment
+and similarity score to be calculated.
+X
+X FASTA searches now optimize similarity scores by default
+(this slows searches about 2-fold (worst case) for ktup=2). Thus,
+the meaning of the "-o" option has been reversed; "-o" now turns
+off optimization and reports results sorted by "initn" scores.
+Optimization significantly improves the sensitivity of FASTA, so
+that it almost matches Smith-Waterman. With version 2.0, the
+default band width used for optimized calculations can be varied
+with the "-y" option. For proteins with ktup=2, a width of 16
+(-y 16) is used; 16 is also used for DNA sequences. For proteins
+and ktup=1, a width of 32 is used. Searches that disable
+optimization with the "-o" option will work fine for sequences
+that share 25% or more identity in general, but to detect
+evolutionary relationships with 20% - 25% identity, the more
+sensitive default optimization is often required. Optimization
+is required for accurate statistical estimates with either
+protein or DNA sequences.
+X
+X The FASTA package now includes FASTX, a program that
+compares a DNA sequence to a protein sequence database by
+translating the DNA sequence in three frames (the reverse frames
+are selected with the -i option) and aligning the three-frame
+translation with the sequences in the protein database.
+Alignment scores allow frameshifts so that a cDNA or EST sequence
+with insertion/deletion errors can be aligned with its homologues
+from beginning to end.
+X
+X With release 20u6, there is also a TFASTX program, which is
+a replacement for TFASTA. TFASTA treats each of the six reading
+frames of a DNA library sequence as a different sequence; TFASTX
+compares a protein sequence against only two sequences from each
+DNA sequence - the forward and reverse orientation. For a given
+orientation, TFASTX calculates a similarity score for alignments
+that allow frameshifts, thus considering all possible reading
+frames.
+X
+X Another new program is included - randseq - which will
+produce a randomly shuffled (uniform or local shuffle) from an
+input sequence. This randomly shuffled sequence can be used to
+evaluate the statistical estimates produced by FASTA, SSEARCH, or
+BLAST.
+X
+1.2. Changes with version 1.7
+Version 1.7 has been released to provide the PRDF and PRSS
+programs for shuffling sequences and estimating accurately the
+probabilities of the unshuffled-sequence scores.
+X
+PRDF a version of RDF2 that uses calculates the probability
+X of a similarity score more accurately by using a fit to
+X an extreme value distribution. Code to fit the extreme
+X value distribution parameters and the impetus to update
+X RDF2 was provided by Phil Green, U. of Washington.
+X
+PRSS a version of PRDF that uses a rigorous Smith-Waterman
+X calculation to score similarities
+X
+1.3. Changes with version 1.6
+X
+X FASTA version 1.6 uses a new method for calculating optimal
+scores in a band (the optimization or last step in the FASTA
+algorithm). In addition, it uses a linear-space method for
+calculating the actual alignments. FASTA v1.6 package includes
+several new programs:
+X
+SSEARCH a program to search a sequence database using the
+X rigorous Smith-Waterman algorithm (this program is
+X about 100-fold slower than FASTA with ktup=2 (for
+X proteins).
+X
+LALIGN A rigorous local sequence alignment program that will
+X display the N-best local alignments (N=10 by default).
+X
+PLALIGN a version of lalign that plots the local alignments to
+X a tektronix display.
+X
+FLALIGN a version of lalign that plots the local alignments to
+X a GCG Figure file.
+X
+X The LALIGN/PLALIGN/FLALIGN programs incorporate the "sim"
+algorithm described by Huang and Miller (1991) Adv. Appl. Math.
+12:337-357. The SSEARCH and PRSS programs incorporate algorithms
+described by Huang, Hardison, and Miller (1990) CABIOS 6:373-381.
+X
+X LFASTA and PLFASTA now calculate a different number of local
+similarities; they now behave more like LALIGN/PLALIGN. Since
+local alignments of identical sequences produce "mirror-image"
+alignments, lalign and lfasta consider only one-half of the
+potential alignments between sequences from identical file names.
+Thus
+X
+X lfasta mchu.aa mchu.aa
+X
+Displays only two alignments, with earlier versions of the
+program, it would have displayed five, including the identity
+alignment. PLFASTA does display five alignments; when two
+identical filenames are given, it draws the identity alignment,
+calculates the two unique local alignments, draws them, and draws
+their mirror images. LFASTA/PLFASTA and LALIGN/PLALIGN use the
+filenames, rather than the actual sequences, to determine whether
+sequences are identical; you can "trick" the programs into
+behaving the old way by putting the same sequence in two
+different files.
+X
+1.4. Changes with version 1.5
+X
+X FASTA version 1.5 includes a number of substantial revisions
+to improve the performance and sensitivity of the program. It is
+now possible to tell the program to optimize all of the initn
+scores greater than a threshold. The threshold is set at the
+same value as the old FASTA cutoff score. Alternatively, you can
+tell FASTA to sort the results by the init1, rather than the
+initn, score by using the -1 option. FASTA -1 ... will report
+the results the way the older FASTP program did.
+X
+X A new method has been provided for selecting libraries. In
+the past, one could enter the name of a sequence file to be
+searched or a single letter that would specify a library from the
+list included in the $FASTLIBS file. Now, you can specify a set
+of library files with a string of letters preceded by a '%'.
+Thus, if the FASTLIBS file has the lines:
+X
+X Genbank 70 primates$1P/seqlib/gbpri.seq 1
+X Genbank 70 rodents$1R/seqlib/gbrod.seq 1
+X Genbank 70 other mammals$1M/seqlib/gbmam.seq 1
+X Genbank 70 vertebrates $1B/seqlib/gbvrt.seq 1
+X
+Then the string: "%PRMB" would tell FASTA to search the four
+libraries listed above. The %PRMB string can be entered either
+on the command line or when the program asks for a filename or
+library letter.
+X
+X FASTA1.5 also provides additional flexibility for specifying
+the number of results and alignments to be displayed with the -Q
+(quiet) option. The -b number option allows you to specify the
+number of sequence scores to show when the search is finished.
+Thus
+X
+X
+X FASTA -b 100 ...
+X
+X
+tells the program to display the top 100 sequence scores. In the
+past, if you displayed 100 scores (in -Q mode), you would also
+have store 100 alignments. The -d option allows you to limit the
+number of alignments shown. FASTA -b 100 -d 20 would show 100
+scores and 20 alignments.
+X
+X Finally, FASTA can provide a complete list of all of the
+sequences and scores calculated to a file with the -r (results)
+option. FASTA -r results.out ... creates a file with a list of
+scores for every sequence in the library. The list is not
+sorted, and only includes those scores calculated during the
+initial scan of the library.
+X
+2. Installing the FASTA package
+X
+2.1. Installing the programs
+X
+2.1.1. Unix version
+X
+X The FASTA distribution comes with several makefile's that
+can be used to compile the FASTA programs. Over the years, as
+ATT Unix System 5 and BSD unix have converged, these files have
+become very similar. To begin with, I recommend using the
+standard Makefile. There are two values in the makefile that
+should be checked against the values used on your system: the HZ
+value, which is the frequency in ticks per second used by the
+times() system call, this value can usually be found by running:
+X
+X grep HZ /usr/include/sys/*
+X
+and the functions available to return random numbers. If you
+have a rand48() function that returns a 32-bit random number, use
+it and use the lines:
+X
+X NRAND=nrand48
+X RANFLG= -DRAND32
+X
+If not, you will need to use the rand() function call and
+determine whether it returns a 16-bit or a 32-bit value. These
+functions are used by PRDF and PRSS. If you have problems
+compiling the programs, you may want to examine the makefile.unx
+and makefile.sun files, to look for differences. I have tried to
+use very standard unix functions in these programs, and they have
+been successfully compiled, with very small changes to the
+Makefile, on Sun's (Sun OS 4.1), IBM RS/6000's (AIX), and MIPS
+machines (under the BSD environment).
+X
+2.1.2. IBM-PC/DOS version
+X
+X For the IBM-PC/DOS version, the FASTA source code disk
+contains the complete source code to all of the programs on the
+other disks. The programs were compiled with Borland's Turbo
+'C++', using Borland's MAKE utility. The graphics programs
+(PLFASTA, TGREASE) use the graphics device drivers supplied with
+the Turbo 'C' V2.0 package. Also included are the documentation
+files PROGRAMS.DOC and FORMAT.DOC. You do not need any of the
+files the source code disk to run the programs. The files on
+this disk are identical to the UNIX and VMS versions that run on
+larger machines. Also included is the code to compile
+ALIGN0.EXE. ALIGN0 is the same as ALIGN, but does not penalize
+for end-gaps.
+X
+X If you have the DOS or Macintosh version of the FASTA
+package, to install the programs you should:
+X
+X (1) Make a new directory (folder) for the FASTA programs.
+X This need not be the same as the directory for your
+X sequence databases.
+X
+X (2) Copy the files from the FASTA source disk to the new
+X directory.
+X
+X (3) (DOS only) Edit your AUTOEXEC.BAT file to (a) modify your
+X PATH command to include the FASTA directory and (b) add
+X the line:
+X
+X set FASTLIBS=c:\yourfastadirectory\fastgbs
+X
+X On the Macintosh, you may need to edit the "environment"
+X file and change the line that reads:
+X
+X FASTLIBS=fastgbs
+X
+X to indicate the full directory path for the fastgbs file,
+X for example:
+X
+X FASTLIBS=Q105:FASTA:fastgbs
+X
+X
+X (4) Finally, you will need to edit the fastgbs file. This is
+X usually the most confusing part of the installation. An
+X example of this file is shown below; to customize this
+X file for your machine, you will need to change the file
+X names from those provided in the fastgbs file to ones that
+X reflect the directory names and file names you use on your
+X machine. This is explained in more detail below. In
+X addition, some entries in the fastgbs file refer to other
+X files of file names. These files of file names (as
+X opposed to actual database files) may also need to be
+X edited.
+X
+2.2. Installing the libraries
+X
+2.2.1. The NBRF protein sequence library
+X
+X The FASTA program package does not include any protein or
+DNA sequence libraries. You can obtain the PIR protein sequence
+database from:
+X
+X National Biomedical Research Foundation
+X Georgetown University Medical Center
+X 3900 Reservoir Rd, N.W.
+X Washington, D.C. 20007
+X
+In addition, this database is available via anonymous ftp from
+the host "ftp.bchs.uh.edu". It is available in two formats, VMS
+and CODATA format. The "VMS" format (library type 5 below) can
+be searched much faster, can be easily reformatted for use by the
+"BLAST" rapid searching program, and is compatible with the
+Genetics Computer Group package of programs. The CODATA format
+is used by the EUGENE/MBIR computing package from Baylor (library
+type 2).
+X
+2.2.2. The GENBANK DNA sequence library
+X
+X FASTA, and TFASTA search sequences from the GENBANK
+"flatfile" (not ASN.1) DNA sequence library in the flat-file
+format distributed by the National Center for Biotechnology
+Information and the PIR format used by EBI/EMBL. CD-ROMs can be
+obtained from:
+X
+X Genbank
+X National Center for Biotechnology Information
+X National Library of Medicine
+X National Institutes of Health
+X 8600 Rockville Pike
+X Bethesda, MD 20894
+X
+X
+X The GenBank DNA sequence library is also available via
+anonymous FTP from ncbi.nlm.nih.gov.
+X
+2.2.3. The EBI/EMBL CD-ROM libraries
+X
+X The European Bioinformatics Institute (EBI) is now
+distributing the EMBL CD-ROM that contains both the complete EMBL
+DNA sequence database (which should be essentially identical to
+the GenBank DNA sequence database) and the SWISS-PROT protein
+sequence database. SWISS-PROT is derived from the NBRF Protein
+sequence database with additions from the EBI/EMBL DNA sequence
+database. This CD-ROM is a "best-buy," since it provides both
+DNA and protein sequence libraries. It is available from:
+X
+X
+X European Bioinformatics Institute
+X Hinxton Genome Campus, Hinxton Hall
+X Hinxton, Cambridge CB10 1RQ,
+X United Kingdom
+X Tel: +44 1223 4944
+X Fax: +44 1223 494468
+X Email: DATALIB@ebi.ac.uk
+X
+X
+X
+X In addition, the SWISS-PROT protein sequence database is
+available via anonymous FTP from ncbi.nlm.nih.gov.
+X
+2.3. Finding the libraries: FASTLIBS
+X
+X FASTA and TFASTA use the environment variable FASTLIBS to
+find the protein and DNA sequence libraries. The FASTLIBS
+variable contains the name of a file that has the actual
+filenames of the libraries. The FASTGBS file on is an example of
+a file that can be referred to by FASTLIBS. To use the FASTGBS
+file, type:
+X
+X setenv FASTLIBS /usr/lib/fasta/fastgbs (BSD UNIX/csh)
+X or
+X export FASTLIBS=/usr/lib/fasta/fastgbs (SysV UNIX/ksh)
+X
+Then edit the FASTGBS file to indicate where the protein and DNA
+sequence libraries can be found. If you have a hard disk and
+your protein sequence library is kept in the file
+/usr/lib/aabank.lib and your Genbank DNA sequence library is kept
+in the directory: /usr/lib/genbank, then fastgbs might contain:
+X
+X NBRF Protein$0P/usr/lib/seq/aabank.lib 0
+X SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
+X GB Primate$1P@/usr/lib/genbank/gpri.nam
+X GB Rodent$1R@/usr/lib/genbank/grod.nam
+X GB Mammal$1M@/usr/lib/genbank/gmammal.nam
+X ^ 1 ^^^^ 4 ^ ^
+X 23 (5)
+X
+The first line of this file says that there is a copy of the NBRF
+protein sequence database (which is a protein database) that can
+be selected by typing "P" on the command line or when the
+database menu is presented in the file /usr/lib/seq/aabank.lib.
+X
+X Note that there are 4 or 5 fields in the lines in fastgbs.
+The first field is the description of the library which will be
+displayed by FASTA; it ends with a '$'. The second field (1
+character), is a 0 if the library is a protein library and 1 if
+it is a DNA library. The third field (1 character) is the
+character to be typed to select the library.
+X
+X The fourth field is the name of the library file. In the
+example above, the /usr/lib/seq/aabank.lib file contains the
+entire protein sequence library. However the DNA library file
+names are preceded by a '@', because these files (gpri.nam,
+grod.nam, gmammal.nam) do not contain the sequences; instead they
+contain the names of the files which contain the sequences. This
+is done because the GENBANK DNA database is broken down in to a
+large number of smaller files. In order to search the entire
+primate database, you must search more than a dozen files.
+X
+X In addition, an optional fifth field can be used to specify
+the format of the library file. Alternatively, you can specify
+the library format in a file of file names (a file preceded by an
+'@'). This field must be separated from the file name by a space
+character (' ') from the filename. In the example above, the
+aabank.lib file is in Pearson/FASTA format, while the swiss.seq
+file is in PIR/VMS format (from the EMBL CD-ROM). Currently,
+FASTA can read the following formats:
+X
+X 0 Pearson/FASTA (>SEQID - comment/sequence)
+X 1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
+X 2 NBRF CODATA (ENTRY/SEQUENCE)
+X 3 EMBL/SWISS-PROT (ID/DE/SQ)
+X 4 Intelligenetics (;comment/SEQID/sequence)
+X 5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
+X 6 GCG (version 8.0) Unix Protein and DNA (compressed)
+X 11 NCBI Blast1.3.2 format (unix only)
+X
+In particular, this version will work with the EMBL and PIR VMS
+formats that are distributed on the EMBL CD-ROM. The latter
+format (PIR VMS) is much faster to search than EMBL format. This
+release also works with the protein and DNA database formats
+created for the BLASTP and BLASTN programs by SETDB and PRESSDB
+and with the new NCBI search format. If a library format is not
+specified, for example, because you are just comparing two
+sequences, Pearson/FASTA (format 0) is used by default. To
+change this default, you may set the LIBTYPE environment variable
+to a number. For example,
+X
+X setenv LIBTYPE 1
+X
+would cause the program to use the GenBank LOCUS format by
+default for libraries (or the second sequence file), but the
+Pearson/FASTA format would still be used for the query sequence.
+X
+X You can specify a group of library files by putting a '@'
+symbol before a file that contains a list of file names to be
+searched. For example, if @gmam.nam is in the fastgbs file, the
+file "gmam.nam" might contain the lines:
+X
+X </usr/lib/genbank
+X gbpri.seq 1
+X gbrod.seq 1
+X gbmam.seq 1
+X
+In this case, the line beginning with a '<' indicates the
+directory the files will be found in. The remaining lines name
+the actual sequence files. So the first sequence file to be
+searched would be:
+X
+X /usr/lib/genbank/gbpri.seq
+X
+The notation "<PIRNAQ:" might be used under the VAX/VMS operating
+system. Under UNIX, the trailing '/' is left off, so the library
+directory might be written as "</usr/seqlib".
+X
+X With version 1.4 of the FASTA package, the FASTA and TFASTA
+programs can search a library composed of different files in
+different sequence formats. For example, you may wish to search
+the Genbank files (in GenBank flat file format) and the EMBL DNA
+sequence database on CD-ROM. To do this, you simply list the
+names and filetypes of the files to be searched in a file of
+filenames. For example, to search the mammalian portion of
+Genbank, the unannotated portion of Genbank, and the unannotated
+portion of the EMBL library, you could use the file:
+X
+X </usr/lib/DNA
+X gbpri.seq 1
+X # (this '#' causes the program to display the size of the library)
+X gbrod.seq 1
+X gbmam.seq 1
+X gbuna.seq 1
+X unanno.seq 5
+X #
+X
+X You do not need to include library format numbers if you
+X only use the Pearson/FASTA version of the PIR protein se-
+X quence library. If no library type is specified, the
+X program assumes that type 0 is being used (unless you
+X have set LIBTYPE).
+X
+Support for the old compressed GenBank files, which have not been
+distributed for more than four years, has been removed from
+programs in the FASTA package.
+X
+X
+X Test the setup by running FASTA. Enter the sequence file
+'MUSPLFM.AA' when the program requests it (this file is included
+with the programs). The program should then ask you to select a
+protein sequence library. Alternatively, if you run the TFASTA
+program and use the MUSPLFM.AA query sequence, the program should
+show you a selection of DNA sequence libraries. Once the fastgbs
+file has been set up correctly, you can set FASTLIBS=fastgbs in
+your AUTOEXEC.BAT file, and you will not need to remember where
+the libraries are kept or how they are named.
+X
+X FASTA and TFASTA must open a large number of files when
+searching and reporting the results of a GENBANK floppy disk
+format library search. You may have problems with the large
+number of files under DOS on IBM-PC's (Unix and VMS users will
+not have these problems). If you are going to search the GENBANK
+floppy disk format DNA sequence library under DOS, you should add
+the line:
+X
+X FILES=16
+X
+to your CONFIG.SYS file. (Typically this is already done for
+programs like Windows or WordPerfect.)
+X
+3. Using the FASTA Package
+X
+3.1. Overview
+X
+X The FASTA sequence comparison programs all require similar
+information, the name of a query sequence file, a library file,
+and the ktup parameter. All of the programs can accept arguments
+on the command line, or they will prompt for the file names and
+ktup value.
+X
+To use FASTA, simply type:
+X
+X FASTA
+X and you will be prompted for :
+X the name of the test sequence file
+X the name of the library file
+X and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
+X
+X ktup of 2 is about 5 times faster than ktup = 1.
+X For a 200 aa sequence against a 10,000,000 aa
+X library, the program takes about 30 min with
+X ktup = 2, 150 min with ktup = 1, on a 12 Mhz 286
+X IBM-PC.
+X
+X
+The program can also be run by typing
+X
+X FASTA test.aa /lib/bigfile.lib ktup (1 or 2)
+X
+X
+Included with the package are the test files, MUSPLFM.AA,
+LCBO.AA, MCHU.AA and BOVPRL.SEQ. To check to make certain that
+everything is working, you can try:
+X
+X fasta musplfm.aa lcbo.aa
+X and
+X tfasta musplfm.aa bovprl.seq
+X
+To test the local similarity programs LFASTA and PLFASTA, try:
+X
+X lfasta mchu.aa mchu.aa
+X and
+X plfasta mchu.aa mchu.aa (use this only on an IBM-PC with graphics
+X or on a Tektronix terminal under UNIX or VMS)
+X
+MCHU (calmodulin) has four duplicated calcium binding sites that
+are clearly detected by LFASTA. For a more complicated example,
+try MWRTC1.aa, myosin heavy chain.
+X
+3.2. Sequence files
+X
+X The FASTA programs know about three kinds of sequence files
+(four under VMS): (1) plain sequence files that can only be used
+as query sequences or for LFASTA, PRDF, and ALIGN. (2) Standard
+library files. These are the same as plain sequence files, each
+sequence is preceded by a comment line with a '>' in the first
+column. (3) distributed sequence libraries (this is a broad class
+that includes the NBRF/PIR VMS and blocked ascii formats, Genbank
+flat-file format, EMBL flat-file format, and Intelligenetics
+format. All of the files that you create should be of type (1)
+or (2). Type (2) files (ones with a be used as query or library
+sequence files by all of the programs.
+X
+X I have included several sample test files, *.AA. The first
+line may begin with a '>' or ';' followed by a comment. The
+text after ';' in other lines will be ignored. Spaces and
+tabs (and anything else that is not an amino-acid code) are
+ignored.
+X
+X Library files should have the form:
+X
+X >Sequence name and identifier
+X A F A S Y T .... actual sequence.
+X F S S .... second line of sequence.
+X >Next sequence name and identifier
+X
+This is often referred to as "FASTA" or "Pearson" format. You
+can build your own library by concatenating several sequence
+files. Just be sure that each sequence is preceded by a line
+beginning with a '>' with a sequence name.
+X
+X The test file should not have lines longer than 120
+characters, and sequences entered with word processors should use
+a document mode, with normal carriage returns at the end of
+lines.
+X
+Program Summary
+X
+3.3. Sequence search programs
+X
+FASTA universal sequence comparison. Defaults to comparing
+X protein sequences; if the sequences are > 85% A+C+G+T
+X or the -n option is used, a DNA sequence is assumed.
+X
+FASTX Search a protein sequence library using amino acid
+X sequence comparison to the forward three frames of a
+X translated DNA query sequence. (The reverse frames are
+X specified with the -i option.) Alignment scores allow
+X frameshifts; the final alignment uses a Smith-Waterman
+X type alignment routine (no limit on gaps) that allows
+X frameshifts.
+X
+TFASTA Search DNA library for a protein sequence by
+X translating the DNA sequence to protein in all six
+X frames (three forward frames with the -3 command line
+X option). TFASTA with ktup=2 is about as fast as a DNA
+X FASTA with ktup=4, and is substantially more sensitive.
+X (also reads the GENBANK library)
+X
+TFASTX Search DNA library for a protein sequence by
+X translating the DNA sequence to protein in all six
+X frames (three forward frames with the -3 command line
+X option) calculating similarity scores that allow
+X frameshifts. TFASTX produces an optimal Smith-Waterman
+X alignment of the query and translated-library sequence.
+X
+SSEARCH Universal sequence comparison using the Smith-Waterman
+X algorithm ( T. F. Smith and M. S. Waterman (1981) J.
+X Mol. Biol. 147:195-197). This program uses code
+X developed by Huang and Miller (X. Huang, R. C.
+X Hardison, W. Miller (1990) CABIOS 6:373-381) for
+X calculating the local similarity score and code from
+X the ALIGN program (see below) for calculating the local
+X alignment. SSEARCH is about 50-times slower than FASTA
+X with ktup=2 (for proteins).
+X
+ALIGN optimal global alignment of two sequences with no
+X short-cuts. This program is a slightly modified
+X version of one taken from E. Myers and W. Miller. The
+X algorithm is described in E. Myers and W. Miller,
+X "Optimal Alignments in Linear Space" (CABIOS (1988)
+X 4:11-17).
+X
+3.4. Local similarity programs
+X
+LFASTA local similarity searches showing local alignments.
+X The algorithm used to calculate the local alignment in
+X a band has been improved (Chao, Pearson, and Miller,
+X submitted).
+X
+PLFASTA local similarity searches with plot output (on the IBM,
+X this program requires that the environment variable
+X BGIDIR be set).
+X
+PCLFASTA (unix only) local similarity searches with plot output
+X using pic commands.
+X
+LALIGN Calculates the N-best local alignments using a rigorous
+X algorithm. (N=10 by default.) The algorithm was
+X developed by Huang and Miller (X. Huang and W. Miller
+X (1991) Adv. Appl. Math. 12:337-357), which is a
+X linear-space version of an algorithm described by M. S.
+X Waterman and M. Eggert (J. Mol. Biol. 197:723-728).
+X Like SSEARCH, LALIGN is rigorous, but also very slow.
+X
+PLALIGN A version of LALIGN that plots its output to a screen
+X or to a Tektronix terminal emulator.
+X
+3.5. Statistical Significance
+X
+X With version 2.0 of the FASTA program distribution, FASTA,
+TFASTA, and SSEARCH now provide estimates of statistical
+significance for library searches. Work by Altschul, Arratia,
+Karlin, Mott, Waterman, and others (see Altschul et al. (1994)
+Nature Genetics 6:119 for an excellent review) suggests that
+local sequence similarity scores follow the extreme value
+distribution, so that P(s > x) = 1 - exp(-exp(-lambda(x-u)) where
+u = ln(Kmn)/lambda and m,m are the lengths of the query and
+library sequence. This formula can be rewritten as: 1 - exp(-Kmn
+exp(-lambda x), which shows that the average score for an
+unrelated library sequence increases with the logarithm of the
+length of the library sequence. FASTA and SSEARCH use simple
+linear regression against the the log of the library sequence
+length to calculate a normalized "z-score" with mean 50,
+regardless of library sequence length, and variance 10. These
+z-scores can then be used with the extreme value distribution and
+the poisson distribution (to account for the fact that each
+library sequence comparison is an independent test) to calculate
+the number of library sequences to obtain a score greater than or
+equal to the score obtained in the search. The original idea and
+routines to do the linear regression on library sequence length
+were provided Phil Green, U. Washington. This version of FASTA
+and SSEARCH uses a slightly different strategy for fitting the
+data than those originally provided by Dr. Green.
+X
+X The expected number of sequences is plotted in the histogram
+using an "*". Since the parameters for the extreme value
+distribution are not calculated directly from the distribution of
+similarity scores, the pattern of "*'s" in the histogram gives a
+qualitative view of how well the statistical theory fits the
+similarity scores calculated by FASTA and SSEARCH. For FASTA, if
+optimized scores are calculated for each sequence in the database
+(the default), the agreement between the actual distribution of
+"z-scores" and the expected distribution based on the length
+dependence of the score and the extreme value distribution is
+usually very good. Likewise, the distribution of SSEARCH Smith-
+Waterman scores typically agrees closely with the actual
+distribution of "z-scores." The agreement with unoptimized
+scores, ktup=2, is often not very good, with too many high
+scoring sequences and too few low scoring sequences compared with
+the predicted relationship between sequence length and similarity
+score. In those cases, the expectation values may be
+overestimates.
+X
+X The statistical routines assume that the library contains a
+large sample of unrelated sequences. If this is not the case,
+then the expectation values are meaningless. Likewise, if there
+are fewer than 20 sequences in the library, the statistical
+calculations are not done.
+X
+X For protein searches, library sequences with E() values <
+0.01 for searches of a 10,000 entry protein database are almost
+always homologous. Frequently sequences with E()-values from 1 -
+10 are related as well. Remember, however, that these E() values
+also reflect differences between the amino acid composition of
+the query sequence and that of the "average" library sequence.
+Thus, when searches are done with query sequences with "biased"
+amino-acid composition, unrelated sequences may have
+"significant" scores because of sequence bias. The programs
+below, PRDF and PRSS, can address this problem by calculating
+similarity scores for random sequences with the same length and
+amino acid composition.
+X
+X If optimization is not used ("-o"), E-values for DNA
+sequences overestimate the significance of the scores that are
+obtained and unrelated sequences frequently have E()-values <
+0.0005. With optimization, the agreement between E()-value
+compares favorably with protein sequence comparison. This is in
+part due to the use of more stringent gap penalties for DNA
+sequence comparison, -16, -4 rather than -12, -2. With the
+latter penalties, many unrelated sequences appear to have
+significant similarity. Nevertheless, since protein sequence
+comparison is much more sensitive, DNA sequence comparison should
+not be used to identify sequences that encode protein. Even with
+ktup=6, optimization rarely increases run-times more than 50%
+with mRNA-size query sequences. Optimization should be used
+whenever possible.
+X
+X Similar comments apply to TFASTA, where higher gap
+penalties (-16,-4) are required for accurate statistical
+estimates. Because TFASTA produces so many artificial "coding"
+sequences with atypical amino acid compositions, the statistical
+estimates with TFASTA are often over estimates. With optimized
+scores, ktup=1, and gap penalties of -16, -4, unrelated sequences
+will sometimes have E() values of 0.1. If initn scores are used,
+unrelated sequences may have have E() values < 0.01.
+X
+PRDF improved version of RDF program that includes accurate
+X probability estimates for all three scoring methods
+X (includes local or window shuffle routine)
+X
+PRSS A version of PRDF that uses the rigorous Smith-Waterman
+X calculation used by SSEARCH.
+X
+RANDSEQ produces a randomly shuffled sequence from a query
+X sequence.
+X
+RELATE significance program described by Dayhoff (Atlas of
+X Protein Sequence and Structure, Vol. 5, Supplement 3).
+X Each chunk of 25 residues in one sequence is compared
+X to every 25 residue fragment of the second sequence.
+X Sequences which are genuinely related will have a large
+X number of scores greater than 3 standard deviations
+X above the mean score of all of the comparisons.
+X
+3.6. Other analysis programs
+X
+AACOMP calculate the amino acid composition and molecular
+X weight of a sequence.
+X
+BESTSCOR calculate the best self-comparison score.
+X
+GREASE Kyte-Doolittle hydropathicity profile
+X
+TGREASE graphic plot of Kyte-Doolittle profile
+X
+FROMGB convert from GenBank LOCUS format (also used by the
+X IBI-Pustell programs) to Pearson/FASTA format.
+X
+GARNIER A secondary structure prediction program using the
+X method of Garnier, Osgusthorpe, and Robson, J. Mol.
+X Biol., (1978) 120:97-120.
+X
+3.7. Options
+X
+X These programs have a number of output options, which are
+invoked by the environment variables LINLEN, SHOWALL, and MARKX.
+Alternatively, these values can be controlled by command line
+options. The number of sequence residues per output line is now
+adjustable by setting the environment variable LINLEN, or the
+command line option -w. LINLEN is normally 60, to change it set
+LINLEN=80 before running the program or add -w 80 to the command
+line. LINLEN can be set up to 200. SHOWALL (-a) determines
+whether all, or just a portion, of the aligned sequences are
+displayed. Previously, FASTP would show the entire length of
+both sequences in an alignment while FASTN would only show the
+portions of the two sequences that overlapped. Now the default is
+to show only the overlap between the two sequences, to show
+complete sequences, set SHOWALL=1, or use the -a option on the
+command line.
+X
+X The differences between the two aligned sequences can be
+highlighted in three different ways by changing the environment
+variable MARKX or the -m option. Normally (MARKX=0) the program
+uses ':' do denote identities and '.' to denote conservative
+replacements. If MARKX=1, the program will not mark identities;
+instead conservative replacements are denoted by a 'x' and non-
+conservative substitutions by a 'X'. If MARKX=2, the residues in
+the second sequence are only shown if they are different from the
+first. MARKX=3 displays the aligned library sequences without the
+query sequence; these can be used to build a primitive multiple
+alignment. MARKX=4 provides a graphical display of the
+boundaries of the alignments. Thus the five options are:
+X
+X
+X MARKX=0 MARKX=1 MARKX=2 MARKX=3 MARKX=4
+X
+X MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
+X ::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
+X MWKSCGYPYT MWKSCGYPYT
+X
+X
+(fasta20u4, Feb. 1996) In addition MARKX=10 is a new, parseable
+format for use with other programs. See the file"readme.v20u4"
+for a more complete description.
+X
+3.8. Command line options
+X
+X It is now possible to specify several options on the
+command line, instead of using environment variables. The
+command line options are preceded by a dash; the following
+options are available:
+X
+-a same as showall=1
+X
+-A force Smith-Waterman alignments for DNA sequences and
+X TFASA. By default, only FASTA protein sequence
+X comparisons use Smith-Waterman alignments.
+X
+-b # Number of sequence scores to be shown on output. In
+X the absence of this option, fasta (and tfasta and
+X ssearch) display all library sequences obtaining
+X similarity scores with expectations less than 10.0 if
+X optimized score are used, or 2.0 if they are not. The
+X -b option can limit the display further, but it will
+X not cause additional sequences to be displayed.
+X
+-c # Threshold score for optimization (OPTCUT). Set "-c 1"
+X to optimize every sequence in a database. (This slows
+X the program down about 5-fold).
+X
+-E # Limit the number of scores and alignments shown based
+X on the expected number of scores. Used to override the
+X expectation value of 10.0 used by default. When used
+X with -Q, -E 2.0 will show all library sequences with
+X scores with an expectation value <= 2.0.
+X
+-d # Number of alignments to be reported by default. (Used
+X in conjunction with -Q). No longer necessary, see "-b"
+X above.
+X
+-f Penalty for the first residue in a gap (-12 by default
+X for proteins, -16 for DNA or for TFASTA).
+X
+-g Penalty for additional residues in a gap (-2 by default
+X for proteins, -4 for DNA and TFASTA ).
+X
+-h Penalty for frameshift (FASTX, TFASTX only).
+X
+-H Omit histogram.
+X
+-i Invert (reverse complement) the query sequence if it is
+X DNA. For TFASTX, search the reverse complement of the
+X library sequence only.
+X
+-k # Threshold for joining init1 segments to build an initn
+X score (GAPCUT).
+X
+-l file Location of library menu file (FASTLIBS).
+X
+-L Display more information about the library sequence in
+X the alignment.
+X
+-m # MARKX = # (0, 1, 2, 3, 4, 10)
+X
+-n Force the query sequence to be treated as a DNA
+X sequence. This is particularly useful for query
+X sequences that contain a large number of ambiguous
+X residues, e.g. transcription factor binding sites.
+X
+-O Send copy of results to "filename." Helpful for
+X environments without STDOUT.
+X
+-o Turn off default optimization of all scores greater
+X than OPTCUT. Sort results by "initn" scores.
+X
+-Q,-q Quiet - does not prompt for any input. Writes scores
+X and alignments to the terminal or standard output file.
+X
+-r file Save a results summary line for every sequence in the
+X sequence library. The summary line includes the
+X sequence identifier, superfamily number (if available)
+X position in the library, and the similarity scores
+X calculated. This option can be used to evaluate the
+X sensitivity and selectivity of different search
+X strategies (see W. R. Pearson (1991) Genomics 11:635-
+X 650.)
+X
+-s file SMATRIX is read from file. Several SMATRIX files are
+X provided with the standard distribution. For protein
+X sequences: codaa.mat - based on minimum mutation
+X matrix; idnaa.mat - identity matrix; pam250.mat - the
+X PAM250 matrix developed by Dayhoff et al (Atlas of
+X Protein Sequence and Structure, vol. 5, suppl. 3,
+X 1978); pam120.mat - a PAM120 matrix. The default
+X scoring matrix is BLOSUM50, PAM250 is available with
+X "-s 250", BLOSUM62 ("-s BL62") is also available.
+X
+-v (LINEVAL) values used for line styles in plfasta
+X
+-w # Line length (width) = number (<200)
+X
+-x Specifies offsets for the beginning of the query and
+X library sequence. For example, if you are comparing
+X upstream regions for two genes, and the first sequence
+X contains 500 nt of upstream sequence while the second
+X contains 300 nt of upstream sequence, you might try:
+X
+X fasta -x "-500 -300" seq1.nt seq2.nt
+X
+X If the -x option is not used, FASTA assumes numbering
+X starts with 1. This option will not work properly with
+X the translated library sequence with tfasta. (You
+X should double check to be certain the negative
+X numbering works properly.)
+X
+-y Set the width of the band used for calculating
+X "optimized" scores. For proteins and ktup=2, the width
+X is 16. For proteins with ktup=1, the width is 32 by
+X default. For DNA the width is 16.
+X
+-z Turn off statistical calculations.
+X
+-1 sort output by init1 score (as FASTP used to do).
+X
+-3 (TFASTA, TFASTX only) translate only three forward
+X frames
+X
+X
+For example:
+X
+X fasta -w 80 -a seq1.aa seq.aa
+X
+would compare the sequence in seq1.aa to that in seq2.aa and
+display the results with 80 residues on an output line, showing
+all of the residues in both sequences. Be sure to enter the
+options before entering the file names, or just enter the options
+on the command line, and the program will prompt for the file
+names.
+X
+X Not all of these options are appropriate for all of the
+programs. The options above are used by FASTA and TFASTA. RELATE
+uses the -s option, ALIGN uses the -w, -m, and -s options, and
+the PRDF program uses -c, -f, -k, and -s.
+X
+4. Environment variable summary
+X
+X Environment variables allow you to set search parameters
+that will be used frequently when you run a program; for example,
+if you prefer to use the PAM250 scoring matrix, you might "set
+SMATRIX=250." Command line parameters, if used, always override
+environment variable settings. The following environment
+variables are used by this program:
+X
+AABANK the file name of the default sequence library.
+X
+FASTLIBS the location of the file which contains the list of
+X library files to be searched.
+X
+GAPCUT threshold used for joining init1 regions in the second
+X step of FASTA. Normally set based on sequence length
+X and ktup.
+X
+LIBTYPE used to specify the format of the library sequence for
+X FASTA and TFASTA.
+X
+LINLEN output line length - can go up to 200
+X
+LINEVAL used by plfasta to determine the relationship between
+X line style and similarity score (-v). This should be a
+X string of three numbers, e.g. "200 100 50"
+X
+MARKX symbol for denoting matches, mismatches. Note that this
+X symbol is only used across the optimized local region;
+X sequences that are outside this region are not marked.
+X
+OPTCUT Set the threshold to be used for optimization in a band
+X around the best initial region. Normally the OPTCUT
+X value is calculated from the length of the sequence and
+X the ktup value (for a 200 residue sequence, it is about
+X 28). If OPTCUT=1, every sequence in the database will
+X be optimized. This is the most sensitive option.
+X
+PAMFACT This version of fasta uses a more sensitive method for
+X identifying initial regions. Instead of using a
+X constant factor (fact) for each match in a ktup, it
+X uses the scoring matrix (PAM) scores. While this works
+X well for protein sequences, it has not been as
+X carefully tested for DNA sequences, so by default, this
+X modification is used for proteins but not for DNA.
+X Setting the PAMFACT environment variable to 1 forces
+X the option on; PAMFACT=0 turns it off.
+X
+SHOWALL on output, show the complete sequence instead of just
+X the overlap of the two aligned sequences.
+X
+SMATRIX alternative scoring matrix file.
+X
+TEKPLOT (IBM-PC only, Unix and VMS versions generate Tektronix
+X graphics by default) Generate Tektronix output.
+X Normally, PLFASTA and TGREASE plot graphs using the
+X Turbo C graphics library. Unfortunately, often these
+X plots cannot be printed out without special programs.
+X However, if you set TEKPLOT=1, tektronix graphics
+X commands will be used. Tektronix commands can be used
+X together with the PLOTDEV program, available from
+X Microplot Systems. They no lonter sell this program,
+X but it can be downloaded from
+X http://iquest.com/~microplt/index1.html. PLOTDEV also
+X allows you to print out graphics on the screen.
+X
+As always, please inform me of bugs as soon as possible.
+X
+William R. Pearson
+Department of Biochemistry
+Box 440, Jordan Hall
+U. of Virginia
+Charlottesville, VA
+X
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 fasta20.doc ||
+echo 'restore of fasta20.doc failed'
+Wc_c="`wc -c < 'fasta20.doc'`"
+test 49762 -eq "$Wc_c" ||
+ echo 'fasta20.doc: original size 49762, current size' "$Wc_c"
+fi
+# ============= fasta3.1 ==============
+if test -f 'fasta3.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta3.1 (File already exists)'
+else
+echo 'x - extracting fasta3.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta3.1' &&
+.TH FASTA/TFASTA/FASTX/TFASTXv3 1 local
+.SH NAME
+fasta3, fasta3_t \- scan a protein or DNA sequence library for similar
+sequences
+X
+tfasta3, tfasta3_t \- compare a protein sequence to a DNA sequence
+library, translating the DNA sequence library `on-the-fly'.
+X
+fastx3, fastx3_t \ - compare a DNA sequence to a protein sequence
+database, comparing the translated DNA sequence in forward and
+reverse frames.
+X
+tfastx3, tfastx3_t \ - compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations.
+X
+fasty3, fasty3_t \ - compare a DNA sequence to a protein sequence
+database, comparing the translated DNA sequence in forward and reverse
+frames.
+X
+tfasty3, tfasty3_t \ - compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations.
+X
+fasts3, fasts3_t \- compare unordered peptides to a protein sequence database
+X
+tfasts3, tfasts3_t \- compare unordered peptides to a translated DNA
+sequence database
+X
+fastf3, fastf3_t \- compare mixed peptides to a protein sequence database
+X
+tfastf3, tfastf3_t \- compare mixed peptides to a translated DNA
+sequence database
+X
+ssearch3, ssearch3_t \- compare a protein or DNA sequence to a
+sequence database using the Smith-Waterman algorithm.
+X
+prss3, prfx3 \- estimate statistical significance of an alignment by
+comparing the score to the distribution of similarity scores generated
+by shuffling the second sequence. prss3 uses Smith-Waterman. prfx3
+uses the fastx algorithm.
+X
+.SH DESCRIPTION
+X
+Release 3.x of the FASTA package provides a modular set of sequence
+comparison programs that can run on conventional single processor
+computers or in parallel on multiprocessor computers. Seven different
+programs \- fasta3, fastx3, fasty3, tfastx3, tfasty3, tfasta3, and
+ssearch3 \- are currently available.
+X
+All of the comparison programs share a set of basic command line
+options; additional options are available for individual comparison
+functions.
+X
+The fasta3_t, fastx3_t, fasty3_t, tfasta3_t, tfastx3_t, tfasty3_t and
+ssearch3_t programs are threaded versions that will run in parallel on
+Digital Equipment, Sun, and SGI multiprocessor computers.
+X
+.SH Options for comparison functions
+.LP
+These versions of the fasta programs have been modified to accept a
+query sequence from the unix "stdin" data stream. This makes it much
+easier to use fasta3 and its relatives as part of a WWW page. To
+indicate that stdin is to be used, use "@" as the query
+sequence file name. "@" can also be used to specify a
+subset of the query sequence to be used, e.g:
+.sp
+.ti 0.5i
+cat query.aa | fasta3 -q @:50-150 s
+.sp
+would search the 's' database with residues 50-150 of query.aa. FASTA
+cannot automatically detect the sequence type (protein vs DNA) when
+"stdin" is used, so the '-n' option is required for DNA.
+.TP
+\-1
+Sort by "init1" score.
+.TP
+\-3
+(TFASTA3, TFASTX/Y3 only) use only forward frame translations
+.TP
+\-a #
+"SHOWALL" option attempts to align all of both sequences in FASTA and SSEARCH.
+.TP
+\-A
+force Smith-Waterman alignment for output. Smith-Waterman is the
+default for protein sequences and FASTX3, but not for TFASTA3 or DNA
+comparisons with FASTA3.
+.TP
+\-b #
+number of best scores to show (must be < -E cutoff if -E is given)
+.TP
+\-B
+show z-scores rather than bit scores
+.TP
+\-c #
+threshold for band optimization (FASTA, FASTX)
+.TP
+\-C #
+(fasta34t11d4) length of name abbreviation in alignments, default = 6.
+.TP
+\-d #
+number of best alignments to show ( must be < -e cutoff)
+.TP
+\-D
+turn on debugging mode. Enables checks on sequence alphabet that
+cause problems with tfastx3, tfasty3, tfasta3.
+.TP
+\-E #
+expectation value upper limit for score and alignment display.
+Defaults are 10.0 for FASTA3 and SSEARCH3 protein searches, 5.0 for
+translated DNA/protein comparisons, and 2.0 for DNA/DNA searches.
+.TP
+\-f #
+penalty for opening a gap (or first residue for older versions)
+.TP
+\-F #
+expectation value lower limit for score and alignment display.
+-F 1e-6 prevents library sequences with E()-values lower than 1e-6
+from being displayed. This allows the use to focus on more distant
+relationships.
+.TP
+\-g #
+penalty for additional residues in a gap
+.TP
+\-h #
+(FASTX3, TFASTX3, FASTY3, TFASTY3 only) penalty for a frameshift between
+two codons.
+.TP
+\-j #
+(FASTY3, TFASTY3 only) penalty for a frameshift within a codon.
+.TP
+\-H
+turn off histogram display
+.TP
+\-i
+(DNA only) reverse complement the query sequence. (TFASTX) compare against
+only the reverse complement of the library sequence.
+.TP
+\-l str
+specify FASTLIBS file
+.TP
+\-L
+report long sequence description in alignments
+.TP
+\-m 0,1,2,3,4,5,6,9,10 alignment display options. \fC-m 0, 1, 2, 3\fP
+display different types of alignments. \fC-m 4\fP provides an
+alignment "map" on the query. \fC-m 5\fP combines the alignment map
+and a \fC-m 0\fP alignment. \fC-m 6\fP provides an HTML output.
+\fC-m 9\fP does not change the alignment output, but provides
+alignment coordinate and percent identity information with the best
+scores report. \fC-m 9c\fP adds encoded alignment information to the
+\fC-m 9\fP; \fC-m 9i\fP provides only percent identity and alignment
+length information with the best scores. With current versions of the
+FASTA programs, independent \fC-m\fP options can be combined;
+e.g. \fC-m 1 -m 9c -m 6\fP.
+.TP
+\-M #-#
+molecular weight (residue) cutoffs. -M "101-200" examines only sequences that are 101-200 residues long.
+.TP
+\-n
+force query to nucleotide sequence
+.TP
+\-N #
+break long library sequences into blocks of # residues. Useful for
+bacterial genomes, which have only one sequence entry. -N 2000 works
+well for well for bacterial genomes.
+.TP
+\-o
+(FASTA) turn fasta band optimization off during initial phase. This was
+the behavior of fasta1.x versions.
+.TP
+\-O file
+send output to file
+.TP
+\-q/-Q
+quiet option; do not prompt for input
+.TP
+\-r "+n/-m"
+values for match/mismatch for DNA comparisons. \fC+n\fP is
+used for the maximum positive value and \fC-m\fP is used for the
+maximum negative value. Values between max and min, are rescaled, but
+residue pairs having the value -1 continue to be -1.
+.TP
+\-R file
+save all scores to statistics file (previously -r file)
+.TP
+\-s name
+specify substitution matrix. BLOSUM50 is used by default;
+PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
+P250, or BL62. With this version, many more scoring matrices are
+available, including BLOSUM80 (BL80), and MDM10, MDM20, MDM40 (Jones,
+Taylor, and Thornton, 1992 CABIOS 8:275-282; specified as -s M10, -s
+M20, -s M40). Alternatively, BLASTP1.4 format scoring matrix files can
+be specified. BL80, BL62, and P120 are scaled in 1/2 bit units; all
+the other matrices use 1/3 bit units. DNA scoring matrices can also
+be specified with the "-r" option.
+.TP
+\-S
+treat lower case letters in the query or database as low complexity
+regions that are equivalent to 'X' during the initial database scan,
+but are treated as normal residues for the final alignment display.
+Statistical estimates are based on the 'X'ed out sequence used during
+the initial search. Protein databases (and query sequences) can be
+generated in the appropriate format using John Wooton's "pseg"
+program, available from ftp://ncbi.nlm.nih.gov/pub/seg/pseg. Once you
+have compiled the "pseg" program, use the command:
+.IP
+\fCpseg database.fasta -z 1 -q > database.lc_seg\fP
+.TP
+\-t #
+Translation table - tfasta3, fastx3, tfastx3, fasty3, and
+tfasty3 now support the BLAST tranlation tables. See
+\fChttp://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi\fP.
+.IP
+In addition, "\-t t" or "\-t t#" turns on the addition of an implicit termination
+codon to a protein:translated DNA match. That is, each protein
+sequence implicitly ends with "*", which matches the termination codes
+for the appropriate genetic code. "\-t t#" sets implicit termination
+and a different genetic code.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (set by
+default to 4 at compile time).
+.TP
+\-U
+Do RNA sequence comparisons: treat 'T' as 'U', allow G:U base pairs (by
+scoring "G-A" and "T-C" as "G-G" -1). Search only one strand.
+.TP
+\-V "?$%*"
+Allow special annotation characters in query sequence. These characters
+will be displayed in the alignments on the coordinate number line.
+.TP
+\-w # line width for similarity score, sequence alignment, output.
+.TP
+\-W # context length (default is 1/2 of line width -w) for alignment,
+like fasta and ssearch, that provide additional sequence context.
+.TP
+\-x #match,#mismatch
+scores used for matches to 'X:X','N:N', '*:*' matches, and the corresponding
+'X:not-X', etc, mismatches, overriding the values
+specified in the scoring matrix. If only one value is given, it is
+used for both values.
+.TP
+\-X "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-y #
+Width for band optimization; by default 16 for DNA and protein ktup=2;
+32 for protein ktup=1;
+.TP
+\-z #
+Specify statistical calculation. Default is -z 1, which uses
+regression against the length of the library sequence. -z 0 disables
+statistics. -z 2 provides maximum likelihood estimates for lambda and K,
+censoring the 250 lowest and 250 highest scores. -z 3 uses Altschul
+and Gish's statistical estimates for specific protein BLOSUM scoring
+matrices and gap penalties. -z 4,5: an alternate regression method.
+\-z 6 uses a composition based maximum likelihood estimate based
+on the method of Mott (1992) Bull. Math. Biol. 54:59-75.
+-z 11,12,14,15,16: compute the regression against scores of randomly
+shuffled copies of the library sequences. Twice as many comparisons
+are performed, but accurate estimates can be generated from databases
+of related sequences. -z 11 uses the -z 1 regression strategy, etc.
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations
+(used for protein/protein FASTA and SSEARCH, and for FASTX, FASTY, TFASTX,
+and TFASTY).
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SMATRIX
+default scoring matrix (-s SMATRIX)
+.TP
+SRCH_URL
+the format string used to define the option to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+X
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 fasta3.1 ||
+echo 'restore of fasta3.1 failed'
+Wc_c="`wc -c < 'fasta3.1'`"
+test 10345 -eq "$Wc_c" ||
+ echo 'fasta3.1: original size 10345, current size' "$Wc_c"
+fi
+# ============= fasta3.rsp ==============
+if test -f 'fasta3.rsp' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta3.rsp (File already exists)'
+else
+echo 'x - extracting fasta3.rsp (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta3.rsp' &&
+compacc.obj doinit.obj showbest.obj htime.obj apam.obj karlin.obj scaleswn.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
+SHAR_EOF
+chmod 0644 fasta3.rsp ||
+echo 'restore of fasta3.rsp failed'
+Wc_c="`wc -c < 'fasta3.rsp'`"
+test 177 -eq "$Wc_c" ||
+ echo 'fasta3.rsp: original size 177, current size' "$Wc_c"
+fi
+# ============= fasta3x.doc ==============
+if test -f 'fasta3x.doc' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta3x.doc (File already exists)'
+else
+echo 'x - extracting fasta3x.doc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta3x.doc' &&
+(Updated December, 2003)
+X
+X
+X COPYRIGHT NOTICE
+X
+Copyright 1988, 1991, 1992, 1994, 1995, 1996, 1999 by William R.
+Pearson and the University of Virginia. All rights reserved. The
+FASTA program and documentation may not be sold or incorporated
+into a commercial product, in whole or in part, without written
+consent of William R. Pearson and the University of Virginia.
+For further information regarding permission for use or
+reproduction, please contact: David Hudson, Assistant Provost for
+Research, University of Virginia, P.O. Box 9025, Charlottesville,
+VA 22906-9025, (434) 924-6853
+X
+The FASTA program package
+X
+Introduction
+X
+X This documentation describes the version 3 of the FASTA
+program package (see W. R. Pearson and D. J. Lipman (1988),
+"Improved Tools for Biological Sequence Analysis", PNAS
+85:2444-2448 (Pearson and Lipman, 1988); W. R. Pearson (1996)
+"Effective protein sequence comparison" Meth. Enzymol.
+266:227-258 (Pearson, 1996); Pearson et. al. (1997) Genomics
+46:24-36 (Zhang et al., 1997); Pearson, (1999) Meth. in
+Molecular Biology 132:185-219 (Pearson, 2000). Version 3 of the
+FASTA packages contains many programs for searching DNA and
+protein databases and one program (prss3) for evaluating
+statistical significance from randomly shuffled sequences.
+Several additional analysis programs, including programs that
+produce local alignments, are available as part of version 2 of
+the FASTA package, which is still available.
+X
+X This document is divided into three sections: (1) A summary
+overview of the programs in the FASTA3 package; (2) A guide to
+installing the programs and databases; (3) A guide to using the
+FASTA programs. The revision history of the programs can be found
+in the readme.v30..v34, files. The programs are easy to use, so
+if you are using them on a machine that is administered by
+someone else, you can skip section (2) and focus on (1) and (3)
+to learn how to use the programsIf you are installing the
+programs on your own machine, you will need to read section (2)
+carefully.
+X
+1. An overview of the FASTA programs
+X
+X Although there are a large number of programs in this
+package, they belong to three groups: (1) "Conventional" Library
+search programs: FASTA3, FASTX3, FASTY3, TFASTA3, TFASTX3,
+TFASTY3, SSEARCH3; (2) Programs for searching with short
+fragments: FASTS3, FASTF3, TFASTS3, TFASTF3; (3) Statistical
+significance: PRSS3. Programs that start with fast search
+protein databases, while tfast programs search translated DNA
+databases. Table I gives a brief description of the programs.
+X
+X
+X Table I. Comparison programs in the FASTA3 package
+X
+---------------------------------------------------------------------------
+fasta3 Compare a protein sequence to a protein sequence
+X database or a DNA sequence to a DNA sequence database
+X using the FASTA algorithm (Pearson and Lipman, 1988,
+X Pearson, 1996). Search speed and selectivity are con-
+X trolled with the ktup(wordsize) parameter. For protein
+X comparisons, ktup = 2 by default; ktup =1 is more sen-
+X sitive but slower. For DNA comparisons, ktup=6 by de-
+X fault; ktup=3 or ktup=4 provides higher sensitivity;
+X ktup=1 should be used for oligonucleotides (DNA query
+X lengths < 20).
+X
+ssearch3 Compare a protein sequence to a protein sequence
+X database or a DNA sequence to a DNA sequence database
+X using the Smith-Waterman algorithm (Smith and Water-
+X man, 1981). ssearch3 is about 10-times slower than
+X FASTA3, but is more sensitive for full-length protein
+X sequence comparison.
+X
+fastx3/ fasty3 Compare a DNA sequence to a protein sequence database,
+X by comparing the translated DNA sequence in three
+X frames and allowing gaps and frameshifts. fastx3 uses
+X a simpler, faster algorithm for alignments that allows
+X frameshifts only between codons; fasty3 is slower but
+X produces better alignments with poor quality sequences
+X because frameshifts are allowed within codons.
+X
+tfastx3/ tfasty3 Compare a protein sequence to a DNA sequence database,
+X calculating similarities with frameshifts to the for-
+X ward and reverse orientations.
+X
+tfasta3 Compare a protein sequence to a DNA sequence database,
+X calculating similarities (without frameshifts) to the 3
+X forward and three reverse reading frames. tfastx3 and
+X tfasty3 are preferred because they calculate similarity
+X over frameshifts.
+X
+fastf3/tfastf3 Compares an ordered peptide mixture, as would be ob-
+X tained by Edman degredation of a CNBr cleavage of a
+X protein, against a protein (fastf) or DNA (tfastf)
+X database.
+X
+fasts3/tfasts3 Compares set of short peptide fragments, as would be
+X obtained from mass-spec. analysis of a protein, against
+X a protein (fasts) or DNA (tfasts) database.
+---------------------------------------------------------------------------
+X
+2. Installing FASTA and the sequence databases
+X
+2.1. Obtaining the libraries
+X
+X The FASTA program package does not include any protein or
+DNA sequence libraries. Protein databases are available on CD-
+ROM from the PIR and EMBL (see below), or via anonymouse FTP from
+many different sources. As this document is updated in the fall
+of 1999, no DNA databases are available on CD-ROM from the major
+sequence databases: Genbank at the National for Biotechnology
+Information (www.ncbi.nlm.nih.gov and ftp://ncbi.nlm.nih.gov) and
+EMBL at the European Bioinformatics Institute (www.ebi.ac.uk).
+However, the databases are available via anonymous FTP from both
+sites.
+X
+2.1.1. The GENBANK DNA sequence library
+X
+X Because of the large size of DNA databases, you will
+probably want to keep DNA databases in only one, or possibly two,
+formats. The FASTA3 programs that search DNA databases - fasta3,
+tfastx/y3, and tfasta3 can read DNA databases in Genbank flatfile
+(not ASN.1), FASTA, GCG/compressed-binary, BLAST1.4 (pressdb),
+and BLAST2.0 (formatdb) formats, as well as EMBL format. If you
+are also running the GCG suite of sequence analysis programs, you
+should use GCG/compressed-binary format or BLAST2.0 format for
+your fasta3 searches. If not, BLAST2.0 is a good choice. These
+files are considerably more compact than Genbank flat files, and
+are preferred. The NCBI does not provide software for converting
+from Genbank flat files to Blast2.0 DNA databases, but you can
+use the Blast formatdb program to convert ASN.1 formated Genbank
+files, which are available from the NCBI ftp site.
+X
+X The NCBI also provides the nr, swissprot, and several EST
+databases that are used by BLAST in FASTA format from:
+ftp://ncbi.nlm.nih.gov/blast/db. These databases are updated
+nightly.
+X
+2.1.2. The NBRF protein sequence library
+X
+X You can obtain the PIR protein sequence database (Barker et
+al., 1998) from:
+X
+X National Biomedical Research Foundation
+X Georgetown University Medical Center
+X 3900 Reservoir Rd, N.W.
+X Washington, D.C. 20007
+X
+or via ftp from nbrf.georgetown.edu or from the NCBI
+(ncbi.nlm.nih.gov/repository/PIR). The data in the ascii
+directory is in PIR Codata format, which is not widely used. I
+recommend the PIR/VMS format data (libtype=5) in the vms
+directory.
+X
+2.1.3. The EBI/EMBL CD-ROM libraries
+X
+X The European Bioinformatics Institute (EBI) distributes both
+the EMBL DNA database and the SwissProt database on CD-ROM
+(Bairoch and Apweiler, 1996), and they are available from:
+X
+X EMBL-Outstation European Bioinformatics Institute
+X Wellcome Trust Genome Campus,
+X Hinxton Hall
+X Hinxton,
+X Cambridge CB10 1SD
+X United Kingdom
+X Tel: +44 (0)1223 494444
+X Fax: +44 (0)1223 494468
+X Email: DATALIB@ebi.ac.uk
+X
+In addition, the SWISS-PROT protein sequence database is
+available via anonymous FTP from
+ftp://ftp.expasy.ch/databases/swiss-prot/ (also see
+www.expasy.ch).
+X
+2.2. Finding the libraries: FASTLIBS
+X
+X The major problem that most new users of the FASTA package
+have is in setting up the program to find the databases and their
+library type. In general, if you cannot get fasta3 to read a
+sequence database, it is likely that something is wrong with the
+FASTLIBS file. A common problem is that the database file is
+found, but either no sequences are read, or an incorrect number
+of entries is read. This is almost always because the library
+format (libtype) is incorrect. Note that a type 5 file (PIR/VMS
+format) can be read as a type 0 (default FASTA) format file, and
+the number of entries will be correct, but the sequence lengths
+will not.
+X
+X All the search programs in the FASTA3 package use the
+environment variable FASTLIBS to find the protein and DNA
+sequence libraries. The FASTLIBS variable contains the name of a
+file that has the actual filenames of the libraries. The
+fastlibs file included with the distribution on is an example of
+a file that can be referred to by FASTLIBS. To use the fastlibs
+file, type:
+X
+X setenv FASTLIBS /usr/lib/fasta/fastgbs (BSD UNIX/csh)
+X or
+X export FASTLIBS=/usr/lib/fasta/fastgbs (SysV UNIX/ksh)
+X
+Then edit the fastlibs file to indicate where the protein and DNA
+sequence libraries can be found. If you have a hard disk and
+your protein sequence library is kept in the file
+/usr/lib/aabank.lib and your Genbank DNA sequence library is kept
+in the directory: /usr/lib/genbank, then fastgbs might contain:
+X
+X NBRF Protein$0P/usr/lib/seq/aabank.lib 0
+X SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
+X GB Primate$1P@/usr/lib/genbank/gpri.nam
+X GB Rodent$1R@/usr/lib/genbank/grod.nam
+X GB Mammal$1M@/usr/lib/genbank/gmammal.nam
+X ^ 1 ^^^^ 4 ^ ^
+X 23 (5)
+X
+The first line of this file says that there is a copy of the NBRF
+protein sequence database (which is a protein database) that can
+be selected by typing "P" on the command line or when the
+database menu is presented in the file /usr/lib/seq/aabank.lib.
+X
+X Note that there are 4 or 5 fields in the lines in fastgbs.
+The first field is the description of the library which will be
+displayed by FASTA; it ends with a '$'. The second field (1
+character), is a 0 if the library is a protein library and 1 if
+it is a DNA library. The third field (1 character) is the
+character to be typed to select the library.
+X
+X The fourth field is the name of the library file. In the
+example above, the /usr/lib/seq/aabank.lib file contains the
+entire protein sequence library. However the DNA library file
+names are preceded by a '@', because these files (gpri.nam,
+grod.nam, gmammal.nam) do not contain the sequences; instead they
+contain the names of the files which contain the sequences. This
+is done because the GENBANK DNA database is broken down in to a
+large number of smaller files. In order to search the entire
+primate database, you must search more than a dozen files.
+X
+X In addition, an optional fifth field can be used to specify
+the format of the library file. Alternatively, you can specify
+the library format in a file of file names (a file preceded by an
+'@'). This field must be separated from the file name by a space
+character (' ') from the filename. In the example above, the
+aabank.lib file is in Pearson/FASTA format, while the swiss.seq
+file is in PIR/VMS format (from the EMBL CD-ROM). Currently,
+FASTA can read the following formats:
+X
+X 0 Pearson/FASTA (>SEQID - comment/sequence)
+X 1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
+X 2 NBRF CODATA (ENTRY/SEQUENCE)
+X 3 EMBL/SWISS-PROT (ID/DE/SQ)
+X 4 Intelligenetics (;comment/SEQID/sequence)
+X 5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
+X 6 GCG (version 8.0) Unix Protein and DNA (compressed)
+X 11 NCBI Blast1.3.2 format (unix only)
+X 12 NCBI Blast2.0 format (unix only, fasta32t08 or later)
+X
+In particular, this version will work with the EMBL and PIR VMS
+formats that are distributed on the EMBL CD-ROM. The latter
+format (PIR VMS) is much faster to search than EMBL format. This
+release also works with the protein and DNA database formats
+created for the BLASTP and BLASTN programs by SETDB and PRESSDB
+and with the new NCBI search format. If a library format is not
+specified, for example, because you are just comparing two
+sequences, Pearson/FASTA (format 0) is used by default. To
+specify a library type on the command line, add it to the library
+filename and surround the filename and library type in quotes:
+X
+X fasta3 query.file "/seqdb/genbank/gbpri1.seq 1"
+X
+X You can specify a group of library files by putting a '@'
+symbol before a file that contains a list of file names to be
+searched. For example, if @gmam.nam is in the fastgbs file, the
+file "gmam.nam" might contain the lines:
+X
+X </seqdb/genbank
+X gbpri1.seq 1
+X gbpri2.seq 1
+X gbpri3.seq 1
+X gbpri4.seq 1
+X gbrod.seq 1
+X gbmam.seq 1
+X
+In this case, the line beginning with a '<' indicates the
+directory the files will be found in. The remaining lines name
+the actual sequence files. So the first sequence file to be
+searched would be:
+X
+X /usr/lib/genbank/gbpri.seq
+X
+The notation "<PIRNAQ:" might be used under the VAX/VMS operating
+system. Under UNIX, the trailing '/' is left off, so the library
+directory might be written as "</usr/seqlib".
+X
+X The FASTA programs can search a database composed of
+different files in different sequence formats. For example, you
+may wish to search the Genbank files (in GenBank flat file
+format) and the EMBL DNA sequence database on CD-ROM. To do
+this, you simply list the names and filetypes of the files to be
+searched in a file of filenames. For example, to search the
+mammalian portion of Genbank, the unannotated portion of Genbank,
+and the unannotated portion of the EMBL library, you could use
+the file:
+X
+X </usr/lib/DNA
+X gbpri.seq 1
+X # (this '#' causes the program to display the size of the library)
+X gbrod.seq 1
+X ...
+X gbmam.seq 1
+X ...
+X gbuna.seq 1
+X ...
+X unanno.seq 5
+X #
+X
+X You do not need to include library format numbers if you
+X only use the Pearson/FASTA version of the PIR protein se-
+X quence library. If no library type is specified, the
+X program assumes that type 0 is being used.
+X
+X Test the setup by running FASTA. Enter the sequence file
+'mgstm1.aa' when the program requests it (this file is included
+with the programs). The program should then ask you to select a
+protein sequence library. Alternatively, if you run the TFASTA
+program and use the mgstm1.aa query sequence, the program should
+show you a selection of DNA sequence libraries. Once the fastgbs
+file has been set up correctly, you can set FASTLIBS=fastgbs in
+your AUTOEXEC.BAT file, and you will not need to remember where
+the libraries are kept or how they are named.
+X
+3. Using the FASTA Package
+X
+3.1. Overview
+X
+X The FASTA sequence comparison programs all require similar
+information, the name of a query sequence file, a library file,
+and the ktup parameter. All of the programs can accept arguments
+on the command line, or they will prompt for the file names and
+ktup value.
+X
+To use FASTA, simply type:
+X
+X FASTA
+X and you will be prompted for :
+X the name of the test sequence file
+X the name of the library file
+X and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
+X (ktup of 2 is about 5 times faster than ktup = 1)
+X
+The program can also be run by typing
+X
+X FASTA test.aa /lib/bigfile.lib ktup (1 or 2)
+X
+Included with the package are several test files. To check to
+make certain that everything is working, you can try:
+X
+X fasta musplfm.aa prot_test.lib
+X and
+X tfastx mgstm1.aa gst.nlib
+X
+3.2. Sequence files
+X
+X The fasta3 programs know about three kinds of sequence
+files: (1) plain sequence files - files that contain nothing but
+sequence residues - can only be used as query sequences. (2)
+FASTA format files. These are the same as plain sequence files,
+each sequence is preceded by a comment line with a '>' in the
+first column. (3) distributed sequence libraries (this is a broad
+class that includes the NBRF/PIR VMS and blocked ascii formats,
+Genbank flat-file format, EMBL flat-file format, and
+Intelligenetics format. All of the files that you create should
+be of type (1) or (2). FASTA format files (ones with a '>' and
+comment before the sequence) are preferred, because they can be
+used as query or library sequence files by all of the programs.
+X
+X I have included several sample test files, *.aa and *.seq as
+well as two small sequence libraries, prot_test.lib and gst.nlib.
+The first line may begin with a '>' by a comment. Spaces and
+tabs (and anything else that is not an amino-acid code) are
+ignored.
+X
+X Library files should have the form:
+X
+X >Sequence name and identifier
+X A F A S Y T .... actual sequence.
+X F S S .... second line of sequence.
+X >Next sequence name and identifier
+X
+This is often referred to as "FASTA" or format. You can build
+your own library by concatenating several sequence files. Just
+be sure that each sequence is preceded by a line beginning with a
+'>' with a sequence name.
+X
+X The test file should not have lines longer than 120
+characters, and sequences entered with word processors should use
+a document mode, with normal carriage returns at the end of
+lines.
+X
+X A different format is required to specify the ordered
+peptide mixture for fastf3/tfastf3. For example:
+X
+X >mgstm1
+X MGCEN,
+X MIDYP,
+X MLLAY,
+X MLLGY
+X
+indicates m in the first position of all three peptides (as from
+CNBr), G, I, L (twice) in the second position (first cycle),
+C,D,L (twice) in the third position, etc. The commas (,) are
+required to indicate the number of fragments in the mixture, but
+there should be no comma after the last residue.
+X
+X For the fasts3/tfasts3 program, the format is the same,
+except that there is no requirement for the peptides to be the
+same length.
+X
+4. Statistical Significance
+X
+X All the programs in the FASTA3 package attempt to calculate
+accurate estimates of the statistical significance of a match.
+For fasta3, ssearch3, and fastx3/y3, these estimates are very
+accurate (Pearson, 1998, Zhang et al., 1997).. Altschul et al.
+(Altschul et al., 1994) provides an excellent review of the
+statistics of local similarity scores. Local sequence similarity
+scores follow the extreme value distribution, so that P(s > x) =
+1 - exp(-exp(-lambda(x-u)) where u = ln(Kmn)/lambda and m,m are
+the lengths of the query and library sequence. This formula can
+be rewritten as: 1 - exp(-Kmn exp(-lambda x), which shows that
+the average score for an unrelated library sequence increases
+with the logarithm of the length of the library sequence. The
+fasta3 programs use simple linear regression against the the log
+of the library sequence length to calculate a normalized "z-
+score" with mean 50, regardless of library sequence length, and
+variance 10. (Several other estimation methods are available with
+the -z option.) These z-scores can then be used with the extreme
+value distribution and the poisson distribution (to account for
+the fact that each library sequence comparison is an independent
+test) to calculate the number of library sequences to obtain a
+score greater than or equal to the score obtained in the search.
+The original idea and routines to do the linear regression on
+library sequence length were provided Phil Green, U. Washington.
+This version uses a slightly different strategy for fitting the
+data than those originally provided by Dr. Green.
+X
+X The expected number of sequences is plotted in the histogram
+using an "*". Since the parameters for the extreme value
+distribution are not calculated directly from the distribution of
+similarity scores, the pattern of "*'s" in the histogram gives a
+qualitative view of how well the statistical theory fits the
+similarity scores calculated by the programs. For fasta3, if
+optimized scores are calculated for each sequence in the database
+(the default), the agreement between the actual distribution of
+"z-scores" and the expected distribution based on the length
+dependence of the score and the extreme value distribution is
+usually very good. Likewise, the distribution of ssearch3 Smith-
+Waterman scores typically agrees closely with the <actual
+distribution of "z-scores." The agreement with unoptimized
+scores, ktup=2, is often not very good, with too many high
+scoring sequences and too few low scoring sequences compared with
+the predicted relationship between sequence length and similarity
+score. In those cases, the expectation values may be
+overestimates.
+X
+X With version 33t01, all the FASTA programs also report a
+"bit" score, which is equivalent to the bit score reported by
+BLAST2. The FASTA33/BLAST2 bit score is calculated as: (lambda*S
+- ln K)/ln 2, where S is the raw similarity score, lambda and K
+are statistical parameters estimated from the distribution of
+unrelated sequence similarity scores. The statistical
+signficance of a given bit score depends on the lengths of the
+query and library sequences and the size of the library, but a 1
+bit increase in score corresponds to a 2-fold reduction in
+expectation; a 10-bit increase implies 1000-fold lower
+expectation, etc.
+X
+X The statistical routines assume that the library contains a
+large sample of unrelated sequences. If this is not true, then
+statistical parameters can be estimated by using the -z 11-15,
+options. -z options greater than 10 calculate a shuffled
+similarity score for each library sequence, in addition to the
+unshuffled score, and estimate the statistical parameters from
+the scores of the shuffled sequences. If there are fewer than 20
+sequences in the library, the statistical calculations are not
+done.
+X
+X For protein searches, library sequences with E() values <
+0.01 for searches of a 10,000 entry protein database are almost
+always homologous. Frequently sequences with E()-values from 1 -
+10 are related as well, but unrelated sequences ( 1 - 10 per
+search) will have scores in this renage as well. Remember,
+however, that these E() values also reflect differences between
+the amino acid composition of the query sequence and that of the
+"average" library sequence. Thus, when searches are done with
+query sequences with "biased" amino-acid composition, unrelated
+sequences may have "significant" scores because of sequence bias.
+PRSS3 can address this problem by calculating similarity scores
+for random sequences with the same length and amino acid
+composition.
+X
+5. Options
+X
+X Command line options are available to change the scoring
+parameters and output display. Command line options must preceed
+other program arguments, such as the query and library file
+names.
+X
+5.1. Command line options
+X
+-a (fasta3, ssearch3 only) show both sequences in their
+X entirety.
+X
+-A force Smith-Waterman alignments for fasta3 DNA sequences.
+X By default, only fasta3 protein sequence comparisons use
+X Smith-Waterman alignments.
+X
+-B Show normalized score as a z-score, rather than a bit-score
+X in the list of best scores.
+X
+-b # Number of sequence scores to be shown on output. In the
+X absence of this option, fasta (and tfasta and ssearch)
+X display all library sequences obtaining similarity scores
+X with expectations less than 10.0 if optimized score are
+X used, or 2.0 if they are not. The -b option can limit the
+X display further, but it will not cause additional sequences
+X to be displayed.
+X
+-c # Threshold score for optimization (OPTCUT). Set "-c 1" to
+X optimize every sequence in a database.
+X
+-E # Limit the number of scores and alignments shown based on the
+X expected number of scores. Used to override the expectation
+X value of 10.0 used by default. When used with -Q, -E 2.0
+X will show all library sequences with scores with an
+X expectation value <= 2.0.
+X
+-d # Maximum number of alignments to be displayed. Ignored if
+X "-Q" is not used.
+X
+-f Penalty for the first residue in a gap (-12 by default for
+X proteins, -16 for DNA, -15 for FAST[XY]/TFAST[XY]).
+X
+-F # Limit the number of scores and alignments shown based on the
+X expected number of scores. "-E #" sets the highest E()-value
+X shown; "-F #" sets the lowest E()-value. Thus, "-F 0.0001"
+X will not show any matches or alignments with E() < 0.0001.
+X This allows one to skip over close relationships in searches
+X for more distant relationships.
+X
+-g Penalty for additional residues in a gap (-2 by default for
+X proteins, -4 for DNA, -3 for FAST[XY]/TFAST[XY]).
+X
+-h Penalty for frameshift (fastx3/y3, tfastx3/y3 only).
+X
+-H Omit histogram.
+X
+-i Invert (reverse complement) the query sequence if it is DNA.
+X For tfasta3/x3/y3, search the reverse complement of the
+X library sequence only.
+X
+-j # Penalty for frameshift within a codon (fasty3/tfasty3 only).
+X
+-l file
+X Location of library menu file (FASTLIBS).
+X
+-L Display more information about the library sequence in the
+X alignment.
+X
+-M low-high
+X Range of amino acid sequence lengths to be included in the
+X search.
+X
+-m # Specify alignment type: 0, 1, 2, 3, 4, 5, 6, 9, 10
+X
+X -m 0 -m 1 -m 2 -m 3 -m 4
+X MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
+X ::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
+X MWKSCGYPYT MWKSCGYPYT
+X
+X -m 5 provides a combination of -m 4 and -m 0. -m 6 provides
+X -m 5 plus HTML formatting.
+X
+-m 9 provides coordinates and scores with the best score
+X information. A simple " -m 9 extends the normal best score
+X information:
+X
+X The best scores are: opt bits E(14548)
+X XURTG4 glutathione transferase (EC 2.5.1.18) 4 - ( 219) 1248 291.7 1.1e-79
+X
+X to include the additional information (on the same line,
+X separated by a <tab>):
+X
+X %_id %_gid sw alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs
+X 0.771 0.771 1248 218 1 218 1 218 1 218 1 219 0 0 0
+X
+X -m 9c provides additional information: an encoded alignment
+X string. Thus:
+X
+X 10 20 30 40 50 60 70
+X GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
+X XURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+X 20 30 40 50 60
+X
+X would be encoded:
+X
+X =23+9=13-2=10-1=3+1=5
+X
+X The alignment encoding is with repect to the alignment, not
+X the sequences. The coordinate of the alignment is given
+X earlier in the " -m 9c" line.
+X
+-m 10
+X -m 10 is a new, parseable format for use with other
+X programs. See the file "readme.v20u4" for a more complete
+X description.
+X
+X As of version "fa34t23b2", it has become possible to combine
+X independent "-m" options. Thus, one can use "-m 1 -m 6 -m
+X 9".
+X
+-M low-high
+X Include library sequences (proteins only) with lengths
+X between low and high.
+X
+-n Force the query sequence to be treated as a DNA sequence.
+X This is particularly useful for query sequences that contain
+X a large number of ambiguous residues, e.g. transcription
+X factor binding sites.
+X
+-O Send copy of results to "filename." Helpful for
+X environments without STDOUT (mostly for the Macintosh).
+X
+-o Turn off default optimization of all scores greater than
+X OPTCUT. Sort results by "initn" scores (reduces the accuracy
+X of statistical estimates).
+X
+-p Force query to be treated as protein sequence.
+X
+-Q,-q
+X Quiet - does not prompt for any input. Writes scores and
+X alignments to the terminal or standard output file.
+X
+-r Specify match/mismatch scores for DNA comparisons. The
+X default is "+5/-4". "+3/-2" can perform better in some
+X cases.
+X
+-R file
+X Save a results summary line for every sequence in the
+X sequence library. The summary line includes the sequence
+X identifier, superfamily number (if available) position in
+X the library, and the similarity scores calculated. This
+X option can be used to evaluate the sensitivity and
+X selectivity of different search strategies (Pearson, 1995,
+X Pearson, 1998).
+X
+-s file
+X Specify the scoring matrix file. fasta3 uses the same
+X scoring matrices as Blast1.4/2.0. Several scoring matrix
+X files are included in the standard distribution. For
+X protein sequences: codaa.mat - based on minimum mutation
+X matrix; idnaa.mat - identity matrix; pam250.mat - the PAM250
+X matrix developed by Dayhoff et al. (Dayhoff et al., 1978);
+X pam120.mat - a PAM120 matrix. The default scoring matrix is
+X BLOSUM50 ("-s BL50"). Other matrices available from within
+X the program are: PAM250/"-s P250", PAM120/"-s P120",
+X PAM40/"-s P40", PAM20/"-s P20", MDM10 - MDM40/"-s M10 - M40"
+X (MDM are modern PAM matrices from Jones et al. (Jones et
+X al., 1992),), BLOSUM50, 62, and 80/"-s BL50", "-s BL62", "-s
+X BL80".
+X
+-S Treat lower-case characters in the query or library
+X sequences as "low-complexity" ("seg"-ed) residues.
+X Traditionally, the "seg" program (Wootton and
+X Federhen, 1993) is used to remove low complexity regions in
+X DNA sequences by replacing the residues with an "X". When
+X the "-S" option is used, the FASTA33 programs provide a
+X potentially more informative approach. With "-S", lower
+X case characters in the query or database sequences are
+X treated as "X"'s during the initial scan, but are treated as
+X normal residues during the final alignment display. Since
+X statistical significance is calculated from the similarity
+X score calculated during the library search, when the lower
+X case residues are "X"'s, low complexity regions will not
+X produce statistically significant matches. However, if a
+X significant alignment contains low complexity regions, their
+X alignmen is shown. With "-S", lower case characters may be
+X included in the alignment to indicate low complexity
+X regions, and the final alignment score may be higher than
+X the score obtained during the search.
+X
+X The pseg program can be used to produce databases (or query
+X sequences) with lower case residues indicating low
+X complexity regions using the command:
+X
+X pseg database.fasta -z 1 -q > database.lc_seg
+X
+X (seg can also be used with some post processing, see
+X readme.v33tx.)
+X
+-U Treat the query sequence an RNA sequence. In addition to
+X selecting a DNA/RNA alphabet, this option causes changes to
+X the scoring matrix so that 'G:A' , 'T:C' or 'U:C' are scored
+X as 'G:G'.
+X
+-V str
+X It is now possible to specify some annotation characters
+X that can be included (and will be ignored), in the query
+X sequence file. Thus, One might have a file with:
+X "ACVS*ITRLFT?", where "*" and "?" are used to indicate
+X phosphorylation. By giving the option -V '*?', those
+X characters in the query will be moved to an "annotation
+X string", and alignments that include the annotated residues
+X will be highlighted with the appropriate character above the
+X sequence (on the number line).
+X
+-w # Line length (width) = number (<200)
+X
+-W # context length (default is 1/2 of line width -w) for
+X alignment, like fasta and ssearch, that provide additional
+X sequence context.
+X
+-x # Specify the penalty for a match to an 'X', independently of
+X the PAM matrix. Particularly useful for fastx3/fasty3,
+X where termination codons are encoded as 'X'.
+X
+-X Specifies offsets for the beginning of the query and library
+X sequence. For example, if you are comparing upstream
+X regions for two genes, and the first sequence contains 500
+X nt of upstream sequence while the second contains 300 nt of
+X upstream sequence, you might try:
+X
+X fasta -X "-500 -300" seq1.nt seq2.nt
+X
+X If the -X option is not used, FASTA assumes numbering starts
+X with 1. (You should double check to be certain the negative
+X numbering works properly.)
+X
+-y Set the width of the band used for calculating "optimized"
+X scores. For proteins and ktup=2, the width is 16. For
+X proteins with ktup=1, the width is 32 by default. For DNA
+X the width is 16.
+X
+-z -1,0,1,2,3,4,5
+X -z -1 turns off statistical calculations. z 0 estimates the
+X significance of the match from the mean and standard
+X deviation of the library scores, without correcting for
+X library sequence length. -z 1 (the default) uses a weighted
+X regression of average score vs library sequence length; -z 2
+X uses maximum likelihood estimates of Lambda and K; -z 3 uses
+X Altschul-Gish parameters (Altschul and Gish, 1996); -z 4 - 5
+X uses two variations on the -z 1 strategy. -z 1 and -z 2 are
+X the best methods, in general.
+X
+-z 11,12,14,15
+X estimate the statistical parameters from shuffled copies of
+X each library sequence. This doubles the time required for a
+X search, but allows accurate statistics to be estimated for
+X libraries comprised of a single protein family.
+X
+-Z db_size
+X set the apparent size of the database to be used when
+X calculating expectation E() values. If you searched a
+X database with 1,000 sequences, but would like to have the
+X E()-values calculated in the context of a 100,000 sequence
+X database, use '-Z 100000'.
+X
+-1 sort output by init1 score (for compatibility with FASTP -
+X do not use).
+X
+-3 translate only three forward frames
+X
+For example:
+X
+X fasta -w 80 -a seq1.aa seq.aa
+X
+would compare the sequence in seq1.aa to that in seq2.aa and
+display the results with 80 residues on an output line, showing
+all of the residues in both sequences. Be sure to enter the
+options before entering the file names, or just enter the options
+on the command line, and the program will prompt for the file
+names.
+X
+X (November, 1997) In addition, it is now possible to provide
+the fasta programs with the query sequence (fasta, fasty,
+ssearch, tfastx), or two sequences (prss, lalign, plalign) from
+the unix "stdin" stream. This makes it much easier to set up
+FASTA or PRSS WWW pages. To specify that stdin be used, rather
+than a file, the file name should be specified as '-' or '@' (the
+latter file name makes it possible to specify a subset of the
+sequence). Thus:
+X
+X cat query.aa | fasta -q @:25-75 s
+X
+would take residues 25-75 from query.aa and search the 's'
+library (see the discussion of FASTLIBS).
+X
+5.2. Environment variables
+X
+X Because the current version of the program allows the user
+to set virtually every option on the command line (except the
+ktup, which must be set as the third command line argument), only
+the FASTLIBS environment variable is routinely used.
+X
+FASTLIBS
+X specifies the location of the file which contains the list
+X of library descriptions, locations, and library types (see
+X section on finding library files).
+X
+6. Frequently Asked Questions
+X
+X (1) Which program should I use? See Table I.
+X
+X (2) How do I search with both DNA strands with fasta3 and
+X fastx3? With version 32 of the FASTA program package, all
+X searches that use DNA queries (e.g. fasta3, fastx3/y3)
+X examine both strands. To revert to earlier FASTA behavior
+X - only looking at the forward or reverse strand - use -3
+X to search only the forward strand and -i -3 to search only
+X the reverse strand.
+X
+X (3) When I search Genbank - the program reports: 0 residues in
+X 0 sequences. This typically happens because the program
+X does not know that you are searching a Genbank flatfile
+X database and is looking for a FASTA format database. Be
+X certain to specify the library type ("1" for Genbank
+X flatfile) with the database name.
+X
+X (4) What is the difference between fastx3 and fasty3 (or
+X tfastx3 and tfasty3). [t]fastx3 uses a simpler codon
+X based model for alignments that does not allow frameshifts
+X in some codon positions (see ref. (Zhang et al., 1997)).
+X tfastx3 is about 30% faster, but tfasty3 can produce
+X higher quality alignments in some cases.
+X
+X (5) When I run fasta3 -q, I don't see any (or very little)
+X output, but I get lots of scores when I run interactively.
+X With the -Q option, the number of high scores displayed is
+X limited by the -E # cutoff, which is 10.0 for protein
+X comparisons, 2.0 for DNA comparisons, and 5.0 for
+X translated DNA:protein comparisons. In interactive mode
+X (without -Q), by default you see 20 high scores,
+X regardless of E() value.
+X
+X (6) What is ktup - All of the programs with fast in their name
+X use a computer science method called a lookup table to
+X speed the search. For proteins with ktup=2, this means
+X that the program does not look at any sequence alignment
+X that does not involve matching two identical residues in
+X both sequences. Likewise with DNA and ktup = 6, the
+X initial alignment of the sequences looks for 6 identical
+X adjacent nucleotides in both sequences. Because it is
+X less likely that two identical amino-acids will line up by
+X chance in two unrelated proteins, this speeds up the
+X comparison. But very distantly related sequences may
+X never have two identical residues in a row but will have
+X single aligned identities. In this case, ktup = 1 may
+X find alignments that ktup=2 misses.
+X
+X (7) Sometimes, in the list of best scores, the same sequence
+X is shown twice with exactly the same score. Sometimes,
+X the sequence is there twice, but the scores are slightly
+X different. When any of the fasta3 programs searches a long
+X sequence, it breaks the sequence up into overlapping
+X pieces. The length of the piece depends on the length of
+X the query and the particular program being used (it can
+X also be controlled with the -N #### option). Since the
+X pieces overlap by the length of the query sequence (or
+X 3*query_length for fastx/y3 and tfasta/x/y3), if the
+X highest scoring alignment is at the end of one piece, it
+X will be scored again at the beginning of the next piece.
+X If the alignment is not be completely included in the
+X overlap region, one of the pieces will give a higher score
+X than the other. These duplications can be detected by
+X looking at the coordinates of the alignment. If either
+X the beginning or end coordinate is identical in two
+X alignments, the alignments are at least partially
+X duplicates.
+X
+As always, please inform me of bugs as soon as possible.
+X
+William R. Pearson
+Department of Biochemistry
+Jordan Hall Box 800733
+U. of Virginia
+Charlottesville, VA
+X
+wrp@virginia.EDU
+X
+7. References
+X
+Altschul, S. F., Boguski, M. S., Gish, W., and Wootton, J. C.
+(1994). Issues in searching molecular sequence databases. Nature
+Genet. 6,119-129.
+X
+Altschul, S. F. and Gish, W. (1996). Local alignment statistics.
+Methods Enzymol. 266,460-480.
+X
+Bairoch, A. and Apweiler, R. (1996). The Swiss-Prot protein
+sequence data bank and its new supplement TrEMBL. Nucleic Acids.
+Res. 24,21-25.
+X
+Barker, W. C., Garavelli, J. S., Haft, D. H., Hunt, L. T.,
+Marzec, C. R., Orcutt, B. C., Srinivasarao, G. Y., Yeh, L. S. L.,
+Ledley, R. S., Mewes, H. W., Pfeiffer, F., and Tsugita, A.
+(1998). The PIR-International Protein Sequence Database. Nucleic
+Acids Res 26,27-32.
+X
+Dayhoff, M., Schwartz, R. M., and Orcutt, B. C. (1978). A model
+of evolutionary change in proteins. In Atlas of Protein Sequence
+and Structure, vol. 5, supplement 3. M. Dayhoff, ed. (Silver
+Spring, MD: National Biomedical Research Foundation), pp.
+345-352.
+X
+Jones, D. T., Taylor, W. R., and Thornton, J. M. (1992). The
+rapid generation of mutation data matrices from protein
+sequences. Comp. Appl. Biosci. 8,275-282.
+X
+Pearson, W. R. (2000). Flexible similarity searching with the
+FASTA3 program package. In Bioinformatics Methods and Protocols,
+S. Misener and S. A. Krawetz, ed. (Totowa, NJ: Humana Press), pp.
+185-219.
+X
+Pearson, W. R. and Lipman, D. J. (1988). Improved tools for
+biological sequence comparison. Proc. Natl. Acad. Sci. USA
+85,2444-2448.
+X
+Pearson, W. R. (1995). Comparison of methods for searching
+protein sequence databases. Prot. Sci. 4,1145-1160.
+X
+Pearson, W. R. (1996). Effective protein sequence comparison.
+Methods Enzymol. 266,227-258.
+X
+Pearson, W. R. (1998). Empirical statistical estimates for
+sequence similarity searches. J. Mol. Biol. 276,71-84.
+X
+Smith, T. F. and Waterman, M. S. (1981). Identification of common
+molecular subsequences. J. Mol. Biol. 147,195-197.
+X
+Wootton, J. C. and Federhen, S. (1993). Statistics of local
+complexity in amino acid sequences and sequence databases.
+Comput. Chem. 17,149-163.
+X
+Zhang, Z., Pearson, W. R., and Miller, W. (1997). Aligning a DNA
+sequence with a protein sequence. J. Computational Biology
+4,339-349.
+X
+SHAR_EOF
+chmod 0644 fasta3x.doc ||
+echo 'restore of fasta3x.doc failed'
+Wc_c="`wc -c < 'fasta3x.doc'`"
+test 41617 -eq "$Wc_c" ||
+ echo 'fasta3x.doc: original size 41617, current size' "$Wc_c"
+fi
+# ============= fasta3x.me ==============
+if test -f 'fasta3x.me' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta3x.me (File already exists)'
+else
+echo 'x - extracting fasta3x.me (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta3x.me' &&
+.nr pp 11
+.nr sp 11
+.nr tp 11
+.nr fp 10
+.nr fi 0n
+.sz 11
+.if t \{
+.po 1i
+.he 'FASTA3.DOC''Release 3.4, Fall, 2003'
+.fo ''- % -''
+\}
+.if n \{
+.po 0
+.na
+.nh
+\}
+.ll 6.5i
+.ce
+\fBCOPYRIGHT NOTICE\fP
+.lp
+Copyright 1988, 1991, 1992, 1994, 1995, 1996, 1999 by William
+R. Pearson and the University of Virginia. All rights reserved. The
+FASTA program and documentation may not be sold or incorporated into a
+commercial product, in whole or in part, without written consent of
+William R. Pearson and the University of Virginia. For further
+information regarding permission for use or reproduction, please
+contact: David Hudson, Assistant Provost for Research, University of
+Virginia, P.O. Box 9025, Charlottesville, VA 22906-9025, (434)
+924-6853
+.uh "\s+2The FASTA program package\s0"
+.uh "Introduction"
+.pp
+This documentation describes the version 3 of the FASTA program
+package (see W. R. Pearson and D. J. Lipman (1988), "Improved Tools
+for Biological Sequence Analysis", PNAS 85:2444-2448 [.wrp881.]; W. R.
+Pearson (1996) "Effective protein sequence comparison"
+Meth. Enzymol. 266:227-258;[.wrp960.] Pearson et. al. (1997) Genomics
+46:24-36;[.wrp971.] Pearson, (1999) Meth. in Molecular Biology
+132:185-219.[.wrp000.] Version 3 of the FASTA packages contains many
+programs for searching DNA and protein databases and one program
+(prss3) for evaluating statistical significance from randomly shuffled
+sequences. Several additional analysis programs, including programs
+that produce local alignments, are available as part of version 2 of
+the FASTA package, which is still available.
+.pp
+This document is divided into three sections: (1) A summary overview of
+the programs in the FASTA3 package; (2) A guide to installing the
+programs and databases; (3) A guide to using the FASTA programs. The
+revision history of the programs can be found in the
+\fCreadme.v30..v34\fP, files. The programs are easy to use, so if
+you are using them on a machine that is administered by someone else,
+you can skip section (2) and focus on (1) and (3) to learn how to use
+the programsIf you are installing the programs on your own
+machine, you will need to read section (2) carefully.
+.sh 1 "An overview of the \f(CBFASTA\fP programs"
+.pp
+Although there are a large number of programs in this package, they
+belong to three groups: (1)
+"Conventional" Library search programs:
+FASTA3, FASTX3, FASTY3, TFASTA3, TFASTX3, TFASTY3, SSEARCH3;
+(2)
+Programs for searching with short fragments:
+FASTS3, FASTF3, TFASTS3, TFASTF3;
+(3)
+Statistical significance: PRSS3.
+Programs that start with \f(CBfast\fP search protein
+databases, while \f(CBtfast\fP programs search translated DNA databases.
+Table I gives a brief description of the programs.
+.lp
+.(z
+.TS
+center;
+c s
+c s
+= =
+l lw(5.5i).
+\d\fBTable I. Comparison programs in the FASTA3 package\fP\u
+X
+\fCfasta3\fP T{
+Compare a protein sequence to a protein sequence
+database or a DNA sequence to a DNA sequence database using the FASTA
+algorithm.[.wrp881,wrp960.] Search speed and selectivity are
+controlled with the \fIktup\fP(wordsize) parameter. For protein
+comparisons, \fIktup\fP = 2 by default; \fIktup\fP =1 is more sensitive
+but slower. For DNA comparisons, \fIktup\fP=6 by default; \fIktup\fP=3 or
+\fIktup\fP=4 provides higher sensitivity; \fIktup\fP=1 should be used for
+oligonucleotides (DNA query lengths < 20).
+T}
+X
+\fCssearch3\fP T{
+Compare a protein sequence to a protein sequence
+database or a DNA sequence to a DNA sequence database using the
+Smith-Waterman algorithm.[.wat815.] \fCssearch3\fP is about 10-times
+slower than FASTA3, but is more sensitive for full-length protein
+sequence comparison.
+T}
+X
+\fCfastx3\fP/ \fCfasty3\fP T{
+Compare a DNA sequence to a protein
+sequence database, by comparing the translated DNA sequence in three
+frames and allowing gaps and frameshifts. \fCfastx3\fP uses a
+simpler, faster algorithm for alignments that allows frameshifts only
+between codons; \fCfasty3\fP is slower but produces better alignments
+with poor quality sequences because frameshifts are allowed within
+codons.
+T}
+X
+\fCtfastx3\fP/ \fCtfasty3\fP T{
+Compare a protein sequence to a DNA sequence
+database, calculating similarities with frameshifts to the forward and
+reverse orientations.
+T}
+X
+\fCtfasta3\fP T{
+Compare a protein sequence to a DNA sequence database, calculating
+similarities (without frameshifts) to the 3 forward and three reverse
+reading frames. \fCtfastx3\fP and \fCtfasty3\fP are preferred because
+they calculate similarity over frameshifts.
+T}
+X
+\fCfastf3/tfastf3\fP T{
+Compares an ordered peptide mixture, as would be obtained by
+Edman degredation of a CNBr cleavage of a protein, against a protein
+(\fCfastf\fP) or DNA (\fCtfastf\fP) database.
+T}
+X
+\fCfasts3/tfasts3\fP T{
+Compares set of short peptide fragments, as would be obtained
+from mass-spec. analysis of a protein, against a
+protein (\fCfasts\fP) or DNA (\fCtfasts\fP) database.
+T}
+= =
+.TE
+.)z
+.sh 1 "Installing FASTA and the sequence databases"
+.sh 2 "Obtaining the libraries"
+.pp
+The FASTA program package does not include any protein or DNA sequence
+libraries. Protein databases are available on CD-ROM from the PIR and
+EMBL (see below), or via anonymouse FTP from many different sources.
+As this document is updated in the fall of 1999, no DNA databases are
+available on CD-ROM from the major sequence databases: Genbank at the
+National for Biotechnology Information (\fCwww.ncbi.nlm.nih.gov\fP and
+\fCftp://ncbi.nlm.nih.gov\fP) and EMBL at the European Bioinformatics
+Institute (\fCwww.ebi.ac.uk\fP). However, the databases are available
+via anonymous FTP from both sites.
+.sh 3 "The GENBANK DNA sequence library"
+.pp
+Because of the large size of DNA databases, you will probably want to
+keep DNA databases in only one, or possibly two, formats. The FASTA3
+programs that search DNA databases - \fCfasta3\fP, \fCtfastx/y3\fP,
+and \fCtfasta3\fP can read DNA databases in Genbank flatfile (not
+ASN.1), FASTA, GCG/compressed-binary, BLAST1.4 (\fCpressdb\fP), and
+BLAST2.0 (\fCformatdb\fP) formats, as well as EMBL format. If you are
+also running the GCG suite of sequence analysis programs, you should
+use GCG/compressed-binary format or BLAST2.0 format for your
+\fCfasta3\fP searches. If not, BLAST2.0 is a good choice. These
+files are considerably more compact than Genbank flat files, and are
+preferred. The NCBI does not provide software for converting from
+Genbank flat files to Blast2.0 DNA databases, but you can use the
+Blast \fCformatdb\fP program to convert ASN.1 formated Genbank files,
+which are available from the NCBI \fCftp\fP site.
+.pp
+The NCBI also provides the \fCnr\fP, \fCswissprot\fP, and several EST
+databases that are used by BLAST in FASTA format from:
+\fCftp://ncbi.nlm.nih.gov/blast/db\fP. These databases are updated
+nightly.
+.sh 3 "The NBRF protein sequence library"
+.pp
+You can obtain the PIR protein sequence database
+[.pir980.] from:
+.(l
+National Biomedical Research Foundation
+Georgetown University Medical Center
+3900 Reservoir Rd, N.W.
+Washington, D.C. 20007
+.)l
+or via ftp from \fCnbrf.georgetown.edu\fP or from the NCBI
+(\fCncbi.nlm.nih.gov/repository/PIR\fP). The data in the \fCascii\fP
+directory is in PIR Codata format, which is not widely used. I
+recommend the PIR/VMS format data (libtype=5) in the \fCvms\fP
+directory.
+.sh 3 "The EBI/EMBL CD-ROM libraries"
+.pp
+The European Bioinformatics Institute (EBI) distributes both the EMBL
+DNA database and the SwissProt database on CD-ROM,[.apw961.] and they
+are available from:
+.(l
+EMBL-Outstation European Bioinformatics Institute
+Wellcome Trust Genome Campus,
+Hinxton Hall
+Hinxton,
+Cambridge CB10 1SD
+United Kingdom
+Tel: +44 (0)1223 494444
+Fax: +44 (0)1223 494468
+Email: DATALIB@ebi.ac.uk
+.)l
+In addition, the SWISS-PROT protein sequence database is available via
+anonymous FTP from \fCftp://ftp.expasy.ch/databases/swiss-prot/\fP
+(also see \fCwww.expasy.ch\fP).
+.sh 2 "Finding the libraries: FASTLIBS"
+.pp
+The major problem that most new users of the FASTA package have is in
+setting up the program to find the databases and their library type.
+In general, if you cannot get \fCfasta3\fP to read a sequence
+database, it is likely that something is wrong with the \fCFASTLIBS\fP
+file. A common problem is that the database file is found, but either
+no sequences are read, or an incorrect number of entries is read.
+This is almost always because the library format (\fClibtype\fP) is
+incorrect. Note that a type 5 file (PIR/VMS format) can be read
+as a type 0 (default FASTA) format file, and the number of entries
+will be correct, but the sequence lengths will not.
+.pp
+All the search programs in the FASTA3 package use the environment
+variable \fCFASTLIBS\fP to find the protein and DNA sequence libraries. The
+\fCFASTLIBS\fP variable contains the name of a file that has the actual
+filenames of the libraries. The \fCfastlibs\fP file included with the
+distribution on is an example of a file that can be referred to by
+FASTLIBS. To use the \fCfastlibs\fP file, type:
+.(l
+\fCsetenv FASTLIBS /usr/lib/fasta/fastgbs\fP (BSD UNIX/csh)
+or
+\fCexport FASTLIBS=/usr/lib/fasta/fastgbs\fP (SysV UNIX/ksh)
+.)l
+Then edit the \fCfastlibs\fP file to indicate where the protein and DNA
+sequence libraries can be found. If you have a hard disk and your
+protein sequence library is kept in the file \fC/usr/lib/aabank.lib\fP and
+your Genbank DNA sequence library is kept in the directory:
+\fC/usr/lib/genbank\fP, then \fCfastgbs\fP might contain:
+.ne 8
+.(l
+.ft C
+NBRF Protein$0P/usr/lib/seq/aabank.lib 0
+SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
+GB Primate$1P@/usr/lib/genbank/gpri.nam
+GB Rodent$1R@/usr/lib/genbank/grod.nam
+GB Mammal$1M@/usr/lib/genbank/gmammal.nam
+^ 1 ^^^^ 4 ^ ^
+X 23 (5)
+.ft R
+.)l
+The first line of this file says that there is a copy of the NBRF
+protein sequence database (which is a protein database) that can be
+selected by typing "P" on the command line or when the database menu
+is presented in the file \fC/usr/lib/seq/aabank.lib\fP.
+.pp
+Note that there are 4 or 5 fields in the lines in \fCfastgbs\fP. The first
+field is the description of the library which will be displayed by
+FASTA; it ends with a '$'. The second field (1 character), is a 0 if
+the library is a protein library and 1 if it is a DNA library. The
+third field (1 character) is the character to be typed to select the
+library.
+.pp
+The fourth field is the name of the library file. In the example
+above, the \fC/usr/lib/seq/aabank.lib\fP file contains the entire
+protein sequence library. However the DNA library file names are
+preceded by a '@', because these files (\fCgpri.nam, grod.nam,
+gmammal.nam\fP) do not contain the sequences; instead they contain the names
+of the files which contain the sequences. This is done because the
+GENBANK DNA database is broken down in to a large number of smaller
+files. In order to search the entire primate database, you must
+search more than a dozen files.
+.pp
+In addition, an optional fifth field can be used to specify the format
+of the library file. Alternatively, you can specify the library
+format in a file of file names (a file preceded by an '@'). This
+field must be separated from the file name by a space character ('\ ')
+from the filename. In the example above, the \fCaabank.lib\fP file is
+in Pearson/FASTA format, while the \fCswiss.seq\fP file is in PIR/VMS format
+(from the EMBL CD-ROM). Currently, FASTA can read the following formats:
+.(l I
+.ft C
+0 Pearson/FASTA (>SEQID - comment/sequence)
+1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
+2 NBRF CODATA (ENTRY/SEQUENCE)
+3 EMBL/SWISS-PROT (ID/DE/SQ)
+4 Intelligenetics (;comment/SEQID/sequence)
+5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
+6 GCG (version 8.0) Unix Protein and DNA (compressed)
+11 NCBI Blast1.3.2 format (unix only)
+12 NCBI Blast2.0 format (unix only, fasta32t08 or later)
+.ft R
+.)l
+In particular, this version will work with the EMBL and PIR VMS
+formats that are distributed on the EMBL CD-ROM. The latter format
+(PIR VMS) is much faster to search than EMBL format. This release
+also works with the protein and DNA database formats created for the
+BLASTP and BLASTN programs by SETDB and PRESSDB and with the new NCBI
+search format. If a library format is not specified, for example,
+because you are just comparing two sequences, Pearson/FASTA (format 0)
+is used by default. To specify a library type on the command line,
+add it to the library filename and surround the filename and library
+type in quotes:
+.(l
+.ft C
+fasta3 query.file "/seqdb/genbank/gbpri1.seq 1"
+.ft P
+.)l
+.pp
+You can specify a group of library files by putting a '@' symbol
+before a file that contains a list of file names to be searched. For
+example, if @gmam.nam is in the fastgbs file, the file "gmam.nam"
+might contain the lines:
+.(l
+.ft C
+</seqdb/genbank
+gbpri1.seq 1
+gbpri2.seq 1
+gbpri3.seq 1
+gbpri4.seq 1
+gbrod.seq 1
+gbmam.seq 1
+.ft R
+.)l
+In this case, the line beginning with a '<' indicates the directory
+the files will be found in. The remaining lines name the actual
+sequence files. So the first sequence file to be searched would be:
+.(l
+.ft C
+/usr/lib/genbank/gbpri.seq
+.ft R
+.)l
+The notation "\fC<PIRNAQ:\fP" might be used under the VAX/VMS operating
+system. Under UNIX, the trailing '/' is left off, so the library
+directory might be written as "\fC</usr/seqlib\fP".
+.pp
+The FASTA programs can search a database composed of different files
+in different sequence formats. For example, you may wish to search
+the Genbank files (in GenBank flat file format) and the EMBL DNA
+sequence database on CD-ROM. To do this, you simply list the names
+and filetypes of the files to be searched in a file of filenames. For
+example, to search the mammalian portion of Genbank, the unannotated
+portion of Genbank, and the unannotated portion of the EMBL library,
+you could use the file:
+.(l I
+.ft C
+</usr/lib/DNA
+gbpri.seq 1
+\&# (this '#' causes the program to display the size of the library)
+gbrod.seq 1
+\&...
+gbmam.seq 1
+\&...
+gbuna.seq 1
+\&...
+unanno.seq 5
+\&#
+.ft R
+.)l
+.(l I F
+You do not need to include library format numbers if you only use the
+Pearson/FASTA version of the PIR protein sequence library. If no
+library type is specified, the program assumes that type 0 is being
+used.
+.)l
+.pp
+Test the setup by running FASTA. Enter the sequence
+file '\fCmgstm1.aa\fP' when the program requests it (this file is
+included with the programs). The program should then ask you to
+select a protein sequence library. Alternatively, if you run the
+TFASTA program and use the mgstm1.aa query sequence, the program
+should show you a selection of DNA sequence libraries.
+Once the fastgbs file has been set up correctly, you can
+set FASTLIBS=fastgbs in your AUTOEXEC.BAT file, and you will not need to
+remember where the libraries are kept or how they are named.
+.ne 8
+.sh 1 "Using the FASTA Package"
+.sh 2 "Overview"
+.pp
+The FASTA sequence comparison programs all require similar
+information, the name of a query sequence file, a library file, and
+the \fIktup\fP parameter. All of the programs can accept arguments
+on the command line, or they will prompt for the file names and
+\fIktup\fP value.
+.lp
+To use FASTA, simply type:
+.(l
+.ft C
+\f(CBFASTA\fP
+and you will be prompted for :
+.in +0.5i
+the name of the test sequence file
+the name of the library file
+and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
+(ktup of 2 is about 5 times faster than ktup = 1)
+.ft R
+.)l
+The program can also be run by typing
+.(l
+.ft C
+FASTA test.aa /lib/bigfile.lib \fIktup\fP (1 or 2)
+.ft R
+.)l
+.lp
+Included with the package are several test files.
+To check to make certain that everything is working, you can try:
+.(l
+.ft C
+fasta musplfm.aa prot_test.lib
+and
+tfastx mgstm1.aa gst.nlib
+.ft R
+.)l
+.sh 2 "Sequence files"
+.pp
+The \fCfasta3\fP programs know about three kinds of sequence files:
+(1) plain sequence files - files that contain nothing but
+sequence residues - can only be used as query sequences. (2) FASTA
+format files. These are the same as plain sequence files, each
+sequence is preceded by a comment line with a '>' in the first
+column. (3) distributed sequence libraries (this is a broad class that
+includes the NBRF/PIR VMS and blocked ascii formats, Genbank flat-file
+format, EMBL flat-file format, and Intelligenetics format. All of the
+files that you create should be of type (1) or (2). FASTA format
+files (ones with a '>' and comment before the sequence) are preferred,
+because they can be used as query or library sequence files by all of
+the programs.
+.pp
+I have included several sample test files, \fC*.aa\fP and \fC*.seq\fP
+as well as two small sequence libraries, \fCprot_test.lib\fP and
+\fCgst.nlib\fP. The first line may begin with a '>' by a comment.
+Spaces and tabs (and anything else that is not an amino-acid code) are
+ignored.
+.pp
+Library files should have the form:
+.(l
+.ft C
+>Sequence name and identifier
+A F A S Y T .... actual sequence.
+F S S .... second line of sequence.
+>Next sequence name and identifier
+.ft R
+.)l
+This is often referred to as "FASTA" or format. You can
+build your own library by concatenating several sequence files. Just
+be sure that each sequence is preceded by a line beginning with a '>'
+with a sequence name.
+.pp
+The test file should not have lines longer than 120 characters, and
+sequences entered with word processors should use a document
+mode, with normal carriage returns at the end of lines.
+.pp
+A different format is required to specify the ordered peptide mixture for \fCfastf3/tfastf3\fP. For example:
+.(l I
+.ft C
+>mgstm1
+MGCEN,
+MIDYP,
+MLLAY,
+MLLGY
+.ft P
+.)l
+indicates \fCm\fP in the first position of all three peptides (as
+from CNBr), \fCG, I, L\fP (twice) in the second position (first cycle),
+\fCC,D,L\fP (twice) in the third position, etc. The commas (\fC,\fP)
+are required to indicate the number of fragments in the mixture, but
+there should be no comma after the last residue.
+.pp
+For the \fCfasts3/tfasts3\fP program, the format is the same, except that there
+is no requirement for the peptides to be the same length.
+.sh 1 "Statistical Significance"
+.pp
+All the programs in the FASTA3 package attempt to calculate accurate
+estimates of the statistical significance of a match. For
+\fCfasta3\fP, \fCssearch3\fP, and \fCfastx3/y3\fP, these estimates are
+very accurate.[.wrp971,wrp981.]. Altschul et al. [.alt940.] provides
+an excellent review of the statistics of local similarity scores.
+Local sequence similarity scores follow the extreme value
+distribution, so that P(s > x) = 1 - exp(-exp(-lambda(x-u)) where u =
+ln(Kmn)/lambda and m,m are the lengths of the query and library
+sequence. This formula can be rewritten as: 1 - exp(-Kmn exp(-lambda
+x), which shows that the average score for an unrelated library
+sequence increases with the logarithm of the length of the library
+sequence. The \fCfasta3\fP programs use simple linear regression
+against the the log of the library sequence length to calculate a
+normalized "z-score" with mean 50, regardless of library sequence
+length, and variance 10. (Several other estimation methods are
+available with the \fC\-z\fP option.) These z-scores can then be used
+with the extreme value distribution and the poisson distribution (to
+account for the fact that each library sequence comparison is an
+independent test) to calculate the number of library sequences to
+obtain a score greater than or equal to the score obtained in the
+search. The original idea and routines to do the linear regression on
+library sequence length were provided Phil Green, U. Washington. This
+version uses a slightly different strategy for fitting the data than
+those originally provided by Dr. Green.
+.pp
+The expected number of sequences is plotted in the histogram using an
+"*". Since the parameters for the extreme value distribution are not
+calculated directly from the distribution of similarity scores, the
+pattern of "*'s" in the histogram gives a qualitative view of how well
+the statistical theory fits the similarity scores calculated by the
+programs. For \fCfasta3\fP, if optimized scores are calculated for
+each sequence in the database (the default), the agreement between the
+actual distribution of "z-scores" and the expected distribution based
+on the length dependence of the score and the extreme value
+distribution is usually very good. Likewise, the distribution of
+\fCssearch3\fP Smith-Waterman scores typically agrees closely with the
+<actual distribution of "z-scores." The agreement with unoptimized
+scores, \fIktup=2\fP, is often not very good, with too many high
+scoring sequences and too few low scoring sequences compared with the
+predicted relationship between sequence length and similarity score.
+In those cases, the expectation values may be overestimates.
+.pp
+With version 33t01, all the FASTA programs also report a "bit" score,
+which is equivalent to the bit score reported by BLAST2. The
+FASTA33/BLAST2 bit score is calculated as: (lambda*S - ln K)/ln 2,
+where S is the raw similarity score, lambda and K are statistical
+parameters estimated from the distribution of unrelated sequence
+similarity scores. The statistical signficance of a given bit score
+depends on the lengths of the query and library sequences and the size
+of the library, but a 1 bit increase in score corresponds to a 2-fold
+reduction in expectation; a 10-bit increase implies 1000-fold lower
+expectation, etc.
+.pp
+The statistical routines assume that the library contains a large
+sample of unrelated sequences. If this is not true, then statistical
+parameters can be estimated by using the \fC\-z 11\-15\fP, options.
+\fC\-z\fP options greater than 10 calculate a shuffled similarity score
+for each library sequence, in addition to the unshuffled score, and
+estimate the statistical parameters from the scores of the shuffled
+sequences. If there are fewer than 20 sequences in the library, the
+statistical calculations are not done.
+.pp
+For protein searches, library sequences with E() values < 0.01 for
+searches of a 10,000 entry protein database are almost always
+homologous. Frequently sequences with E()-values from 1 - 10 are
+related as well, but unrelated sequences ( 1 \- 10 per search) will
+have scores in this renage as well. Remember, however, that these E()
+values also reflect differences between the amino acid composition of
+the query sequence and that of the "average" library sequence. Thus,
+when searches are done with query sequences with "biased" amino-acid
+composition, unrelated sequences may have "significant" scores because
+of sequence bias. \fCPRSS3\fP can address this problem by calculating
+similarity scores for random sequences with the same length and amino
+acid composition.
+.sh 1 "Options"
+.pp
+Command line options are available to change the scoring parameters
+and output display. \fBCommand line options must preceed other program
+arguments, such as the query and library file names.\fP
+.sh 2 "Command line options"
+.ip "-a"
+(fasta3, ssearch3 only) show both sequences in their entirety.
+.ip "-A"
+force Smith-Waterman alignments for fasta3 DNA sequences. By default,
+only fasta3 protein sequence comparisons use Smith-Waterman alignments.
+.ip "-B"
+Show normalized score as a z-score, rather than a bit-score in the list
+of best scores.
+.ip "-b #"
+Number of sequence scores to be shown on output. In the absence of
+this option, fasta (and tfasta and ssearch) display all library
+sequences obtaining similarity scores with expectations less than
+10.0 if optimized score are used, or 2.0 if they are not. The -b
+option can limit the display further, but it will not cause additional
+sequences to be displayed.
+.ip "-c #"
+Threshold score for optimization (OPTCUT). Set "-c 1" to
+optimize every sequence in a database.
+.ip "-E #"
+Limit the number of scores and alignments shown based on the
+expected number of scores. Used to override the expectation value of 10.0
+used by default. When used with -Q, -E 2.0 will show all library sequences
+with scores with an expectation value <= 2.0.
+.ip "-d #"
+Maximum number of alignments to be displayed. Ignored if "-Q" is not
+used.
+.ip "-f"
+Penalty for the first residue in a gap (-12 by default for proteins,
+-16 for DNA, -15 for FAST[XY]/TFAST[XY]).
+.ip "-F #"
+Limit the number of scores and alignments shown based on the expected
+number of scores. "-E #" sets the highest E()-value shown; "-F #" sets
+the lowest E()-value. Thus, "-F 0.0001" will not show any matches or
+alignments with E() < 0.0001. This allows one to skip over close
+relationships in searches for more distant relationships.
+.ip "-g"
+Penalty for additional residues in a gap (-2 by default for proteins,
+-4 for DNA, -3 for FAST[XY]/TFAST[XY]).
+.ip "-h"
+Penalty for frameshift (fastx3/y3, tfastx3/y3 only).
+.ip "-H"
+Omit histogram.
+.ip "-i"
+Invert (reverse complement) the query sequence if it is DNA. For
+tfasta3/x3/y3, search the reverse complement of the library sequence
+only.
+.ip "-j #"
+Penalty for frameshift within a codon (fasty3/tfasty3 only).
+.ip "-l file"
+Location of library menu file (FASTLIBS).
+.ip "-L"
+Display more information about the library sequence in the alignment.
+.ip "-M low-high"
+Range of amino acid sequence lengths to be included in the search.
+.ip "-m #"
+Specify alignment type: 0, 1, 2, 3, 4, 5, 6, 9, 10
+.(l I
+.ft C
+X \-m 0 \-m 1 \-m 2 \-m 3 \-m 4
+.ft C
+MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
+::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
+MWKSCGYPYT MWKSCGYPYT
+.ft P
+.)l
+.ip
+\fC\-m 5\fP provides a combination of \fC\-m 4\fP and
+\fC\-m 0. \fC\-m 6 provides \fC\-m 5\fP plus HTML formatting.
+.ip "-m 9"
+provides coordinates and scores with the best score information.
+A simple "\fC -m 9\fP extends the normal best score information:
+.(l
+.ft C
+The best scores are: opt bits E(14548)
+XXURTG4 glutathione transferase (EC 2.5.1.18) 4 - ( 219) 1248 291.7 1.1e-79
+.ft P
+.)l
+to include the additional information (on the same line, separated by
+a <tab>):
+.(l
+.ft C
+%_id %_gid sw alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs
+0.771 0.771 1248 218 1 218 1 218 1 218 1 219 0 0 0
+.ft P
+.)l
+\fC -m 9c\fP provides additional information: an encoded alignment string. Thus:
+.(l I
+.ft C
+X 10 20 30 40 50 60 70
+GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
+XXURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+X 20 30 40 50 60
+.ft P
+.)l
+would be encoded:
+.(l
+.ft C
+=23+9=13-2=10-1=3+1=5
+.ft P
+.)l
+The alignment encoding is with repect to the alignment, not the
+sequences. The coordinate of the alignment is given earlier in the
+"\fC -m 9c\fP" line.
+.ip "-m 10"
+\fC\-m 10\fP is a new, parseable format for use
+with other programs. See the file "readme.v20u4" for a more complete
+description.
+.ip
+As of version "fa34t23b2", it has become possible to combine independent
+"\fC\-m\fP" options. Thus, one can use "\fC\-m 1 -m 6 -m 9\fP".
+.ip "-M low\-high"
+Include library sequences (proteins only) with lengths between low and
+high.
+.ip "-n"
+Force the query sequence to be treated as a DNA sequence. This is
+particularly useful for query sequences that contain a large number of
+ambiguous residues, e.g. transcription factor binding sites.
+.ip "-O"
+Send copy of results to "filename." Helpful for environments without
+STDOUT (mostly for the Macintosh).
+.ip "-o "
+Turn off default optimization of all scores greater than OPTCUT. Sort
+results by "initn" scores (reduces the accuracy of statistical
+estimates).
+.ip "-p"
+Force query to be treated as protein sequence.
+.ip "-Q,-q"
+Quiet - does not prompt for any input. Writes scores and alignments
+to the terminal or standard output file.
+.ip "-r"
+Specify match/mismatch scores for DNA comparisons. The default is
+"+5/-4". "+3/-2" can perform better in some cases.
+.ip "-R file"
+Save a results summary line for every sequence in the sequence
+library. The summary line includes the sequence identifier,
+superfamily number (if available) position
+in the library, and the similarity scores calculated. This option can
+be used to evaluate the sensitivity and selectivity of different
+search strategies.[.wrp951,wrp981.]
+.ip "-s file"
+Specify the scoring matrix file. \fCfasta3\fP uses the same scoring
+matrices as Blast1.4/2.0. Several scoring matrix files are included
+in the standard distribution. For protein sequences: \fCcodaa.mat\fP
+- based on minimum mutation matrix; \fCidnaa.mat\fP - identity matrix;
+\fCpam250.mat\fP - the PAM250 matrix developed by Dayhoff et
+al.;[.day787.] \fCpam120.mat\fP - a PAM120 matrix. The default
+scoring matrix is BLOSUM50 ("-s BL50"). Other matrices available from
+within the program are: PAM250/"-s P250", PAM120/"-s P120", PAM40/"-s
+P40", PAM20/"-s P20", MDM10 - MDM40/"-s M10 \- M40" (MDM are modern
+PAM matrices from Jones et al.,[.tay925.]), BLOSUM50, 62, and 80/"-s
+BL50", "-s BL62", "-s BL80".
+.ip "-S"
+Treat lower-case characters in the query or library sequences as
+"low-complexity" ("seg"-ed) residues. Traditionally, the "seg"
+program [.woo935.] is used to remove low complexity regions in DNA
+sequences by replacing the residues with an "X". When the "-S" option
+is used, the FASTA33 (and later) programs provide a potentially more
+informative approach. With "-S", lower case characters in the query
+or database sequences are treated as "X"'s during the initial scan,
+but are treated as normal residues during the final alignment display.
+Since statistical significance is calculated from the similarity score
+calculated during the library search, when the lower case residues are
+"X"'s, low complexity regions will not produce statistically
+significant matches. However, if a significant alignment contains low
+complexity regions, their alignmen is shown. With "-S", lower case
+characters may be included in the alignment to indicate low complexity
+regions, and the final alignment score may be higher than the score
+obtained during the search.
+.ip
+The \fCpseg\fP program can be used to produce databases (or query
+sequences) with lower case residues indicating low complexity regions
+using the command:
+.(l I
+\fCpseg database.fasta -z 1 -q > database.lc_seg\fP
+.)l
+(\fCseg\fP can also be used with some post processing, see readme.v33tx.)
+.ip
+The \fC-S\fP option should always be used with \fCFASTX/Y\fCP and
+\fCTFASTX/Y\fP because out of frame translations often generate
+low-complexity protein sequences. However, only lower case characters
+in the protein sequence (or protein database) are masked; lower case
+DNA sequences are translated into upper case protein sequences, and
+not treated as low complexity by the translated alignment programs.
+.ip "-t #"
+Translation table - tfasta3, fastx3, tfastx3, fasty3, and
+tfasty3 now support the BLAST tranlation tables. See
+\fChttp://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi\fP.
+.ip
+In addition, "\-t t" or "\-t t#" turns on the addition of an implicit termination
+codon to a protein:translated DNA match. That is, each protein
+sequence implicitly ends with "*", which matches the termination codes
+for the appropriate genetic code. "\-t t#" sets implicit termination
+and a different genetic code.
+.ip "-U"
+Treat the query sequence an RNA sequence. In addition to selecting a
+DNA/RNA alphabet, this option causes changes to the scoring matrix so
+that 'G:A' , 'T:C' or 'U:C' are scored as 'G:G'.
+.ip "-V str"
+It is now possible to specify some annotation characters that can be
+included (and will be ignored), in the query sequence file. Thus, One
+might have a file with: \fC"ACVS*ITRLFT?"\fP, where "*" and "?" are
+used to indicate phosphorylation. By giving the option \fC\-V '*?'\fP,
+those characters in the query will be moved to an "annotation string",
+and alignments that include the annotated residues will be highlighted
+with the appropriate character above the sequence (on the number line).
+.ip "-w #"
+Line length (width) = number (<200)
+.ip "-W #"
+X context length (default is 1/2 of line width -w) for alignment,
+like fasta and ssearch, that provide additional sequence context.
+.ip "-x #match,#mismatch"
+Specify the penalty for a match to an 'X', and mismatch to 'X',
+independently of the PAM matrix. Particularly useful for
+\fCfastx3/fasty3\fP, where termination codons are encoded as 'X'.
+.ip "-X \"off1 off2\""
+Specifies offsets for the beginning of the query and library sequence.
+For example, if you are comparing upstream regions for two genes, and
+the first sequence contains 500 nt of upstream sequence while the
+second contains 300 nt of upstream sequence, you might try:
+.(l I
+\fCfasta -X "-500 -300" seq1.nt seq2.nt\fP
+.)l
+If the -X option is not used, FASTA assumes numbering starts with 1.
+(You should double check to be certain the negative numbering works
+properly.)
+.ip "-y"
+Set the width of the band used for calculating "optimized" scores.
+For proteins and ktup=2, the width is 16. For proteins with ktup=1,
+the width is 32 by default. For DNA the width is 16.
+.ip "-z -1,0,1,2,3,4,5"
+\fC\-z -1\fP turns off statistical calculations. \fCz 0\fP estimates
+the significance of the match from the mean and standard deviation of
+the library scores, without correcting for library sequence length.
+\fC\-z 1\fP (the default) uses a weighted regression of average score
+vs library sequence length; \fC\-z 2\fP uses maximum likelihood
+estimates of
+.if t \(*l
+.if n Lambda
+and \fIK\fP; \fC\-z 3\fP uses Altschul-Gish
+parameters;[.alt960.] \fC\-z 4 \- 5\fP uses two variations on the
+\fC\-z 1\fP strategy. \fC\-z 1\fP and \fC\-z 2\fP are the best methods,
+in general.
+.ip "-z 11,12,14,15"
+estimate the statistical parameters from shuffled copies of each
+library sequence. This doubles the time required for a search, but
+allows accurate statistics to be estimated for libraries comprised of
+a single protein family.
+.ip "-Z db_size"
+set the apparent size of the database to be used when calculating
+expectation E() values. If you searched a database with 1,000
+sequences, but would like to have the E()-values calculated in the
+context of a 100,000 sequence database, use '-Z 100000'.
+.ip "-1"
+sort output by init1 score (for compatibility with FASTP - do not
+use).
+.ip "-3"
+translate only three forward frames
+.sp
+.lp
+For example:
+.(l
+\fCfasta -w 80 -a seq1.aa seq.aa\fP
+.)l
+would compare the sequence in seq1.aa to that in seq2.aa and display the
+results with 80 residues on an output line, showing all of the residues
+in both sequences. Be sure to enter the options before entering the file
+names, or just enter the options on the command line, and the program will
+prompt for the file names.
+.sp
+.pp
+(November, 1997) In addition, it is now possible to provide the fasta
+programs with the query sequence (fasta, fasty, ssearch, tfastx), or
+two sequences (prss, lalign, plalign) from the unix "stdin" stream. This
+makes it much easier to set up FASTA or PRSS WWW pages. To specify
+that stdin be used, rather than a file, the file name should be
+specified as '-' or '@' (the latter file name makes it possible to
+specify a subset of the sequence).
+Thus:
+.(l
+cat query.aa | fasta -q @:25-75 s
+.)l
+would take residues 25-75 from query.aa and search the 's' library
+(see the discussion of FASTLIBS).
+.sh 2 "Environment variables"
+.pp
+Because the current version of the program allows the user to set
+virtually every option on the command line (except the \fIktup\fP,
+which must be set as the third command line argument), only the
+\fCFASTLIBS\fP environment variable is routinely used.
+.ip "FASTLIBS"
+specifies the location of the file which contains the list of library
+descriptions, locations, and library types (see section on finding
+library files).
+.sh 1 "Frequently Asked Questions (FAQs)"
+.np
+\fIWhich program should I use?\fP See Table I.
+.np
+\fIHow do I search with both DNA strands with\fP \fCfasta3\fP \fIand\fP
+\fCfastx3\fP? With version 32 of the FASTA program package, all
+searches that use DNA queries (e.g. \fCfasta3\fP, \fCfastx3/y3\fP)
+examine both strands. To revert to earlier FASTA behavior - only
+looking at the forward or reverse strand - use \fC\-3\fP to search only
+the forward strand and \fC\-i -3\fP to search only the reverse strand.
+.np
+\fIWhen I search Genbank - the program reports:\fP \fC0 residues in 0
+sequences\fP. This typically happens because the program does not
+know that you are searching a Genbank flatfile database and is looking
+for a FASTA format database. Be certain to specify the library type
+("1" for Genbank flatfile) with the database name.
+.np
+What is the difference between \fCfastx3\fP and \fCfasty3\fP (or
+\fCtfastx3\fP and \fCtfasty3\fP). \fC[t]fastx3\fP uses a simpler
+codon based model for alignments that does not allow frameshifts in
+some codon positions (see ref. [.wrp971.]). \fCtfastx3\fP is about
+30% faster, but \fCtfasty3\fP can produce higher quality alignments in
+some cases.
+.np
+\fIWhen I run\fP \fCfasta3 -q\fP, I don't see any (or very little)
+output, but I get lots of scores when I run interactively. With the
+\fC\-Q\fP option, the number of high scores displayed is limited by the
+\fC\-E #\fP cutoff, which is 10.0 for protein comparisons, 2.0 for DNA
+comparisons, and 5.0 for translated DNA:protein comparisons. In
+interactive mode (without \fC\-Q\fP), by default you see 20 high
+scores, regardless of \fCE()\fP value.
+.np
+\fIWhat is ktup\fP \- All of the programs with \fCfast\fP in their
+name use a computer science method called a lookup table to speed the
+search. For proteins with \fIktup\fP=2, this means that the program
+does not look at any sequence alignment that does not involve matching
+two identical residues in both sequences. Likewise with DNA and
+\fIktup\fP = 6, the initial alignment of the sequences looks for 6
+identical adjacent nucleotides in both sequences. Because it is less
+likely that two identical amino-acids will line up by chance in two
+unrelated proteins, this speeds up the comparison. But very distantly
+related sequences may never have two identical residues in a row but
+will have single aligned identities. In this case, \fIktup\fP = 1 may
+find alignments that \fIktup\fP=2 misses.
+.np
+\fISometimes, in the list of best scores, the same sequence is shown
+twice with exactly the same score. Sometimes, the sequence is there
+twice, but the scores are slightly different.\fP When any of the
+\fCfasta3\fP programs searches a long sequence, it breaks the sequence
+up into \fIoverlapping\fP pieces. The length of the piece depends on
+the length of the query and the particular program being used (it can
+also be controlled with the -N #### option). Since the pieces overlap
+by the length of the query sequence (or 3*query_length for fastx/y3
+and tfasta/x/y3), if the highest scoring alignment is at the end of
+one piece, it will be scored again at the beginning of the next piece.
+If the alignment is not be completely included in the overlap region,
+one of the pieces will give a higher score than the other. These
+duplications can be detected by looking at the coordinates of the
+alignment. If either the beginning or end coordinate is identical in
+two alignments, the alignments are at least partially duplicates.
+.lp
+As always, please inform me of bugs as soon as possible.
+.sp
+.nf
+William R. Pearson
+Department of Biochemistry
+Jordan Hall Box 800733
+U. of Virginia
+Charlottesville, VA
+X
+wrp@virginia.EDU
+X
+.sh 1 "References"
+.[]
+SHAR_EOF
+chmod 0644 fasta3x.me ||
+echo 'restore of fasta3x.me failed'
+Wc_c="`wc -c < 'fasta3x.me'`"
+test 39642 -eq "$Wc_c" ||
+ echo 'fasta3x.me: original size 39642, current size' "$Wc_c"
+fi
+# ============= fasta_func.doc ==============
+if test -f 'fasta_func.doc' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasta_func.doc (File already exists)'
+else
+echo 'x - extracting fasta_func.doc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasta_func.doc' &&
+Over all structure of the fasta3 program. (Some functions
+are different for translated comparisons FASTX, FASTY, TFASTX, TFASTY.)
+X
+main() { /* complib.c structure */
+X
+X /* get command line arguments, set up initial parameter values */
+X initenv (argc, argv, &m_msg, &pst,&aa0[0],outtty);
+X
+X /* allocate space for sequence arrays */
+X /* get the query file name if not on command line */
+X /* get query */
+X m_msg.n0 = getseq (m_msg.tname,aa0[0], MAXTOT, m_msg.libstr,&pst.dnaseq,
+X &m_msg.sq0off);
+X
+X /* reset some parameters if DNA */
+X resetp (aa0[0], m_msg.n0, &m_msg, &pst);
+X
+X /* get a library name if not on command line */
+X libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
+X /* use library name to build list of library files */
+X libselect(m_msg.lname, &m_msg);
+X
+X /* get additional options (ktup, prss-window) if not specified */
+X query_parm (&m_msg, &pst);
+X
+X /* do final parameter initializations */
+X last_init(&m_msg, &pst);
+X
+X /* set up structures for saved scores[20000], statistics[50000] */
+X nbest = 0;
+X
+X /* initialize the comparison function */
+X init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
+X
+X /* open the library */
+X for (iln = 0; iln < m_msg.nln; iln++) {
+X if (openlib(m_msg.lbnames[iln],m_msg)!=1) {continue;}
+X }
+X
+X /* get the library sequence and do the comparison */
+X while ((n1=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) {
+X do_work (aa0[itt], m_msg.n0, aa1, n1, itt, &pst, f_str[itt], &rst);
+X
+X /* save the scores */
+X /* save the scores for statistics */
+X }
+X
+X /* all done with all libraries */
+X process_hist(stats,nstats,pst);
+X
+X /* sort the scores by z-value */
+X sortbestz (bptr, nbest);
+X
+X /* sort the scores by E-value */
+X sortbeste (bptr, nbest);
+X
+X /* print the histogram */
+X prhist (stdout,m_msg,pst,gstring2);
+X
+X /* show the high scoring sequences */
+X showbest (stdout, aa0, aa1, maxn, bptr, nbest, qlib, &m_msg, pst,
+X f_str, gstring2);
+X
+X /* show the high-scoring alignments */
+X showalign(outfd, aa0, aa1, maxn, bptr, nbest, qlib, m_msg, pst,
+X f_str, gstring2);
+X
+X /* thats all folks !!! */
+}
+X\f
+================
+complib.c /* version set as mp_verstr */
+X
+main()
+printsum() /* prints summary of run (residues, entries, time) */
+void fsigint() /* sets up interrupt handler for HUP not used */
+X
+================
+compacc.c
+X
+void selectbest() /* select best 15000/20000 based on raw score */
+void selectbestz() /* select best 15000/20000 based on z-score */
+void sortbest() /* sort based on raw score */
+void sortbestz() /* sort based on z-score */
+void sortbeste() /* sort based on E() score - different from z-score for DNA */
+X
+prhist() /* print histogram */
+X
+shuffle() /* shuffle sequence (prss) */
+wshuffle() /* window shuffle */
+X
+================
+showbest.c
+X
+void showbest() /* present list of high scoring sequences */
+X
+================
+showalign.c
+X
+void showalign() /* show list of high-scoring alignments */
+void do_show() /* show an individual alignment */
+void initseq() /* setup seqc0/seqc1 which contain alignment characters */
+void freeseq() /* free them up */
+X
+================
+htime.c
+X
+time_t s_time() /* get the time in usecs */
+void ptime() /* print elapsed time */
+X
+================
+apam.c
+X
+initpam () /* read in PAM matrix or change default array */
+void mk_n_pam() /* make DNA pam from +5/-3 values */
+================
+doinit.c
+X
+void initenv() /* read environment variables, general options */
+================
+initfa.c /* version set as "verstr" */
+X
+alloc_pam() /* allocate 2D pam array */
+initpam2() /* fill it up from 1D pam triangle */
+f_initenv() /* function-specific environment variables */
+f_getopt() /* function-specific options */
+f_getarg() /* function specific argument - ktup */
+resetp() /* reset scoring matrix, optional parameters for DNA-DNA */
+reseta() /* reset scoring matrix, optional parameters for prot-DNA */
+query_parm() /* ask for additional program arguments (ktup) */
+last_init() /* last chance to set up parameters based on query,lib,parms */
+f_initpam() /* not used - could set parameters from pam matrix */
+X
+================
+scaleswn.c
+X
+process_hist() /* do statistics calculations */
+X
+X proc_hist_r() /* regression fit z=1, also used by z=5 */
+X float find_z() /* gives z-score for score, length, mu, rho, var */
+X float find_zr() /* gives z-score for score, length, mu, rho, var */
+X fit_llen() /* first estimate of mu, rho, var */
+X fit_llens() /* second estimate of mu, rho, var, mu2, rho2 */
+X
+X proc_hist_r2() /* regression_i fit z=4 */
+X float find_zr2() /* gives z-score for score, length, mu, rho, mu2, rho2 */
+X fit_llen2() /* iterative estimate of mu, rho, var */
+X
+X proc_hist_ln() /* ln()-scaled z=2 */ /* no longer used */
+X float find_zl() /* gives z-score from ln()-scaled scores */
+X
+X proc_hist_ml() /* estimate lambda, K using Maximum Likelihood */
+X float find_ze() /* z-score from lambda, K */
+X
+X proc_hist_n() /* no length-scaling z=0 */
+X float find_zn() /* gives z-score from mu, var (no scaling) */
+X
+X proc_hist_a() /* Altschul-Gish params z= 3 */
+X ag_parm() /* match pst.pamfile name, look_p() */
+X look_p() /* lookup Lambda, K, H given param struct */
+X float find_za()
+X
+eq_s() /* returns (double)score (available for length correction) */
+ln_s() /* returns (double)score * ln(200)/ln(length) */
+X
+proc_hist_r() /* regression fit z=1, also used by z=5 */
+alloc_hist() /* set up arrays for score vs length */
+free_hist() /* free them */
+inithist() /* calls alloc_hist(), sets some other globals */
+addhist() /* update score vs length hist */
+inithistz() /* initialize displayed (z-score) histogram hist[]*/
+addhistz() /* add to hist[], increment num_db_entries */
+addhistzp() /* add to hist[], don't change num_db_entries */
+prune_hist() /* remove scores from score vs length */
+update_db_size() /* num_db_entries = nlib - ntrimmed */
+set_db_size() /* -Z db_size; set nlib */
+X
+double z_to_E() /* z-value to E() (extreme value distribution */
+double zs_to_E() /* z-score (mu=50, sigma=10) to E() */
+double zs_to_bit() /* z-score to BLAST2 bit score */
+X
+float E_to_zs() /* E() to z-score */
+double zs_to_Ec() /* z-score to num_db_entries*(1 - P(zs))
+X
+summ_stats() /* put stat summary in string */
+vsort() /* not used, does shell sort */
+calc_ks() /* does Kolmogorov-Smirnoff calculation for histogram */
+================
+dropnfa.c /* contains worker comparison functions */
+X
+init_work() /* set up struct f_struct fstr - hash query */
+get_param() /* actually prints parameters to string */
+close_work() /* clean up fstr */
+do_work() /* do a comparison */
+X do_fasta() /* use the fasta() function */
+X savemax() /* save the best region during scan */
+X spam() /* rescan the best regions */
+X sconn() /* try to connect the best regions for initn */
+X kssort() /* sort by score */
+X kpsort() /* sort by left end pos */
+X shscore() /* best self-score */
+X dmatch() /* do band alignment for opt score */
+X FLOCAL_ALIGN() /* fast band score-only */
+X
+do_opt() /* do an "optimized comparison */
+X
+do_walign() /* put an alignment into res[] for calcons() */
+X sw_walign() /* SW alignment driver - find boundaries */
+X ALIGN() /* actual alignment driver */
+X nw_align() /* recursive global alignment */
+X CHECK_SCORE() /* double check */
+X DISPLAY() /* Miller's display routine */
+X
+X bd_walign() /* band alignment driver for DNA */
+X LOCAL_ALIGN() /* find boundaries in band */
+X B_ALIGN() /* produce band alignment */
+X bg_align() /* recursively produce band alignment */
+X BCHECK_SCORE() /* double check */
+X
+calcons() /* calculate ascii alignment seqc0,seqc1 from res[]*/
+calc_id() /* calculate % identity with no alignment */
+================
+nxgetaa.c
+X
+getseq() /* get a query (prot or DNA) */
+getntseq() /* get a nt query (for fastx, fasty) */
+gettitle() /* get a description */
+X
+int openlib() /* open a library */
+closelib() /* close it */
+GETLIB() /* get a fasta-format next library entry */
+RANLIB() /* jump back in, get description, position for getlib() */
+X
+lgetlib() /* get a Genbank flat-file format next library entry */
+lranlib() /* jump back in, get description, position for lgetlib() */
+X
+pgetlib() /* get CODATA format next library entry */
+pranlib() /* jump back in, get description, position for lgetlib() */
+X
+egetlib() /* get EMBL format next library entry */
+eranlib() /* jump back in, get description, position for egetlib() */
+X
+igetlib() /* get Intelligenetics format next library entry */
+iranlib() /* jump back in, get description, position for igetlib() */
+X
+vgetlib() /* get PIR/VMS/GCG format next library entry */
+vranlib() /* jump back in, get description, position for vgetlib() */
+X
+gcg_getlib() /* get GCG binary format next library entry */
+gcg_ranlib() /* jump back in, get description, position for gcg_getlib() */
+X
+int scanseq() /* find %ACGT */
+X
+revcomp() /* do reverse complement */
+sf_sort() /* sort superfamily numbers */
+================
+c_dispn.c
+X
+discons() /* display alignment from seqc0, seqc1 */
+disgraph() /* display graphical representation, -m 4,5 */
+aancpy() /* copy a binary sequence to ascii */
+r_memcpy()
+l_memcpy()
+iidex() /* lookup ascii-encoding of residue */
+cal_coord() /* calculate coordinates of alignment ends */
+X
+================
+ncbl_lib.c
+X
+ncbl_openlib()
+ncbl_closelib()
+ncbl_getliba()
+ncbl_getlibn()
+ncbl_ranlib()
+src_ulong_read()
+src_long_read()
+src_char_read()
+src_fstr_read()
+newname()
+X
+================
+lib_sel.c
+X
+getlnames()
+libchoice()
+libselect()
+addfile()
+ulindex()
+X
+================
+nrand48.c
+X
+irand(time) /* initialize random number generator */
+nrand(n) /* get a number 0 - n */
+X
+================
+url_subs.c
+X
+void do_url1() /* setup search links */
+X
+SHAR_EOF
+chmod 0644 fasta_func.doc ||
+echo 'restore of fasta_func.doc failed'
+Wc_c="`wc -c < 'fasta_func.doc'`"
+test 9645 -eq "$Wc_c" ||
+ echo 'fasta_func.doc: original size 9645, current size' "$Wc_c"
+fi
+# ============= fastf3.1 ==============
+if test -f 'fastf3.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping fastf3.1 (File already exists)'
+else
+echo 'x - extracting fastf3.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fastf3.1' &&
+.TH FASTF/TFASTFv3 1 local
+.SH NAME
+fastf3, fastf3_t \- compare a mixed peptide sequence against a protein
+database using a modified fasta algorithm.
+X
+tfastf3, tfastf3_t \- compare a mixed pepide sequence against a
+translated DNA database.
+X
+.SH DESCRIPTION
+X
+.B fastf3
+and
+.B tfastf3
+are designed to compare a sequence of mixed peptides to a protein
+(fastf3) or translated DNA (tfastf3) database. Unlike the traditional
+.B fasta3
+search, which uses a protein or DNA sequence,
+.B fastf3
+and
+.B tfastf3
+work with a query sequence of the form:
+.in +5
+.nf
+>testf from mgstm1
+MGCEN,
+MIDYP,
+MLLAY,
+MLLGY
+.fi
+.in 0
+This sequence indicates that a mixture of four peptides has been
+found, with 'M' in the first position of each one (as from a CNBr
+cleavage), in the second position 'G', 'I', or 'L' (twice), at the
+third position 'C', 'D', or 'L' (twice), at the fourth position 'E',
+'Y', 'A', or 'G', etc. When this sequence is compared against mgstm1.aa
+(included with the distribution), the mixture is deconvolved to form:
+.nf
+.ft C
+.in +5
+testf MILGY-----------MLLEY-----------MGDAP-----------
+X ::::: ::::: :::::
+GT8.7 MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
+X 10 20 30 40 50
+X
+testf --------------------------------------------------
+X
+GT8.7 FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
+X 60 70 80 90 100
+X
+X 20
+testf ------------MLCYN
+X :::::
+GT8.7 ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
+X 110 120 130 140 150
+.in 0
+.ft P
+.fi
+.SH Options
+.LP
+.B fastf3
+and
+.B tfastf3
+can accept a query sequence from the unix "stdin" data stream. This makes it much
+easier to use fasta3 and its relatives as part of a WWW page. To
+indicate that stdin is to be used, use "-" or "@" as the query
+sequence file name.
+.TP
+\-b #
+number of best scores to show (must be < -E cutoff)
+.TP
+\-d #
+number of best alignments to show ( must be < -E cutoff)
+.TP
+\-D
+turn on debugging mode. Enables checks on sequence alphabet that
+cause problems with tfastx3, tfasty3, tfasta3.
+.TP
+\-E #
+Expectation value limit for displaying scores and
+alignments. Expectation values for
+.B fastf3
+and
+.B tfastf3
+are not as accurate as those for the other
+.B fasta3
+programs.
+.TP
+\-H
+turn off histogram display
+.TP
+\-i
+compare against only the reverse complement of the library sequence.
+.TP
+\-L
+report long sequence description in alignments
+.TP
+\-m 0,1,2,3,4,5,6,10
+alignment display options
+.TP
+\-n
+force query to nucleotide sequence
+.TP
+\-N #
+break long library sequences into blocks of # residues. Useful for
+bacterial genomes, which have only one sequence entry. -N 2000 works
+well for well for bacterial genomes.
+.TP
+\-O file
+send output to file
+.TP
+\-q/-Q
+quiet option; do not prompt for input
+.TP
+\-R file
+save all scores to statistics file
+.TP
+\-S #
+offset substitution matrix values by a constant #
+.TP
+\-s name
+specify substitution matrix. BLOSUM50 is used by default;
+PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
+P250, or BL62. With this version, many more scoring matrices are
+available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
+M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
+specified.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (set by
+default to 4 at compile time).
+.TP
+\-t #
+Translation table - tfastf3 can use the BLAST tranlation tables. See
+\fChttp://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
+.TP
+\-w #
+line width for similarity score, sequence alignment, output.
+.TP
+\-x "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-z #
+Specify statistical calculation. Default is -z 1, which uses
+regression against the length of the library sequence. -z 0 disables
+statistics. -z 2 uses the ln() length correction. -z 3 uses Altschul
+and Gish's statistical estimates for specific protein BLOSUM scoring
+matrices and gap penalties. -z 4: an alternate regression method.
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations.
+.TP
+\-1
+Sort by "init1" score.
+.TP
+\-3
+(TFASTF3 only) use only forward frame translations
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SMATRIX
+default scoring matrix (-s SMATRIX)
+.TP
+SRCH_URL
+the format string used to define the option to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+X
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 fastf3.1 ||
+echo 'restore of fastf3.1 failed'
+Wc_c="`wc -c < 'fastf3.1'`"
+test 4824 -eq "$Wc_c" ||
+ echo 'fastf3.1: original size 4824, current size' "$Wc_c"
+fi
+# ============= fastlibs ==============
+if test -f 'fastlibs' -a X"$1" != X"-c"; then
+ echo 'x - skipping fastlibs (File already exists)'
+else
+echo 'x - extracting fastlibs (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fastlibs' &&
+NBRF PIR1 Annotated Protein Database (rel 56)$0A/seqlib/lib/pir1.seq 5
+NBRF PIR1 Annotated (seg) (rel 56)$0B/slib2/blast/pir1.seg
+NBRF Protein database (complete)$0P@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0N/slib2/blast/nr
+NCBI/Blast non-redundant proteins (seg)$0K/slib2/blast/nr.seg
+NCBI/Blast Swissprot$0Q/slib2/blast/swissprot
+NCBI/Blast Swissprot (seg)$0R/slib2/blast/swissprot.seg
+OWL 30.1 non-redundant protein database$0O/slib2/OWL/owl.seq 5
+GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/blast/genpept.fsa
+Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
+Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
+C. elegans blast server$0W/slib2/blast/C.elegans_blast.fa
+E. coli proteome$0E/slib0/genomes/ecoli.npep
+H. influenzae proteome$0I/slib0/genomes/hinf.npep
+H. pylori proteome$0L/slib0/genomes/hpyl.npep
+NCBI Entrez Human proteins$0H/slib2/blast/human.aa
+M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
+M. jannaschii proteome$0J/slib0/genomes/mjan.npep
+Synechosystis proteome$0C/slib0/genomes/synecho.npep
+GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
+GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
+GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
+GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
+GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
+GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
+GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
+GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
+GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
+GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
+GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
+GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
+GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
+GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
+GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
+Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
+E. coli genome$1D/slib0/genomes/ecoli.gbk 1
+Blast Human ESTs$1F/slib2/blast/est_human
+TIGR Human Gene Index$1K/slib2/blast/HGI.nr.031898
+Blast Mouse ESTs$1C/slib2/blast/est_mouse
+TIGR Mouse Gene Index$1J/slib2/blast/MGI.nr.022498
+NCBI/BLAST NR DNA$1n/slib2/blast/nt
+SHAR_EOF
+chmod 0644 fastlibs ||
+echo 'restore of fastlibs failed'
+Wc_c="`wc -c < 'fastlibs'`"
+test 2173 -eq "$Wc_c" ||
+ echo 'fastlibs: original size 2173, current size' "$Wc_c"
+fi
+# ============= fasts3.1 ==============
+if test -f 'fasts3.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasts3.1 (File already exists)'
+else
+echo 'x - extracting fasts3.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasts3.1' &&
+.TH FASTS/TFASTSv3 1 local
+.SH NAME
+fasts3, fasts3_t \- compare several short peptide sequences against a protein
+database using a modified fasta algorithm.
+X
+tfasts3, tfasts3_t \- compare short pepides against a
+translated DNA database.
+X
+.SH DESCRIPTION
+X
+.B fasts3
+and
+.B tfasts3
+are designed to compare set of (presumably non-contiguous) peptides to
+a protein (fasts3) or translated DNA (tfasts3) database.
+fasts3/tfasts3 are designed particularly for short peptide data from
+mass-spec analysis of protein digests. Unlike the traditional
+.B fasta3
+search, which uses a protein or DNA sequence,
+.B fasts3
+and
+.B tfasts3
+work with a query sequence of the form:
+.in +5
+.nf
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+.fi
+.in 0
+This sequence indicates that four peptides are to be used. When this
+sequence is compared against mgstm1.aa (included with the
+distribution), the result is:
+.nf
+.ft C
+.in +5
+testf MILGYW----------MLLE------------MGDAP-----------
+X :::::: :::: :::::
+GT8.7 MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
+X 10 20 30 40 50
+X
+testf --------------------------------------------------
+X
+GT8.7 FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
+X 60 70 80 90 100
+X
+X 20
+testf ------------MLCYNP
+X ::::::
+GT8.7 ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
+X 110 120 130 140 150
+.in 0
+.ft P
+.fi
+.SH Options
+.LP
+.B fasts3
+and
+.B tfasts3
+can accept a query sequence from the unix "stdin" data stream. This makes it much
+easier to use fasta3 and its relatives as part of a WWW page. To
+indicate that stdin is to be used, use "-" or "@" as the query
+sequence file name.
+.TP
+\-b #
+number of best scores to show (must be < -E cutoff)
+.TP
+\-d #
+number of best alignments to show ( must be < -E cutoff)
+.TP
+\-D
+turn on debugging mode. Enables checks on sequence alphabet that
+cause problems with tfastx3, tfasty3, tfasta3.
+.TP
+\-E #
+Expectation value limit for displaying scores and
+alignments. Expectation values for
+.B fasts3
+and
+.B tfasts3
+are not as accurate as those for the other
+.B fasta3
+programs.
+.TP
+\-H
+turn off histogram display
+.TP
+\-i
+compare against only the reverse complement of the library sequence.
+.TP
+\-L
+report long sequence description in alignments
+.TP
+\-m 0,1,2,3,4,5,6,9,10
+alignment display options
+.TP
+\-N #
+break long library sequences into blocks of # residues. Useful for
+bacterial genomes, which have only one sequence entry. -N 2000 works
+well for well for bacterial genomes.
+.TP
+\-O file
+send output to file
+.TP
+\-q/-Q
+quiet option; do not prompt for input
+.TP
+\-R file
+save all scores to statistics file
+.TP
+\-S #
+offset substitution matrix values by a constant #
+.TP
+\-s name
+specify substitution matrix. BLOSUM50 is used by default;
+PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
+P250, or BL62. With this version, many more scoring matrices are
+available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
+M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
+specified.
+.TP
+\-T #
+(threaded, parallel only) number of threads or workers to use (set by
+default to 4 at compile time).
+.TP
+\-t #
+Translation table - tfasts3 can use the BLAST tranlation tables. See
+\fChttp://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
+.TP
+\-w #
+line width for similarity score, sequence alignment, output.
+.TP
+\-x "#,#"
+offsets query, library sequence for numbering alignments
+.TP
+\-z #
+Specify statistical calculation. Default is -z 1, which uses
+regression against the length of the library sequence. -z 0 disables
+statistics. -z 2 uses the ln() length correction. -z 3 uses Altschul
+and Gish's statistical estimates for specific protein BLOSUM scoring
+matrices and gap penalties. -z 4: an alternate regression method.
+.TP
+\-Z db_size
+Set the apparent database size used for expectation value calculations.
+.TP
+\-3
+(TFASTS3 only) use only forward frame translations
+.SH Environment variables:
+.TP
+FASTLIBS
+location of library choice file (-l FASTLIBS)
+.TP
+SMATRIX
+default scoring matrix (-s SMATRIX)
+.TP
+SRCH_URL
+the format string used to define the option to re-search the
+database.
+.TP
+REF_URL
+the format string used to define the option to lookup the library
+sequence in entrez, or some other database.
+X
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 fasts3.1 ||
+echo 'restore of fasts3.1 failed'
+Wc_c="`wc -c < 'fasts3.1'`"
+test 4556 -eq "$Wc_c" ||
+ echo 'fasts3.1: original size 4556, current size' "$Wc_c"
+fi
+# ============= fasts3.rsp ==============
+if test -f 'fasts3.rsp' -a X"$1" != X"-c"; then
+ echo 'x - skipping fasts3.rsp (File already exists)'
+else
+echo 'x - extracting fasts3.rsp (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'fasts3.rsp' &&
+compacc.obj doinit.obj showbest.obj htime.obj apam.obj scaleswt.obj karlin.obj last_tat.obj tatsta_s.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
+SHAR_EOF
+chmod 0644 fasts3.rsp ||
+echo 'restore of fasts3.rsp failed'
+Wc_c="`wc -c < 'fasts3.rsp'`"
+test 203 -eq "$Wc_c" ||
+ echo 'fasts3.rsp: original size 203, current size' "$Wc_c"
+fi
+# ============= getenv.c ==============
+if test -f 'getenv.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping getenv.c (File already exists)'
+else
+echo 'x - extracting getenv.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'getenv.c' &&
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+X
+#define MAXENV 1024
+char *envstr;
+X
+char *mgetenv(str)
+char *str;
+{
+X static int EnvInit=0;
+X
+X char *eptr, *esptr, *bp;
+X int i,esize;
+X FILE *fenv;
+X
+X if (EnvInit==0) {
+X EnvInit=1;
+X if ((fenv=fopen("environment","r"))!=NULL) {
+X if ((envstr=malloc((size_t)(esize=MAXENV)))==NULL) {
+X fclose(fenv); goto noenv;}
+X esptr=envstr; esize -= 10;
+X while (fgets(esptr,esize,fenv)!=NULL) {
+X if ((bp=strchr(esptr,'\n'))!=NULL) *bp='\0';
+X esize -= (i=strlen(esptr)+1);
+X esptr += i;
+X }
+X fclose(fenv);
+X esptr='\0';
+X }
+X else envstr=NULL;
+X }
+X
+X if (envstr==NULL) return NULL;
+X else {
+X for (eptr=envstr; *eptr; eptr += strlen(eptr)+1) {
+X if (strncmp(str,eptr,(long)strlen(str))==0) {
+X return strchr(eptr,'=')+1;
+X }
+X }
+X return NULL;
+X }
+noenv: envstr=NULL; return NULL;
+X }
+X
+strnpcpy(to,from,max)
+X char *to; Str255 from; size_t max;
+{
+X size_t i, n;
+X
+X n = (*from<max) ? *from : max;
+X from++;
+X
+X for (i=0; i<n; i++) *to++ = *from++;
+X if (n<max) *to='\0';
+X }
+SHAR_EOF
+chmod 0644 getenv.c ||
+echo 'restore of getenv.c failed'
+Wc_c="`wc -c < 'getenv.c'`"
+test 1036 -eq "$Wc_c" ||
+ echo 'getenv.c: original size 1036, current size' "$Wc_c"
+fi
+# ============= getopt.c ==============
+if test -f 'getopt.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping getopt.c (File already exists)'
+else
+echo 'x - extracting getopt.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'getopt.c' &&
+/*LINTLIBRARY*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#define ERR(s, c) if(opterr){\
+X char errbuf[3];\
+X errbuf[0] = c; errbuf[1] = '\n'; errbuf[2]='\0';\
+X (void) fputs(argv[0],stderr);\
+X (void) fputs(s,stderr);\
+X (void) fputs(errbuf,stderr);}
+X
+X
+int opterr = 1;
+int optind = 1;
+int optopt;
+char *optarg;
+X
+int
+getopt(argc, argv, opts)
+int argc;
+char **argv, *opts;
+{
+X static int sp = 1;
+X register int c;
+X register char *cp;
+X
+X if(sp == 1)
+X if(optind >= argc ||
+X argv[optind][0] != '-' || argv[optind][1] == '\0')
+X return(EOF);
+X else if(strcmp(argv[optind], "--") == 0) {
+X optind++;
+X return(EOF);
+X }
+X optopt = c = argv[optind][sp];
+X if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+X ERR(": illegal option -- ", c);
+X if(argv[optind][++sp] == '\0') {
+X optind++;
+X sp = 1;
+X }
+X return('?');
+X }
+X if(*++cp == ':') {
+X if(argv[optind][sp+1] != '\0')
+X optarg = &argv[optind++][sp+1];
+X else if(++optind >= argc) {
+X ERR(": option requires an argument -- ", c);
+X sp = 1;
+X return('?');
+X } else
+X optarg = argv[optind++];
+X sp = 1;
+X } else {
+X if(argv[optind][++sp] == '\0') {
+X sp = 1;
+X optind++;
+X }
+X optarg = NULL;
+X }
+X return(c);
+}
+SHAR_EOF
+chmod 0644 getopt.c ||
+echo 'restore of getopt.c failed'
+Wc_c="`wc -c < 'getopt.c'`"
+test 1174 -eq "$Wc_c" ||
+ echo 'getopt.c: original size 1174, current size' "$Wc_c"
+fi
+# ============= getseq.c ==============
+if test -f 'getseq.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping getseq.c (File already exists)'
+else
+echo 'x - extracting getseq.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'getseq.c' &&
+/* May, June 1987 - modified for rapid read of database
+X
+X copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson
+X
+X This is one of three alternative files that can be used to
+X read a database. The three files are nxgetaa.c, nmgetaa.c, and
+X mmgetaa.c.
+X
+X nxgetaa.c contains the original code for reading databases, and
+X is still used for Mac and PC versions of fasta33 (which do not
+X use mmap).
+X
+X nmgetaa.c and mmgetaa.c are used together. nmgetaa.c provides
+X the same functions as nxgetaa.c if memory mapping is not used,
+X mmgetaa.c provides the database reading functions if memory
+X mapping is used. The decision to use memory mapping is made on
+X a file-by-file basis.
+X
+X June 2, 1987 - added TFASTA
+X March 30, 1988 - combined ffgetaa, fgetgb;
+X April 8, 1988 - added PIRLIB format for unix
+X Feb 4, 1989 - added universal subroutines for libraries
+X December, 1995 - added range option file.name:1-1000
+X Feb 22, 2002 - fix to allow "plain" text file queries
+X
+X getnt.c associated subroutines for matching sequences */
+X
+/* $Name: fa_34_26_5 $ - $Id: getseq.c,v 1.13 2006/10/05 18:22:07 wrp Exp $ */
+X
+/*
+X 8-April-88
+X The compile time #define PIRLIB allows this routine to be used
+X to read protein and DNA sequence libraries in the NBRF/PIR
+X VAX/VMS library format. That is:
+X
+X >P1;LCBO
+X This is a line of description
+X GTYH ... the sequence starts on this line
+X
+X This may ease conversion from UWGCG format libraries. It
+X has not been extensively tested.
+X
+X In addition, sequence libraries with a '>' in the 4th position
+X are recognized as NBRF format libraries for consistency with
+X UWGCG
+*/
+X
+/* Nov 12, 1987 - this version checks to see if the sequence
+X is DNA or protein by asking whether > 85% is A, C, G, T
+X
+X May 5, 1988 - modify the DNA/PROTEIN checker by re-reading
+X DNA sequences in order to check for 'U'.
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "structs.h"
+X
+#ifndef SFCHAR
+#define SFCHAR ':'
+#endif
+X
+#ifdef VMS
+#define PIRLIB
+#endif
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+X
+#define YES 1
+#define NO 0
+#define MAXLINE 512
+X
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+X
+#ifdef SUPERFAMNUM
+extern int nsfnum; /* number of superfamily numbers */
+extern int sfnum[]; /* superfamily number from types 0 and 5 */
+extern int nsfnum_n;
+extern int sfnum_n[];
+#endif
+X
+#define NO_FORMAT 0
+#define FASTA_FORMAT 1
+#define GCG_FORMAT 2
+X
+static int seq_format=NO_FORMAT;
+static char seq_title[200];
+X
+int scanseq(unsigned char *, int, char *);
+void sf_sort(int *, int);
+extern void init_ascii(int is_ext, int *sascii, int is_dna);
+X
+/* getseq - get a query sequence, possibly re-reading to set type
+X returns - length of query sequence or error = 0
+X
+X char *filen - name of file to be opened
+X char *seq - destination for query sequence
+X int maxs - maximum length of query
+X char libstr[20] - short description (locus or acc)
+X int *dnaseq - -1 => use scanseq to determine sequence type
+X 0 => must be protein
+X 1 => must be DNA
+X long *sq0off - offset into query specified by query_file:1001-2000
+*/
+X
+int
+getseq(char *filen, int *qascii, unsigned char *seq, int maxs, char *libstr, long *sq0off)
+{
+X FILE *fptr;
+X char line[512],*bp, *bp1, *bpn, *tp;
+X int i, rn, n;
+X int ic;
+X int sstart, sstop, sset=0;
+X int llen, l_offset;
+#ifdef SUPERFAMNUM
+X static char tline[MAXLINE];
+#endif
+X
+X seq_title[0]='\0';
+X libstr[0]='\0';
+X
+X sstart = sstop = -1;
+#ifndef DOS
+X if ((bp=strchr(filen,':'))!=NULL && *(bp+1)!='\0') {
+#else
+X if ((bp=strchr(filen+3,':'))!=NULL && *(bp+1)!='\0') {
+#endif
+X *bp='\0';
+X if (*(bp+1)=='-') {
+X sstart = 0;
+X sscanf(bp+2,"%d",&sstop);
+X }
+X else {
+X sscanf(bp+1,"%d-%d",&sstart,&sstop);
+X sstart--;
+X if (sstop <= 0 ) sstop = BIGNUM;
+X }
+X sset=1;
+X }
+X else {
+X sstart = 0;
+X sstop = BIGNUM;
+X }
+X
+X /* check for input from stdin */
+X if (strcmp(filen,"-") && strcmp(filen,"@")) {
+X if ((fptr=fopen(filen,"r"))==NULL) {
+X fprintf(stderr," could not open %s\n",filen);
+X return 0;
+X }
+X }
+X else {
+X fptr = stdin;
+X }
+X rn = n=0;
+X
+X while(fgets(line,sizeof(line),fptr)!=NULL) {
+#ifdef PIRLIB
+X if (line[0]=='>'&& (line[3]==';'||line[3]=='>'))
+X fgets(line,sizeof(line),fptr);
+X else
+#endif
+X l_offset = 0;
+X if (line[0]=='>') {
+X seq_format = FASTA_FORMAT;
+#ifdef SUPERFAMNUM
+X sfnum[nsfnum=0]= sfnum_n[nsfnum_n=0]=0;
+X strncpy(tline,line+1,sizeof(tline));
+X tline[sizeof(tline)-1]='\0';
+X
+X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
+X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
+X fprintf(stderr," second %c missing: %s\n",SFCHAR,tline);
+X }
+X else {
+X if ((bpn=strchr(bp+1,NSFCHAR))!=NULL) *bpn = '\0';
+X *bp1 = '\0';
+X i = 0;
+X if ((tp = strtok(bp+1," \t"))!=NULL) {
+X sfnum[i++] = atoi(tp);
+X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
+X if (isdigit(*tp)) sfnum[i++] = atoi(tp);
+X if (i>=9) break;
+X }
+X }
+X sfnum[nsfnum=i]= 0;
+X if (nsfnum>1) sf_sort(sfnum,nsfnum);
+X else {
+X if (nsfnum < 1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
+X }
+X if (bpn != NULL) {
+X tp = strtok(bpn+1," \t");
+X sfnum_n[0]=atoi(tp);
+X i = 1;
+X while ((tp=strtok(NULL," \t"))!=NULL) {
+X sfnum_n[i++] = atoi(tp);
+X if (i >= 10) {
+X fprintf(stderr,
+X " error - too many negative superfamilies: %d\n %s\n",
+X i,tline);
+X break;
+X }
+X }
+X sfnum_n[nsfnum_n=i]=0;
+X sf_sort(sfnum_n,nsfnum_n);
+X }
+X }
+X }
+X else {
+X sfnum[nsfnum = 0] = 0;
+X sfnum_n[nsfnum_n = 0] = 0;
+X }
+#endif
+X if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+X strncpy(seq_title,line+1,sizeof(seq_title));
+X seq_title[sizeof(seq_title)-1]='\0';
+X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+X strncpy(libstr,line+1,12);
+X libstr[12]='\0';
+X }
+X else if (seq_format==NO_FORMAT && strcmp(line,"..")==0) {
+X seq_format = GCG_FORMAT;
+/*
+X if (*dnaseq != 1) qascii['*'] = qascii['X'];
+*/
+X l_offset = 10;
+X llen = strlen(line);
+X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+X llen = strlen(line);
+X }
+X bp = strtok(line," \t");
+/*
+X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+X else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+*/
+X if (bp!=NULL) strncpy(libstr,bp,12);
+X else strncpy(libstr,filen,12);
+X libstr[12]='\0';
+X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+X }
+X else {
+X if (libstr[0]=='\0') strncpy(libstr,filen,12);
+X libstr[12]='\0';
+X }
+X
+X if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
+X
+X if (line[0]!='>'&& line[0]!=';') {
+X for (i=l_offset; (n<maxs && rn < sstop)&&
+X ((ic=qascii[line[i]&AAMASK])<EL); i++)
+X if (ic<NA && ++rn > sstart) seq[n++]= ic;
+X if (ic == ES || rn > sstop) break;
+X }
+X }
+X
+X if (n==maxs) {
+X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+X fflush(stderr);
+X }
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
+X if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
+X seq[n]= EOSEQ;
+X
+X
+X if (seq_format !=GCG_FORMAT)
+X while(fgets(line,sizeof(line),fptr)!=NULL) {
+#ifdef PIRLIB
+X if (line[0]=='>'&& (line[3]==';'||line[3]=='>'))
+X fgets(line,sizeof(line),fptr);
+X else
+#endif
+X if (line[0]!='>'&& line[0]!=';') {
+X for (i=0; (n<maxs && rn < sstop)&&
+X ((ic=qascii[line[i]&AAMASK])<EL); i++)
+X if (ic<NA && ++rn > sstart ) seq[n++]= ic;
+X if (ic == ES || rn > sstop) break;
+X }
+X }
+X else {
+X llen = strlen(line);
+X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+X llen = strlen(line);
+X }
+X while (fgets(line,sizeof(line),fptr)!=NULL) {
+X if (strlen(line)<l_offset) continue;
+X for (i=l_offset; (n<maxs && rn < sstop) &&
+X ((ic=qascii[line[i]&AAMASK])<EL); i++)
+X if (ic<NA && ++rn > sstart ) seq[n++]= ic;
+X if (ic == ES || rn > sstop ) break;
+X }
+X }
+X
+X if (n==maxs) {
+X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+X fflush(stderr);
+X }
+X seq[n]= EOSEQ;
+X
+X if (fptr!=stdin) fclose(fptr);
+X
+X if (sset==1) {
+X sstart++;
+X filen[strlen(filen)]=':';
+X if (*sq0off==1 || sstart>=1) *sq0off = sstart;
+X }
+X
+X return n;
+}
+X
+int
+gettitle(char *filen, char *title, int len) {
+X FILE *fptr;
+X char line[512];
+X char *bp;
+X int sset;
+#ifdef WIN32
+X char *strpbrk();
+#endif
+X
+X sset = 0;
+X
+X if (strncmp(filen,"-",1)==0 || strncmp(filen,"@",1)==0) {
+X strncpy(title,seq_title,len);
+X title[len-1]='\0';
+X return (int)strlen(title);
+X }
+X
+X if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
+X
+X
+X if ((fptr=fopen(filen,"r"))==NULL) {
+X fprintf(stderr," file %s was not found\n",filen);
+X fflush(stderr);
+X return 0;
+X }
+X
+X if (sset==1) filen[strlen(filen)]=':';
+X
+X while(fgets(line,sizeof(line),fptr)!=NULL) {
+X if (line[0]=='>'|| line[0]==';') goto found;
+X }
+X fclose(fptr);
+X title[0]='\0';
+X return 0;
+X
+X found:
+X
+#ifdef PIRLIB
+X if (line[0]=='>'&&(line[3]==';'||line[3]=='>')) {
+X if ((bp = strchr(line,'\n'))!=NULL) *bp='\0';
+X ll=strlen(line); line[ll++]=' '; line[ll]='\0';
+X fgets(&line[ll],sizeof(line)-ll,fptr);
+X }
+#endif
+X
+#ifdef WIN32
+X bp = strpbrk(line,"\n\r");
+#else
+X bp = strchr(line,'\n');
+#endif
+X if (bp!=NULL) *bp = 0;
+X strncpy(title,line,len);
+X title[len-1]='\0';
+X fclose(fptr);
+X return strlen(title);
+}
+X
+SHAR_EOF
+chmod 0644 getseq.c ||
+echo 'restore of getseq.c failed'
+Wc_c="`wc -c < 'getseq.c'`"
+test 9431 -eq "$Wc_c" ||
+ echo 'getseq.c: original size 9431, current size' "$Wc_c"
+fi
+# ============= grou_drome.pseg ==============
+if test -f 'grou_drome.pseg' -a X"$1" != X"-c"; then
+ echo 'x - skipping grou_drome.pseg (File already exists)'
+else
+echo 'x - extracting grou_drome.pseg (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'grou_drome.pseg' &&
+>gi|121620|sp|P16371|GROU_DROME GROUCHO PROTEIN (ENHANCER OF SPLIT M9/10)
+MYPSPVRHpaaggpppqgpIKFTIADTLERIKEEFNFLQAHYHSIKLECEKLSNEKTEMQ
+RHYVMYYEMSYGLNVEMHKQTEIAKRLNTLINQLLPFLQADHQQQVLQAVERAKQVTMQE
+LNLIIGQQIHAqqvpggppqpmgALNPFGALGATMGLPHGPQGLLNKPPEHHRPDIKPTG
+LEGPAAAEERLRNSVSPADREKYRTRSPLDIENDSKRRKDEKLQEDEGEKSDQDLVVDVA
+NEMESHSPRPNGEHVSMEVRDRESLNGERLEKPSSSGIKQErppsrsgssssrstpsLKT
+KDMEKPGTPGakartptpnaaapapgvnpkqmmpqgpppagypgapyqrpaDPYQRPPSD
+PAYGRPPPMPYDPHAHVRTNGIPHPSALTGGKPAYSFHMNGEGSLQPVPFPPDALVGVGI
+PRHARQINTLSHGEVVCAVTISNPTKYVYTGGKGCVKVWDISQPGNKNPVSQLDCLQRDN
+YIRSVKLLPDGRTLIVGGEASNLSIWDLASPTPRIKAELTSAAPACYALAISPDSKVCFS
+CCSDGNIAVWDLHNEILVRQFQGHTDGASCIDISPDGSRLWTGGLDNTVRSWDLREGRQL
+QQHDFSSQIFSLGYCPTGDWLAVGMENSHVEVLHASKPDKYQLHLHESCVLSLRFAACGK
+WFVSTGKDNLLNAWRTPYGASIFQSKETSSVLSCDISTDDKYIVTGSGDKKATVYEVIY
+X
+SHAR_EOF
+chmod 0644 grou_drome.pseg ||
+echo 'restore of grou_drome.pseg failed'
+Wc_c="`wc -c < 'grou_drome.pseg'`"
+test 806 -eq "$Wc_c" ||
+ echo 'grou_drome.pseg: original size 806, current size' "$Wc_c"
+fi
+# ============= gst.nlib ==============
+if test -f 'gst.nlib' -a X"$1" != X"-c"; then
+ echo 'x - skipping gst.nlib (File already exists)'
+else
+echo 'x - extracting gst.nlib (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'gst.nlib' &&
+>pGT875 | 266
+GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
+GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
+TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
+TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
+CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
+AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
+catgcagctcatcatgctctgttacaaccctgactttgagaagcagaagccagagttctt
+gaagaccatccctgagaaaatgaagctctactctgagttcctgggcaagaggccatggtt
+tgcaggggacaaggtcacctatgtggatttccttgcttatgacattcttgaccagtaccg
+tatgtttgagcccaagtgcctggacgccttcccaaacctgagggacttcctggcccgctt
+cgagggcctcaagaagatctctgcctacatgaagagtagccgctacatcgcaacacctat
+ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
+GAGGACCTGTCCACACTGGGGATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
+TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
+TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
+ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
+TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
+CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
+CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
+>RABGLTR Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
+X CGGCAGCTCC TGTGGACTCA GAGGAGCTGC ACCATGCCCA TGACGCTGGG TTACTGGGAC
+X GTCCGTGGGC TGGCTCTGCC AATCCGCATG CTCCTGGAAT ACACGGACAC CAGCTATGAG
+X GAAAAGAAAT ACACCATGGG GGATGCTCCC AACTATGACC AAAGCAAGTG GCTGAGTGAG
+X AAGTTCACCC TGGGCCTGGA CTTTCCCAAT CTGCCCTACC TAATTGATGG GACTCACAAG
+X CTCACGCAGA GCAACGCCAT CCTGCGCTAC CTGGCCCGCA AGCACGGCCT GTGTGGGGAG
+X ACGGAAGAGG AGAGGATTCG CGTGGACATT CTGGAGAATC AGCTGATGGA CAACCGCTTC
+X CAACTTGTAA ACGTCTGCTA CAGTCCCGAC TTTGAGAAGC TCAAGCCCGA GTACCTGAAG
+X GGGCTCCCTG AGAAGCTGCA GCTGTACTCG CAGTTCCTGG GAAGCCTCCC CTGGTTCGCA
+X GGGGACAAGA TCACCTTCGC CGATTTCCTT GTCTACGACG TTCTTGACCA GAACCGGATA
+X TTTGTGCCTG GGTGCCTGGA CGCGTTCCCA AACCTGAAGG ACTTTCATGT CCGCTTTGAG
+X GGCCTGCCGA AGATCTCTGC CTACATGAAG TCCAGCCGCT TTATCCGAGT CCCTGTGTTT
+X TTAAAGAAGG CCACGTGGAC GGGAATATAG GGCCCTGGAA GGAGGTGGGC CATCCCCTGG
+X GAGCTCAGGT CTCCCAGCCT CTTGCTCATC TTCCTCAACC TTCCCAAAAA CAAAAGCCTA
+X CTGCCTGCTT GTGTTCTGAG CCAGCCCCTC CCATGCAGGC TCTGGCCAGC TCAGAAACCC
+X ACCCTTCTAG CCATGGGCTC TCTAAGGCTG CTCTTCCCGG ACTAAGCAGA CCCCACGGGC
+X CACATCTCTC TTCGTGGGCT CCGTTTGATC TCCCCGACTG CCAGAATCAT GGTTGTACCT
+X GCTGCGGCCC TATTCCCAGG CGGGACTCCC CAGTGCTGTT TGGTCCCCAG GAGGGCCTGA
+X CCTCAGCCAG GGCCCTTCTT ACCCCTCCCT GTGTTGCACT GGAGTGGGCG CTGACTGTGC
+X AGACCTTGGG GGGGTTTCTT TGTTCTGCTG CCCACAGCAT GGCTGGGTGG GGCAGGATTA
+X GTGTGGGGGG AGTTGGGTGC TCAGGCAGGG CTATGAGGGA TCTTGTTCAT TTCCGGGCCC
+X TATCCATGTG CTCTGCTCCT CGCCCTGGGT TTTCTCCTCT GCCCGGGTTC CTCGTTCCTT
+X CACCCTGGAG GGAGGCCAGG GCCACGTGCA GCCGTGCCGG GTTCTGAGAG CGCTGGGCTG
+X ATGGGGACGG GGCTGAGCAG GCTTGAGCAG ACCCCTCTGT CACCATCTCC CGGAAGCTTT
+X CAGCTGATAC AGATGCTCCT CGTCTATAGT TTCAGGATGT TTCTCAATAA AACATCCCAC
+X TGT
+>BTGST Bovine GST mRNA for gluthathione S-transferase, class-pi.
+X CGGCTCAGGC CGCCGCCGAG CGCGCTGGAA CTTTGCTGCC GCCGCCACCT TTACCGACTT
+X CCCCGACTCC AGGATGCCTC CCTACACCAT CGTCTACTTC CCGGTTCAAG GGCGCTGCGA
+X GGCCATGCGC ATGCTGCTGG CCGACCAGGG CCAGAGCTGG AAGGAGGAGG TCGTAGCCAT
+X GCAGAGCTGG CTGCAGGGCC CACTCAAGGC CTCCTGCCTG TACGGGCAGC TCCCCAAGTT
+X CCAGGACGGA GACCTCACGC TGTACCAGTC CAATGCCATC CTGCGGCACC TGGGCCGCAC
+X CCTCGGGCTG TATGGGAAGG ACCAGCAGGA GGCGGCCCTG GTGGACATGG TGAATGACGG
+X TGTAGAGGAC CTTCGCTGCA AATACGTCTC CCTCATTTAC ACCAACTACG AGGCGGGCAA
+X GGAGGACTAT GTGAAGGCGC TGCCCCAGCA CCTGAAGCCT TTCGAGACCC TGCTGTCCCA
+X GAACAAGGGT GGCCAGGCCT TCATCGTGGG CGACCAGATC TCCTTTGCGG ACTACAACCT
+X GCTGGACCTG CTTCGGATTC ACCAGGTCCT GGCCCCCAGC TGTCTGGACT CCTTCCCCCT
+X GCTCTCAGCC TACGTGGCCC GTCTCAACTC CCGGCCCAAG CTCAAGGCCT TCCTGGCCTC
+X CCCCGAGCAC ATGAACCGGC CCATCAACGG CAATGGGAAA CAGTGAGGGC TTGCAGCACT
+X CTCTGCTCGA GGCAGGGGGC TGCCTGCTCT TCCCTTTCCC CAGGACCAAT AAAACTTCCA
+X AGAGAGAAAA AAAAAAAAAA AAAAAAAAA
+>OCDHPR Rabbit mRNA for dihydropyridine (DHP) receptor (from skeletal
+X TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
+X GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
+X GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
+X CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
+X CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
+X AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
+X AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
+X GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
+X CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
+X ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
+X GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
+X CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
+X GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
+X CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
+X ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
+X TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
+X GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
+X CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
+X TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
+X GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
+X CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
+X TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
+X TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
+X CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
+X AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
+X CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
+X ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
+X ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
+X CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
+X AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
+X GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
+X AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
+X CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
+X GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
+X AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
+X GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
+X GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
+X GAGGCGGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGCAGGAAG
+X ATGTCCAGGG GTCTCCCTGA CAAGACAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
+X GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
+X GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
+X GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
+X CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
+X AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
+X CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
+X TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
+X GAGATTGTCC TCAAGATGAC GACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
+X AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCTGTGT CTCTCATCTC CATGGGTCTC
+X GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
+X CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
+X CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATT
+X GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAACG ACCTATCCAA GATGACAGAA
+X GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
+X CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
+X TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
+X TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
+X ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
+X GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
+X CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
+X CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
+X GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
+X ATGAACCACA TCTCGGACAT CCTCAACGTG GCCTTCACCA TCATCTTCAC ACTGGAGATG
+X ATCCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
+X GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
+X CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
+X GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTCATGAG GCTGATCAAG
+X CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
+X GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
+X ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
+X TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
+X CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
+X CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
+X ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
+X TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCTATC
+X TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
+X CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
+X AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
+X ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTTGAGCAG
+X GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTG
+X CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
+X ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
+X TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
+X GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
+X AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACGGG AGGCCTGTTT
+X GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTGC CCCCGGTGAT GGCCAACCAA
+X AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
+X GACTTCCCTC AAGATGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
+X GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
+X TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
+X CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
+X CTGGTCCAGC CGGGGATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
+X ACGGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
+X GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
+X CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
+X TTCGTCACGG CAACAAGCCA GGCCCTGGCA GACGCCTGTC AGATGGAACC GGAGGAAGTA
+X GAGGTCGCAG CCACAGAGCT ACTGAAAGCG CGAGAGTCTG TCCAGGGCAT GGCCAGTGTC
+X CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
+X GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGGTGTC CACATGACCA AGGCGAGAGG
+X GACAGTGCGT GCAGAAGCTC AGCCCTGCAT GGCAGCCTCC CTCTGTCTCA GCCCTCCTGC
+X TGAGCTGGGG CGGTCTGGAA CCGCACCAGG AAGCCAGGAG CCTCCCCTGG CCAGCAAGAG
+X GCATGATTCT AAAGCCATCC AGAAAGGCCT GGTCAGTGCC ACTCCCCAGC AGGACATTAA
+X AGTCTCTAGG TCTGTGGCAC TGG
+>RABALP1A Rabbit dihydropyridine-sensitive calcium channel alpha-1 subunit
+X TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
+X GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
+X GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
+X CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
+X CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
+X AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
+X AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
+X GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
+X CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
+X ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
+X GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
+X CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
+X GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
+X CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
+X ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
+X TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
+X GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
+X CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
+X TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
+X GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
+X CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
+X TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
+X TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
+X CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
+X AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
+X CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
+X ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
+X ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
+X CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
+X AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
+X GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
+X AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
+X CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
+X GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
+X AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
+X GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
+X GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
+X GAGGCCGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGTAGGAAG
+X ATGTCCAGGG GTCTCCCTGA CAAGAGAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
+X GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
+X GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
+X GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
+X CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
+X AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
+X CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
+X TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
+X GAGATTGTCC TCAAGATGAC AACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
+X AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCCGTGT CTCTCATCTC CATGGGTCTC
+X GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
+X CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
+X CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATC
+X GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAATG ACCTATCCAA GATGACAGAA
+X GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
+X CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
+X TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
+X TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
+X ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
+X GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
+X CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
+X CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
+X GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
+X ATGAACCACA TCTCAGACAT CCTCAATGTG GCCTTCACCA TCATCTTCAC GCTGGAGATG
+X ATTCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
+X GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
+X CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
+X GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTTATGAG GCTGATCAAG
+X CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
+X GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
+X ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
+X TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
+X CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
+X CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
+X ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
+X TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCCATC
+X TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
+X CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
+X AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
+X ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTCGAGCAG
+X GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTA
+X CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
+X ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
+X TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
+X GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
+X AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACCGG AGGCCTGTTT
+X GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTAC CCCCGGTGAT GGCCAACCAA
+X AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
+X GACTTCCCTC AAGACGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
+X GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
+X TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
+X CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
+X CTGGTCCAGC CGGGAATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
+X ACAGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
+X GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
+X CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
+X TTCGTCATGG CAACAAGCCA GGCCCTGGTA GACGCCTGTC AGATGGAACC GGAGGAAGTA
+X GAGGTCGCAG CCACAGAGCT ACTGAAAGAG CGAGAGTCCG TCCAGGGCAT GGCCAGTGTC
+X CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
+X GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGCAGTG TCCACATGAC CAAGGCGAGA
+X GGGACAGTGC GTGCAGAAGC TCAGCCCTGC ATGGCAGCCT CCCTCTGTCT CAGCCCTCCT
+X GCTGAGCTGG GGCGGTCTGG AACCGACCAG GAAGCCAGGA GCCTCCCCTG GCCAGCAAGA
+X GGCATGATTC TAAAGCCATC CAGAAAGGCC TGGTCAGTGC CACTCCCCAG CAGGACATTA
+X AAGTCTCTAG GTCTGTGGCA
+>RABGSTB Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
+X CAGAAACCAC CACTATGGCA GGGAAGCCCA AGCTTCACTA CTTCAATGCA CGGGGCAGAA
+X TGGAGTCTAT CCGGTGGCTC CTGACTGCAG CTGGGGTAGA GTTTGAAGAG AAATGTATGA
+X AAACTCGAGA AGACCTGGAA AAGTTAAGAA AAGATGGGGT ATTGATGTTC CAGCAAGTGC
+X CCATGGTTGA GATTGATGGG ATGAAGCTGG TGCAGACCAG AGCCATTTTC AACTACATTG
+X CAGACAAGCA CAACCTGTAT GGGAAAGACA TAAAGGAGAG AGCCCTGATT GATATGTATA
+X CAGAAGGCAT AGTAGATTTG AATGAATTGA TTCTTACTCG TCCATTCCTT CCACCGGAGG
+X AACAAGAGGC AAAACTTGCT CAGATCAAAG ATAAAGCAAA AAACCGTTAT TTTCCTGCCT
+X TTGAAAAGGT GTTGAAGAGC CACGGACAAG ACTACCTTGT TGGCAACAAG CTGAGCAAGG
+X CTGACATTCT CCTGGTTGAA CTTCTCTACA ACGTGGAAGA GCTCAACCCC GGCGCGACTG
+X CCAGCTTCCC TCTGCTGCAG GCCCTGAAAA CCAGGATCAG CAATCTCCCC ACCGTGAAGA
+X AGTTTCTGCA GCCTGGCAGC CAGAGGAATC CGCCTGATGA TGAGAAATGC AGAGAAGAAG
+X CAAAAATCAT TTTCCATTAA GAAGGCAAAG ATACCAAGCA CAGGCAAGAC CAGCCTCTGA
+X CCCCCTGCAG CGATGAAGTA CTTTAAATAA ATAGTGATCC TGATTGTCAT AAGGCATATT
+X ACGTTTTCTA AGTATTGTGT AAATTTAATT AAAAACCACC CATGTAGATT TAGTTGCAAT
+X ACATGGTACT TGGTTTTGAT CAAATACAAA ATTATGAGCA CCTCCTAGGA TGTCCCTTTG
+X AA
+SHAR_EOF
+chmod 0644 gst.nlib ||
+echo 'restore of gst.nlib failed'
+Wc_c="`wc -c < 'gst.nlib'`"
+test 18633 -eq "$Wc_c" ||
+ echo 'gst.nlib: original size 18633, current size' "$Wc_c"
+fi
+# ============= gst.seq ==============
+if test -f 'gst.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping gst.seq (File already exists)'
+else
+echo 'x - extracting gst.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'gst.seq' &&
+>gi|193547|gb|J04632|MUSGLUTA Mouse glutathione S-transferase class mu (GST1-1) mRNA, complete cds
+CCTGCCTTCCGCTTTAGGGTCTGCTGCTCTGGTTACAGACCTAGGAAGGGGAGTGCCTAATTGGGATTGG
+TGCAGGGTTGGGAGGGACCCGCTGTTTTGTCCTGCCCACGTTTCTCTAGTAGTCTGTATAAAGTCACAAC
+TCCAAACACACAGGTCAGTCCTGCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGAT
+ACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGC
+TATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAATGAGAAGT
+TCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAA
+TGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCA
+GACATTGTGGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTTG
+AGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAA
+GAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTAC
+CGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCC
+TCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCA
+CTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGC
+AGGCCCTGGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCC
+TTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCC
+AGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGA
+GGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAGCCCTCCCT
+AGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTC
+GTCTACAATAAAGTCTGAAACACACTT
+SHAR_EOF
+chmod 0644 gst.seq ||
+echo 'restore of gst.seq failed'
+Wc_c="`wc -c < 'gst.seq'`"
+test 1405 -eq "$Wc_c" ||
+ echo 'gst.seq: original size 1405, current size' "$Wc_c"
+fi
+# ============= gtm1_human.aa ==============
+if test -f 'gtm1_human.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping gtm1_human.aa (File already exists)'
+else
+echo 'x - extracting gtm1_human.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'gtm1_human.aa' &&
+>gtm1_human GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKITQSNAILCY
+IARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRPWFAGNKITFVD
+FLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
+SHAR_EOF
+chmod 0644 gtm1_human.aa ||
+echo 'restore of gtm1_human.aa failed'
+Wc_c="`wc -c < 'gtm1_human.aa'`"
+test 300 -eq "$Wc_c" ||
+ echo 'gtm1_human.aa: original size 300, current size' "$Wc_c"
+fi
+# ============= gtt1_drome.aa ==============
+if test -f 'gtt1_drome.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping gtt1_drome.aa (File already exists)'
+else
+echo 'x - extracting gtt1_drome.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'gtt1_drome.aa' &&
+>GTT1_DROME GLUTATHIONE S-TRANSFERASE 1-1 (EC 2.5.1.18) (CLASS-THETA). - DROS
+MVDFYYLPGSSPCRSVIMTAKAVGVELNKKLLNLQAGEHLKPEFLKINPQHTIPTLVDNGFALWESRAIQVYLVEKYG
+KTDSLYPKCPKKRAVINQRLYFDMGTLYQSFANYYYPQVFAKAPADPEAFKKIEAAFEFLNTFLEGQDYAAGDSLTVA
+DIALVATVSTFEVAKFEISKYANVNRWYENAKKVTPGWEENWAGCLEFKKYFE
+SHAR_EOF
+chmod 0644 gtt1_drome.aa ||
+echo 'restore of gtt1_drome.aa failed'
+Wc_c="`wc -c < 'gtt1_drome.aa'`"
+test 291 -eq "$Wc_c" ||
+ echo 'gtt1_drome.aa: original size 291, current size' "$Wc_c"
+fi
+# ============= h10_human.aa ==============
+if test -f 'h10_human.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping h10_human.aa (File already exists)'
+else
+echo 'x - extracting h10_human.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'h10_human.aa' &&
+>H10_HUMAN | 90538 | HISTONE H1' (H1.0) (H1(0)).
+TENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKVGENADSQIKLSIKRLV
+TTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKAASKAPTKKPKATPVKKAKKKLA
+ATPKKAKKPKTVKAKPVKASKPKKAKPVKPKAKSSAKRAGKKK
+SHAR_EOF
+chmod 0644 h10_human.aa ||
+echo 'restore of h10_human.aa failed'
+Wc_c="`wc -c < 'h10_human.aa'`"
+test 247 -eq "$Wc_c" ||
+ echo 'h10_human.aa: original size 247, current size' "$Wc_c"
+fi
+# ============= h_altlib.h ==============
+if test -f 'h_altlib.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping h_altlib.h (File already exists)'
+else
+echo 'x - extracting h_altlib.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'h_altlib.h' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: h_altlib.h,v 1.2 1999/12/30 01:26:57 wrp Exp $ */
+X
+X
+#define LASTENTRY 10
+#define LASTLIB 10
+#define BINARYGB 9
+#define DEFAULT 0
+#define FULLGB 1
+#define UNIXPIR 2
+#define EMBLSWISS 3
+#define INTELLIG 4
+#define VMSPIR 5
+X
+int agetlib_h(); /* pearson fasta format */
+int agetntlib_h(); /* pearson fasta format nucleotides */
+int vgetlib_h(); /* PIR VMS format */
+X
+int (*h_getliba[LASTLIB])()={
+X agetlib_h,agetlib_h,agetlib_h,agetlib_h,
+X agetlib_h,vgetlib_h,agetlib_h,agetlib_h,
+X agetlib_h,agetlib_h};
+X
+int (*h_getntliba[LASTLIB])()={
+X agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
+X agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
+X agetntlib_h,agetntlib_h};
+X
+SHAR_EOF
+chmod 0644 h_altlib.h ||
+echo 'restore of h_altlib.h failed'
+Wc_c="`wc -c < 'h_altlib.h'`"
+test 691 -eq "$Wc_c" ||
+ echo 'h_altlib.h: original size 691, current size' "$Wc_c"
+fi
+# ============= hahu.aa ==============
+if test -f 'hahu.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping hahu.aa (File already exists)'
+else
+echo 'x - extracting hahu.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'hahu.aa' &&
+>HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
+VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
+AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
+R
+SHAR_EOF
+chmod 0644 hahu.aa ||
+echo 'restore of hahu.aa failed'
+Wc_c="`wc -c < 'hahu.aa'`"
+test 225 -eq "$Wc_c" ||
+ echo 'hahu.aa: original size 225, current size' "$Wc_c"
+fi
+# ============= hostacc.c ==============
+if test -f 'hostacc.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping hostacc.c (File already exists)'
+else
+echo 'x - extracting hostacc.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'hostacc.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: hostacc.c,v 1.7 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* Concurrent read version */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+X
+#ifdef PVM_SRC
+#include "pvm3.h"
+#endif
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+X
+#include "msg.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+X
+extern char prog_name[];
+X
+extern int nnodes;
+#ifdef PVM_SRC
+extern int pinums[];
+#endif
+X
+X
+#ifdef PVM_SRC
+int tidtonode(tid)
+X int tid;
+{
+X int i;
+X for (i=FIRSTNODE; i< nnodes; i++) if (tid==pinums[i]) return i;
+X return -1;
+}
+#endif
+X
+/* rand_nodes selects nnodes at random from max_nodes */
+X
+void
+rand_nodes(int *node_map, int nnodes, int max_nodes)
+{
+X int node_used[MAXNOD];
+X int i, j;
+X struct timeval tv;
+X
+X gettimeofday(&tv,NULL);
+X SRAND(tv.tv_usec);
+X
+X for (i=0; i<max_nodes; i++) node_used[i]=0;
+X
+X if (nnodes < (max_nodes+1)/2) {
+X for (i=0; i<nnodes; ) {
+X j = RAND()%max_nodes;
+X if (node_used[j]) continue;
+X else {
+X node_map[i++]=j;
+X node_used[j]=1;
+X }
+X }
+X }
+X else {
+X for (i=0; i<(max_nodes-nnodes); ) {
+X j = RAND()%max_nodes;
+X if (node_used[j]) continue;
+X else {
+X node_used[j]=1;
+X i++;
+X }
+X }
+X for (i=j=0; i<nnodes; j++)
+X if (node_used[j]) continue;
+X else node_map[i++]=j;
+X }
+/* for (i=0; i<nnodes; i++) fprintf(stderr,"%2d %2d\n",i,node_map[i]); */
+}
+SHAR_EOF
+chmod 0644 hostacc.c ||
+echo 'restore of hostacc.c failed'
+Wc_c="`wc -c < 'hostacc.c'`"
+test 1466 -eq "$Wc_c" ||
+ echo 'hostacc.c: original size 1466, current size' "$Wc_c"
+fi
+# ============= hsgstm1b.gcg ==============
+if test -f 'hsgstm1b.gcg' -a X"$1" != X"-c"; then
+ echo 'x - skipping hsgstm1b.gcg (File already exists)'
+else
+echo 'x - extracting hsgstm1b.gcg (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'hsgstm1b.gcg' &&
+X FROMSTADEN of: hsgstm1b.g check: 1769 from: 1 to: 5183
+X
+X <---No Contig Comments--->
+X
+hsgstm1b.gcg Length: 5183 October 12, 1994 10:58 Type: N Check: 1769 ..
+X
+X 1 GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGTGG
+X
+X 51 GGTAAGCGAG GGTCCTCTGG TGGGTGGGAC AGGGGGCGGA GGCGGGGATG
+X
+X 101 TGTGGAGTAG CTGCAGGACT GGCTCTAGGG ACCGTTCCTC TTCAGGGCTG
+X
+X 151 CCCGCCTCAG AAGGGCCTGT GCATGACGCT GTGTGTGTGT TTGGGGGTGG
+X
+X 201 GGGCGGGTAG AGGAGGCGAC GGGTACGTGC AGTATAGACT AGGGCTGGCC
+X
+X 251 TGGTGCAGAG AAAGTCACCA AGTCAGGGAC CCTCCATCTC TGACCCGAGC
+X
+X 301 CGCGGCCATC TCTCCCAGCT GGCCCACGCC ATCCGCCTGC TCCTGGAATA
+X
+X 351 CACAGACTCA AGCTACGAGG AAAAGAAGTA TACGATGGGG GACGGTAATG
+X
+X 401 ACACCCTTGT GTCCGGGCTC TGCACTCACG CTGAGTTGGC ACCAAGCAAC
+X
+X 451 CCATGGTGGC CACCTGTCGT ACCTCTGCAG GCCTCCCCTG CTGGAGCTGC
+X
+X 501 AGGCTGTCCC TTCCCTGAGC CCCGGTGAGG AGTCCTGTGG CCTTGCAAGG
+X
+X 551 CAGAATGCTG GGGCGGGATA GTGGGTCCCT GTTTAATTGG GTTGGGTGTC
+X
+X 601 CTCAGAGCTT CCCAAACCCT GGAAGCCTTA GCCGTGTGGG GTCCAGAGCC
+X
+X 651 TCAGCGGGAT TATTTGTCCC TGAACCCTGG GATGTGGGAC TGAGTGGTCA
+X
+X 701 GATTCTAGAT CCACCTGTCT CAGGGATCTT GCCACTGGTT CCTTGGGAGG
+X
+X 751 GTCCCCGGAA GGAGGGCTGG GCTCTGGGGA GGTTTGTTTT CACTTCTTCT
+X
+X 801 TCCCCACGGC AGCTCCTGAC TATGACAGAA CGCAGTGGCT GAATGAAAAA
+X
+X 851 TTCAAGCTGG GCCTGGACTT TCCCAATGTA GGTGCAGGGG GAAGGGGCGG
+X
+X 901 TTTTGGGGGA AAGTGCGACG TGTCTCTGAC TGCATCTCCT CTCCCCAGAT
+X
+X 951 TAGAGGTGTT CGGATCAGGA GTCTTCTGCC CAATTCCTGG TTGTCTACAC
+X
+X 1001 AGCCCCTGCA TGATGTTCTG TGTCCCAGCT CATTTGTTCA TGTGACAGTA
+X
+X 1051 TTTCTATGTC AGGCCTGCAT GAGCGGGCAC AGTGAGTCTG GTCTCCCCTT
+X
+X 1101 GCATATAGGA AGGGGATGCT GGGGAGCCTG CTGGCCCCAA CTGAGCTTCC
+X
+X 1151 CCGGTTTCCC ATCTATCCAG CTGCCCTACT TGATTGATGG GGCTCACAAG
+X
+X 1201 ATCACCCAGA GCAACGCCAT CTTGTGCTAC ATTGCCCGCA AGCACAACCT
+X
+X 1251 GTGTGAGTGT GGGTGGCTGC AATGTGTGGG GGGAAGGTGG CCTCCTCCTT
+X
+X 1301 GGCTGGGCTG TGATGCTGAG ATTGAGTCTG TGTTTTGTGG GTGGCAGGTG
+X
+X 1351 GGGAGACAGA AGAGGAGAAG ATTCGTGTGG ACATTTTGGA GAACCAGACC
+X
+X 1401 ATGGACAACC ATATGCAGCT GGGCATGATC TGCTACAATC CAGAATTTGT
+X
+X 1451 GAGTGTCCCC AGTGAGCTGC ATCTGACAGA GTTTGGATTT GGGGCCAGGA
+X
+X 1501 CTCTTGCATC CTGCACACAT TGGTCTTAAG TCCCTGGTAC CATTCATCCT
+X
+X 1551 CCAAGTGCTT TCCCATCATC TAGCAGTATC TCTACGACTC CAATGTCATG
+X
+X 1601 TCAACAAAAG CAGAGGCAAT TCCCAACCAA CCTTAGGACA CGATTCCAGG
+X
+X 1651 CATTCCCAGG GTAGAAATTT CAGTTCCTGT ATGGTAAAGT TTGTGTTCAG
+X
+X 1701 AATCTCCTTC ATCAGCTCTG GCCTCTGACT TCTGTCCTGG GTCATTTCTG
+X
+X 1751 TCAGCCAGTT CACATCACCT GCCTGCTCCT AGAATATGCA GACTCAAGTA
+X
+X 1801 GAAGACTCAG GAATGTAATG GCACCCTCGA ATTGCATCTT CTCCTCAACA
+X
+X 1851 GTTTTCTGAG TGCTGTCATT GACATGCACA GGGATCTGCG CATCTTCATA
+X
+X 1901 ACAGACAGCT CAGAGGCAGT CAGAGGGCCT TTATTCCTCT CCCTCCTTCC
+X
+X 1951 TTTCAACTTG AACTTCTCAT CTCCCTGGAA ACTAGTCAAC GTTCATTGTT
+X
+X 2001 TTCTTCTGCC ACCCCATTAG AAGGAACTTT CTACTTTCCC TGAGCTCCCT
+X
+X 2051 TAGTTCTTTG CATCCTTGAT TCTGCTGGTC TGGATCCAGA GGCTGCCAGG
+X
+X 2101 TGCTTGGGCG CTCCTGGGGC TGACCCAGAG GCTATTGGGA GGTCAGTGAG
+X
+X 2151 GACAGATTCA GGGACAGCAT CTCATTCCTC TCTGCCTTCT GATCAGTTTA
+X
+X 2201 GATAGGGTCT GACACTCAGT CAGAGTCTAA AATGCTGAGT ATCCAATTGA
+X
+X 2251 AGCCTGCACT GCCCCAGTTC CAGACTTGGG GAAGATGGCT GCTTGCCCGT
+X
+X 2301 GCCAGCCTGG CCGTCCACAG CCCCGGGGAG GCCACGTCTG TGCAGGGAGC
+X
+X 2351 TTTTGTCCGA GGGTGGTGAC AGCTGTTTTC TGCCTCAGGA GAAACTGAAG
+X
+X 2401 CCAAAGTACT TGGAGGAACT CCCTGAAAAG CTAAAGCTCT ACTCAGAGTT
+X
+X 2451 TCTGGGGAAG CGGCCATGGT TTGCAGGAAA CAAGGTAAAG GAGGAGTGAT
+X
+X 2501 ATGGGGAATG AGATCTGTTT TGCTTCACGT GTTATGGAGG TTCCAGCCCA
+X
+X 2551 CACATTCTTG GCCTTCTGCA GATCACTTTT GTAGATTTTC TCGTCTATGA
+X
+X 2601 TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG GACGCCTTCC
+X
+X 2651 CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGTGATGCC CCCAATCCTC
+X
+X 2701 CCTTCTCTTT GATGCCCCTT GTTCCGTTAC CTCCTTTCAG ATGCTTTCCC
+X
+X 2751 ATGCCTGGAG CTACACACAG AATAACTCGC ATGTATTGAG TACTGGTTTC
+X
+X 2801 ATGCCACGAA CCGTACCCCA GCACATTATA CCTATTGTGT GAAATTTGAA
+X
+X 2851 TTTTATAACA TTCCAGTAAG GTAACAGAAT TATCTCGCCC ATTTTAGAGA
+X
+X 2901 TAAGGAAACT AAGAATGAGA GGGTCGGTCC TCTGCTCAGG GTCCCAGAGC
+X
+X 2951 TAGTGGAGGC AGTGCTGGGC CCCTGTGAGC CTCTGGATCT ATGGGTGGCA
+X
+X 3001 GTCAGGCTCT CCCATTCGAC AGAGAAAAAG CCTTAGCGTT CACCTAGCCT
+X
+X 3051 GGGTTTCACA GCCCAGGACA CTTTGGAAGA GGCAGAGAAC TTCATGACCA
+X
+X 3101 TAGATGGAGC TGGCAATAGT AGGACTGACA CAACGGTGAC ATTGATGTCT
+X
+X 3151 AGTACTGAAC CCACAGGCAA TCTCATAGCT ACCTCCAGAA GCTTTGCATG
+X
+X 3201 ATTGGACCCC AGTGTGGGAA TCCTGAGAGC CAGGGCTGTG GCTGTAGCTG
+X
+X 3251 GATTAAGGTA CATATGTGGG TGTCCCTGTT GAAGGAGTAT ATGTTGAAAT
+X
+X 3301 GCCCGGTGCT GGGGCACTTA CTTACTCCAC CACTATCTTT TTTTTTTTTT
+X
+X 3351 TTTTTTTTTT TTTGTGCTGG AGTCTTGCTC TGTTGCCCAG GCTGGAGTTC
+X
+X 3401 AATGGAGTGA TCTTGGCTCA CTGCAACCTC CGCCTCCTGG GTTCAAGCGA
+X
+X 3451 TTCTACTGCC TCAGCTGCAC GATTAGTTGG GATTACAGGT GTGCACCACC
+X
+X 3501 ACGTCTGGCT AATTTTTGTA TTTTTAGTAG AGATGGGGTT TTGCCATGTT
+X
+X 3551 GGTCAGGCTG GTCTTCGAAC TCCTGACCTC AGGTGATCTA CCCACATCAG
+X
+X 3601 CCTCCCTCAG ATCGTGTCTT GCTGTTGCCC AGGCTGGAGC AGCAGTTGCG
+X
+X 3651 TGACCTCGGA CTTACTGCAA CCTCTGCTCC CGGGTTCAAA CAATTCTCTG
+X
+X 3701 CCTCAGCCTC CCGAGTAGCT GGGAATTACA AGTGTCTATC ACCACGCCCA
+X
+X 3751 GCTAATTTTT CTATTTTTAG TAGAGATGGG CTTTTCACCA TGTTGGCCAG
+X
+X 3801 GTGGTCTTGA ACTCCTGACC TCGGTGATCC ACCCACCTCG GCTTCCCACA
+X
+X 3851 TCTGAGTGTC ATGTAGCCTG ATCTGCAGCA GGGCTGTAGA TGCCATGGGT
+X
+X 3901 TAGGGCACAG TGAGATTTTG CTCAGGTATT AGATGGAGAA CTTTGGACTT
+X
+X 3951 TCTGCTTTAA GGGGAATGTT TAGAGCCTAG TCTCgTTTGA TTTTCTTGTG
+X
+X 4001 CACTGCCACC CCCCATTCCA CTTTCATCCA GGTTTACTGA GACATTGGGG
+X
+X 4051 TGAGTGTGTT CAGAGCCCCT TTGTTCTGCT GCAGGTCCCT TCTGTGTCTC
+X
+X 4101 TATACCCAGA CAAGCCAAGA GCCTCCCTGT GGAAAAGGAG ACTGTTTGTG
+X
+X 4151 CAGTCAAGGA GTGACAGGGC CTGGTGTGAG GGGTGGTGGG GCAGAAGAAG
+X
+X 4201 AAGAGAATTT GTCAGGAAGA GGCCAGAACT GGAGAGAGAC AGAACCAGGC
+X
+X 4251 TACACYGCAA GTTCTATTCC CCTTACAAGG TATCTAAACG TAAGGAAGTT
+X
+X 4301 GCTGAACTTC TGTTCCACAT GAGAATGGTG ATAATAGATT CAGCCTTGCA
+X
+X 4351 GAGCAGTCGA GTGGTTTTCT AAGCTTACGT TGTAATTTGT GTTGGTACAG
+X
+X 4401 AGCACCCAGC ACCGTGTAGA ATCTTCGTAA GTGTTAGCTG TTACTGTGGT
+X
+X 4451 ACAACATTAC CTAAAGGAAG TTGGAAGAGT TAACTCAGCA AATCTGGGGA
+X
+X 4501 CCCTAAGAAG CTGTGTGATG CCTCAGCACT TGAGCCCACA TGGAAAGGCT
+X
+X 4551 GTGCCAGGGC CCTGACCTGC TGTGTCTGCA GTGGGGTTGT CCCACCGCTC
+X
+X 4601 ATGGGCAGCT GACCTTGAGT TCTGGCCTTA TTTTCCCCCC TCTCAGGGCT
+X
+X 4651 TGGAGAAGAT CTCTGCCTAC ATGAAGTCCA GCCGCTTCCT CCCAAGACCT
+X
+X 4701 GTGTTCTCAA AGATGGCTGT CTGGGGCAAC AAGTAGGGCC TTGAAGGCAG
+X
+X 4751 GAGGTGGGAG TGAGGAGCCC ATACTCAGCC TGCTGCCCAG GCTGTGCAGC
+X
+X 4801 GCAGCTGGAC TCTGCATCCC AGCACCTGCC TCCTCGTTCC TTTCTCCTGT
+X
+X 4851 TTATTCCCAT CTTTACTCCC AAGACTTCAT TGTCCCTCTT CACTCCCCCT
+X
+X 4901 AAACCCCTGT CCCATGCAGG CCCTTTGAAG CCTCAGCTAC CCACTATCCT
+X
+X 4951 TCGTGAACAT CCCCTCCCAT CATTACCCTT CCCTGCACTA AAGCCAGCCT
+X
+X 5001 GACCTTCCTT CCTGTTAGTG GTTGTGTCTG CTTTAAAGCC TGCCTGGCCC
+X
+X 5051 CTCGCCTGTG GAGCTCAGCC CCGAGCTGTC CCCGTGTTGC ATGAAGGAGC
+X
+X 5101 AGCATTGACT GGTTTACAGG CCCTGCTCCT GCAGCATGGT CCCTGCCTAG
+X
+X 5151 GCCTACCTGA TGGAAGTAAA GCCTCAACCA CAc
+X
+SHAR_EOF
+chmod 0644 hsgstm1b.gcg ||
+echo 'restore of hsgstm1b.gcg failed'
+Wc_c="`wc -c < 'hsgstm1b.gcg'`"
+test 7118 -eq "$Wc_c" ||
+ echo 'hsgstm1b.gcg: original size 7118, current size' "$Wc_c"
+fi
+# ============= hsgstm1b.seq ==============
+if test -f 'hsgstm1b.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping hsgstm1b.seq (File already exists)'
+else
+echo 'x - extracting hsgstm1b.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'hsgstm1b.seq' &&
+>gi|31932|emb|X68676|HSGSTM1B H.sapiens GSTM1b gene for glutathione S-transferase
+ATGCCCATGATACTGGGGTACTGGGACATCCGTGGGGTAAGCGAGGGTCCTCTGGTGGGTGGGACAGGGG
+GCGGAGGCGGGGATGTGTGGAGTAGCTGCAGGACTGGCTCTAGGGACCGTTCCTCTTCAGGGCTGCCCGC
+CTCAGAAGGGCCTGTGCATGACGCTGTGTGTGTGTTTGGGGGTGGGGGCGGGTAGAGGAGGCGACGGGTA
+CGTGCAGTATAGACTAGGGCTGGCCTGGTGCAGAGAAAGTCACCAAGTCAGGGACCCTCCATCTCTGACC
+CGAGCCGCGGCCATCTCTCCCAGCTGGCCCACGCCATCCGCCTGCTCCTGGAATACACAGACTCAAGCTA
+CGAGGAAAAGAAGTATACGATGGGGGACGGTAATGACACCCTTGTGTCCGGGCTCTGCACTCACGCTGAG
+TTGGCACCAAGCAACCCATGGTGGCCACCTGTCGTACCTCTGCAGGCCTCCCCTGCTGGAGCTGCAGGCT
+GTCCCTTCCCTGAGCCCCGGTGAGGAGTCCTGTGGCCTTGCAAGGCAGAATGCTGGGGCGGGATAGTGGG
+TCCCTGTTTAATTGGGTTGGGTGTCCTCAGAGCTTCCCAAACCCTGGAAGCCTTAGCCGTGTGGGGTCCA
+GAGCCTCAGCGGGATTATTTGTCCCTGAACCCTGGGATGTGGGACTGAGTGGTCAGATTCTAGATCCACC
+TGTCTCAGGGATCTTGCCACTGGTTCCTTGGGAGGGTCCCCGGAAGGAGGGCTGGGCTCTGGGGAGGTTT
+GTTTTCACTTCTTCTTCCCCACGGCAGCTCCTGACTATGACAGAACGCAGTGGCTGAATGAAAAATTCAA
+GCTGGGCCTGGACTTTCCCAATGTAGGTGCAGGGGGAAGGGGCGGTTTTGGGGGAAAGTGCGACGTGTCT
+CTGACTGCATCTCCTCTCCCCAGATTAGAGGTGTTCGGATCAGGAGTCTTCTGCCCAATTCCTGGTTGTC
+TACACAGCCCCTGCATGATGTTCTGTGTCCCAGCTCATTTGTTCATGTGACAGTATTTCTATGTCAGGCC
+TGCATGAGCGGGCACAGTGAGTCTGGTCTCCCCTTGCATATAGGAAGGGGATGCTGGGGAGCCTGCTGGC
+CCCAACTGAGCTTCCCCGGTTTCCCATCTATCCAGCTGCCCTACTTGATTGATGGGGCTCACAAGATCAC
+CCAGAGCAACGCCATCTTGTGCTACATTGCCCGCAAGCACAACCTGTGTGAGTGTGGGTGGCTGCAATGT
+GTGGGGGGAAGGTGGCCTCCTCCTTGGCTGGGCTGTGATGCTGAGATTGAGTCTGTGTTTTGTGGGTGGC
+AGGTGGGGAGACAGAAGAGGAGAAGATTCGTGTGGACATTTTGGAGAACCAGACCATGGACAACCATATG
+CAGCTGGGCATGATCTGCTACAATCCAGAATTTGTGAGTGTCCCCAGTGAGCTGCATCTGACAGAGTTTG
+GATTTGGGGCCAGGACTCTTGCATCCTGCACACATTGGTCTTAAGTCCCTGGTACCATTCATCCTCCAAG
+TGCTTTCCCATCATCTAGCAGTATCTCTACGACTCCAATGTCATGTCAACAAAAGCAGAGGCAATTCCCA
+ACCAACCTTAGGACACGATTCCAGGCATTCCCAGGGTAGAAATTTCAGTTCCTGTATGGTAAAGTTTGTG
+TTCAGAATCTCCTTCATCAGCTCTGGCCTCTGACTTCTGTCCTGGGTCATTTCTGTCAGCCAGTTCACAT
+CACCTGCCTGCTCCTAGAATATGCAGACTCAAGTAGAAGACTCAGGAATGTAATGGCACCCTCGAATTGC
+ATCTTCTCCTCAACAGTTTTCTGAGTGCTGTCATTGACATGCACAGGGATCTGCGCATCTTCATAACAGA
+CAGCTCAGAGGCAGTCAGAGGGCCTTTATTCCTCTCCCTCCTTCCTTTCAACTTGAACTTCTCATCTCCC
+TGGAAACTAGTCAACGTTCATTGTTTTCTTCTGCCACCCCATTAGAAGGAACTTTCTACTTTCCCTGAGC
+TCCCTTAGTTCTTTGCATCCTTGATTCTGCTGGTCTGGATCCAGAGGCTGCCAGGTGCTTGGGCGCTCCT
+GGGGCTGACCCAGAGGCTATTGGGAGGTCAGTGAGGACAGATTCAGGGACAGCATCTCATTCCTCTCTGC
+CTTCTGATCAGTTTAGATAGGGTCTGACACTCAGTCAGAGTCTAAAATGCTGAGTATCCAATTGAAGCCT
+GCACTGCCCCAGTTCCAGACTTGGGGAAGATGGCTGCTTGCCCGTGCCAGCCTGGCCGTCCACAGCCCCG
+GGGAGGCCACGTCTGTGCAGGGAGCTTTTGTCCGAGGGTGGTGACAGCTGTTTTCTGCCTCAGGAGAAAC
+TGAAGCCAAAGTACTTGGAGGAACTCCCTGAAAAGCTAAAGCTCTACTCAGAGTTTCTGGGGAAGCGGCC
+ATGGTTTGCAGGAAACAAGGTAAAGGAGGAGTGATATGGGGAATGAGATCTGTTTTGCTTCACGTGTTAT
+GGAGGTTCCAGCCCACACATTCTTGGCCTTCTGCAGATCACTTTTGTAGATTTTCTCGTCTATGATGTCC
+TTGACCTCCACCGTATATTTGAGCCCAACTGCTTGGACGCCTTCCCAAATCTGAAGGACTTCATCTCCCG
+CTTTGAG
+SHAR_EOF
+chmod 0644 hsgstm1b.seq ||
+echo 'restore of hsgstm1b.seq failed'
+Wc_c="`wc -c < 'hsgstm1b.seq'`"
+test 2788 -eq "$Wc_c" ||
+ echo 'hsgstm1b.seq: original size 2788, current size' "$Wc_c"
+fi
+# ============= htime.c ==============
+if test -f 'htime.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping htime.c (File already exists)'
+else
+echo 'x - extracting htime.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'htime.c' &&
+/* Concurrent read version */
+X
+/* $Name: fa_34_26_5 $ - $Id: htime.c,v 1.3 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdio.h>
+#include <time.h>
+X
+#ifdef UNIX
+#include <sys/types.h>
+#include <sys/time.h>
+#ifdef TIMES
+#include <sys/times.h>
+#else
+#undef TIMES
+#endif
+#endif
+X
+#ifndef HZ
+#define HZ 100
+#endif
+X
+time_t s_time () /* returns time in milliseconds */
+{
+#ifndef TIMES
+X time_t time(), tt;
+X return time(&tt)*1000;
+#else
+X struct tms tt;
+X times(&tt);
+#ifdef CLK_TCK
+X return tt.tms_utime*1000/CLK_TCK;
+#else
+X return tt.tms_utime*1000/HZ;
+#endif
+#endif
+}
+X
+void ptime (FILE *fp, time_t time) /* prints the time */
+{
+X fprintf (fp, "%6.3f",(double)(time)/1000.0);
+}
+X
+SHAR_EOF
+chmod 0644 htime.c ||
+echo 'restore of htime.c failed'
+Wc_c="`wc -c < 'htime.c'`"
+test 674 -eq "$Wc_c" ||
+ echo 'htime.c: original size 674, current size' "$Wc_c"
+fi
+# ============= humgstd.seq ==============
+if test -f 'humgstd.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping humgstd.seq (File already exists)'
+else
+echo 'x - extracting humgstd.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'humgstd.seq' &&
+>HUMGSTD Human glutathione transferase class mu (GST1) mRNA, complete cds.
+X GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGCGG GCTGGCCCAC
+X GCCATCCGCC TGCTCCTGGA ATACACAGAC TCAAGCTATG AGGAAAAGAA GTACACGATG
+X GGGGACGCTC CTGATTATGA CAGAAGCCAG TGGCTGAATG AAAAATTCAA GCTGGGCCTG
+X GACTTTCCCA ATCTGCCCTA CTTGATTGAT GGGGCTCACA AGATCACCCA GAGCAACGCC
+X ATCTTGTGCT ACATTGCCCG CAAGCACAAC CTGTGTGGGG AGACAGAAGA GGAGAAGATT
+X CGTGTGGACA TTTTGGAGAA CCAGACCATG GACAACCATA TGCAGCTGGG CATGATCTGC
+X TACAATCCAG AATTTGAGAA ACTGAAGCCA AAGTACTTGG AGGAACTCCC TGAAAAGCTA
+X AAGCTCTACT CAGAGTTTCT GGGGAAGCGG CCATGGTTTG CAGGAAACAA GATCACTTTT
+X GTAGATTTTC TCGTCTATGA TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG
+X GACGCCTTCC CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGGCTTGGA GAAGATCTCT
+X GCCTACATGA AGTCCAGCCG CTTCCTCCCA AGACCTGTGT TCTCAAAGAT GGCTGTCTGG
+X GGCAACAAGT AGGGCCTTGA AGGCAGGAGG TGGGAGTGAG GAGCCCATAC TCAGCCTGCT
+X GCCCAGGCTG TGCAGCGCAG CTGGACTCTG CATCCCAGCA CCTGCCTCCT CGTTCCTTTC
+X TCCTGTTTAT TCCCATCTTT ACTCCCAAGA CTTCATTGTC CCTCTTCACT CCCCCTAAAC
+X CCCTGTCCCA TGCAGGCCCT TTGAAGCCTC AGCTACCCAC TATCCTTCGT GAACATCCCC
+X TCCCATCATT ACCCTTCCCT GCACTAAAGC CAGCCTGACC TTCCTTCCTG TTAGTGGTTG
+X TGTCTGCTTT AAAGCCTGCC TGGCCCCTCG CCTGTGGAGC TCAGCCCCGA GCTGTCCCCG
+X TGTTGCATGA AGGAGCAGCA TTGACTGGTT TACAGGCCCT GCTCCTGCAG CATGGTCCCT
+X GCCTAGGCCT ACCTGATGGA AGTAAAGCCT CAACCAC
+SHAR_EOF
+chmod 0644 humgstd.seq ||
+echo 'restore of humgstd.seq failed'
+Wc_c="`wc -c < 'humgstd.seq'`"
+test 1323 -eq "$Wc_c" ||
+ echo 'humgstd.seq: original size 1323, current size' "$Wc_c"
+fi
+# ============= idn_aa.mat ==============
+if test -f 'idn_aa.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping idn_aa.mat (File already exists)'
+else
+echo 'x - extracting idn_aa.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'idn_aa.mat' &&
+X A R N B D C Q Z E G H I L K M F P S T W Y V X
+A 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+R -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+N -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+B -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+D -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+C -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+Q -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+Z -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+E -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+G -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+H -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+I -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+L -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
+K -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10
+M -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10
+F -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10
+P -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10
+S -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10
+T -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10
+W -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10
+Y -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10
+V -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10
+XX -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 0
+SHAR_EOF
+chmod 0644 idn_aa.mat ||
+echo 'restore of idn_aa.mat failed'
+Wc_c="`wc -c < 'idn_aa.mat'`"
+test 2210 -eq "$Wc_c" ||
+ echo 'idn_aa.mat: original size 2210, current size' "$Wc_c"
+fi
+# ============= initfa.c ==============
+if test -f 'initfa.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping initfa.c (File already exists)'
+else
+echo 'x - extracting initfa.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'initfa.c' &&
+/* initfa.c */
+X
+/* $Name: fa_34_26_5 $ - $Id: initfa.c,v 1.148 2007/04/26 18:40:58 wrp Exp $ */
+X
+/* copyright (c) 1996, 1997, 1998 William R. Pearson and the U. of Virginia */
+X
+/* init??.c files provide function specific initializations */
+X
+/* h_init() - called from comp_lib.c, comp_thr.c to initialize pstruct ppst
+X which includes the alphabet, and pam matrix
+X
+X alloc_pam() - allocate pam matrix space
+X init_pam2() - convert from 1D to 2D pam
+X
+X init_pamx() - convert from 1D to 2D pam
+X
+X f_initenv() - set up mngmsg and pstruct defaults
+X f_getopt() - read fasta specific command line options
+X f_getarg() - read ktup
+X
+X resetp() - reset the parameters, scoring matrix for DNA-DNA/DNA-prot
+X
+X query_parm() - ask for ktup
+X last_init() - some things must be done last
+X
+X f_initpam() - set some parameters based on the pam matrix
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <math.h>
+X
+#ifdef UNIX
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+X
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+X
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+#undef XTERNAL
+X
+#define MAXWINDOW 32
+X
+int initpam(char *, struct pstruct *);
+void init_pam2 (struct pstruct *ppst);
+void extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst);
+void build_xascii(int *qascii, char *save_str);
+void ann_ascii(int *qascii, char *ann_arr);
+void re_ascii(int *qascii, int *pascii);
+extern int nrand(int);
+X
+/* at some point, all the defaults should be driven from this table */
+/*
+#pgm q_seq l_seq p_seq matrix g_open g_ext fr_shft e_cut ktup
+# -n/-p -s -e -f -h/-j -E argv[3]
+fasta prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 2
+fasta dna(1) dna(1) dna(1) +5/-4 -14 -4 - 2.0 6
+ssearch prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 -
+ssearch dna(1) dna(1) dna(1) +5/-4 -14 -4 - 2.0 -
+fastx dna(1) prot(0) prot(0) BL50 -12 -2 -20 5.0 2
+fasty dna(1) prot(0) prot(0) BL50 -12 -2 -20/-24 5.0 2
+tfastx dna(1) prot(0) prot(0) BL50 -14 -2 -20 5.0 2
+tfasty dna(1) prot(0) prot(0) BL50 -14 -2 -20/-24 5.0 2
+fasts prot(0) prot(0) prot(0) MD20-MS - - - 5.0 -
+fasts dna(1) dna(1) dna(1) +2/-4 - - - 5.0 1
+tfasts prot(0) dna(1) prot(0) MD10-MS - - - 2.0 1
+fastf prot(0) prot(0) prot(0) MD20 - - - 2.0 1
+tfastf prot(0) dna(1) prot(0) MD10 - - - 1.0 1
+fastm prot(0) prot(0) prot(0) MD20 - - - 5.0 1
+fastm dna(1) dna(1) dna(1) +2/-4 - - - 2.0 1
+tfastm prot(0) dna(1) prot(0) MD10 - - - 2.0 1
+*/
+X
+struct pgm_def_str {
+X int pgm_id;
+X char *prog_func;
+X char *pgm_abbr;
+X char *iprompt0;
+X char *ref_str;
+X int PgmDID;
+X char *smstr;
+X int g_open_mod;
+X int gshift;
+X int hshift;
+X int e_cut;
+X int ktup;
+};
+X
+char *ref_str_a[]={
+X "\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n",
+X "\nPlease cite:\n T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; \n W.R. Pearson (1991) Genomics 11:635-650\n",
+X "\nPlease cite:\n Pearson et al, Genomics (1997) 46:24-36\n",
+X "\nPlease cite:\n Mackey et al. Mol. Cell. Proteomics (2002) 1:139-147\n",
+X "\nPlease cite:\n W.R. Pearson (1996) Meth. Enzymol. 266:227-258\n"
+};
+X
+#define FA_PID 1
+#define SS_PID 2
+#define FX_PID 3
+#define FY_PID 4
+#define FS_PID 5
+#define FF_PID 6
+#define FM_PID 7
+#define RSS_PID 8
+#define RFX_PID 9
+#define SSS_PID 10 /* old (slow) non-PG Smith-Waterman */
+#define TFA_PID FA_PID+10
+#define TFX_PID FX_PID+10
+#define TFY_PID FY_PID+10
+#define TFS_PID FS_PID+10
+#define TFF_PID FF_PID+10
+#define TFM_PID FM_PID+10
+X
+struct pgm_def_str
+pgm_def_arr[20] = {
+X {0, "", "", "", NULL, 400, "", 0, 0, 0, 1.0, 0 }, /* 0 */
+X {FA_PID, "FASTA", "fa",
+X "FASTA searches a protein or DNA sequence data bank",
+X NULL, 401, "BL50", 0, 0, 0, 10.0, 2}, /* 1 - FASTA */
+X {SS_PID, "SSEARCH","gsw","SSEARCH searches a sequence data bank",
+X NULL, 404, "BL50", 0, 0, 0, 10.0, 0}, /* 2 - SSEARCH */
+X {FX_PID, "FASTX","fx",
+X "FASTX compares a DNA sequence to a protein sequence data bank",
+X NULL, 405, "BL50", -2, -20, 0, 5.0, 2}, /* 3 - FASTX */
+X {FY_PID, "FASTY", "fy",
+X "FASTY compares a DNA sequence to a protein sequence data bank",
+X NULL, 405, "BL50", -2, -20, -24, 5.0, 2}, /* 4 - FASTY */
+X {FS_PID, "FASTS", "fs",
+X "FASTS compares linked peptides to a protein data bank",
+X NULL, 400, "MD20-MS", 0, 0, 0, 5.0, 1}, /* 5 - FASTS */
+X {FF_PID, "FASTF", "ff",
+X "FASTF compares mixed peptides to a protein databank",
+X NULL, 400, "MD20", 0, 0, 0, 2.0, 1 }, /* 6 - FASTF */
+X {FM_PID, "FASTM", "fm",
+X "FASTM compares ordered peptides to a protein data bank",
+X NULL, 400, "MD20", 0, 0, 0, 5.0, 1 }, /* 7 - FASTM */
+X {RSS_PID, "PRSS", "rss",
+X "PRSS evaluates statistical signficance using Smith-Waterman",
+X NULL, 401, "BL50", 0, 0, 0, 1000.0, 0 }, /* 8 - PRSS */
+X {RFX_PID,"PRFX", "rfx",
+X "PRFX evaluates statistical signficance using FASTX",
+X NULL, 401, "BL50", -2, -20, -24, 1000.0, 2 }, /* 9 - PRFX */
+X {SSS_PID, "OSEARCH","ssw","OSEARCH searches a sequence data bank",
+X NULL, 404, "BL50", 0, 0, 0, 10.0, 0}, /* 2 - OSEARCH */
+X {TFA_PID, "TFASTA", "tfa",
+X "TFASTA compares a protein to a translated DNA data bank",
+X NULL, 402, "BL50", -2, 0, 0, 5.0, 2 },
+X {0, "", "", "", NULL, 400, "", 0, 0, 0, 1.0, 0 }, /* 0 */
+X {TFX_PID, "TFASTX", "tfx",
+X "TFASTX compares a protein to a translated DNA data bank",
+X NULL, 406, "BL50", -2, -20, 0, 2.0, 2},
+X {TFY_PID, "TFASTY", "tfy",
+X "TFASTY compares a protein to a translated DNA data bank",
+X NULL, 406, "BL50", -2, -20, -24, 2.0, 2},
+X {TFS_PID, "TFASTS", "tfs",
+X "TFASTS compares linked peptides to a translated DNA data bank",
+X NULL, 400, "MD10-MS", 0, 0, 0, 2.0, 2 },
+X {TFF_PID, "TFASTF", "tff",
+X "TFASTF compares mixed peptides to a protein databank",
+X NULL, 400, "MD10", 0, 0, 0, 1.0, 1 },
+X {TFM_PID, "TFASTM", "tfm",
+X "TFASTM compares ordered peptides to a translated DNA databank",
+X NULL, 400, "MD10", 0, 0, 0, 1.0, 1 }
+};
+X
+struct msg_def_str {
+X int pgm_id;
+X int q_seqt;
+X int l_seqt;
+X int p_seqt;
+X int sw_flag;
+X int stages;
+X int qframe;
+X int nframe;
+X int nrelv, srelv, arelv;
+X char *f_id0, *f_id1, *label;
+};
+X
+/* pgm_id q_seqt l_seqt p_seqt sw_f st qf nf nrv srv arv s_ix */
+struct msg_def_str msg_def_arr[20] = {
+X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=0 */
+X {FA_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 1, 3,
+X "fa","sw", "opt"},
+X {SS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+X "sw","sw", "s-w"},
+X {FX_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
+X "fx","sx", "opt"},
+X {FY_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
+X "fy","sy", "opt"},
+X {FS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+X "fs","fs", "initn init1"},
+X {FF_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+X "ff","ff", "initn init1"},
+X {FM_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
+X "fm","fm","initn init1"},
+X {RSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 0, 1, 1, -1, 1, 1, 1,
+X "rss","sw","s-w"},
+X {RFX_PID, SEQT_DNA,SEQT_PROT, SEQT_PROT, 0, 1, 2, -1, 3, 1, 3,
+X "rfx","sx","opt"},
+X {SSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
+X "sw","sw", "s-w"},
+X {TFA_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 0, 1, 1, 6, 3, 1, 3,
+X "tfa","fa","initn init1"},
+X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=12 */
+X {TFX_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
+X "tfx","sx","initn opt"},
+X {TFY_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
+X "tfy","sy","initn opt"},
+X {TFS_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+X "tfs","fs","initn init1"},
+X {TFF_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+X "tff","ff","initn init1"},
+X {TFM_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
+X "tfm","fm","initn init1"}
+};
+X
+int
+get_pgm_id() {
+X
+X int rval=0;
+X
+#ifdef FASTA
+#ifndef TFAST
+X pgm_def_arr[FA_PID].ref_str = ref_str_a[0];
+X rval=FA_PID;
+#else
+X pgm_def_arr[TFA_PID].ref_str = ref_str_a[0];
+X rval=TFA_PID;
+#endif
+#endif
+X
+#ifdef FASTX
+#ifndef TFAST
+#ifndef PRSS
+X pgm_def_arr[FX_PID].ref_str = ref_str_a[2];
+X rval=FX_PID;
+#else
+X pgm_def_arr[RFX_PID].ref_str = ref_str_a[2];
+X rval=RFX_PID;
+#endif
+#else
+X pgm_def_arr[TFX_PID].ref_str = ref_str_a[2];
+X rval=TFX_PID;
+#endif
+#endif
+X
+#ifdef FASTY
+#ifndef TFAST
+X pgm_def_arr[FY_PID].ref_str = ref_str_a[2];
+X rval=FY_PID;
+#else
+X pgm_def_arr[TFY_PID].ref_str = ref_str_a[2];
+X rval=TFY_PID;
+#endif
+#endif
+X
+#ifdef FASTS
+#ifndef TFAST
+X pgm_def_arr[FS_PID].ref_str = ref_str_a[3];
+X rval=FS_PID;
+#else
+X pgm_def_arr[TFS_PID].ref_str = ref_str_a[3];
+X rval=TFS_PID;
+#endif
+#endif
+X
+#ifdef FASTF
+#ifndef TFAST
+X pgm_def_arr[FF_PID].ref_str = ref_str_a[3];
+X rval=FF_PID;
+#else
+X pgm_def_arr[TFF_PID].ref_str = ref_str_a[3];
+X rval=TFF_PID;
+#endif
+#endif
+X
+#ifdef FASTM
+#ifndef TFAST
+X pgm_def_arr[FM_PID].ref_str = ref_str_a[3];
+X rval=FM_PID;
+#else
+X pgm_def_arr[TFM_PID].ref_str = ref_str_a[3];
+X rval=TFM_PID;
+#endif
+#endif
+X
+#ifdef SSEARCH
+X pgm_def_arr[SS_PID].ref_str = ref_str_a[1];
+X rval=SS_PID;
+#endif
+X
+#ifdef OSEARCH
+X pgm_def_arr[SSS_PID].ref_str = ref_str_a[1];
+X rval=SSS_PID;
+#endif
+X
+#ifdef PRSS
+#ifndef FASTX
+X pgm_def_arr[RSS_PID].ref_str = ref_str_a[4];
+X rval=RSS_PID;
+#endif
+#endif
+X
+X return rval;
+}
+X
+char *iprompt1=" test sequence file name: ";
+char *iprompt2=" database file name: ";
+X
+char *verstr="version 34.26.5 April 26, 2007";
+X
+char *s_optstr = "13Ac:f:g:h:j:k:nopP:r:s:St:Ux:y:";
+X
+static int mktup=2;
+static int ktup_set = 0;
+static int gap_set=0;
+static int del_set=0;
+static int mshuff_set = 0;
+static int prot2dna = 0;
+X
+extern int max_workers;
+X
+extern void s_abort(char *, char *);
+extern void init_ascii(int ext_sq, int *sascii, int dnaseq);
+extern int standard_pam(char *smstr, struct pstruct *ppst,
+X int del_set, int gap_set);
+extern void mk_n_pam(int *arr,int siz, int mat, int mis);
+extern int karlin(int , int, double *, double *, double *);
+extern void init_karlin_a(struct pstruct *, double *, double **);
+extern int do_karlin_a(int **, struct pstruct *, double *,
+X double *, double *, double *, double *);
+X
+#if defined(TFAST) || defined(FASTX) || defined(FASTY)
+extern void aainit(int tr_type, int debug);
+#endif
+X
+char *iprompt0, *prog_func, *refstr;
+X
+X
+/* Sets defaults assuming a protein sequence */
+void h_init (struct pstruct *ppst, struct mngmsg *m_msp, char *pgm_abbr)
+{
+X struct pgm_def_str pgm_def;
+X int i, pgm_id;
+X
+X ppst->pgm_id = pgm_id = get_pgm_id();
+X pgm_def = pgm_def_arr[pgm_id];
+X
+X /* check that pgm_def_arr[] is valid */
+X if (pgm_def.pgm_id != pgm_id) {
+X fprintf(stderr,
+X "**pgm_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
+X pgm_def.pgm_id, pgm_id);
+X exit(1);
+X }
+X
+X /* check that msg_def_arr[] is valid */
+X if (msg_def_arr[pgm_id].pgm_id != pgm_id) {
+X fprintf(stderr,
+X "**msg_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
+X msg_def_arr[pgm_id].pgm_id, pgm_id);
+X exit(1);
+X }
+X
+X strncpy(pgm_abbr,pgm_def.pgm_abbr,MAX_SSTR);
+X iprompt0 = pgm_def.iprompt0;
+X refstr = pgm_def.ref_str;
+X prog_func = pgm_def.prog_func;
+X
+X /* MAXTOT = MAXTST + MAXLIB for everything except TFAST,
+X where it is MAXTST + MAXTRN */
+X m_msp->max_tot = MAXTOT;
+X
+X /* set up DNA query sequence if required*/
+X if (msg_def_arr[pgm_id].q_seqt == SEQT_DNA) {
+X memcpy(qascii,nascii,sizeof(qascii));
+X m_msp->qdnaseq = SEQT_DNA;
+X }
+X else { /* when SEQT_UNK, start with protein */
+X memcpy(qascii,aascii,sizeof(qascii));
+X m_msp->qdnaseq = msg_def_arr[pgm_id].q_seqt;
+X }
+X
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+X qascii[','] = ESS;
+X /* also initialize aascii, nascii for databases */
+X qascii['*'] = NA;
+#endif
+X
+X /* initialize a pam matrix */
+X strncpy(ppst->pamfile,pgm_def.smstr,MAX_FN);
+X standard_pam(ppst->pamfile,ppst,del_set,gap_set);
+X ppst->have_pam2 = 0;
+X
+X /* this is always protein by default */
+X ppst->nsq = naa;
+X ppst->nsqx = naax;
+X for (i=0; i<=ppst->nsqx; i++) {
+X ppst->sq[i] = aa[i];
+X ppst->hsq[i] = haa[i];
+X ppst->sqx[i]=aax[i]; /* sq = aa */
+X ppst->hsqx[i]=haax[i]; /* hsq = haa */
+X }
+X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
+X
+X /* set up the c_nt[] mapping */
+X
+#if defined(FASTS) || defined(FASTF) || defined(FASTM)
+X ppst->c_nt[ESS] = ESS;
+#endif
+X ppst->c_nt[0]=0;
+X for (i=1; i<=nnt; i++) {
+X ppst->c_nt[i]=gc_nt[i];
+X ppst->c_nt[i+nnt]=gc_nt[i]+nnt;
+X }
+}
+X
+/*
+X * alloc_pam(): allocates memory for the 2D pam matrix as well
+X * as for the integer array used to transmit the pam matrix
+X */
+void
+alloc_pam (int d1, int d2, struct pstruct *ppst)
+{
+X int i, *d2p;
+X char err_str[128];
+X
+X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X s_abort (err_str,"");
+X }
+X
+X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X s_abort (err_str,"");
+X }
+X
+X if ((d2p = pam12 = (int *) calloc (d1 * d2, sizeof (int))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X s_abort (err_str,"");
+X }
+X
+X for (i = 0; i < d1; i++, d2p += d2)
+X ppst->pam2[0][i] = d2p;
+X
+X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);
+X s_abort (err_str,"");
+X }
+X
+X for (i = 0; i < d1; i++, d2p += d2)
+X ppst->pam2[1][i] = d2p;
+X
+X ppst->have_pam2 = 1;
+}
+X
+/*
+X * init_pam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
+X */
+void
+init_pam2 (struct pstruct *ppst) {
+X int i, j, k, nsq;
+X
+X nsq = ppst->nsq;
+X
+X ppst->pam2[0][0][0] = -BIGNUM;
+X ppst->pam_h = -1; ppst->pam_l = 1;
+X
+X k = 0;
+X for (i = 1; i <= nsq; i++) {
+X ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;
+X for (j = 1; j <= i; j++) {
+X ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;
+X if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];
+X if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];
+X }
+X }
+}
+X
+void
+init_pamx (struct pstruct *ppst) {
+X int i, j, k, nsq, pam_xx, pam_xm;
+X int sa_x, sa_t, tmp;
+X
+X nsq = ppst->nsq;
+X
+X ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+X
+X if (ppst->nt_align) {
+X sa_x = pascii['N'];
+X sa_t = sa_x;
+X }
+X else {
+X sa_x = pascii['X'];
+X sa_t = pascii['*'];
+X }
+X
+X if (ppst->dnaseq == SEQT_RNA) {
+X tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 1;
+X ppst->pam2[0][nascii['A']][nascii['G']] =
+X ppst->pam2[0][nascii['C']][nascii['T']] =
+X ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
+X }
+X
+X if (ppst->pam_x_set) {
+X for (i=1; i<=nsq; i++) {
+X ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
+X ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
+X }
+X ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
+X ppst->pam2[0][sa_t][sa_t]=ppst->pam_xx;
+X }
+X else {
+X ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
+X ppst->pam_xm = ppst->pam2[0][1][sa_x];
+X }
+X
+X pam_xx = ppst->pam_xx;
+X pam_xm = ppst->pam_xm;
+X
+X if (ppst->ext_sq_set) { /* using extended alphabet */
+X /* fill in pam2[1] matrix */
+X ppst->pam2[1][0][0] = -BIGNUM;
+X /* fill in additional parts of the matrix */
+X for (i = 1; i <= nsq; i++) {
+X
+X /* -BIGNUM to all matches vs 0 */
+X ppst->pam2[0][0][i+nsq] = ppst->pam2[0][i+nsq][0] =
+X ppst->pam2[1][0][i+nsq] = ppst->pam2[1][i+nsq][0] =
+X ppst->pam2[1][0][i] = ppst->pam2[1][i][0] = -BIGNUM;
+X
+X for (j = 1; j <= nsq; j++) {
+X
+X /* replicate pam2[0] to i+nsq, j+nsq */
+X ppst->pam2[0][i+nsq][j] = ppst->pam2[0][i][j+nsq] =
+X ppst->pam2[0][i+nsq][j+nsq] = ppst->pam2[1][i][j] =
+X ppst->pam2[0][i][j];
+X
+X /* set the high portion of pam2[1] to the corresponding value
+X of pam2[1][sa_x][j] */
+X
+X ppst->pam2[1][i+nsq][j] = ppst->pam2[1][i][j+nsq]=
+X ppst->pam2[1][i+nsq][j+nsq]=ppst->pam2[0][sa_x][j];
+X }
+X }
+X }
+}
+X
+/* function specific initializations */
+void
+f_initenv (struct mngmsg *m_msp, struct pstruct *ppst, unsigned char **aa0) {
+X struct msg_def_str m_msg_def;
+X int pgm_id;
+X
+X pgm_id = ppst->pgm_id;
+X m_msg_def = msg_def_arr[pgm_id];
+X
+X m_msp->last_calc_flg=0;
+X
+X strncpy(m_msp->f_id0,m_msg_def.f_id0,sizeof(m_msp->f_id0));
+X strncpy(m_msp->f_id1,m_msg_def.f_id1,sizeof(m_msp->f_id1));
+X strncpy (m_msp->label, m_msg_def.label, sizeof(m_msp->label));
+X
+#ifndef SSEARCH
+X strncpy (m_msp->alab[0],"initn",20);
+X strncpy (m_msp->alab[1],"init1",20);
+X strncpy (m_msp->alab[2],"opt",20);
+#else
+X strncpy (m_msp->alab[0],"s-w opt",20);
+#endif
+X
+X ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;
+X ppst->sw_flag = m_msg_def.sw_flag;
+X m_msp->e_cut=pgm_def_arr[pgm_id].e_cut;
+X
+X ppst->score_ix = 0;
+X ppst->histint = 2;
+X m_msp->qframe = m_msg_def.qframe;
+X ppst->sw_flag = m_msg_def.sw_flag;
+X m_msp->nframe = m_msg_def.nframe;
+X m_msp->nrelv = m_msg_def.nrelv;
+X m_msp->srelv = m_msg_def.srelv;
+X m_msp->arelv = m_msg_def.arelv;
+X m_msp->stages = m_msg_def.stages;
+#if defined(PRSS)
+X m_msp->shuff_wid = 0;
+X m_msp->shuff_max = 200;
+#endif
+X
+X /* see param.h for the definition of all these */
+X
+X m_msp->qshuffle = 0;
+X m_msp->nm0 = 1;
+X m_msp->escore_flg = 0;
+X
+X /* pam information */
+X ppst->pam_pssm = 0;
+#if defined(FASTS) || defined(FASTF) || defined(FASTM)
+X ppst->pam_xx = ppst->pam_xm = 0;
+#else
+X ppst->pam_xx = 1; /* set >0 to use pam['X']['X'] value */
+X ppst->pam_xm = -1; /* set >0 to use pam['X']['A-Z'] value */
+#endif
+X ppst->pam_x_set = 0;
+X ppst->pam_set = 0;
+X ppst->pam_pssm = 0;
+X ppst->p_d_set = 0;
+X ppst->pamoff = 0;
+X ppst->ext_sq_set = 0;
+X
+X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+X mktup = 2;
+X ppst->param_u.fa.bestscale = 300;
+X ppst->param_u.fa.bestoff = 36;
+X ppst->param_u.fa.bkfact = 6;
+X ppst->param_u.fa.scfact = 3;
+X ppst->param_u.fa.bktup = 2;
+X ppst->param_u.fa.ktup = 0;
+X ppst->param_u.fa.bestmax = 50;
+X ppst->param_u.fa.pamfact = 1;
+X ppst->param_u.fa.altflag = 0;
+X ppst->param_u.fa.optflag = 1;
+X ppst->param_u.fa.iniflag = 0;
+X ppst->param_u.fa.optcut = 0;
+X ppst->param_u.fa.optcut_set = 0;
+X ppst->param_u.fa.cgap = 0;
+X ppst->param_u.fa.optwid = MAXWINDOW;
+X }
+X
+}
+X
+/* switches for fasta only */
+X
+static int shift_set=0;
+static int subs_set=0;
+static int sw_flag_set=0;
+static int nframe_set=0;
+static int wid_set=0;
+X
+void
+f_getopt (char copt, char *optarg,
+X struct mngmsg *m_msg, struct pstruct *ppst)
+{
+X int pgm_id;
+X char *bp;
+X
+X pgm_id = ppst->pgm_id;
+X
+X switch (copt) {
+X case '1':
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X ppst->param_u.fa.iniflag=1;
+X }
+X break;
+X case '3':
+X nframe_set = 1;
+X if (pgm_id == TFA_PID) {
+X m_msg->nframe = 3; break;
+X }
+X else {
+X m_msg->nframe = 1; /* for TFASTXY */
+X m_msg->qframe = 1; /* for FASTA, FASTX */
+X }
+X break;
+X case 'A':
+X ppst->sw_flag= 1;
+X sw_flag_set = 1;
+X break;
+X case 'c':
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X sscanf (optarg, "%d", &ppst->param_u.fa.optcut);
+X ppst->param_u.fa.optcut_set = 1;
+X }
+X break;
+X case 'f':
+X sscanf (optarg, "%d", &ppst->gdelval);
+X if (ppst->gdelval > 0) ppst->gdelval = -ppst->gdelval;
+X del_set = 1;
+X break;
+X case 'g':
+X sscanf (optarg, "%d", &ppst->ggapval);
+X if (ppst->ggapval > 0) ppst->ggapval = -ppst->ggapval;
+X gap_set = 1;
+X break;
+X case 'h':
+X sscanf (optarg, "%d", &ppst->gshift);
+X if (ppst->gshift > 0) ppst->gshift = -ppst->gshift;
+X shift_set = 1;
+X break;
+X case 'j':
+X sscanf (optarg, "%d", &ppst->gsubs);
+X subs_set = 1;
+X break;
+X case 'k':
+X sscanf (optarg, "%d", &m_msg->shuff_max);
+X mshuff_set = 1;
+X break;
+X case 'n':
+X m_msg->qdnaseq = SEQT_DNA;
+X re_ascii(qascii,nascii);
+X strncpy(m_msg->sqnam,"nt",4);
+X prot2dna = 1;
+X break;
+X case 'o':
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X ppst->param_u.fa.optflag = 0;
+X msg_def_arr[pgm_id].nrelv = m_msg->nrelv = 2;
+X }
+X break;
+X case 'p':
+X m_msg->qdnaseq = SEQT_PROT;
+X ppst->dnaseq = SEQT_PROT;
+X strncpy(m_msg->sqnam,"aa",4);
+X break;
+X case 'P':
+X strncpy(ppst->pgpfile,optarg,MAX_FN);
+X if ((bp=strchr(ppst->pgpfile,' '))!=NULL) {
+X *bp='\0';
+X ppst->pgpfile_type = atoi(bp+1);
+X }
+X else ppst->pgpfile_type = 0;
+X ppst->pgpfile[MAX_FN-1]='\0';
+X ppst->pam_pssm = 1;
+X break;
+X case 'r':
+X sscanf(optarg,"%d/%d",&ppst->p_d_mat,&ppst->p_d_mis);
+X if (ppst->p_d_mat > 0 && ppst->p_d_mis < 0) {
+X ppst->p_d_set = 1;
+X strncpy(ppst->pamfile,optarg,40);
+X }
+X break;
+X case 's':
+X strncpy (ppst->pamfile, optarg, 120);
+X ppst->pamfile[120-1]='\0';
+X if (!standard_pam(ppst->pamfile,ppst,del_set, gap_set)) {
+X initpam (ppst->pamfile, ppst);
+X }
+X ppst->pam_set=1;
+X break;
+X case 'S': /* turn on extended alphabet for seg */
+X ppst->ext_sq_set = 1;
+X break;
+X case 't':
+X if (tolower(optarg[0])=='t') {
+X m_msg->term_code = aascii['*']; optarg++;
+X }
+X if (*optarg) {sscanf (optarg, "%d", &ppst->tr_type);}
+X break;
+X case 'U':
+X m_msg->qdnaseq = SEQT_RNA;
+X memcpy(qascii,nascii,sizeof(qascii));
+X strncpy(m_msg->sqnam,"nt",4);
+X nt[nascii['T']]='U';
+X prot2dna=1;
+X break;
+X case 'x':
+X if (strchr(optarg,',')!=NULL) {
+X sscanf (optarg,"%d,%d",&ppst->pam_xx, &ppst->pam_xm);
+X }
+X else {
+X sscanf (optarg,"%d",&ppst->pam_xx);
+X ppst->pam_xm = ppst->pam_xx;
+X }
+X ppst->pam_x_set=1;
+X break;
+X case 'y':
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X sscanf (optarg, "%d", &ppst->param_u.fa.optwid);
+X wid_set = 1;
+X }
+X break;
+X }
+}
+X
+void
+f_lastenv (struct mngmsg *m_msg, struct pstruct *ppst)
+{
+X char save_str[MAX_SSTR];
+X
+#if !defined(FASTM) && !defined(FASTS) && !defined(FASTF)
+X strncpy(save_str,"*",sizeof(save_str));
+#else
+X strncpy(save_str,",",sizeof(save_str));
+#endif
+X
+X if (m_msg->qdnaseq == SEQT_UNK) {
+X build_xascii(qascii,save_str);
+X if (m_msg->ann_flg) ann_ascii(qascii,m_msg->ann_arr);
+X }
+X
+/* this check allows lc DNA sequence queries with FASTX */
+#if defined(FASTA) && !defined(FASTS) && !defined(FASTM) && !defined(FASTF)
+X else
+X init_ascii(ppst->ext_sq_set,qascii,m_msg->qdnaseq);
+#endif
+}
+X
+void
+f_getarg (int argc, char **argv, int optind,
+X struct mngmsg *m_msg, struct pstruct *ppst)
+{
+X
+X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+X if (argc - optind >= 4) {
+X sscanf (argv[optind + 3], "%d", &ppst->param_u.fa.ktup);
+X ktup_set = 1;
+X }
+X else
+X ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;
+X }
+X
+X if (ppst->pgm_id == RSS_PID && argc - optind > 3) {
+X sscanf (argv[optind + 3], "%d", &m_msg->shuff_max);
+X }
+X
+X if (ppst->pgm_id == RFX_PID && argc - optind > 4) {
+X sscanf (argv[optind + 4], "%d", &m_msg->shuff_max);
+X }
+}
+X
+/* fills in the query ascii mapping from the parameter
+X ascii mapping.
+*/
+X
+void
+re_ascii(int *qascii, int *pascii) {
+X int i;
+X
+X for (i=0; i < 128; i++) {
+X if (qascii[i] > '@' || qascii[i] < ESS) {
+X qascii[i] = pascii[i];
+X }
+X }
+}
+X
+X
+/* recode has become function specific to accommodate FASTS/M */
+/* modified 28-Dec-2004 to ensure that all mapped characters
+X are valid */
+int
+recode(unsigned char *seq, int n, int *qascii, int nsqx) {
+X int i,j;
+X char save_c;
+X
+#if defined(FASTS) || defined(FASTM)
+X qascii[',']=ESS;
+#endif
+X
+X for (i=0; i < n; i++) {
+X save_c = seq[i];
+X if (seq[i] > '@') seq[i] = qascii[seq[i]];
+X if (seq[i] > nsqx && seq[i]!=ESS) {
+X fprintf(stderr, "*** Warning - unrecognized residue at %d:%c - %2d\n",
+X i,save_c,save_c);
+X seq[i] = qascii['X'];
+X }
+X }
+X seq[i]=EOSEQ;
+X return i;
+}
+X
+/* here we have the query sequence, all the command line options,
+X but we need to set various parameter options based on the type
+X of the query sequence (m_msg->qdnaseq = 0:protein/1:DNA) and
+X the function (FASTA/FASTX/TFASTA)
+*/
+X
+/* this resetp is for conventional a FASTA/TFASTXYZ search */
+void
+resetp (struct mngmsg *m_msg, struct pstruct *ppst) {
+X int i, pgm_id;
+X
+X pgm_id = ppst->pgm_id;
+X
+#if defined(TFAST)
+X if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
+X fprintf(stderr," %s compares a protein to a translated\n\
+DNA sequence library. Do not use a DNA query/scoring matrix.\n",prog_func);
+X exit(1);
+X }
+#else
+#if (defined(FASTX) || defined(FASTY))
+X if (!(m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA)) {
+X fprintf(stderr," FASTX/Y compares a DNA sequence to a protein database\n");
+X fprintf(stderr," Use a DNA query\n");
+X exit(1);
+X }
+#endif
+#endif
+X
+/* this code changes parameters for programs (FA_PID, SS_PID, FS_PID,
+X RSS_PID) that can examine either protein (initial state) or DNA
+X Modified May, 2006 to reset e_cut for DNA comparisons.
+*/
+X
+X if (msg_def_arr[pgm_id].q_seqt == SEQT_UNK) {
+X if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
+X msg_def_arr[pgm_id].q_seqt = m_msg->qdnaseq;
+X msg_def_arr[pgm_id].p_seqt = SEQT_DNA;
+X msg_def_arr[pgm_id].l_seqt = SEQT_DNA;
+X if (m_msg->qdnaseq == SEQT_DNA) msg_def_arr[pgm_id].qframe = 2;
+X pgm_def_arr[pgm_id].e_cut /= 5.0;
+X }
+X else {
+X msg_def_arr[pgm_id].q_seqt = SEQT_PROT;
+X }
+X }
+X
+X ppst->dnaseq = msg_def_arr[pgm_id].p_seqt;
+X if (!sw_flag_set) ppst->sw_flag = msg_def_arr[pgm_id].sw_flag;
+X if (!m_msg->e_cut_set) m_msg->e_cut=pgm_def_arr[pgm_id].e_cut;
+X
+X if (ppst->dnaseq == SEQT_DNA && m_msg->qdnaseq==SEQT_RNA) {
+X ppst->dnaseq = SEQT_RNA;
+X ppst->nt_align = 1;
+X }
+X if (ppst->dnaseq==SEQT_DNA) pascii = &nascii[0];
+X else if (ppst->dnaseq==SEQT_RNA) {
+X pascii = &nascii[0];
+X ppst->sq[nascii['T']] = 'U';
+X }
+X else pascii = &aascii[0];
+X m_msg->ldnaseq = msg_def_arr[pgm_id].l_seqt;
+X if (m_msg->ldnaseq & SEQT_DNA) {
+X memcpy(lascii,nascii,sizeof(lascii));
+#ifndef TFAST
+#ifdef DNALIB_LC
+X init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);
+#endif
+#else
+X /* no init_ascii() because we translate lower case library sequences */
+#endif
+X }
+X else {
+X memcpy(lascii,aascii,sizeof(lascii)); /* initialize lib mapping */
+X
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+X lascii['*'] = NA;
+#endif
+X init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);
+X }
+X
+X if (!nframe_set) {
+X m_msg->qframe = msg_def_arr[pgm_id].qframe;
+X m_msg->nframe = msg_def_arr[pgm_id].nframe;
+X }
+X
+X /* the possibilities:
+X -i -3 qframe revcomp
+X FA_D/FX - - 2 0
+X FA_D/FX + - 2 1
+X FA_D/FX - + 1 0
+X FA_D/FX + + 2 1
+X */
+X
+X if (m_msg->qdnaseq == SEQT_DNA) {
+X m_msg->nframe = 1;
+X if (m_msg->qframe == 1 && m_msg->revcomp==1) {
+X m_msg->qframe = m_msg->revcomp+1;
+X }
+X }
+X else if (m_msg->qdnaseq == SEQT_RNA) {
+X m_msg->qframe = m_msg->revcomp+1;
+X m_msg->nframe = 1;
+X }
+X
+X /* change settings for DNA search */
+X if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {
+X ppst->histint = 4;
+X
+X if (!del_set) {
+#ifdef OLD_FASTA_GAP
+X ppst->gdelval = -16; /* def. del penalty */
+#else
+X ppst->gdelval = -12; /* def. open penalty */
+#endif
+X }
+X if (!gap_set) ppst->ggapval = -4; /* def. gap penalty */
+X
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X /* these parameters are used to scale optcut, they should be replaced
+X by statistically based parameters */
+X if (!wid_set) ppst->param_u.fa.optwid = 16;
+X ppst->param_u.fa.bestscale = 80;
+X ppst->param_u.fa.bkfact = 5;
+X ppst->param_u.fa.scfact = 1;
+X ppst->param_u.fa.bktup = 6;
+X ppst->param_u.fa.bestmax = 80;
+X ppst->param_u.fa.bestoff = 45;
+X
+X if (!sw_flag_set) {
+X ppst->sw_flag = 0;
+X strncpy(m_msg->f_id1,"bs",sizeof(m_msg->f_id1));
+X }
+X
+X /* largest ktup */
+X mktup = 6;
+X
+X if (ppst->param_u.fa.pamfact >= 0) ppst->param_u.fa.pamfact = 0;
+X if (ppst->param_u.fa.ktup < 0)
+X ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;
+X }
+X
+X ppst->nsq = nnt;
+X ppst->nsqx = nntx;
+X for (i=0; i<=ppst->nsqx; i++) {
+X ppst->hsq[i] = hnt[i];
+X ppst->sq[i] = nt[i];
+X ppst->hsqx[i] = hntx[i];
+X ppst->sqx[i] = ntx[i];
+X }
+X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
+X
+X if (!ppst->pam_set) {
+X if (ppst->p_d_set)
+X mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
+#if !defined(FASTS) && !defined(FASTM)
+X else if (ppst->pamfile[0]=='\0' || strncmp(ppst->pamfile,"BL50",4)==0) {
+X strncpy (ppst->pamfile, "+5/-4", sizeof(ppst->pamfile));
+X }
+#else
+X else if (strncmp(ppst->pamfile,"MD20",4)==0) {
+X strncpy (ppst->pamfile, "+2/-2", sizeof(ppst->pamfile));
+X ppst->p_d_mat = +2;
+X ppst->p_d_mis = -2;
+X mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
+X }
+#endif
+X pam = npam;
+X }
+X
+X strncpy (m_msg->sqnam, "nt",sizeof(m_msg->sqnam));
+X strncpy (m_msg->sqtype, "DNA",sizeof(m_msg->sqtype));
+X } /* end DNA reset */
+X
+X else { /* other parameters for protein comparison */
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X if (!wid_set) {
+X if (ppst->param_u.fa.ktup==1) ppst->param_u.fa.optwid = 32;
+X else ppst->param_u.fa.optwid = 16;
+X }
+X }
+X if (!del_set) {ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;}
+X if (!shift_set) {ppst->gshift = pgm_def_arr[pgm_id].gshift;}
+X if (!subs_set) {ppst->gsubs = pgm_def_arr[pgm_id].hshift;}
+X }
+X
+}
+X
+/* query_parm() this function asks for any additional parameters
+X that have not been provided. Could be null. */
+void
+query_parm (struct mngmsg *m_msp, struct pstruct *ppst)
+{
+X char qline[40];
+X
+X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+X if (ppst->param_u.fa.ktup < 0)
+X ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
+X
+X if (ppst->param_u.fa.ktup == 0) {
+X printf (" ktup? (1 to %d) [%d] ", mktup, ppst->param_u.fa.bktup);
+X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+X else sscanf(qline,"%d",&ppst->param_u.fa.ktup);
+X }
+X if (ppst->param_u.fa.ktup == 0)
+X ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
+X else ktup_set = 1;
+X }
+X
+#if defined(PRSS)
+X if (m_msp->shuff_max < 10) m_msp->shuff_max = 200;
+X
+X if (!mshuff_set) {
+X printf(" number of shuffles [%d]? ",m_msp->shuff_max);
+X fflush(stdout);
+X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+X else sscanf(qline,"%d",&m_msp->shuff_max);
+X }
+X
+X if (ppst->zs_win == 0) {
+X printf (" local (window) (w) or uniform (u) shuffle [u]? ");
+X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+X else if (qline[0]=='w' || qline[0]=='W') {
+X m_msp->shuff_wid = 20;
+X printf(" local shuffle window size [%d]? ",m_msp->shuff_wid);
+X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
+X else sscanf(qline,"%d",&m_msp->shuff_wid);
+X }
+X }
+#endif
+}
+X
+/* last_init() cannot look at aa0, n0, because it is only run once,
+X it is not run before each new aa0 search */
+void
+last_init (struct mngmsg *m_msg, struct pstruct *ppst
+#ifdef PCOMPLIB
+X ,int nnodes
+#endif
+X )
+{
+X int ix_l, ix_i, i, pgm_id;
+X double *kar_p;
+X double aa0_f[MAXSQ];
+X
+X pgm_id = ppst->pgm_id;
+X
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+X m_msg->nohist = 1;
+X m_msg->shuff_max = 2000;
+#ifndef PCOMPLIB
+X ppst->shuff_node = m_msg->shuff_max/max_workers;
+#else
+X ppst->shuff_node = m_msg->shuff_max/nnodes;
+#endif
+#endif
+X
+X if (m_msg->aln.llen < 1) {
+X m_msg->aln.llen = 60;
+X }
+X
+#ifndef PCOMPLIB
+#if defined(FASTX) || defined(FASTY) || defined(TFAST)
+X /* set up translation tables: faatran.c */
+X aainit(ppst->tr_type,ppst->debug_lib);
+#endif
+#endif
+X
+/* a sanity check */
+#if !defined(TFAST)
+X if (m_msg->revcomp && m_msg->qdnaseq!=SEQT_DNA && m_msg->qdnaseq!=SEQT_RNA) {
+X fprintf(stderr," cannot reverse complement protein\n");
+X m_msg->revcomp = 0;
+X }
+#endif
+X
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X
+X if (ppst->param_u.fa.ktup < 0)
+X ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
+X
+X if (ppst->param_u.fa.ktup < 1 || ppst->param_u.fa.ktup > mktup) {
+X fprintf(stderr," warning ktup = %d out of range [1..%d], reset to %d\n",
+X ppst->param_u.fa.ktup, mktup, ppst->param_u.fa.bktup);
+X ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
+X }
+X }
+X
+X if (pgm_id == TFA_PID) {
+X m_msg->revcomp *= 3;
+X if (m_msg->nframe == 3) m_msg->nframe += m_msg->revcomp;
+X }
+X else if (pgm_id == TFX_PID || pgm_id == TFY_PID) {
+X if (m_msg->nframe == 1) m_msg->nframe += m_msg->revcomp;
+X }
+X
+#if !defined(TFAST)
+X /* for fasta/fastx searches, itt iterates the the query strand */
+X m_msg->nitt1 = m_msg->qframe-1;
+#else
+X /* for tfasta/tfastxy searches, itt iterates the library frames */
+X m_msg->nitt1 = m_msg->nframe-1;
+#endif
+X
+X if (pgm_def_arr[pgm_id].ktup > 0) {
+X if (ppst->param_u.fa.ktup>=2 && !wid_set) {
+X ppst->param_u.fa.optwid=16;
+X switch (pgm_id) {
+X case FA_PID:
+X m_msg->thr_fact = 32;
+X break;
+X case FX_PID:
+X case FY_PID:
+X m_msg->thr_fact = 16;
+X break;
+X case TFA_PID:
+X case TFX_PID:
+X case TFY_PID:
+X m_msg->thr_fact = 8;
+X break;
+X default:
+X m_msg->thr_fact = 4;
+X }
+X }
+X else { m_msg->thr_fact = 4;}
+X }
+X else m_msg->thr_fact = 4;
+X
+#if defined(PRSS)
+X if (m_msg->shuff_max < 10) m_msg->shuff_max = 200;
+X if (ppst->zsflag < 10) ppst->zsflag += 10;
+X if (ppst->zs_win > 0) {
+X m_msg->shuff_wid = ppst->zs_win;
+X }
+#endif
+X
+X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
+X if (ppst->param_u.fa.iniflag) {
+X ppst->score_ix = 1;
+X strncpy (m_msg->label, "initn init1", sizeof(m_msg->label));
+X }
+X else if (ppst->param_u.fa.optflag) {
+X ppst->score_ix = 2;
+X m_msg->stages = 1;
+X }
+X }
+X
+X if (!ppst->have_pam2) {
+X alloc_pam (MAXSQ, MAXSQ, ppst);
+X init_pam2(ppst);
+X }
+X init_pamx(ppst);
+X
+X if (ppst->pam_ms) {
+X if (m_msg->qdnaseq == SEQT_PROT) {
+X /* code to make 'L'/'I' identical scores */
+X ix_l = pascii['L'];
+X ix_i = pascii['I'];
+X ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
+X ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
+X (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;
+X for (i=1; i<=ppst->nsq; i++) {
+X ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
+X (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;
+X ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
+X (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;
+X }
+X
+X /* code to make 'Q'/'K' identical scores */
+X if (!shift_set) {
+X ix_l = pascii['Q'];
+X ix_i = pascii['K'];
+X ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
+X ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
+X (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;
+X for (i=1; i<=ppst->nsq; i++) {
+X ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
+X (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;
+X ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
+X (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;
+X }
+X }
+X }
+X }
+X
+X /*
+X print_pam(ppst);
+X */
+X
+X /* once we have a complete pam matrix, we can calculate Lambda and K
+X for "average" sequences */
+X kar_p = NULL;
+X init_karlin_a(ppst, aa0_f, &kar_p);
+X do_karlin_a(ppst->pam2[0], ppst, aa0_f,
+X kar_p, &m_msg->Lambda, &m_msg->K, &m_msg->H);
+X free(kar_p);
+X
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+X if (ppst->ext_sq_set) {
+X fprintf(stderr," -S not available on [t]fast[fs]\n");
+X ppst->ext_sq_set = 0;
+X
+X /* reset sascii to ignore -S, map lc */
+X init_ascii(0,lascii,0);
+X }
+#endif
+}
+X
+/* this function is left over from the older FASTA format scoring
+X matrices that allowed additional parameters (bktup, bkfact) to be
+X set in the scoring matrix. It is no longer used. A modern version
+X would set parameters based on lambda and K.
+*/
+/*
+void
+f_initpam (line, ppst)
+char *line;
+struct pstruct *ppst;
+{
+X if (sscanf (line, " %d %d %d %d %d %d %d", &ppst->param_u.fa.scfact,
+X &ppst->param_u.fa.bestoff, &ppst->param_u.fa.bestscale,
+X &ppst->param_u.fa.bkfact, &ppst->param_u.fa.bktup,
+X &ppst->param_u.fa.bestmax, &ppst->histint) != 7)
+X {
+X printf (" bestcut parameters - bad format\n");
+X exit (1);
+X }
+}
+*/
+X
+/* alloc_pam2 creates a profile structure */
+int **
+alloc_pam2p(int len, int nsq) {
+X int i;
+X int **pam2p;
+X
+X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
+X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
+X return NULL;
+X }
+X
+X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
+X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
+X free(pam2p);
+X return NULL;
+X }
+X
+X for (i=1; i<len; i++) {
+X pam2p[i] = pam2p[0] + (i*(nsq+1));
+X }
+X
+X return pam2p;
+}
+X
+void free_pam2p(int **pam2p) {
+X if (pam2p) {
+X free(pam2p[0]);
+X free(pam2p);
+X }
+}
+X
+/* sortbest has now become comparison function specific so that we can use
+X a different comparison for fasts/f
+*/
+#if !defined(FASTS) && !defined (FASTF) && !defined(FASTM)
+#ifndef PCOMPLIB
+void
+qshuffle() {}
+#endif
+X
+int
+last_calc(unsigned char *aa0, unsigned char *aa1, int maxn,
+X struct beststr **bestp_arr, int nbest,
+X struct mngmsg *m_msg, struct pstruct *pst,
+X void **f_str, void *rs_str)
+{
+X return nbest;
+}
+X
+void sortbest (bptr, nbest, irelv)
+struct beststr **bptr;
+int nbest, irelv;
+{
+X int gap, i, j;
+X struct beststr *tmp;
+X
+X for (gap = nbest/2; gap > 0; gap /= 2)
+X for (i = gap; i < nbest; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (bptr[j]->score[irelv] >= bptr[j + gap]->score[irelv]) break;
+X tmp = bptr[j];
+X bptr[j] = bptr[j + gap];
+X bptr[j + gap] = tmp;
+X }
+}
+X
+void show_aux(FILE *fp, struct beststr *bptr) {}
+void header_aux(FILE *fp) {}
+X
+#else
+void sortbest (bptr, nbest, irelv)
+struct beststr **bptr;
+int nbest, irelv;
+{
+X int gap, i, j;
+X struct beststr *tmp;
+X
+X for (gap = nbest/2; gap > 0; gap /= 2)
+X for (i = gap; i < nbest; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (bptr[j]->escore < bptr[j + gap]->escore) break;
+X tmp = bptr[j];
+X bptr[j] = bptr[j + gap];
+X bptr[j + gap] = tmp;
+X }
+}
+X
+#if defined(FASTS) || defined(FASTM)
+X
+#ifndef PCOMPLIB
+/* this shuffle is for FASTS */
+/* convert ',' -> '\0', shuffle each of the substrings */
+void
+qshuffle(unsigned char *aa0, int n0, int nm0) {
+X
+X unsigned char **aa0start, *aap, tmp;
+X int i,j,k, ns;
+X
+X if ((aa0start=(unsigned char **)calloc(nm0+1,
+X sizeof(unsigned char *)))==NULL) {
+X fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);
+X exit(1);
+X }
+X
+X aa0start[0]=aa0;
+X for (k=1,i=0; i<n0; i++) {
+X if (aa0[i]==EOSEQ || aa0[i]==ESS) {
+X aa0[i]='\0';
+X aa0start[k++] = &aa0[i+1];
+X }
+X }
+X
+X /* aa0start has the beginning of each substring */
+X for (k=0; k<nm0; k++) {
+X aap=aa0start[k];
+X ns = strlen((char *)aap);
+X for (i=ns; i>1; i--) {
+X j = nrand(i);
+X tmp = aap[j];
+X aap[j] = aap[i-1];
+X aap[i-1] = tmp;
+X }
+X aap[ns] = 0;
+X }
+X
+X for (k=1; k<nm0; k++) {
+/* aap = aa0start[k];
+X while (*aap) fputc(pst.sq[*aap++],stderr);
+X fputc('\n',stderr);
+*/
+X aa0start[k][-1]=ESS;
+X }
+X
+X free(aa0start);
+}
+#endif
+#endif
+X
+#ifdef FASTF
+#ifndef PCOMPLIB
+void qshuffle(unsigned char *aa0, int n0, int nm0) {
+X
+X int i, j, k, nmpos;
+X unsigned char tmp;
+X int nmoff;
+X
+X nmoff = (n0 - nm0 - 1)/nm0 + 1;
+X
+X for (i = nmoff-1 ; i > 0 ; i--) {
+X
+X /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
+X j = (nmoff -1 ) - i;
+X if (i <= j) break; /* reverse columns */
+X
+X /* swap all i'th column residues for all j'th column residues */
+X for(nmpos = 0, k = 0 ; k < nm0 ; k++, nmpos += nmoff+1 ) {
+X tmp = aa0[nmpos + i];
+X aa0[nmpos + i] = aa0[nmpos + j];
+X aa0[nmpos + j] = tmp;
+X }
+X }
+}
+#endif
+#endif
+X
+X
+/* show additional best_str values */
+void show_aux(FILE *fp, struct beststr *bptr) {
+X fprintf(fp," %2d %3d",bptr->segnum,bptr->seglen);
+}
+X
+void header_aux(FILE *fp) {
+X fprintf(fp, " sn sl");
+}
+#endif
+X
+void
+fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {
+X int i, j;
+X double freq;
+X
+X /* fprintf(stderr, "scale: %g\n", scale); */
+X
+X /* now fill in the pam matrix: */
+X for (i = 0 ; i < n0 ; i++) {
+X for (j = 1 ; j <=20 ; j++) {
+X freq = scale * freq2d[i][j-1];
+X if ( freq < 0.0) freq -= 0.5;
+X else freq += 0.5;
+X pam2p[i][j] = (int)(freq);
+X }
+X }
+}
+X
+double
+get_lambda(int **pam2p, int n0, int nsq, unsigned char *query) {
+X double lambda, H;
+X double *pr, tot, sum;
+X int i, ioff, j, min, max;
+X
+X /* get min and max scores */
+X min = BIGNUM;
+X max = -BIGNUM;
+X if(pam2p[0][1] == -BIGNUM) {
+X ioff = 1;
+X n0++;
+X } else {
+X ioff = 0;
+X }
+X
+X for (i = ioff ; i < n0 ; i++) {
+X for (j = 1; j <= nsq ; j++) {
+X if (min > pam2p[i][j])
+X min = pam2p[i][j];
+X if (max < pam2p[i][j])
+X max = pam2p[i][j];
+X }
+X }
+X
+X /* fprintf(stderr, "min: %d\tmax:%d\n", min, max); */
+X
+X if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
+X exit(1);
+X }
+X
+X tot = (double) rrtotal * (double) rrtotal * (double) n0;
+X for (i = ioff ; i < n0 ; i++) {
+X for (j = 1; j <= nsq ; j++) {
+X pr[pam2p[i][j] - min] +=
+X (double) ((double) rrcounts[aascii[query[i]]] * (double) rrcounts[j]) / tot;
+X }
+X }
+X
+X sum = 0.0;
+X for(i = 0 ; i <= max-min ; i++) {
+X sum += pr[i];
+X /* fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
+X }
+X /* fprintf(stderr, "sum: %g\n", sum); */
+X
+X for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
+X
+X if (!karlin(min, max, pr, &lambda, &H)) {
+X fprintf(stderr, "Karlin lambda estimation failed\n");
+X }
+X
+X /* fprintf(stderr, "lambda: %g\n", lambda); */
+X free(pr);
+X
+X return lambda;
+}
+X
+/*
+X *aa0 - query sequence
+X n0 - length
+X pamscale - scaling for pam matrix - provided by apam.c, either
+X 0.346574 = ln(2)/2 (P120, BL62) or
+X 0.231049 = ln(2)/3 (P250, BL50)
+*/
+X
+void
+scale_pssm(int **pssm2p, double **freq2d,
+X unsigned char *query, int n0,
+X int **pam2, double pamscale);
+X
+static unsigned char ustandard_aa[] ="\0ARNDCQEGHILKMFPSTWYV";
+X
+void
+read_pssm(unsigned char *aa0, int n0, int nsq,
+X double pamscale,
+X FILE *fp, int pgpf_type, struct pstruct *ppst) {
+X int i, j, len, k;
+X int qi, rj; /* qi - index query; rj - index residues (1-20) */
+X int **pam2p;
+X int first, too_high;
+X unsigned char *query, ctmp;
+X char dline[512];
+X double freq, **freq2d, lambda, new_lambda;
+X double scale, scale_high, scale_low;
+X
+X pam2p = ppst->pam2p[0];
+X
+X if (pgpf_type == 0) {
+X
+X if(1 != fread(&len, sizeof(int), 1, fp)) {
+X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+X exit(1);
+X }
+X
+X if(len != n0) {
+X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+X len,n0);
+X exit(1);
+X }
+X
+X /* read over query sequence stored in BLAST profile */
+X if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
+X fprintf(stderr, "Couldn't allocate memory for query!\n");
+X exit(1);
+X }
+X
+X if(len != fread(query, sizeof(char), len, fp)) {
+X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+X exit(1);
+X }
+X }
+X else if (pgpf_type == 1) {
+X
+X if ((fgets(dline,sizeof(dline),fp) == NULL) ||
+X (1 != sscanf(dline, "%d",&len))) {
+X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+X exit(1);
+X }
+X
+X if(len != n0) {
+X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+X len,n0);
+X exit(1);
+X }
+X
+X /* read over query sequence stored in BLAST profile */
+X if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
+X fprintf(stderr, "Couldn't allocate memory for query!\n");
+X exit(1);
+X }
+X
+X if (fgets((char *)query,len+2,fp)==NULL) {
+X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+X exit(1);
+X }
+X }
+X else {
+X fprintf(stderr," Unrecognized PSSM file type: %d\n",pgpf_type);
+X exit(1);
+X }
+X
+X /* currently we don't do anything with query; ideally, we should
+X check to see that it actually matches aa0 ... */
+X
+X /* quick 2d array alloc: */
+X if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+X exit(1);
+X }
+X
+X if((freq2d[0] = (double *) calloc(n0 * 20, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+X exit(1);
+X }
+X
+X /* a little pointer arithmetic to fill out 2d array: */
+X for (i = 1 ; i < n0 ; i++) {
+X freq2d[i] = freq2d[i-1] + 20;
+X }
+X
+X if (pgpf_type == 0) {
+X for (qi = 0 ; qi < n0 ; qi++) {
+X for (rj = 0 ; rj < 20 ; rj++) {
+X if(1 != fread(&freq, sizeof(double), 1, fp)) {
+X fprintf(stderr, "Error while reading frequencies!\n");
+X exit(1);
+X }
+X freq2d[qi][rj] = freq;
+X }
+X }
+X }
+X else {
+X for (qi = 0 ; qi < n0 ; qi++) {
+X if ((fgets(dline,sizeof(dline),fp) ==NULL) ||
+X (k = sscanf(dline,"%c %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg\n",
+X &ctmp, &freq2d[qi][0], &freq2d[qi][1], &freq2d[qi][2], &freq2d[qi][3], &freq2d[qi][4],
+X &freq2d[qi][5], &freq2d[qi][6], &freq2d[qi][7], &freq2d[qi][8], &freq2d[qi][9],
+X &freq2d[qi][10], &freq2d[qi][11], &freq2d[qi][12], &freq2d[qi][13], &freq2d[qi][14],
+X &freq2d[qi][15], &freq2d[qi][16], &freq2d[qi][17], &freq2d[qi][18], &freq2d[qi][19]))<1) {
+X fprintf(stderr, "Error while reading frequencies: %d read!\n",k);
+X exit(1);
+X }
+X for (rj=0; rj<20; rj++) { freq2d[qi][rj] /= 10.0; } /* reverse scaling */
+X }
+X }
+X
+X scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0],pamscale);
+X
+X free(freq2d[0]);
+X free(freq2d);
+X
+X free(query);
+}
+X
+void
+scale_pssm(int **pssm2p, double **freq2d, unsigned char *query, int n0, int **pam2, double pamscale) {
+X int i, qi, rj;
+X double freq, new_lambda, lambda;
+X int first, too_high;
+X double scale, scale_high, scale_low;
+X
+X for (qi = 0 ; qi < n0 ; qi++) {
+X for (rj = 0 ; rj < 20 ; rj++) {
+X if (freq2d[qi][rj] > 1e-20) {
+X freq = log(freq2d[qi][rj] /((double) (rrcounts[rj+1])/(double) rrtotal));
+X freq /= pamscale; /* this gets us close to originial pam scores */
+X freq2d[qi][rj] = freq;
+X }
+X else {
+X /* when blastpgp decides to leave something out, it puts 0's in all the frequencies
+X in the binary checkpoint file. In the ascii version, however, it uses BLOSUM62
+X values. I will put in scoring matrix values as well */
+X
+X freq2d[qi][rj] = pam2[aascii[query[qi]]][rj+1];
+X }
+X }
+X }
+X
+X /* now figure out the right scale */
+X scale = 1.0;
+X lambda = get_lambda(pam2, 20, 20, ustandard_aa);
+X
+X /* should be near 1.0 because of our initial scaling by ppst->pamscale */
+X /* fprintf(stderr, "real_lambda: %g\n", lambda); */
+X
+X /* get initial high/low scale values: */
+X first = 1;
+X while (1) {
+X fill_pam(pssm2p, n0, 20, freq2d, scale);
+X new_lambda = get_lambda(pssm2p, n0, 20, query);
+X
+X if (new_lambda > lambda) {
+X if (first) {
+X first = 0;
+X scale = scale_high = 1.0 + 0.05;
+X scale_low = 1.0;
+X too_high = 1;
+X } else {
+X if (!too_high) break;
+X scale = (scale_high += scale_high - 1.0);
+X }
+X } else if (new_lambda > 0) {
+X if (first) {
+X first = 0;
+X scale_high = 1.0;
+X scale = scale_low = 1.0 - 0.05;
+X too_high = 0;
+X } else {
+X if (too_high) break;
+X scale = (scale_low += scale_low - 1.0);
+X }
+X } else {
+X fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
+X exit(1);
+X }
+X }
+X
+X /* now do binary search between low and high */
+X for (i = 0 ; i < 10 ; i++) {
+X scale = 0.5 * (scale_high + scale_low);
+X fill_pam(pssm2p, n0, 20, freq2d, scale);
+X new_lambda = get_lambda(pssm2p, n0, 20, query);
+X
+X if (new_lambda > lambda) scale_low = scale;
+X else scale_high = scale;
+X }
+X
+X scale = 0.5 * (scale_high + scale_low);
+X fill_pam(pssm2p, n0, 20, freq2d, scale);
+X
+X /*
+X fprintf(stderr, "final scale: %g\n", scale);
+X
+X for (qi = 0 ; qi < n0 ; qi++) {
+X fprintf(stderr, "%4d %c: ", qi+1, query[qi]);
+X for (rj = 1 ; rj <= 20 ; rj++) {
+X fprintf(stderr, "%4d", pssm2p[qi][rj]);
+X }
+X fprintf(stderr, "\n");
+X }
+X */
+}
+X
+#if defined(SSEARCH) || (defined(PRSS) && !defined(FASTX))
+int
+parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
+X unsigned char **query, double ***freqs,
+X char *matrix, int *gap_open, int *gap_extend,
+X double *lambda);
+X
+/* the ASN.1 pssm includes information about the scoring matrix used
+X (though not the gap penalty in the current version PSSM:2) The PSSM
+X scoring matrix and gap penalties should become the default if they
+X have not been set explicitly.
+*/
+X
+int
+read_asn_pssm(unsigned char *aa0, int n0, int nsq,
+X double pamscale, FILE *fp, struct pstruct *ppst) {
+X
+X int i, j, len, k;
+X int qi, rj; /* qi - index query; rj - index residues (1-20) */
+X int **pam2p;
+X int first, too_high;
+X unsigned char *query, ctmp;
+X char dline[512];
+X char matrix[MAX_SSTR];
+X double psi2_lambda;
+X double freq, **freq2d, lambda, new_lambda;
+X double scale, scale_high, scale_low;
+X int gap_open, gap_extend;
+X int n_rows, n_cols;
+X
+X pam2p = ppst->pam2p[0];
+X
+X if (parse_pssm_asn_fa(fp, &n_rows, &n_cols, &query, &freq2d,
+X matrix, &gap_open, &gap_extend, &psi2_lambda)<=0) {
+X return -1;
+X }
+X
+X if (!gap_set) {
+X if (gap_open) {
+X if (gap_open > 0) {gap_open = -gap_open;}
+X ppst->gdelval = gap_open;
+X }
+X else if (strncmp(matrix,"BLOSUM62",8)==0) {
+X ppst->gdelval = -11;
+X }
+X gap_set = 1;
+X }
+X if (!del_set) {
+X if (gap_extend) {
+X if (gap_extend > 0) {gap_extend = -gap_extend;}
+X ppst->ggapval = gap_extend;
+X }
+X else if (strncmp(matrix,"BLOSUM62",8)==0) {
+X ppst->ggapval = -1;
+X }
+X del_set = 1;
+X }
+X
+X if (strncmp(matrix, "BLOSUM62", 8)== 0 && !ppst->pam_set) {
+X strncpy(ppst->pamfile, "BL62", 120);
+X standard_pam(ppst->pamfile,ppst,del_set, gap_set);
+X if (!ppst->have_pam2) {
+X alloc_pam (MAXSQ, MAXSQ, ppst);
+X }
+X init_pam2(ppst);
+X ppst->pam_set = 1;
+X }
+X
+X if (n_cols < n0) {
+X fprintf(stderr, " query length: %d != n_cols: %d\n",n0, n_cols);
+X exit(1);
+X }
+X
+X scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0],pamscale);
+X
+X free(freq2d[0]);
+X free(freq2d);
+X
+X free(query);
+X return 1;
+}
+#endif
+X
+void
+last_params(unsigned char *aa0, int n0,
+X struct mngmsg *m_msg,
+X struct pstruct *ppst
+#ifdef PCOMPLIB
+X , struct qmng_str *qm_msg
+#endif
+X ) {
+X int i, nsq;
+X FILE *fp;
+X
+X if (n0 < 0) { return;}
+X
+X ppst->n0 = m_msg->n0;
+X
+X if (ppst->ext_sq_set) { nsq = ppst->nsqx; }
+X else {nsq = ppst->nsq;}
+X
+/* currently, profiles are only available for SSEARCH, PRSS */
+#if defined(SSEARCH) || defined(PRSS)
+X
+X ppst->pam2p[0] = alloc_pam2p(n0,nsq);
+X ppst->pam2p[1] = alloc_pam2p(n0,nsq);
+X
+X if (ppst->pam_pssm) {
+X if ((ppst->pgpfile_type == 0) && (fp=fopen(ppst->pgpfile,"rb"))) {
+X read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 0, ppst);
+X extend_pssm(aa0, n0, ppst);
+X }
+X else if ((ppst->pgpfile_type == 1) && (fp=fopen(ppst->pgpfile,"r"))) {
+X read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 1, ppst);
+X extend_pssm(aa0, n0, ppst);
+X }
+#if defined(SSEARCH) || (defined(PRSS) && !defined(FASTX))
+X else if ((ppst->pgpfile_type == 2) && (fp=fopen(ppst->pgpfile,"rb"))) {
+X if (read_asn_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, ppst)>0) {
+X extend_pssm(aa0, n0, ppst);
+X }
+X else {
+X fprintf(stderr," Could not parse PSSM file: %s\n",ppst->pgpfile);
+X ppst->pam_pssm = 0;
+X return;
+X }
+X }
+#endif
+X else {
+X fprintf(stderr," Could not open PSSM file: %s\n",ppst->pgpfile);
+X ppst->pam_pssm = 0;
+X return;
+X }
+X }
+#endif
+X
+#if defined(FASTF) || defined(FASTS) || defined(FASTM)
+X m_msg->nm0 = 1;
+X for (i=0; i<n0; i++)
+X if (aa0[i]==EOSEQ || aa0[i]==ESS) m_msg->nm0++;
+X
+/*
+X for FASTS, we can do statistics in one of two different ways
+X if there are <= 10 query fragments, then we calculate probabilistic
+X scores for every library sequence. If there are > 10 fragments, this
+X takes much too long and too much memory, so we use the old fashioned
+X raw score only z-score normalized method initially, and then calculate
+X the probabilistic scores for the best hits. To scale those scores, we
+X also need a set of random probabilistic scores. So we do the qshuffle
+X to get them.
+X
+X For FASTF, precalculating probabilities is prohibitively expensive,
+X so we never do it; FASTF always acts like FASTS with nfrags>10.
+X
+*/
+X
+#if defined(FASTS) || defined(FASTM)
+X if (m_msg->nm0 > 10) m_msg->escore_flg = 0;
+X else m_msg->escore_flg = 1;
+#endif
+X
+X if (m_msg->escore_flg && (ppst->zsflag&1)) {
+X m_msg->last_calc_flg = 0;
+X m_msg->qshuffle = 0;
+X }
+X else { /* need random query, second set of 2000 scores */
+X m_msg->last_calc_flg = 1;
+X m_msg->qshuffle = 1;
+X }
+#else
+X m_msg->last_calc_flg = 0;
+X m_msg->qshuffle = 0;
+X m_msg->escore_flg = 0;
+X m_msg->nm0 = 1;
+#endif
+X
+/* adjust the ktup if appropriate */
+X
+X if (!ktup_set && pgm_def_arr[ppst->pgm_id].ktup > 0) {
+X if (m_msg->qdnaseq == SEQT_PROT) {
+X ppst->param_u.fa.ktup = pgm_def_arr[ppst->pgm_id].ktup;
+#if defined(FASTS) || defined(FASTM)
+X if (n0 > 100) ppst->param_u.fa.ktup = 2;
+#endif
+X if (n0 < 40) ppst->param_u.fa.ktup = 1;
+X }
+X else if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
+X if (n0 < 20) ppst->param_u.fa.ktup = 1;
+#if defined(FASTS) || defined(FASTM)
+X /* with the current (April 12 2005) dropfs2.c - ktup cannot be > 2 */
+X else ppst->param_u.fa.ktup = 2;
+#else
+X else if (n0 < 50) ppst->param_u.fa.ktup = 2;
+X else if (n0 < 100) ppst->param_u.fa.ktup = 3;
+#endif
+X }
+X }
+X
+#ifdef PCOMPLIB
+X qm_msg->nm0 = m_msg->nm0;
+X qm_msg->escore_flg = m_msg->escore_flg;
+X qm_msg->qshuffle = m_msg->qshuffle;
+X qm_msg->pam_pssm = 0;
+#endif
+}
+X
+/* given a good profile in ppst->pam2p[0], make an extended profile
+X in ppst->pam2p[1]
+*/
+void
+extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst) {
+X
+X int i, j, nsq;
+X int sa_x, sa_t, sa_b, sa_z;
+X int **pam2p0, **pam2p1;
+X
+X nsq = ppst->nsq;
+X
+X pam2p0 = ppst->pam2p[0];
+X pam2p1 = ppst->pam2p[1];
+X
+X sa_x = pascii['X'];
+X sa_t = pascii['*'];
+X sa_b = pascii['B'];
+X sa_z = pascii['Z'];
+X
+X /* fill in boundaries, B, Z, *, X */
+X for (i=0; i<n0; i++) {
+X pam2p0[i][0] = -BIGNUM;
+X pam2p0[i][sa_b] = (int)
+X (((float)pam2p0[i][pascii['N']]+(float)pam2p0[i][pascii['D']]+0.5)/2.0);
+X pam2p0[i][sa_z] = (int)
+X (((float)pam2p0[i][pascii['Q']]+(float)pam2p0[i][pascii['E']]+0.5)/2.0);
+X pam2p0[i][sa_x] = ppst->pam_xm;
+X pam2p0[i][sa_t] = ppst->pam_xm;
+X }
+X
+X /* copy pam2p0 into pam2p1 */
+X for (i=0; i<n0; i++) {
+X pam2p1[i][0] = -BIGNUM;
+X for (j=1; j<=ppst->nsq; j++) {
+X pam2p1[i][j] = pam2p0[i][j];
+X }
+X }
+X
+X /* then fill in extended characters, if necessary */
+X if (ppst->ext_sq_set) {
+X for (i=0; i<n0; i++) {
+X for (j=1; j<=ppst->nsq; j++) {
+X pam2p0[i][nsq+j] = pam2p0[i][j];
+X pam2p1[i][nsq+j] = ppst->pam_xm;
+X }
+X }
+X }
+}
+SHAR_EOF
+chmod 0644 initfa.c ||
+echo 'restore of initfa.c failed'
+Wc_c="`wc -c < 'initfa.c'`"
+test 54882 -eq "$Wc_c" ||
+ echo 'initfa.c: original size 54882, current size' "$Wc_c"
+fi
+# ============= karlin.c ==============
+if test -f 'karlin.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping karlin.c (File already exists)'
+else
+echo 'x - extracting karlin.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'karlin.c' &&
+/**************** Statistical Significance Parameter Subroutine ****************
+X
+X $Name: fa_34_26_5 $ - $Id: karlin.c,v 1.18 2006/06/01 16:05:30 wrp Exp $
+X
+X Version 1.0 February 2, 1990
+X Version 2.0 March 18, 1993
+X
+X Program by: Stephen Altschul
+X
+X Address: National Center for Biotechnology Information
+X National Library of Medicine
+X National Institutes of Health
+X Bethesda, MD 20894
+X
+X Internet: altschul@ncbi.nlm.nih.gov
+X
+X See: Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
+X Significance of Molecular Sequence Features by Using General Scoring
+X Schemes," Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.
+X
+X Computes the parameters lambda and K for use in calculating the
+X statistical significance of high-scoring segments or subalignments.
+X
+X The scoring scheme must be integer valued. A positive score must be
+X possible, but the expected (mean) score must be negative.
+X
+X A program that calls this routine must provide the value of the lowest
+X possible score, the value of the greatest possible score, and a pointer
+X to an array of probabilities for the occurence of all scores between
+X these two extreme scores. For example, if score -2 occurs with
+X probability 0.7, score 0 occurs with probability 0.1, and score 3
+X occurs with probability 0.2, then the subroutine must be called with
+X low = -2, high = 3, and pr pointing to the array of values
+X { 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }. The calling program must also provide
+X pointers to lambda and K; the subroutine will then calculate the values
+X of these two parameters. In this example, lambda=0.330 and K=0.154.
+X
+X The parameters lambda and K can be used as follows. Suppose we are
+X given a length N random sequence of independent letters. Associated
+X with each letter is a score, and the probabilities of the letters
+X determine the probability for each score. Let S be the aggregate score
+X of the highest scoring contiguous segment of this sequence. Then if N
+X is sufficiently large (greater than 100), the following bound on the
+X probability that S is greater than or equal to x applies:
+X
+X P( S >= x ) <= 1 - exp [ - KN exp ( - lambda * x ) ].
+X
+X In other words, the p-value for this segment can be written as
+X 1-exp[-KN*exp(-lambda*S)].
+X
+X This formula can be applied to pairwise sequence comparison by assigning
+X scores to pairs of letters (e.g. amino acids), and by replacing N in the
+X formula with N*M, where N and M are the lengths of the two sequences
+X being compared.
+X
+X In addition, letting y = KN*exp(-lambda*S), the p-value for finding m
+X distinct segments all with score >= S is given by:
+X
+X 2 m-1 -y
+X 1 - [ 1 + y + y /2! + ... + y /(m-1)! ] e
+X
+X Notice that for m=1 this formula reduces to 1-exp(-y), which is the same
+X as the previous formula.
+X
+*******************************************************************************/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+X
+#define MAXIT 25 /* Maximum number of iterations used in calculating lambda */
+#define NMAP_X 23
+#define NMAP 33
+X
+#define TINY 1e-6
+X
+/* first build a residue map to automatically put residues in score bins */
+X
+#include "defs.h"
+#include "param.h"
+X
+/* initialize the Karlin frequency, probability arrays using
+X a specific query sequence */
+X
+int karlin(int , int, double *, double *, double *);
+static int karlin_k(int , int , double *, double *, double *, double *);
+X
+void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
+X double *aa0_f, double **kp)
+{
+X int kar_nsq, kar_range, kar_min, kar_max;
+X
+X const unsigned char *aa0p;
+X int i;
+X int r_cnt[NMAP+1];
+X double fn0, *kar_p;
+X
+X kar_range = ppst->pam_h - ppst->pam_l + 1;
+X if (*kp == NULL) {
+X if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
+X fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
+X exit(1);
+X }
+X *kp = kar_p;
+X }
+X kar_nsq = ppst->nsq; /* alphabet size */
+X kar_min = ppst->pam_l; /* low pam value */
+X kar_max = ppst->pam_h; /* high pam value */
+X
+X /* must have at least 1 residue of each type */
+X r_cnt[NMAP]=0;
+X for (i=1; i<=kar_nsq; i++) r_cnt[i]=1;
+X
+X fn0 = 100.0/(double)(n0+kar_nsq); /* weight of each residue */
+X
+X aa0p = aa0;
+X /* increment residue count for each residue in query sequence */
+X while (*aa0p) r_cnt[ppst->hsqx[*aa0p++]]++;
+X
+X /* map all unmapped residues to 'X' */
+X r_cnt[NMAP_X] += r_cnt[NMAP];
+X
+X for (i=1; i<=kar_nsq; i++) aa0_f[i] = fn0*(double)r_cnt[i];
+}
+X
+double nt_f[] = {0.0, 0.25, 0.25, 0.25, 0.25 };
+X
+/* Robinson and Robinson frequencies */
+double aa_f[] = {
+/* NULL */ 0.00,
+/* A */ 0.0780474700897585,
+/* R */ 0.0512953149316987,
+/* N */ 0.0448725775979007,
+/* D */ 0.0536397361638076,
+/* C */ 0.0192460110427568,
+/* Q */ 0.0426436013507063,
+/* E */ 0.0629485981204668,
+/* G */ 0.0737715654561964,
+/* H */ 0.0219922696262025,
+/* I */ 0.0514196403000682,
+/* L */ 0.090191394464413,
+/* K */ 0.0574383201866657,
+/* M */ 0.0224251883196316,
+/* F */ 0.0385564048655621,
+/* P */ 0.0520279465667327,
+/* S */ 0.0711984743501224,
+/* T */ 0.0584129422708473,
+/* W */ 0.013298374223799,
+/* Y */ 0.0321647488738564,
+/* V */ 0.0644094211988074};
+X
+/* initialize the Karlin frequency, probability arrays using
+X an "average" composition (average length if n0 <=0) */
+X
+void
+init_karlin_a(struct pstruct *ppst, double *aa0_f, double **kp)
+{
+X int kar_nsq, kar_range;
+X
+X int i;
+X double fn0, *kar_p;
+X
+X kar_range = ppst->pam_h - ppst->pam_l + 1;
+X if (*kp == NULL) {
+X if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
+X fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
+X exit(1);
+X }
+X *kp = kar_p;
+X }
+X
+X if (ppst->nt_align) {
+X kar_nsq = 4;
+X for (i=1; i<=kar_nsq; i++) aa0_f[i] = nt_f[i];
+X }
+X else if (ppst->dnaseq==SEQT_PROT || ppst->dnaseq == SEQT_UNK) {
+X kar_nsq = 20;
+X for (i=1; i<=kar_nsq; i++) aa0_f[i] = aa_f[i];
+X }
+X else {
+X kar_nsq = ppst->nsq;
+X fn0 = 1.0/(double)(kar_nsq-1);
+X for (i=1; i< kar_nsq; i++) aa0_f[i] = fn0;
+X aa0_f[kar_nsq]=0.0;
+X }
+X
+}
+X
+/* calculate set up karlin() to calculate Lambda, K, by calculating
+X aa1 frequencies */
+int
+do_karlin(const unsigned char *aa1, int n1,
+X int **pam2, struct pstruct *ppst,
+X double *aa0_f, double *kar_p, double *lambda, double *H)
+{
+X register unsigned const char *aap;
+X int kar_range, kar_min, kar_max, kar_nsq;
+X int r_cnt[NMAP+1];
+X double aa1_f[NMAP];
+X double fn1, kar_tot;
+X int i, j;
+X
+X kar_nsq = ppst->nsq;
+X kar_min = ppst->pam_l;
+X kar_max = ppst->pam_h;
+X kar_range = kar_max - kar_min + 1;
+X
+X r_cnt[NMAP]=0;
+X for (i=1; i<=kar_nsq; i++) r_cnt[i]=1;
+X
+X /* residue counts */
+X
+X aap=aa1;
+X while (*aap) r_cnt[ppst->hsqx[*aap++]]++;
+X
+X r_cnt[NMAP_X] += r_cnt[NMAP];
+X
+X /* residue frequencies */
+X fn1 = 100.0/(double)(n1+kar_nsq);
+X for (i=1; i<=kar_nsq; i++) aa1_f[i]= fn1*(double)r_cnt[i];
+X
+X for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
+X
+X for (i=1; i<=kar_nsq; i++) {
+X for (j=1; j<=kar_nsq; j++)
+X kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
+X }
+X
+X kar_tot = 0.0;
+X for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
+X if (kar_tot <= 0.00001) return 0;
+X
+X for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
+X
+X return karlin(kar_min, kar_max, kar_p, lambda, H);
+}
+X
+int
+do_karlin_a(int **pam2, struct pstruct *ppst,
+X double *aa0_f, double *kar_p, double *lambda, double *K, double *H)
+{
+X double *aa1fp;
+X int kar_range, kar_min, kar_max, kar_nsq;
+X double aa1_f[NMAP];
+X double fn1, kar_tot;
+X int i, j;
+X
+X kar_min = ppst->pam_l;
+X kar_max = ppst->pam_h;
+X kar_range = kar_max - kar_min + 1;
+X
+X kar_tot = 0.0;
+X if (ppst->nt_align ) {
+X kar_nsq = 4;
+X aa1fp = nt_f;
+X for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
+X for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
+X }
+X else if (!ppst->nt_align) {
+X kar_nsq = 20;
+X aa1fp = aa_f;
+X for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
+X for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
+X }
+X else {
+X kar_nsq = ppst->nsq;
+X fn1 = 1.0/(double)(kar_nsq-1);
+X for (i=1; i< kar_nsq; i++) aa1_f[i] = fn1;
+X aa1_f[kar_nsq]=0.0;
+X }
+X
+X for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
+X
+X for (i=1; i<=kar_nsq; i++) {
+X for (j=1; j<kar_nsq; j++)
+X kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
+X }
+X
+X kar_tot = 0.0;
+X for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
+X if (kar_tot <= 0.00001) return 0;
+X
+X for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
+X
+X return karlin_k(kar_min, kar_max, kar_p, lambda, K, H);
+}
+X
+/* take a array of letters and pam information and get *lambda, *H */
+int
+karlin(int low, /* Lowest score (must be negative) */
+X int high, /* Highest score (must be positive) */
+X double *pr, /* Probabilities for various scores */
+X double *lambda_p, /* Pointer to parameter lambda */
+X double *H_p) /* Pointer to parameter H */
+{
+X int i,range, nit;
+X double up,new,sum,av,beta,ftemp;
+X double lambda;
+X double *p,*ptr1;
+X
+X /* Calculate the parameter lambda */
+X
+X p = pr;
+X range = high-low;
+X
+X /* check for E() < 0.0 */
+X sum = 0;
+X ptr1 = pr;
+X for (i=low; i <= high ; i++) sum += i* (*ptr1++);
+X if (sum >= 0.0) {
+#ifdef DEBUG
+X fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
+X sum);
+#endif
+X return 0;
+X }
+X
+X /* up is upper bound on lambda */
+X up=0.5;
+X do {
+X up *= 2.0;
+X ptr1=p;
+X
+X beta=exp(up);
+X
+X ftemp=exp(up*(low-1));
+X sum = 0.0;
+X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+X }
+X while (sum<1.0);
+X
+X /* avoid overflow from very large lambda*S */
+/*
+X do {
+X up /= 2.0;
+X ptr1=p;
+X beta=exp(up);
+X
+X ftemp=exp(up*(low-1));
+X sum = 0.0;
+X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+X } while (sum > 2.0);
+X
+X up *= 2.0;
+*/ /* we moved past, now back up */
+X
+X /* for (lambda=j=0;j<25;++j) { */
+X lambda = 0.0;
+X nit = 0;
+X while ( nit++ < MAXIT ) {
+X new = (lambda+up)/2.0;
+X beta = exp(new);
+X ftemp = exp(new*(low-1));
+X ptr1=p;
+X sum = 0.0;
+X /* multiply by exp(new) for each score */
+X for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
+X
+X if (sum > 1.0 + TINY) up=new;
+X else {
+X if ( fabs(lambda - new) < TINY ) goto done;
+X lambda = new;
+X }
+X }
+X
+X if (lambda <= 1e-10) {
+X lambda = -1.0;
+X return 0;
+X }
+X
+X done:
+X *lambda_p = lambda;
+X
+X /* Calculate the parameter K */
+X
+X ptr1=p;
+X ftemp=exp(lambda*(low-1));
+X for (av=0.0, i=low; i<=high; ++i)
+X av+= *ptr1++ *i*(ftemp*=beta);
+X *H_p= lambda*av;
+X
+X return 1; /* Parameters calculated successfully */
+}
+X
+static int a_gcd (int, int);
+X
+/* take a array of letters and pam information and get *lambda, *K, *H */
+static int
+karlin_k(int low, /* Lowest score (must be negative) */
+X int high, /* Highest score (must be positive) */
+X double *pr, /* Probabilities for various scores */
+X double *lambda_p, /* Pointer to parameter lambda */
+X double *K_p,
+X double *H_p) /* Pointer to parameter H */
+{
+X int i,j,range,lo,hi,first,last, nit;
+X double up,new,sum,Sum,av,beta,oldsum,ratio,ftemp;
+X double lambda;
+X double *p,*P,*ptrP,*ptr1,*ptr2;
+X
+X /* Calculate the parameter lambda */
+X
+X p = pr;
+X range = high-low;
+X
+X /* check for E() < 0.0 */
+X sum = 0;
+X ptr1 = pr;
+X for (i=low; i <= high ; i++) sum += i* (*ptr1++);
+X if (sum >= 0.0) {
+#ifdef DEBUG
+X fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
+X sum);
+#endif
+X return 0;
+X }
+X
+X /* up is upper bound on lambda */
+X up=0.5;
+X do {
+X up *= 2.0;
+X ptr1=p;
+X
+X beta=exp(up);
+X
+X ftemp=exp(up*(low-1));
+X sum = 0.0;
+X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+X }
+X while (sum<1.0);
+X
+X /* avoid overflow from very large lambda*S */
+X /*
+X do {
+X up /= 2.0;
+X ptr1=p;
+X beta=exp(up);
+X
+X ftemp=exp(up*(low-1));
+X sum = 0.0;
+X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
+X } while (sum > 2.0);
+X
+X up *= 2.0;
+X */
+X /* we moved past, now back up */
+X
+X /* for (lambda=j=0;j<25;++j) { */
+X lambda = 0.0;
+X nit = 0;
+X while ( nit++ < MAXIT ) {
+X new = (lambda+up)/2.0;
+X beta = exp(new);
+X ftemp = exp(new*(low-1));
+X ptr1=p;
+X sum = 0.0;
+X /* multiply by exp(new) for each score */
+X for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
+X
+X if (sum > 1.0 + TINY) up=new;
+X else {
+X if ( fabs(lambda - new) < TINY ) goto done;
+X lambda = new;
+X }
+X }
+X
+X if (lambda <= 1e-10) {
+X lambda = -1.0;
+X return 0;
+X }
+X
+X done:
+X *lambda_p = lambda;
+X
+X /* Calculate the parameter H */
+X
+X ptr1=p;
+X ftemp=exp(lambda*(low-1));
+X for (av=0.0, i=low; i<=high; ++i) av+= *ptr1++ *i*(ftemp*=beta);
+X *H_p= lambda*av;
+X
+X /* Calculate the pamameter K */
+X Sum=lo=hi=0;
+X P= (double *) calloc(MAXIT*range+1,sizeof(double));
+X for (*P=sum=oldsum=j=1;j<=MAXIT && sum>0.001;Sum+=sum/=j++) {
+X first=last=range;
+X for (ptrP=P+(hi+=high)-(lo+=low); ptrP>=P; *ptrP-- =sum) {
+X ptr1=ptrP-first;
+X ptr2=p+first;
+X for (sum=0,i=first; i<=last; ++i) sum += *ptr1-- * *ptr2++;
+X if (first) --first;
+X if (ptrP-P<=range) --last;
+X }
+X ftemp=exp(lambda*(lo-1));
+X for (sum=0,i=lo;i;++i) sum+= *++ptrP * (ftemp*=beta);
+X for (;i<=hi;++i) sum+= *++ptrP;
+X ratio=sum/oldsum;
+X oldsum=sum;
+X }
+X for (;j<=200;Sum+=oldsum/j++) oldsum*=ratio;
+X for (i=low;!p[i-low];++i);
+X for (j= -i;i<high && j>1;) if (p[++i-low]) j=a_gcd(j,i);
+X *K_p = (j*exp(-2*Sum))/(av*(1.0-exp(- lambda*j)));
+X free(P);
+X
+X return 1; /* Parameters calculated successfully */
+}
+X
+int
+a_gcd(int a, int b)
+{
+X int c;
+X
+X if (b<0) b= -b;
+X if (b>a) { c=a; a=b; b=c; }
+X for (;b;b=c) { c=a%b; a=b; }
+X return a;
+}
+X
+SHAR_EOF
+chmod 0644 karlin.c ||
+echo 'restore of karlin.c failed'
+Wc_c="`wc -c < 'karlin.c'`"
+test 13727 -eq "$Wc_c" ||
+ echo 'karlin.c: original size 13727, current size' "$Wc_c"
+fi
+# ============= last_tat.c ==============
+if test -f 'last_tat.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping last_tat.c (File already exists)'
+else
+echo 'x - extracting last_tat.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'last_tat.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: last_tat.c,v 1.8 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#include "structs.h"
+#include "param.h"
+X
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "msg.h"
+#include "p_mw.h"
+X
+void do_stage2(struct beststr **bptr, int nbest, struct mngmsg m_msg0,
+X int s_func, struct qmng_str *qm_msp);
+#endif
+X
+X
+extern int (*ranlib) (char *str, int cnt,
+X fseek_t libpos, char *libstr,
+X struct lmf_str *lm_fd);
+X
+#define RANLIB (m_fptr->ranlib)
+X
+#define MAX_BLINE 200
+X
+int
+re_getlib(unsigned char *, int, int, int, int, int, long *, long *,
+X struct lmf_str *m_fptr);
+X
+void
+do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame,
+X struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
+X
+extern void
+do_opt (unsigned char *aa0, int n0, unsigned char *aa1, int n1,
+X int frame, struct pstruct *pst, void *f_str,
+X struct rstruct *rst);
+X
+struct lmf_str *re_openlib(struct lmf_str *, int outtty);
+X
+void sortbestz (struct beststr **bptr, int nbest);
+X
+double zs_to_E(double zs,int n1, int isdna, long entries, struct db_str db);
+X
+double scale_one_score(int ipos, double escore, struct db_str db, void *rs_str);
+X
+void sortbests (struct beststr **bptr, int nbest)
+{
+X int gap, i, j;
+X struct beststr *tmp;
+X
+X for (gap = nbest/2; gap > 0; gap /= 2)
+X for (i = gap; i < nbest; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (bptr[j]->score[0] >= bptr[j + gap]->score[0]) break;
+X tmp = bptr[j];
+X bptr[j] = bptr[j + gap];
+X bptr[j + gap] = tmp;
+X }
+}
+X
+int
+last_calc(
+#ifndef PCOMPLIB
+X unsigned char **aa0, unsigned char *aa1, int maxn,
+#endif
+X struct beststr **bptr, int nbest,
+X struct mngmsg m_msg, struct pstruct *ppst
+#ifdef PCOMPLIB
+X , struct qmng_str *qm_msp
+#else
+X , void **f_str
+#endif
+X , void *rstat_str)
+{
+X int nopt, ib;
+X struct beststr *bbp;
+X long loffset, l_off;
+X int n0, n1;
+X struct rstruct rst;
+X struct lmf_str *m_fptr;
+X char bline[60];
+X int tat_samp, tat_inc, loop_cnt, i;
+X double min_escore, ess;
+X
+X n0 = m_msg.n0;
+X
+X sortbestz(bptr,nbest);
+X
+X tat_inc = 500;
+/*
+X if (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg.db)/
+X zs_to_E(bptr[nbest-1]->zscore,bptr[nbest-1]->n1,0,ppst->zdb_size,m_msg.db)
+X < 1e-20) { tat_inc /= 4 ;}
+*/
+X
+/* || (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg.db)< 1e-5); */
+X
+X ib = tat_samp = 0;
+X for (loop_cnt = 0; loop_cnt < 5; loop_cnt++) {
+X tat_samp += tat_inc;
+X nopt = min(nbest,tat_samp);
+X min_escore = 1000000.0;
+#ifndef PCOMPLIB
+X for ( ; ib<nopt; ib++) {
+X bbp = bptr[ib];
+X
+X if (bbp->score[0] < 0) break;
+X
+X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg.quiet))==NULL) {
+X fprintf(stderr,"*** cannot re-open %s\n",bbp->m_file_p->lb_name);
+X exit(1);
+X }
+X RANLIB(bline,sizeof(bline),bbp->lseek,bbp->libstr,m_fptr);
+X
+X n1 = re_getlib(aa1,maxn,m_msg.maxt3,m_msg.loff,bbp->cont,m_msg.term_code,
+X &loffset,&l_off,bbp->m_file_p);
+X
+X do_opt(aa0[bbp->frame],m_msg.n0,aa1,n1,bbp->frame,ppst,
+X f_str[bbp->frame],&rst);
+X bbp->score[0]=rst.score[0];
+X bbp->score[1]=rst.score[1];
+X bbp->score[2]=rst.score[2];
+X bbp->escore=rst.escore;
+X bbp->segnum = rst.segnum;
+X bbp->seglen = rst.seglen;
+X
+X if ((ess=scale_one_score(ib, bbp->escore, m_msg.db, rstat_str)) <
+X min_escore) { min_escore = ess;}
+X /*
+X fprintf(stderr,"%d: %4d %2d %3d %.4g %.4g\n",
+X ib, bbp->score[0], bbp->segnum,bbp->seglen,bbp->escore, ess);
+X */
+X }
+#else
+X do_stage2(&bptr[ib], nopt-ib, m_msg, DO_CALC_FLG, qm_msp);
+X
+X for ( ; ib < nopt; ib++) {
+X if ((ess=scale_one_score(ib, bptr[ib]->escore, m_msg.db, rstat_str)) <
+X min_escore) { min_escore = ess;}
+X /*
+X fprintf(stderr, "%d: %4d %2d %3d %.4g %.4g\n",
+X ib,bptr[ib]->score[0],bptr[ib]->segnum,bptr[ib]->seglen,bptr[ib]->escore,ess);
+X */
+X }
+#endif
+X
+X
+X if (min_escore > m_msg.e_cut) return ib;
+X }
+X return ib;
+}
+SHAR_EOF
+chmod 0644 last_tat.c ||
+echo 'restore of last_tat.c failed'
+Wc_c="`wc -c < 'last_tat.c'`"
+test 4128 -eq "$Wc_c" ||
+ echo 'last_tat.c: original size 4128, current size' "$Wc_c"
+fi
+# ============= lcbo.aa ==============
+if test -f 'lcbo.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping lcbo.aa (File already exists)'
+else
+echo 'x - extracting lcbo.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'lcbo.aa' &&
+>LCBO - Prolactin precursor - Bovine
+MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQVSLRDLFDRAVMVSHYIHDLSS
+EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHHEVLMSLILGLLRSWNDPLYHL
+VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGAKETEPYPVWSGLPSLQTKDED
+ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC*
+SHAR_EOF
+chmod 0644 lcbo.aa ||
+echo 'restore of lcbo.aa failed'
+Wc_c="`wc -c < 'lcbo.aa'`"
+test 271 -eq "$Wc_c" ||
+ echo 'lcbo.aa: original size 271, current size' "$Wc_c"
+fi
+# ============= lib_sel.c ==============
+if test -f 'lib_sel.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping lib_sel.c (File already exists)'
+else
+echo 'x - extracting lib_sel.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'lib_sel.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: lib_sel.c,v 1.16 2006/12/06 17:30:52 wrp Exp $ */
+X
+/* modified Dec 13, 1989 requires different FASTLIBS */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "structs.h"
+X
+#ifdef NCBIBL13
+#define LASTLIB NCBIBL13+1
+#else
+#define LASTLIB 11
+#endif
+X
+X
+int getlnames(char *tname, struct mngmsg *m_msg);
+void addfile(char *, char *, struct mngmsg *);
+void libchoice(char *lname, int nl, struct mngmsg *m_msg);
+void libselect(char *lname, struct mngmsg *m_msg);
+void subs_env(char *dest, char *src, int dest_size);
+char *ulindex(char *str, char *chr);
+X
+static char ldname[MAX_FN];
+static char *libenv;
+X
+int
+getlnames(char *iname, struct mngmsg *m_msg) /* read in the library names */
+{
+X char *bp, tsave[MAX_STR], *tname;
+X char lline[MAX_FN], *llp;
+X FILE *tptr;
+X
+X /* expand environment variables */
+X
+X tname = tsave;
+X subs_env(tname, iname, sizeof(tsave));
+X
+X if (*tname != '@') {addfile(tname,"\0",m_msg); return 1;}
+X else tname++;
+X
+X /* remove ' ' before deftype if present */
+X if ((bp=strchr(tname,' '))!=NULL) *bp='\0';
+X
+X if ((tptr=fopen(tname,"r"))==NULL) {
+X fprintf(stderr," could not open file of names: %s\n",tname);
+X return 0;
+X }
+X
+X while (fgets(lline,sizeof(lline),tptr)!=NULL) {
+X if (lline[0]==';') continue;
+X if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
+X subs_env(tsave, lline, sizeof(tsave));
+X if (tsave[0]=='<') {
+X strncpy(ldname,&tsave[1],sizeof(ldname));
+X ldname[sizeof(ldname)-1]='\0';
+X libenv=ldname;
+X }
+X else addfile(tsave,libenv,m_msg);
+X }
+X fclose(tptr);
+X return 1;
+}
+X
+/* libchoice displays a list of potential library files
+X in the new &lib& version, only traditional 1-letter files will be
+X shown initially
+*/
+X
+void
+libchoice(char *lname, int nl, struct mngmsg *m_msg)
+{
+X FILE *fch;
+X char line[MAX_STR], *bp;
+X char *chstr[MAX_CH],*chfile[MAX_CH];
+X char *chtmp, *charr;
+X int i,j,k,chlen;
+X
+X charr = NULL;
+X if (strlen(m_msg->flstr)> (size_t)0) {
+X chlen = MAX_CH*MAX_FN;
+X if ((chtmp=charr=calloc((size_t)chlen,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate choice file array\n");
+X goto l1;
+X }
+X chlen--;
+X if ((fch=fopen(m_msg->flstr,"r"))==NULL) {
+X fprintf(stderr," cannot open choice file: %s\n",m_msg->flstr);
+X goto l1;
+X }
+X fprintf(stderr,"\n Choose sequence library:\n\n");
+X
+X for (i=j=0; j<MAX_CH; i++) {
+X if (fgets(line,sizeof(line),fch)==NULL) break;/* check for comment */
+X if (line[0]==';') continue;
+X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; /* remove \n */
+X if ((bp=strchr(line,'$'))==NULL) continue; /* if no '$', continue */
+X *bp++='\0'; /* replace $ with \0, bp points to libtype */
+X
+X /* if libtypes don't match, continue */
+X if ((*bp++ -'0')!=m_msg->ldnaseq) continue;
+X
+X /* if the library file name is too long, quit */
+X if ((k=strlen(line))>chlen) break;
+X
+X /* save the library file name */
+X strncpy(chstr[j]=chtmp,line,chlen);
+X chtmp += k+1; chlen -= k+1;
+X
+X if ((k=strlen(bp))>chlen) break;
+X strncpy(chfile[j]=chtmp,bp,chlen);
+X chtmp += k+1; chlen -= k+1;
+X fprintf(stderr," %c: %s\n",*chfile[j++],line);
+X }
+X l2: fprintf(stderr,"\n Enter library filename (e.g. %s), letter (e.g. P)\n",
+X (m_msg->ldnaseq==0)? "prot.lib" : "dna.lib");
+X fprintf(stderr," or a %% followed by a list of letters (e.g. %%PN): ");
+X fflush(stderr);
+X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
+X if (strlen(line)==0) goto l2;
+X strncpy(lname,line,nl);
+X }
+X else {
+X l1: fprintf(stderr," library file name: ");
+X fflush(stderr);
+X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
+X if (strlen(line)> (size_t)0) strncpy(lname,line,nl);
+X else goto l1;
+X }
+X if (charr!=NULL) {
+X fclose(fch);
+X free(charr);
+X }
+}
+X
+/* libselect parses the choices in char *lname and builds the list
+X of library files
+*/
+void
+libselect(char *lname, struct mngmsg *m_msg)
+{
+X char line[MAX_FN*2], *bp, *bp1;
+X char *llnames[MAX_LF]; /* pointers into new list of names */
+X int new_abbr,ich, nch; /* use new multi-letter abbr */
+X FILE *fch;
+X
+X new_abbr = 0;
+X m_msg->nln = 0;
+X if (strlen(lname) > (size_t)1 && *lname != '%' && *lname != '+') {
+X getlnames(lname,m_msg); /* file name */
+X return;
+X }
+X else {
+X if (*m_msg->flstr=='\0') {
+X fprintf(stderr," abbrv. list request but FASTLIBS undefined, cannot use %s\n",lname);
+X exit(1);
+X }
+X
+X if (strchr(lname,'+')) {
+X /* indicates list of database abbrevs (not files) */
+X new_abbr=1;
+X nch = 0;
+X bp = lname+1; if (*bp == '+') bp++;
+X for (bp1=bp; bp!=NULL && bp1!=NULL; bp=bp1+1) {
+X if ((bp1=strchr(bp,'+'))!=NULL) *bp1='\0';
+X llnames[nch++] = bp;
+X }
+X }
+X else if (*lname=='%') { /* list of single letter abbreviations */
+X lname++; /* bump over '%' to get letters */
+X }
+X
+X /* else just use a single character abbreviation */
+X
+X if (strlen(m_msg->flstr) > (size_t)0) {
+X if ((fch=fopen(m_msg->flstr,"r"))==NULL) {
+X fprintf(stderr," cannot open choice file: %s\n",m_msg->flstr);
+X return;
+X }
+X }
+X else {
+X fprintf(stderr," FASTLIBS undefined\n");
+X addfile(lname,"\0",m_msg);
+X return;
+X }
+X
+X /* read each line of FASTLIBS */
+X while (fgets(line,sizeof(line),fch)!=NULL) {
+X if (line[0]==';') continue; /* skip comments */
+X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; /* remove '\n' */
+X if ((bp=strchr(line,'$'))==NULL) continue; /* no delim, continue */
+X *bp++='\0'; /* point to library type */
+X if ((*bp++ -'0')!=m_msg->ldnaseq) continue; /* doesn't match, continue */
+X
+X /* if !new_abbr, match on one letter with ulindex() */
+X if (!new_abbr) {
+X if (*bp=='+') continue; /* not a &lib& */
+X else if (ulindex(lname,bp)!=NULL) {
+X strncpy(m_msg->ltitle,line,MAX_FN);
+X getlnames(bp+1,m_msg);
+X }
+X }
+X else {
+X if (*bp!='+') continue;
+X else {
+X bp++;
+X if ((bp1 = strchr(bp,'+'))!=NULL) {
+X *bp1='\0';
+X for (ich = 0; ich<nch; ich++) {
+X if (strcmp(llnames[ich],bp)==0) {
+X strncpy(m_msg->ltitle,line,MAX_FN);
+X getlnames(bp1+1,m_msg);
+X break;
+X }
+X }
+X *bp1='+';
+X }
+X else fprintf(stderr,"%s missing final '+'\n",bp);
+X }
+X }
+X }
+X fclose(fch);
+X }
+}
+X
+void
+addfile(char *fname, char *env, struct mngmsg *m_msg)
+{
+X char tname[MAX_STR], *bp, *bp1;
+X char *lbptr;
+X int len, lenv, l_size;
+X
+X /* check for default directory for files */
+X if (env != NULL && *env != '\0') lenv = strlen(env)+1;
+X else lenv = 0;
+X
+X len=strlen(fname)+1+lenv;
+X
+X if (lenv > 1 && *fname != '#') { /* add default directory to file name */
+X strncpy(tname,env,sizeof(tname)-1);
+#ifdef UNIX
+X strcat(tname,"/");
+#endif
+X }
+X else tname[0]='\0';
+X
+X /* add fname to tname, allocate space, and move to space */
+X strncat(tname,fname,sizeof(tname)-strlen(tname)-1);
+X len=strlen(tname)+1;
+X if ((lbptr=calloc(len,sizeof(char)))==NULL) {
+X fprintf(stderr,"no more space for filenames: %s ignored\n",fname);
+X return;
+X }
+X else {
+X strncpy(lbptr,tname,len);
+X lbptr[len-1]='\0';
+X }
+X
+X if (m_msg->nln< MAX_LF) {
+X m_msg->lbnames[m_msg->nln++]=lbptr;
+X }
+X else fprintf(stderr," no more file name slots: %s ignored\n",lbptr);
+}
+X
+char *
+ulindex(char *str, char *chr)
+{
+X char c;
+X
+X c = tolower((int)(*chr));
+X
+X while (*str != '\0' && tolower(*str) !=c ) str++;
+X if (*str=='\0') return NULL;
+X else return str;
+}
+SHAR_EOF
+chmod 0644 lib_sel.c ||
+echo 'restore of lib_sel.c failed'
+Wc_c="`wc -c < 'lib_sel.c'`"
+test 7638 -eq "$Wc_c" ||
+ echo 'lib_sel.c: original size 7638, current size' "$Wc_c"
+fi
+# ============= list_db.c ==============
+if test -f 'list_db.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping list_db.c (File already exists)'
+else
+echo 'x - extracting list_db.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'list_db.c' &&
+/* list_db.c - report values from map_db.c */
+X
+/* copyright (c) 1999 William R. Pearson */
+X
+/* format of the index file:
+X
+1) map_db version number ["MP"+2 bytes]
+2) number of sequences in database [4 bytes]
+3) total length of database [8 bytes]
+4) longest sequence in database [8 bytes]
+5) list of offsets to definitions [num_seq+1] int*8
+6) list of offsets to sequences [num_seq+1] int*8
+7) list of flag characters for sequences [num_seq+1] bytes
+X (used for GCG binary to encode 2bit or 4 bit representation)
+X
+X sequence files will be as defined by their format
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "uascii.h"
+#include "ncbl2_head.h"
+X
+void src_int4_write(FILE *, int);
+void src_int4_read(FILE *, int *);
+void src_long4_read(FILE *, long *);
+void src_long8_write(FILE *, long);
+void src_long8_read(FILE *, long *);
+X
+void newname(char *nname, char *oname, char *suff, int maxn);
+X
+main(int argc, char **argv)
+{
+X FILE *libi;
+X char lname[256];
+X char iname[256];
+X char format[4];
+X char *bp;
+X
+X int i;
+X int d_pos; /* start of description */
+X int s_pos; /* start of sequence */
+X int attr; /* sequence attribute */
+X int lib_aa; /* 0 => DNA, 1 => protein */
+X int nlib; /* number of entries */
+X long f_size;
+X long max_len; /* longest sequence */
+X long tot_len; /* total sequence length */
+X int n1;
+X
+X int lib_size; /* current space available - may be realloc'ed */
+X int lib_inc;
+X int lib_type; /* 1 for protein, 0 for DNA */
+X int lib_dna; /* dna=1; prot=0; */
+X long *d_pos_arr; /* array of description pointers */
+X long *s_pos_arr; /* array of description pointers */
+X char *attr_arr; /* array of attribute chars */
+X
+X int mm64_flag;
+X
+X lib_type = 0;
+X lib_dna = 0;
+X
+X /* open the database */
+X if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
+X else {
+X fprintf(stderr," Entry library name: ");
+X fgets(lname,sizeof(lname),stdin);
+X if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
+X }
+X
+X if ((bp=strchr(lname,' '))!=NULL) {
+X lib_type = atoi(bp+1);
+X *bp='\0';
+X }
+X else lib_type = 0;
+X
+X newname(iname,lname,"xin",sizeof(iname));
+X
+X if ((libi=fopen(iname,"r"))==NULL) {
+X fprintf(stderr," cannot open %s\n",iname);
+X exit(1);
+X }
+X
+X fread(format,1,sizeof(format),libi);
+X printf("%c%c%d %d\n",format[0],format[1],format[2],format[3]);
+X mm64_flag = (format[2]==1);
+X
+X src_int4_read(libi,&lib_aa);
+X
+X if (mm64_flag) src_long8_read(libi,&f_size);
+X else src_long4_read(libi,&f_size);
+X
+X src_int4_read(libi,&nlib);
+X
+X if (mm64_flag) {
+X src_long8_read(libi,&tot_len);
+X src_long8_read(libi,&max_len);
+X }
+X else {
+X src_long4_read(libi,&tot_len);
+X src_long4_read(libi,&max_len);
+X }
+X
+X printf(" %d entries; tot: %ld; max: %ld\n",nlib,tot_len,max_len);
+X
+X /* allocate array of description pointers */
+X if ((d_pos_arr=(long *)calloc(nlib+1, sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate %d for desc. array\n",nlib+1);
+X exit(1);
+X }
+X /* allocate array of sequence pointers */
+X if ((s_pos_arr=(long *)calloc(nlib+1, sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate %d for seq. array\n",nlib+1);
+X exit(1);
+X }
+X if ((attr_arr=(char *)calloc(nlib+1, sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate %d for attr. array\n",nlib+1);
+X exit(1);
+X }
+X
+X if (mm64_flag) {
+X for (i=0; i<=nlib; i++) src_long8_read(libi,&d_pos_arr[i]);
+X for (i=0; i<=nlib; i++) src_long8_read(libi,&s_pos_arr[i]);
+X }
+X else {
+X for (i=0; i<=nlib; i++) src_long4_read(libi,&d_pos_arr[i]);
+X for (i=0; i<=nlib; i++) src_long4_read(libi,&s_pos_arr[i]);
+X }
+X
+X fread(attr_arr,nlib+1,sizeof(char),libi);
+X fclose(libi);
+X
+X printf("header\tseq\n");
+X
+X for (i=0; i<nlib; i++) printf("%ld\t%ld\n",d_pos_arr[i],s_pos_arr[i]);
+}
+X
+void src_int4_read(FILE *fd, int *val)
+{
+X int tval;
+#ifdef IS_BIG_ENDIAN
+X fread(&tval,(size_t)4,(size_t)1,fd);
+X *val = tval;
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+#endif
+}
+X
+void src_long4_read(FILE *fd, long *val)
+{
+X int tval;
+#ifdef IS_BIG_ENDIAN
+X fread(&tval,(size_t)4,(size_t)1,fd);
+X *val = tval;
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+#endif
+}
+X
+void src_long8_read(FILE *fd, long *val)
+{
+#ifdef IS_BIG_ENDIAN
+X fread((char *)val,(size_t)8,(size_t)1,fd);
+#else
+X unsigned char b[8];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+X *val = 0;
+X *val = (int)
+X ((((((((int)b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)+(int)b[3]<<8)+
+X (int)b[4]<<8)+(int)b[5]<<8)+(int)b[6]<<8)+(int)b[7];
+#endif
+}
+X
+void src_int4_write(FILE *fd, int val)
+{
+#ifdef IS_BIG_ENDIAN
+X fwrite(&val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X b[3] = val & 255;
+X b[2] = (val=val>>8)&255;
+X b[1] = (val=val>>8)&255;
+X b[0] = (val=val>>8)&255;
+X
+X fwrite(b,(size_t)1,(size_t)4,fd);
+#endif
+}
+X
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+X strncpy(nname,oname,maxn-1);
+X strncat(nname,".",1);
+X strncat(nname,suff,maxn-strlen(nname));
+}
+SHAR_EOF
+chmod 0644 list_db.c ||
+echo 'restore of list_db.c failed'
+Wc_c="`wc -c < 'list_db.c'`"
+test 5150 -eq "$Wc_c" ||
+ echo 'list_db.c: original size 5150, current size' "$Wc_c"
+fi
+# ============= llgetaa.c ==============
+if test -f 'llgetaa.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping llgetaa.c (File already exists)'
+else
+echo 'x - extracting llgetaa.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'llgetaa.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: llgetaa.c,v 1.25 2007/01/08 15:38:46 wrp Exp $ */
+X
+/*
+X Feb, 1998 - version for prss
+X
+X March, 2001 - modifications to support comp_thr.c: use libpos to indicate
+X whether the score is shuffled==1 or unshuffled==0. This simplifies
+X complib.c and makes comp_thr.c possible
+X
+X modified version of nxgetaa.c that generates random sequences
+X for a library
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#include "uascii.h"
+#include "structs.h"
+X
+#define XTERNAL
+#include "upam.h"
+#undef XTERNAL
+X
+#define YES 1
+#define NO 0
+#define MAXLINE 512
+X
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+X
+int nsfnum; /* number of superfamily numbers */
+int sfnum[10]; /* superfamily number from types 0 and 5 */
+int nsfnum_n;
+int sfnum_n[10];
+X
+static int use_stdin=0;
+static char llibstr0[256];
+static char llibstr1[256];
+static char o_line[256];
+X
+#define NO_FORMAT 0
+#define FASTA_FORMAT 1
+#define GCG_FORMAT 2
+static int seq_format=NO_FORMAT;
+static char seq_title[200];
+X
+extern int irand(int);
+extern void shuffle(unsigned char *from, unsigned char *to, int n);
+extern void wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven);
+X
+int
+getseq(char *filen, int *qascii,
+X unsigned char *seq, int maxs, char *libstr,
+X int n_libstr, long *sq0off)
+{
+X FILE *fptr;
+X char line[512],*bp;
+X int i, j, n;
+X int ic;
+X int sstart, sstop, sset=0;
+X int have_desc = 0;
+X int desc_complete = 0;
+X int llen, l_offset;
+X
+X seq_title[0]='\0';
+X
+X sstart = sstop = -1;
+#ifndef DOS
+X if ((bp=strchr(filen,':'))!=NULL) {
+#else
+X if ((bp=strchr(filen+3,':'))!=NULL) {
+#endif
+X *bp='\0';
+X if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
+X else sscanf(bp+1,"%d-%d",&sstart,&sstop);
+X sset=1;
+X }
+X
+X if (strcmp(filen,"-") && strcmp(filen,"@")) {
+X if ((fptr=fopen(filen,"r"))==NULL) {
+X fprintf(stderr," could not open %s\n",filen);
+X return 0;
+X }
+X }
+X else {
+X fptr = stdin;
+X use_stdin++;
+X }
+X
+X if (use_stdin > 1) {
+X have_desc = 1;
+X if ((bp=strchr(o_line,'\001'))!=NULL) *bp='\0';
+X strncpy(llibstr1,o_line,sizeof(llibstr1));
+X strncpy(libstr,o_line,n_libstr);
+X libstr[n_libstr-1]='\0';
+X l_offset = 0;
+X }
+X
+X if (sset==1) {
+X filen[strlen(filen)]=':';
+X if (*sq0off==1 || sstart>1) *sq0off = sstart;
+X }
+X
+X desc_complete = 0;
+X n=0;
+X while(fgets(line,sizeof(line),fptr)!=NULL) {
+X if (line[0]=='>') {
+X if (have_desc) {
+X strncpy(o_line,line,sizeof(o_line));
+X goto last;
+X }
+X l_offset = 0;
+X seq_format = FASTA_FORMAT;
+#ifdef STAR_X
+X qascii['*'] = qascii['X'];
+#endif
+X sfnum[0] = nsfnum = 0;
+X
+X if ((bp=(char *)strchr(line,'\n'))!=NULL) {
+X *bp='\0'; /* have newline */
+X desc_complete = 1;
+X }
+X
+X if ((bp=strchr(line+1,'\001'))!=NULL) *bp='\0';
+X strncpy(seq_title,line+1,sizeof(seq_title));
+X strncpy(llibstr0,line+1,sizeof(llibstr0));
+X if (n_libstr <= 20) {
+X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+X }
+X strncpy(libstr,line+1,n_libstr);
+X libstr[n_libstr-1]='\0';
+X
+X if (!desc_complete) {
+X while (fgets(line, sizeof(line), fptr) != NULL) {
+X if (strchr(line,'\n') != NULL) {
+X line[0]='>';
+X break;
+X }
+X }
+X desc_complete = 1;
+X }
+X }
+X else if (seq_format==NO_FORMAT) {
+X seq_format = GCG_FORMAT;
+X qascii['*'] = qascii['X'];
+X l_offset = 10;
+X llen = strlen(line);
+X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
+X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+X llen = strlen(line);
+X }
+X if (n_libstr <= 20) {
+X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
+X else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
+X }
+X strncpy(libstr,line,n_libstr);
+X libstr[n_libstr-1]='\0';
+X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
+X }
+X
+X if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
+X
+X if (line[0]!='>'&& line[0]!=';') {
+X for (i=l_offset; (n<maxs)&&
+X ((ic=qascii[line[i]&AAMASK])<EL); i++)
+X if (ic<NA) seq[n++]= ic;
+X if (ic == ES) break;
+X }
+X else {
+X if (have_desc) {
+X strncpy(o_line,line,sizeof(o_line));
+X goto last;
+X }
+X else {
+X have_desc = 1;
+X }
+X }
+X }
+X
+X last:
+X if (n==maxs) {
+X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
+X fflush(stderr);
+X }
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
+X if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
+X seq[n]= EOSEQ;
+X
+X if (fptr!=stdin) fclose(fptr);
+X
+X if (sset) {
+X if (sstart <= 0) sstart = 1;
+X if (sstop <= 0) sstop = n;
+X sstart--;
+X sstop--;
+X for (i=0, j=sstart; j<=sstop; i++,j++)
+X seq[i] = seq[j];
+X n = sstop - sstart +1;
+X seq[n]=EOSEQ;
+X }
+X
+X return n;
+}
+X
+int
+gettitle(filen,title,len)
+X char *filen, *title; int len;
+{
+X FILE *fptr;
+X char line[512];
+X char *bp;
+X int ll,sset;
+#ifdef WIN32
+X char *strpbrk();
+#endif
+X sset = 0;
+X
+X if (use_stdin) {
+X if (use_stdin == 1) {
+X /* use_stdin++; */
+X strncpy(title,llibstr0,len);
+X }
+X else {
+X strncpy(title,llibstr1,len);
+X }
+X if ((bp=strchr(title,'\001'))!=NULL) *bp='\0';
+X return strlen(title);
+X }
+X
+X if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
+X
+X if ((fptr=fopen(filen,"r"))==NULL) {
+X fprintf(stderr," file %s was not found\n",filen);
+X fflush(stderr);
+X return 0;
+X }
+X
+X if (sset==1) filen[strlen(filen)]=':';
+X
+X while(fgets(line,sizeof(line),fptr)!=0) {
+X if (line[0]=='>'|| line[0]==';') goto found;
+X }
+X fclose(fptr);
+X title[0]='\0';
+X return 0;
+X
+X found:
+X if ((bp=strchr(line,'\001'))!=NULL) *bp = 0;
+#ifdef WIN32
+X bp = strpbrk(line,"\n\r");
+#else
+X bp = strchr(line,'\n');
+#endif
+X if (bp!=NULL) *bp = 0;
+X strncpy(title,line,len);
+X title[len-1]='\0';
+X fclose(fptr);
+X return strlen(title);
+}
+X
+FILE *libf=NULL;
+X
+long lpos;
+char lline[MAXLINE];
+int lfflag=0; /* flag for CRLF in EMBL CDROM files */
+#define LFCHAR '\015' /* for MWC 5.5 */
+X
+int agetlib(); void aranlib(); /* pearson fasta format */
+X
+/* the following is from fgetgb.c */
+X
+/* a file name for openlib may now include a library type suffix */
+/* only opens fasta format files */
+X
+static char libn_save[MAX_FN];
+static int ldna_save=0;
+static int do_shuffle;
+static int shuff_cnt=10;
+static int w_flag = 0;
+#ifdef DEBUG
+static FILE *dfile=NULL;
+#endif
+static unsigned char *aa_save;
+static int n1_save;
+static int i_even;
+X
+/* lmf_str * is used here for compatibility with the "normal" openlib,
+X but is largely unnecessary */
+X
+void
+set_shuffle(struct mngmsg m_msg) {
+X char dfname[MAX_FN];
+X
+X if (m_msg.shuff_wid > 0) w_flag = m_msg.shuff_wid;
+X if (m_msg.shuff_max > shuff_cnt) shuff_cnt = m_msg.shuff_max;
+X
+#ifdef DEBUG
+X if (m_msg.dfile[0]!='\0') {
+X strncpy(dfname,m_msg.dfile,sizeof(dfname));
+X strncat(dfname,"_rlib",sizeof(dfname));
+X dfile = fopen(dfname,"w");
+X }
+#endif
+}
+X
+struct lmf_str *
+openlib(char *lname, int ldnaseq, int *sascii, int quiet, struct lmf_str *m_fd)
+{
+X char rline[10],libn[MAX_FN], *bp;
+X int wcnt, ll, opnflg;
+X int libtype;
+X struct lmf_str *m_fptr;
+X
+X wcnt = 0;
+X libtype = 0;
+X
+X strncpy(libn_save,lname,sizeof(libn_save));
+X
+X /* now allocate a buffer for the opened text file */
+X if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+X sizeof(struct lmf_str),lname);
+X return NULL;
+X }
+X
+X strncpy(m_fptr->lb_name,lname,MAX_FN);
+X m_fptr->lb_name[MAX_FN-1]='\0';
+X
+X m_fptr->sascii = sascii;
+X m_fptr->getlib = agetlib;
+X m_fptr->ranlib = aranlib;
+X m_fptr->mm_flg = 0;
+X
+X do_shuffle = 0;
+X irand(0); /* initialize the random number generator */
+X
+X return m_fptr;
+}
+X
+void
+closelib()
+{
+X if (libf!=NULL) {
+X fclose(libf);
+X libf = NULL;
+X }
+#ifdef DEBUG
+X if (dfile) fclose(dfile);
+#endif
+}
+X
+static int ieven=0;
+static char *desc_save;
+X
+int
+agetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lf_fd,
+X long *l_off)
+{
+X long sq1_off;
+X char lib_desc[120];
+X int i;
+X
+X *l_off = 1;
+X
+X if (!do_shuffle) {
+X do_shuffle = 1;
+X
+X if ((n1_save = getseq(libn_save,lf_fd->sascii,
+X seq,maxs,lib_desc,sizeof(lib_desc),&sq1_off)) < 1)
+X return n1_save;
+X
+X strncpy(libstr,lib_desc,n_libstr);
+X libstr[n_libstr-1]='\0';
+X
+X if ((aa_save = (unsigned char *)calloc(n1_save+1,sizeof(unsigned char)))==
+X NULL) fprintf(stderr," cannot allocate %d for saved sequence\n",
+X n1_save);
+X memcpy((void *)aa_save,(void *)seq,n1_save);
+X
+X if ((desc_save =
+X (char *)calloc(strlen(lib_desc)+1,sizeof(char)))== NULL) {
+X fprintf(stderr," cannot allocate saved desciption [%d]\n",
+X strlen(lib_desc)+1);
+X }
+X else {
+X strncpy (desc_save,lib_desc,strlen(lib_desc));
+X desc_save[strlen(lib_desc)]=='\0';
+X }
+X
+X *libpos = 0;
+X return n1_save;
+X }
+X else { /* return a shuffled sequence - here we need a window size; */
+X strncpy(libstr,desc_save,n_libstr);
+X libstr[n_libstr-1]='\0';
+X
+X if (shuff_cnt-- <= 0 ) return -1;
+X if (w_flag > 0) wshuffle(aa_save,seq,n1_save,w_flag,&ieven);
+X else shuffle(aa_save,seq,n1_save);
+X seq[n1_save] = EOSEQ;
+#ifdef DEBUG
+X if (dfile!=NULL) {
+X fprintf(dfile,">%d\n",shuff_cnt);
+X for (i=0; i<n1_save; i++) {
+X if (aa[seq[i]]>0) fputc(aa[seq[i]],dfile);
+X else {fprintf(stderr,"error aa0[%d]: %d %d\n",
+X i,seq[i],aa[seq[i]]);}
+X if (i%60 == 59) fputc('\n',dfile);
+X }
+X fputc('\n',dfile);
+X }
+#endif
+X *libpos = 1;
+X return n1_save;
+X }
+}
+X
+void
+aranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp;
+X int ll;
+X
+X if (use_stdin == 2) {
+X if (llibstr1[0]=='>' || llibstr1[0]==';') {
+X strncpy(str,llibstr1+1,cnt);
+X }
+X else {
+X strncpy(str,llibstr1,cnt);
+X }
+X }
+X else {
+X strncpy(str,desc_save,cnt);
+X }
+X str[cnt-1]='\0';
+X if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
+X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X else str[cnt-1]='\0';
+}
+X
+/*
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+X unsigned char tmp;
+X int i, ni;
+X
+X
+X for (i=0, ni = n-1; i< n/2; i++,ni--) {
+X tmp = c_nt[seq[i]];
+X seq[i] = c_nt[seq[ni]];
+X seq[ni] = tmp;
+X }
+X if ((n%2)==1) {
+X i = n/2;
+X seq[i] = c_nt[seq[i]];
+X }
+}
+*/
+X
+struct lmf_str *
+re_openlib(struct lmf_str *om_fptr, int outtty)
+{
+X return om_fptr;
+}
+X
+int re_getlib(unsigned char *aa1, int n1, int maxt3, int loff, int cont,
+X int term_code, long *loffset, long *l_off,
+X struct lmf_str *m_file_p)
+{
+X *loffset = 0;
+X *l_off = 1;
+X return n1;
+}
+X
+SHAR_EOF
+chmod 0644 llgetaa.c ||
+echo 'restore of llgetaa.c failed'
+Wc_c="`wc -c < 'llgetaa.c'`"
+test 10617 -eq "$Wc_c" ||
+ echo 'llgetaa.c: original size 10617, current size' "$Wc_c"
+fi
+# ============= m1r.aa ==============
+if test -f 'm1r.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping m1r.aa (File already exists)'
+else
+echo 'x - extracting m1r.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'm1r.aa' &&
+>test | 40001 90043 | mgstm1
+MGCEN,
+MIDYP,
+MLLAY,
+MLLGY
+SHAR_EOF
+chmod 0644 m1r.aa ||
+echo 'restore of m1r.aa failed'
+Wc_c="`wc -c < 'm1r.aa'`"
+test 56 -eq "$Wc_c" ||
+ echo 'm1r.aa: original size 56, current size' "$Wc_c"
+fi
+# ============= m2.aa ==============
+if test -f 'm2.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping m2.aa (File already exists)'
+else
+echo 'x - extracting m2.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'm2.aa' &&
+>tests from mgstm1
+MILGYW,
+MLLEYT,
+MGDAPD,
+MLCYNP
+SHAR_EOF
+chmod 0644 m2.aa ||
+echo 'restore of m2.aa failed'
+Wc_c="`wc -c < 'm2.aa'`"
+test 50 -eq "$Wc_c" ||
+ echo 'm2.aa: original size 50, current size' "$Wc_c"
+fi
+# ============= make_osx_univ.sh ==============
+if test -f 'make_osx_univ.sh' -a X"$1" != X"-c"; then
+ echo 'x - skipping make_osx_univ.sh (File already exists)'
+else
+echo 'x - extracting make_osx_univ.sh (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'make_osx_univ.sh' &&
+#!/bin/csh
+X
+make -f Makefile.os_x all
+make -f Makefile.os_x install
+make -f Makefile.os_x clean-up
+X
+make -f Makefile.os_x86 all
+make -f Makefile.os_x86 install
+make -f Makefile.os_x86 clean-up
+X
+foreach n ( ppc/* )
+set f=$n:t
+lipo -create ppc/$f i386/$f -output bin/$f
+echo "Universal $f built"
+end
+echo "Done!"
+X
+SHAR_EOF
+chmod 0755 make_osx_univ.sh ||
+echo 'restore of make_osx_univ.sh failed'
+Wc_c="`wc -c < 'make_osx_univ.sh'`"
+test 312 -eq "$Wc_c" ||
+ echo 'make_osx_univ.sh: original size 312, current size' "$Wc_c"
+fi
+# ============= map_db.1 ==============
+if test -f 'map_db.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping map_db.1 (File already exists)'
+else
+echo 'x - extracting map_db.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'map_db.1' &&
+.TH MAP_DB "September, 1999"
+.SH NAME
+.B map_db
+\- read a FASTA (0), GENBANK flat file (1) PIR/VMS (5) or GCG binary
+(6) sequence database and produce the offsets necessary for efficient
+memory mapping.
+.SH SYNOPSIS
+.B map_db
+[-n] filename | "filename libtype"
+.SH DESCRIPTION
+.B map_db
+.I filename
+reads the sequence database in
+.I filename
+and produce a new file
+.I filename.xin
+with the offset information necessary for efficient memory mapping.
+.LP
+The programs in fasta version 32t08 can use memory mapped i/o to load
+sequence database files and read them efficiently. Memory mapping is
+used only if a "\c
+.I .xin\c
+\&" file is available. The "\c
+.I .xin\c
+\&" file is created by
+.B map_db\c
+\&.
+.LP
+In addition to
+.B map_db\c
+\&,
+.B list_db
+is available to display the database size, etc, and set of offsets calculated
+by
+.B map_db\c
+\&.
+.SH OPTIONS
+.TP
+\-n
+Read file as DNA database.
+.SH BUGS
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 map_db.1 ||
+echo 'restore of map_db.1 failed'
+Wc_c="`wc -c < 'map_db.1'`"
+test 948 -eq "$Wc_c" ||
+ echo 'map_db.1: original size 948, current size' "$Wc_c"
+fi
+# ============= map_db.c ==============
+if test -f 'map_db.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping map_db.c (File already exists)'
+else
+echo 'x - extracting map_db.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'map_db.c' &&
+/* map_db.c - read a FASTA or GCG format database and generate a list
+X of indices for rapid memory mapping */
+X
+/* copyright (c) 1999 William R. Pearson */
+X
+/* $Name: fa_34_26_5 $ - $Id: map_db.c,v 1.9 2005/09/27 15:32:58 wrp Exp $ */
+X
+/* input is a libtype 1,5, or 6 sequence database */
+/* output is a BLAST2 formatdb type index file */
+X
+/* format of the index file:
+X
+1) map_db version number ["MP"+2 bytes]
+2) number of sequences in database [4 bytes]
+3) total length of database [8 bytes] (MP1, 4 bytes for MP0)
+4) longest sequence in database [8 bytes] (MP1, 4 bytes for MP0)
+5) list of offsets to definitions [num_seq+1] int*8 (MP1, 4 bytes for MP0)
+6) list of offsets to sequences [num_seq+1] int*8 (MP1, 4 bytes for MP1)
+7) list of flag characters for sequences [num_seq+1]bytes
+X (used for GCG binary to encode 2bit or 4 bit representation)
+X
+X sequence files will be as defined by their format
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include <sys/types.h>
+#include <sys/stat.h>
+X
+#include "uascii.h"
+#include "ncbl2_head.h"
+X
+#define GCGBIN 6
+#define LASTLIB 6
+X
+int (*get_entry) ();
+X
+int a_get_ent(long *, long *);
+int v_get_ent(long *, long *);
+int gcg_get_ent(long *, long *);
+int gbf_get_ent(long *, long *);
+X
+void src_int4_write(FILE *, int);
+void src_int4_read(FILE *, int *);
+void src_long4_write(FILE *, long);
+void src_long4_read(FILE *, long *);
+void src_long8_write(FILE *, long);
+void src_long8_read(FILE *, long *);
+X
+void newname(char *nname, char *oname, char *suff, int maxn);
+X
+int (*get_ent_arr[LASTLIB+1])()={a_get_ent, gbf_get_ent, NULL, NULL, NULL,
+X v_get_ent, gcg_get_ent};
+X
+long openlib(char *, int);
+X
+static int *sascii;
+X
+main(int argc, char **argv)
+{
+X FILE *libi;
+X char lname[256];
+X char iname[256];
+X char format[4];
+X char *bp;
+X
+X int i;
+X int nlib; /* number of entries */
+X
+X long max_len; /* longest sequence */
+X long tot_len; /* total sequence length */
+X
+X int n1;
+X
+X long f_size; /* file size from fstat() */
+X int lib_size; /* current space available - may be realloc'ed */
+X int lib_inc;
+X int lib_type; /* 1 for protein, 0 for DNA */
+X int lib_aa; /* dna=1; prot=0; */
+X
+X /* file offsets */
+X long d_pos; /* start of description */
+X long s_pos; /* start of sequence */
+X long *d_pos_arr; /* array of description pointers */
+X long *s_pos_arr; /* array of description pointers */
+X
+X lib_type = 0;
+X lib_size = 200000;
+X lib_inc = 100000;
+X
+X lib_aa = 1;
+X
+X while (argc > 1 && *argv[1]=='-') {
+X if (strcmp(argv[1],"-n")==0) lib_aa = 0;
+X argv++;
+X argc--;
+X }
+X
+X /* open the database */
+X if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
+X else {
+X fprintf(stderr," Entry library name: ");
+X fgets(lname,sizeof(lname),stdin);
+X if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
+X }
+X
+X if ((bp=strchr(lname,' '))!=NULL) {
+X lib_type = atoi(bp+1);
+X *bp='\0';
+X }
+X else lib_type = 0;
+X
+X if (get_ent_arr[lib_type] == NULL) {
+X fprintf(stderr," cannot index file %s type %d\n",lname,lib_type);
+X exit(1);
+X }
+X
+X if (lib_type == 6) lib_aa = 0;
+X if (lib_type == 1) lib_aa = 0;
+X
+X if (lib_aa == 1) sascii = aascii;
+X else sascii = nascii;
+X
+X if ((f_size=openlib(lname,lib_type))==0) {
+X fprintf(stderr," cannot open %s (type: %d)\n",lname,lib_type);
+X exit(1);
+X }
+X
+X /* allocate array of description pointers */
+X if ((d_pos_arr=(long *)calloc(lib_size, sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate %d for desc. array\n",lib_size);
+X exit(1);
+X }
+X /* allocate array of sequence pointers */
+X if ((s_pos_arr=(long *)calloc(lib_size, sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate %d for seq. array\n",lib_size);
+X exit(1);
+X }
+X
+X /* allocate array of sequence flags */
+X
+X nlib = 0; tot_len=0; max_len=-1;
+X while ((n1=get_entry(&d_pos, &s_pos)) > 0) {
+X d_pos_arr[nlib] = d_pos;
+X s_pos_arr[nlib] = s_pos;
+X nlib++;
+X tot_len += n1;
+X if (n1 > max_len) max_len = n1;
+X if (nlib >= lib_size) { /* too many entries */
+X lib_size += lib_inc;
+X if ((d_pos_arr=(long *)realloc(d_pos_arr,lib_size*sizeof(long)))==NULL) {
+X fprintf(stderr," cannot realloc allocate %d for desc.. array\n",
+X lib_size);
+X exit(1);
+X }
+X if ((s_pos_arr=(long *)realloc(s_pos_arr,lib_size*sizeof(long)))==NULL) {
+X fprintf(stderr," cannot realloc allocate %d for seq. array\n",
+X lib_size);
+X exit(1);
+X }
+X }
+X }
+X
+X d_pos_arr[nlib]= d_pos; /* put in the end of the file */
+X s_pos_arr[nlib]=0;
+X
+X /* all the information is in, write it out */
+X
+X newname(iname,lname,"xin",sizeof(iname));
+X
+X if ((libi=fopen(iname,"w"))==NULL) {
+X fprintf(stderr," cannot open %s for writing\n",iname);
+X exit(1);
+X }
+X
+X /* write out format version */
+X format[0]='M';
+X format[1]='P';
+#ifdef BIG_LIB64
+X format[2]= 1; /* format 1 for 8-byte offsets */
+#else
+X format[2]='\0'; /* format '\0' for original 4-byte */
+#endif
+X
+X format[3]=lib_type;
+X fwrite(format,4,sizeof(char),libi);
+X
+X /* write out sequence type */
+X src_int4_write(libi, lib_aa);
+X
+X /* write out file fstat as integrity check */
+#ifdef BIG_LIB64
+X src_long8_write(libi, f_size);
+#else
+X src_int4_write(libi, f_size);
+#endif
+X
+X /* write out num_seq */
+X src_int4_write(libi, nlib);
+X
+#ifdef BIG_LIB64
+X /* write out tot_len, max_len */
+X src_long8_write(libi, tot_len);
+#else
+X src_int4_write(libi, tot_len);
+#endif
+X src_int4_write(libi, max_len);
+X
+#ifdef BIG_LIB64
+X for (i=0; i<=nlib; i++) src_long8_write(libi,d_pos_arr[i]);
+X for (i=0; i<=nlib; i++) src_long8_write(libi,s_pos_arr[i]);
+#else
+X for (i=0; i<=nlib; i++) src_int4_write(libi,d_pos_arr[i]);
+X for (i=0; i<=nlib; i++) src_int4_write(libi,s_pos_arr[i]);
+#endif
+X
+X fclose(libi);
+X
+#ifdef BIG_LIB64
+X fprintf(stderr," wrote %d sequences (tot=%ld, max=%ld) to %s\n",
+X nlib,tot_len,max_len,iname);
+#else
+X fprintf(stderr," wrote %d sequences (tot=%ld, max=%ld) to %s\n",
+X nlib,tot_len,max_len,iname);
+#endif
+}
+X
+X
+FILE *libf=NULL;
+long lpos;
+X
+#define MAXLINE 4096
+char lline[MAXLINE+1];
+X
+long
+openlib(char *lname, int lib_type)
+{
+X long f_size;
+X struct stat stat_buf;
+X
+X if (stat(lname,&stat_buf)<0) {
+X fprintf(stderr," cannot stat library: %s\n",lname);
+X return 0;
+X }
+X
+X if ((libf=fopen(lname,"r"))==NULL) {
+X fprintf(stderr," cannot open library: %s (type: %d)\n",
+X lname, lib_type);
+X return 0;
+X }
+X
+X f_size = stat_buf.st_size;
+X
+X get_entry = get_ent_arr[lib_type];
+X
+X lpos = ftell(libf);
+X if (fgets(lline,MAXLINE,libf)==NULL) return 0;
+X return f_size;
+}
+X
+int
+a_get_ent(long *d_pos, long *s_pos)
+{
+X register char *cp;
+X register int *ap, n1;
+X
+X ap = sascii;
+X
+X while (lline[0]!='>' && lline[0]!=';') {
+X lpos = ftell(libf);
+X if (fgets(lline,sizeof(lline),libf)==NULL) {
+X *d_pos = lpos;
+X return 0;
+X }
+X }
+X
+X *d_pos = lpos;
+X
+X /* make certain we have the end of the line */
+X while (strchr((char *)lline,'\n')==NULL) {
+X if (fgets(lline,sizeof(lline),libf)==NULL) break;
+X }
+X
+X *s_pos = ftell(libf);
+X lline[0]='\0';
+X n1 = 0;
+X while (fgets(lline,sizeof(lline),libf)!=NULL) {
+X if (lline[0]=='>') break;
+X if (lline[0]==';') {
+X if (strchr(lline,'\n')==NULL) {
+X fprintf(stderr," excessive continuation\n%s",lline);
+X return -1;
+X }
+X }
+X
+X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
+X lpos = ftell(libf);
+X }
+X return n1;
+}
+X
+int
+v_get_ent(long *d_pos, long *s_pos)
+{
+X register char *cp;
+X register int *ap;
+X int n1;
+X
+X ap = sascii;
+X
+X /* check for seq_id line */
+X while (lline[0]!='>' && lline[0]!=';') {
+X lpos = ftell(libf);
+X if (fgets(lline,sizeof(lline),libf)==NULL) {
+X *d_pos = lpos;
+X return 0;
+X }
+X }
+X *d_pos = lpos;
+X
+X /* get the description line */
+X if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
+X /* make certain we have the end of the line */
+X while (strchr((char *)lline,'\n')==NULL) {
+X if (fgets(lline,sizeof(lline),libf)==NULL) break;
+X }
+X
+X *s_pos = ftell(libf);
+X lline[0]='\0';
+X n1 = 0;
+X while (fgets(lline,sizeof(lline),libf)!=NULL) {
+X if (lline[0]=='>') break;
+X
+X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
+X lpos = ftell(libf);
+X }
+X return n1;
+}
+X
+static char gcg_type[10];
+static long gcg_len;
+static int gcg_bton[4]={2,4,1,3};
+X
+int
+gcg_get_ent(long *d_pos, long *s_pos)
+{
+X register char *cp;
+X register int *ap;
+X char libstr[20], dummy[20];
+X char gcg_date[6];
+X int r_block;
+X int n1;
+X
+X /* check for seq_id line */
+X while (lline[0]!='>') {
+X lpos = ftell(libf);
+X if (fgets(lline,sizeof(lline),libf)==NULL) {
+X *d_pos = lpos;
+X return 0;
+X }
+X }
+X *d_pos = lpos;
+X
+X /* get the encoding/sequence length info */
+X
+X sscanf(&lline[4],"%s %s %s %s %ld",
+X libstr,gcg_date,gcg_type,dummy,&gcg_len);
+X
+X /* get the description line */
+X if (fgets(lline,MAXLINE,libf)==NULL) return;
+X
+X *s_pos = ftell(libf);
+X /* seek to the end of the sequence; +1 to jump over newline */
+X if (gcg_type[0]=='2') {
+X r_block = (gcg_len+3)/4;
+X fseek(libf,r_block+1,SEEK_CUR);
+X }
+X else fseek(libf,gcg_len+1,SEEK_CUR);
+X
+X lpos = ftell(libf);
+X fgets(lline,MAXLINE,libf);
+X
+X return gcg_len;
+}
+X
+int
+gbf_get_ent(long *d_pos, long *s_pos)
+{
+X int n1;
+X char *cp;
+X register int *ap;
+X
+#if !defined(TFAST)
+X ap = sascii;
+#else
+X ap = nascii;
+#endif
+X
+X while (lline[0]!='L' || lline[1]!='O' ||
+X strncmp(lline,"LOCUS",5)) { /* find LOCUS */
+X lpos = ftell(libf);
+X if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
+X }
+X *d_pos=lpos;
+X
+X while (lline[0]!='O' || lline[1]!='R' ||
+X strncmp(lline,"ORIGIN",6)) { /* find ORIGIN */
+X if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
+X }
+X *s_pos = ftell(libf);
+X
+X lline[0]='\0';
+X n1=0;
+X while (fgets(lline,MAXLINE,libf)!=NULL) {
+X if (lline[0]=='/') break;
+X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
+X }
+X lpos = ftell(libf);
+X fgets(lline,MAXLINE,libf);
+X
+X return n1;
+}
+X
+void src_int4_read(FILE *fd, int *val)
+{
+#ifdef IS_BIG_ENDIAN
+X fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+#endif
+}
+X
+void src_int4_write(FILE *fd, int val)
+{
+#ifdef IS_BIG_ENDIAN
+X fwrite(&val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X b[3] = val & 255;
+X b[2] = (val=val>>8)&255;
+X b[1] = (val=val>>8)&255;
+X b[0] = (val=val>>8)&255;
+X
+X fwrite(b,(size_t)1,(size_t)4,fd);
+#endif
+}
+X
+void src_long8_write(FILE *fd, long val)
+{
+#ifdef IS_BIG_ENDIAN
+X fwrite(&val,(size_t)8,(size_t)1,fd);
+#else
+X unsigned char b[8];
+X
+X b[7] = val & 255;
+X b[6] = (val=val>>8)&255;
+X b[5] = (val=val>>8)&255;
+X b[4] = (val=val>>8)&255;
+X b[3] = (val=val>>8)&255;
+X b[2] = (val=val>>8)&255;
+X b[1] = (val=val>>8)&255;
+X b[0] = (val=val>>8)&255;
+X
+X fwrite(b,(size_t)1,(size_t)8,fd);
+#endif
+}
+X
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+X strncpy(nname,oname,maxn-1);
+X strncat(nname,".",1);
+X strncat(nname,suff,maxn-strlen(nname));
+}
+SHAR_EOF
+chmod 0644 map_db.c ||
+echo 'restore of map_db.c failed'
+Wc_c="`wc -c < 'map_db.c'`"
+test 10852 -eq "$Wc_c" ||
+ echo 'map_db.c: original size 10852, current size' "$Wc_c"
+fi
+# ============= mchu.aa ==============
+if test -f 'mchu.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mchu.aa (File already exists)'
+else
+echo 'x - extracting mchu.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mchu.aa' &&
+>MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
+ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
+FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
+DIDGDGQVNYEEFVQMMTAK
+SHAR_EOF
+chmod 0644 mchu.aa ||
+echo 'restore of mchu.aa failed'
+Wc_c="`wc -c < 'mchu.aa'`"
+test 212 -eq "$Wc_c" ||
+ echo 'mchu.aa: original size 212, current size' "$Wc_c"
+fi
+# ============= md_10.mat ==============
+if test -f 'md_10.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping md_10.mat (File already exists)'
+else
+echo 'x - extracting md_10.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'md_10.mat' &&
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 11 -13 -12 -11 -13 -13 -10 -8 -15 -13 -15 -14 -13 -18 -7 -5 -4 -20 -19 -6 -12 -11 -1
+R -12 12 -13 -18 -10 -5 -15 -9 -5 -17 -14 -2 -14 -22 -11 -10 -12 -9 -17 -17 -15 -10 -1
+N -12 -13 13 -3 -14 -11 -12 -11 -5 -13 -19 -6 -15 -20 -17 -4 -7 -21 -12 -17 5 -11 -1
+D -11 -18 -3 12 -20 -13 -2 -9 -10 -19 -21 -15 -18 -23 -18 -12 -14 -24 -13 -15 5 -7 -1
+C -13 -10 -14 -20 17 -19 -22 -12 -12 -18 -16 -21 -15 -11 -18 -7 -14 -9 -7 -12 -17 -21 -1
+Q -13 -5 -11 -13 -19 13 -5 -15 -3 -19 -12 -6 -14 -22 -8 -13 -13 -17 -16 -17 -12 4 -1
+E -10 -15 -12 -2 -22 -5 12 -9 -15 -19 -20 -8 -17 -23 -17 -15 -15 -20 -21 -14 -7 3 -1
+G -8 -9 -11 -9 -12 -16 -9 11 -16 -21 -21 -15 -18 -22 -16 -7 -14 -13 -21 -13 -10 -13 -1
+H -16 -5 -5 -10 -12 -3 -15 -16 16 -17 -13 -13 -15 -14 -10 -11 -13 -20 -3 -19 -7 -9 -1
+I -13 -17 -14 -19 -17 -20 -19 -21 -18 12 -7 -17 -4 -11 -19 -14 -7 -20 -15 -1 -16 -19 -1
+L -15 -14 -19 -21 -16 -12 -20 -21 -13 -7 10 -18 -4 -6 -10 -13 -15 -13 -16 -8 -20 -16 -1
+K -14 -2 -6 -15 -21 -6 -8 -15 -13 -17 -18 12 -12 -24 -17 -13 -10 -19 -20 -18 -11 -7 -1
+M -13 -14 -15 -18 -15 -14 -18 -19 -15 -4 -4 -12 16 -14 -17 -15 -7 -16 -18 -5 -16 -16 -1
+F -18 -22 -19 -22 -11 -22 -23 -22 -14 -11 -6 -23 -14 14 -17 -11 -18 -13 -3 -12 -21 -22 -1
+P -7 -12 -17 -18 -18 -8 -17 -16 -10 -19 -10 -16 -17 -17 13 -6 -9 -22 -20 -16 -17 -13 -1
+S -5 -10 -4 -12 -7 -13 -15 -7 -11 -14 -13 -13 -15 -11 -6 11 -4 -15 -12 -14 -8 -14 -1
+T -4 -12 -7 -14 -14 -13 -15 -14 -13 -7 -16 -10 -7 -19 -9 -4 12 -19 -17 -10 -10 -14 -1
+W -21 -9 -21 -21 -10 -17 -21 -13 -21 -21 -13 -21 -17 -13 -21 -15 -18 18 -12 -16 -21 -19 -1
+Y -20 -17 -12 -13 -7 -16 -21 -20 -3 -15 -16 -20 -17 -3 -20 -12 -17 -12 15 -18 -13 -19 -1
+V -6 -17 -17 -15 -12 -17 -14 -13 -19 -1 -8 -18 -5 -12 -16 -14 -10 -16 -18 11 -16 -15 -1
+B -12 -15 5 5 -17 -12 -7 -10 -7 -16 -20 -11 -17 -21 -17 -8 -10 -22 -13 -16 13 -9 -1
+Z -16 -18 -17 -8 -32 1 9 -17 -17 -29 -26 -11 -24 -34 -21 -21 -21 -29 -29 -22 -9 13 -1
+XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 md_10.mat ||
+echo 'restore of md_10.mat failed'
+Wc_c="`wc -c < 'md_10.mat'`"
+test 2255 -eq "$Wc_c" ||
+ echo 'md_10.mat: original size 2255, current size' "$Wc_c"
+fi
+# ============= md_20.mat ==============
+if test -f 'md_20.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping md_20.mat (File already exists)'
+else
+echo 'x - extracting md_20.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'md_20.mat' &&
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 10 -10 -9 -8 -10 -10 -7 -5 -12 -10 -12 -11 -9 -15 -5 -2 -1 -17 -16 -3 -9 -8 -1
+R -10 12 -10 -14 -7 -3 -11 -6 -3 -14 -12 0 -11 -18 -9 -7 -9 -6 -14 -14 -12 -7 -1
+N -9 -10 13 -1 -11 -8 -9 -8 -2 -11 -15 -4 -12 -16 -13 -1 -4 -18 -9 -14 6 -8 -1
+D -8 -14 -1 12 -16 -9 1 -6 -7 -16 -18 -11 -15 -20 -15 -9 -11 -20 -11 -12 6 -4 -1
+C -10 -7 -11 -16 17 -16 -19 -9 -9 -14 -13 -17 -12 -8 -14 -4 -11 -7 -4 -10 -14 -17 -1
+Q -10 -3 -8 -9 -16 13 -3 -12 0 -16 -9 -3 -11 -18 -5 -10 -10 -14 -12 -14 -9 5 -1
+E -7 -11 -9 1 -19 -3 11 -7 -12 -16 -17 -5 -14 -20 -14 -12 -12 -17 -18 -11 -4 4 -1
+G -5 -6 -8 -6 -9 -12 -7 11 -13 -17 -18 -12 -15 -19 -12 -5 -11 -10 -17 -11 -7 -9 -1
+H -12 -3 -2 -7 -9 0 -12 -13 15 -14 -10 -9 -12 -11 -7 -8 -10 -16 0 -15 -4 -6 -1
+I -10 -14 -11 -16 -14 -16 -16 -17 -14 12 -4 -14 -1 -8 -15 -11 -4 -16 -12 2 -13 -16 -1
+L -12 -11 -15 -18 -13 -9 -17 -18 -10 -4 10 -15 -2 -4 -7 -10 -12 -10 -13 -5 -17 -13 -1
+K -11 0 -4 -12 -17 -3 -5 -12 -9 -14 -15 12 -9 -21 -13 -10 -7 -16 -17 -15 -8 -4 -1
+M -9 -11 -12 -15 -12 -11 -15 -16 -12 -1 -2 -9 15 -10 -14 -12 -4 -13 -14 -3 -13 -13 -1
+F -15 -19 -16 -19 -8 -18 -20 -19 -11 -8 -4 -19 -10 13 -14 -8 -15 -10 0 -9 -17 -19 -1
+P -5 -9 -13 -15 -14 -5 -14 -12 -7 -15 -7 -13 -14 -14 12 -3 -7 -18 -16 -13 -14 -10 -1
+S -2 -8 -1 -9 -4 -10 -12 -5 -8 -11 -10 -10 -12 -8 -3 10 -1 -12 -9 -11 -5 -11 -1
+T -1 -9 -4 -11 -10 -10 -12 -11 -10 -4 -12 -7 -4 -15 -7 -1 11 -16 -14 -7 -7 -11 -1
+W -17 -6 -18 -18 -7 -14 -18 -10 -17 -17 -10 -17 -14 -10 -18 -12 -15 18 -9 -13 -18 -16 -1
+Y -16 -14 -9 -11 -4 -12 -18 -17 0 -12 -12 -17 -14 0 -16 -9 -13 -9 14 -15 -10 -15 -1
+V -3 -14 -14 -12 -9 -14 -11 -11 -15 2 -5 -15 -2 -9 -13 -11 -7 -13 -14 11 -13 -12 -1
+B -9 -12 6 6 -14 -9 -4 -7 -4 -13 -17 -8 -13 -18 -14 -5 -7 -19 -10 -13 12 -6 -1
+Z -12 -13 -13 -4 -27 4 10 -13 -12 -24 -21 -6 -20 -29 -17 -17 -17 -24 -24 -18 -6 12 -1
+XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 md_20.mat ||
+echo 'restore of md_20.mat failed'
+Wc_c="`wc -c < 'md_20.mat'`"
+test 2256 -eq "$Wc_c" ||
+ echo 'md_20.mat: original size 2256, current size' "$Wc_c"
+fi
+# ============= md_40.mat ==============
+if test -f 'md_40.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping md_40.mat (File already exists)'
+else
+echo 'x - extracting md_40.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'md_40.mat' &&
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 9 -7 -6 -6 -7 -7 -5 -3 -10 -6 -9 -8 -7 -11 -2 0 1 -13 -12 -1 -6 -6 -1
+R -7 11 -6 -10 -5 0 -8 -4 0 -10 -9 3 -8 -14 -6 -5 -6 -4 -10 -11 -8 -4 -1
+N -6 -6 12 2 -8 -5 -5 -5 0 -8 -12 -1 -9 -13 -9 1 -2 -16 -6 -10 7 -5 -1
+D -6 -10 2 11 -13 -6 3 -4 -5 -12 -15 -8 -11 -16 -11 -6 -7 -15 -8 -9 6 -1 -1
+C -6 -5 -8 -13 16 -12 -15 -7 -6 -11 -11 -13 -9 -6 -11 -2 -7 -4 -2 -7 -11 -13 -1
+Q -7 0 -5 -6 -12 12 0 -9 2 -13 -6 0 -8 -14 -3 -7 -7 -11 -9 -11 -6 6 -1
+E -5 -8 -5 3 -15 0 10 -4 -8 -12 -13 -3 -11 -16 -10 -8 -8 -13 -14 -8 -1 5 -1
+G -3 -4 -5 -4 -7 -9 -4 10 -10 -13 -14 -9 -12 -15 -9 -2 -8 -7 -15 -8 -5 -7 -1
+H -10 0 0 -5 -6 2 -8 -10 14 -11 -7 -6 -9 -7 -4 -6 -7 -12 2 -12 -2 -3 -1
+I -6 -10 -8 -12 -11 -13 -12 -13 -11 11 -1 -11 1 -6 -11 -8 -2 -12 -9 4 -10 -12 -1
+L -9 -9 -12 -14 -11 -6 -13 -14 -7 -1 9 -12 1 -1 -5 -7 -9 -7 -9 -2 -13 -10 -1
+K -8 3 -1 -8 -13 0 -3 -9 -6 -11 -12 11 -7 -18 -10 -7 -5 -12 -13 -12 -5 -2 -1
+M -7 -8 -9 -11 -8 -8 -11 -12 -9 1 1 -7 14 -7 -10 -8 -2 -11 -11 0 -10 -10 -1
+F -11 -14 -12 -16 -6 -14 -16 -15 -7 -6 -1 -17 -7 13 -11 -5 -11 -7 2 -6 -14 -15 -1
+P -2 -6 -9 -12 -11 -3 -10 -9 -4 -11 -5 -10 -10 -11 12 -1 -4 -14 -12 -9 -11 -7 -1
+S 0 -5 1 -6 -2 -7 -8 -2 -6 -8 -7 -7 -8 -5 -1 9 1 -10 -7 -7 -3 -8 -1
+T 1 -6 -2 -7 -7 -7 -8 -8 -7 -2 -9 -5 -2 -11 -4 1 10 -14 -10 -4 -5 -8 -1
+W -14 -4 -17 -15 -4 -12 -13 -7 -11 -12 -7 -13 -11 -7 -14 -10 -14 18 -6 -11 -16 -12 -1
+Y -12 -9 -6 -8 -2 -9 -14 -14 2 -9 -9 -13 -11 2 -12 -7 -11 -6 14 -11 -7 -11 -1
+V -1 -11 -10 -9 -7 -11 -8 -8 -12 4 -2 -12 0 -6 -10 -7 -4 -10 -11 10 -10 -9 -1
+B -6 -8 7 6 -11 -6 -1 -5 -2 -10 -13 -5 -10 -14 -10 -3 -5 -16 -7 -10 11 -3 -1
+Z -8 -8 -8 0 -21 6 10 -9 -7 -18 -16 -3 -15 -23 -12 -12 -12 -19 -18 -14 -3 11 -1
+XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 md_40.mat ||
+echo 'restore of md_40.mat failed'
+Wc_c="`wc -c < 'md_40.mat'`"
+test 2255 -eq "$Wc_c" ||
+ echo 'md_40.mat: original size 2255, current size' "$Wc_c"
+fi
+# ============= mgstm1.aa ==============
+if test -f 'mgstm1.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.aa (File already exists)'
+else
+echo 'x - extracting mgstm1.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.aa' &&
+>GT8.7 | 266 40001 90043 | transl. of pa875.con, 19 to 675
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
+YTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLI
+DGSHKITQSNAILRYLARKHHLDGETEEERIR
+ADIVENQVMDTRMQLIMLCYNPDFEKQKPEFL
+KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+YDILDQYRMFEPKCLDAFPNLRDFLARFEGLK
+KISAYMKSSRYIATPIFSKMAHWSNK
+SHAR_EOF
+chmod 0644 mgstm1.aa ||
+echo 'restore of mgstm1.aa failed'
+Wc_c="`wc -c < 'mgstm1.aa'`"
+test 284 -eq "$Wc_c" ||
+ echo 'mgstm1.aa: original size 284, current size' "$Wc_c"
+fi
+# ============= mgstm1.aaa ==============
+if test -f 'mgstm1.aaa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.aaa (File already exists)'
+else
+echo 'x - extracting mgstm1.aaa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.aaa' &&
+>GT8.7 | 266 40001 90043 | transl. of pa875.con, 19 to 675
+MPMILGY@WNVRGLT#HPIRMLLEY@T#DS*S*Y@DEKR
+Y@T#MGDAPDFDRS*QWLNEKFKLGLDFPNLPY@LI
+DGS*HKIT#QSNAILRY@LARKHHLDGET#EEERIR
+ADIVENQVMDT#RMQLIMLCY@NPDFEKQKPEFL
+KT#IPEKMKLY@SEFLGKRPWFAGDKVT#Y@VDFLA
+Y@DILDQY@RMFEPKCLDAFPNLRDFLARFEGLK
+KISAY@MKSSRY@IAT#PIFSKMAHWSNK
+SHAR_EOF
+chmod 0644 mgstm1.aaa ||
+echo 'restore of mgstm1.aaa failed'
+Wc_c="`wc -c < 'mgstm1.aaa'`"
+test 310 -eq "$Wc_c" ||
+ echo 'mgstm1.aaa: original size 310, current size' "$Wc_c"
+fi
+# ============= mgstm1.e05 ==============
+if test -f 'mgstm1.e05' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.e05 (File already exists)'
+else
+echo 'x - extracting mgstm1.e05 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.e05' &&
+>pGT875 | 266 with an average of 5% of residues modified by mutr.
+GCTGAAGCCTAGTTTGAGAAGACCACCAGCACCACCACCATGCCTATGATATGGGATACTG
+GAAAGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACCCAAGTTA
+TGATGAGAAGAGATACACTATGGGTGACGGCTCCCGACTTTGACAGACAGTGGCTGA
+ATGAGAAGNTTCAAGCTGGGCCTGGAATTTCCCTAATCTGCCTTACTTGATCGATGGATCA
+CACAAGATCACCCAGAGAATGCCATCCTGCGCTACCTGGCCACAAAGCCCACCTGGAGGA
+GATGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGATAGCATGGAAACC
+CGCTGCAGCNNNNCATGCTCTCGTTACAACCTTGACTTTGAGAAGCAGAAGCCAGAGTTC
+TTGAAGACCATCCCTGAGAAAATGAGCTCTACTCTGAGTTCCTGGGATGCAAGAGGCCATGGT
+TTGCATGGGACAAGTGTCACCTATGTGGATTTCTTTGCTTATGACATTCTTGACCAGTAC
+CGTATGTTTGAGCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGTGACTTCCTGGCCCGC
+TTCGAGGGCCTCAAGAAGATCTCTGCTCTACATGAAGAGTAGCCGGTACATCGGCACAGC
+TCATATTTACAAAGATGGCCCACTGGAGTAACAAGCAGGCCCTTGCTACACGGCACTCAC
+TAGGAGGACCTGTCCNNACTGGTGGCTCCTGCAGTCCCTGTGTGGGGACAAGCACCCTGG
+CCTTCTGCACTGTGGCTCCTGGTTCCTCTCCTCCCGCTCCCTTCTGCAGTTGGTCAGCCC
+CATCTCCTCACCCTCTTCCCAGTCAAGGCCACACGCCTTCATTCGTCCCCGTCTTCTTTC
+ACATGGCCTCCTTCTTCGATTGGCTCCCTGACCCACACCTCACAGCCCGTTTCTGCGAAC
+TGAGGTCTGTCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACCACTATCTTAGT
+GCTAGCCCTGCCCTAGAGTTACCCGAAGTCAATACTTGAAGTGCCAGCCTGCTTCCTGGT
+GGTAGTAGCCTCCCCAGGTCGGTCTCGTCTACAATAAAGTCATGAAACACACT
+SHAR_EOF
+chmod 0644 mgstm1.e05 ||
+echo 'restore of mgstm1.e05 failed'
+Wc_c="`wc -c < 'mgstm1.e05'`"
+test 1220 -eq "$Wc_c" ||
+ echo 'mgstm1.e05: original size 1220, current size' "$Wc_c"
+fi
+# ============= mgstm1.eeq ==============
+if test -f 'mgstm1.eeq' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.eeq (File already exists)'
+else
+echo 'x - extracting mgstm1.eeq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.eeq' &&
+>mgstm1 | 266
+ATGCCTATGATACTGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATAGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGACCGTGCAGACATTGT
+GGAGAAGGCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTTGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTTTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATCCCACTGGAGTAACAAGTAGG
+CCCTTGCTACACGGGCACACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
+GGGTGGGGACAGCACCCTGGCCTTCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
+CCCTTCTGCAGCTTGTTTGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
+CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCTTGGCTCCTGACCCAACCT
+CACAGCCCGTTTCTGCGAATGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
+ATGGTCAACACTATCTTAGTGCTAGCACCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
+TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGC
+AAACACACTT
+SHAR_EOF
+chmod 0644 mgstm1.eeq ||
+echo 'restore of mgstm1.eeq failed'
+Wc_c="`wc -c < 'mgstm1.eeq'`"
+test 1122 -eq "$Wc_c" ||
+ echo 'mgstm1.eeq: original size 1122, current size' "$Wc_c"
+fi
+# ============= mgstm1.esq ==============
+if test -f 'mgstm1.esq' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.esq (File already exists)'
+else
+echo 'x - extracting mgstm1.esq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.esq' &&
+>mgstm1e
+ATGCCTATGATACTGGGATACTGGGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCT
+GCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
+TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGT
+GGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
+TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
+GTTCCTGGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
+CTTATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
+ACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
+GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGG
+CCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
+GGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
+CCCTTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
+CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCT
+CACAGCCCGTTTCTGCGAACTGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
+ATGGTCAACACTATCTTAGTGCTAGCCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
+TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTG
+AAACACACTT
+SHAR_EOF
+chmod 0644 mgstm1.esq ||
+echo 'restore of mgstm1.esq failed'
+Wc_c="`wc -c < 'mgstm1.esq'`"
+test 1116 -eq "$Wc_c" ||
+ echo 'mgstm1.esq: original size 1116, current size' "$Wc_c"
+fi
+# ============= mgstm1.gcg ==============
+if test -f 'mgstm1.gcg' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.gcg (File already exists)'
+else
+echo 'x - extracting mgstm1.gcg (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.gcg' &&
+GT8.7 transl. of pa875.con, 19 to 675
+X gt87 Length: 217 July 31, 1996 19:51 Type: P Check: 9358 ..
+X
+X 1 PMILGYWNVR GLTHPIRMLL EYTDSSYDEK RYTMGDAPDF DRSQWLNEKF
+X
+X 51 KLGLDFPNLP YLIDGSHKIT QSNAILRYLA RKHHLDGETE EERIRADIVE
+X
+X 101 NQVMDTRMQL IMLCYNPDFE KQKPEFLKTI PEKMKLYSEF LGKRPWFAGD
+X
+X 151 KVTYVDFLAY DILDQYRMFE PKCLDAFPNL RDFLARFEGL KKISAYMKSS
+X
+X 201 RYIATPIFSK MAHWSNK
+X
+SHAR_EOF
+chmod 0644 mgstm1.gcg ||
+echo 'restore of mgstm1.gcg failed'
+Wc_c="`wc -c < 'mgstm1.gcg'`"
+test 406 -eq "$Wc_c" ||
+ echo 'mgstm1.gcg: original size 406, current size' "$Wc_c"
+fi
+# ============= mgstm1.lc ==============
+if test -f 'mgstm1.lc' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.lc (File already exists)'
+else
+echo 'x - extracting mgstm1.lc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.lc' &&
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
+ytmgdapdfdrsqwlnekfklgldfpnlpyli
+DGSHKITQSNAILRYLARKHHLDGETEEERIR
+adivenqvmdtrmqlimlcynpdfekqkpefl
+KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+ydildqyrmfepkcldafpnlrdflarfeglk
+KISAYMKSSRYIATPIFSKMAHWSNK
+SHAR_EOF
+chmod 0644 mgstm1.lc ||
+echo 'restore of mgstm1.lc failed'
+Wc_c="`wc -c < 'mgstm1.lc'`"
+test 282 -eq "$Wc_c" ||
+ echo 'mgstm1.lc: original size 282, current size' "$Wc_c"
+fi
+# ============= mgstm1.nt ==============
+if test -f 'mgstm1.nt' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.nt (File already exists)'
+else
+echo 'x - extracting mgstm1.nt (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.nt' &&
+>pGT875
+ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
+CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
+TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTG
+CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
+GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
+GAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTT
+GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
+TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
+TATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAAC
+CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
+AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
+SHAR_EOF
+chmod 0644 mgstm1.nt ||
+echo 'restore of mgstm1.nt failed'
+Wc_c="`wc -c < 'mgstm1.nt'`"
+test 677 -eq "$Wc_c" ||
+ echo 'mgstm1.nt: original size 677, current size' "$Wc_c"
+fi
+# ============= mgstm1.nts ==============
+if test -f 'mgstm1.nts' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.nts (File already exists)'
+else
+echo 'x - extracting mgstm1.nts (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.nts' &&
+>mgstm1
+GCACCATGCCTATGAT,
+GATACACCA,
+CCATCCTGCGCTACCTTGCC,
+aaggtcacctatgtggatttccttgcttat,
+CCTGTCCACACTGGG,
+TCAAGTCCACACAGCC,
+TCACGCTTCCTA,
+CAATACTTGAGTGCCAGCC
+SHAR_EOF
+chmod 0644 mgstm1.nts ||
+echo 'restore of mgstm1.nts failed'
+Wc_c="`wc -c < 'mgstm1.nts'`"
+test 160 -eq "$Wc_c" ||
+ echo 'mgstm1.nts: original size 160, current size' "$Wc_c"
+fi
+# ============= mgstm1.raa ==============
+if test -f 'mgstm1.raa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.raa (File already exists)'
+else
+echo 'x - extracting mgstm1.raa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.raa' &&
+>mgstm1.aa shuffled
+LEGLPLKPCK RPQDRFSEDR VILFESFTYG FILAAWNMGY NEAEDMDRSH YLLTKELPKS
+YGGRRYYAPD FTYLFLILRN PPVKRAAPDR GNTMLQIFMA FLDDQYVMQD AFLPIGDGLK
+DKPMRSNMKY ITHNVYIDED IVRCKWIFAD EMSTPLLLWL MHKQKPGHRF LEKSWSHTRR
+EEEYNSIIDL KKSYKYLKNM AELKITSQTI FFDKDAE
+SHAR_EOF
+chmod 0644 mgstm1.raa ||
+echo 'restore of mgstm1.raa failed'
+Wc_c="`wc -c < 'mgstm1.raa'`"
+test 259 -eq "$Wc_c" ||
+ echo 'mgstm1.raa: original size 259, current size' "$Wc_c"
+fi
+# ============= mgstm1.rev ==============
+if test -f 'mgstm1.rev' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.rev (File already exists)'
+else
+echo 'x - extracting mgstm1.rev (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.rev' &&
+>mgstm1 reverse complement
+AAGTGTGTTTCAGACTTTATTGTAGACGAGACAGACCTGGGGAGGCTACTCCACCAGGAACAGGCTGGCACTCAA
+GTATTGACTTCGGGGTAACTCTAGGGAGGGCTAGCACTAAGATAGTGTTGACCATCGGGGTAATTCTAGGAAGCG
+TGAGTTCAGGACAGACCTCAGTTCGCAGAAACGGGCTGTGAGGTTGGGTCAGGGAGCCAATGAAGAAGGGGCCAT
+gtgaaagaaactggggagaatgaaggctgtgtggacttgactgggaagagggtgaggagatggggctgaccaagc
+tgcagaagggagcgggaaggagagagaaccaggagccacagtgcagaaggccagggtgctgtccccacccagggc
+CTGCAGGATCCCCAGTGTGGACAGGTCCTCCTAGTGAGTGCCCGTGTAGCAAGGGCCTACTTGTTACTCCAGTGG
+GCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCG
+AAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGGTACTGGTCAAGA
+ATGTCATAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGGAACTCAGAG
+tagagcttcattttctcagggatggtcttcaagaactctggcttctgcttctcaaagtcagggttgtaacagagc
+atgatgagctgcatgcgggtgtccatgacctggttctccacaatgtctgcacggatcctctcctcctctgtctct
+ccatccaggtggtgctttcgggcaaggtagcgcaggatggcattgctctgggtgatcttgtgtgatccatcgatc
+AAGTAAGGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCAAAGTCGGGA
+GCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCCAGGAGCATGCGGATCGGGTGTGTC
+AGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGCATGGTGCTGGTGCTGTGGTCTTCTCAAACTGGCTTCAGC
+SHAR_EOF
+chmod 0644 mgstm1.rev ||
+echo 'restore of mgstm1.rev failed'
+Wc_c="`wc -c < 'mgstm1.rev'`"
+test 1167 -eq "$Wc_c" ||
+ echo 'mgstm1.rev: original size 1167, current size' "$Wc_c"
+fi
+# ============= mgstm1.seq ==============
+if test -f 'mgstm1.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgstm1.seq (File already exists)'
+else
+echo 'x - extracting mgstm1.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.seq' &&
+>pGT875 | 266
+GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
+GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
+TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
+TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
+CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
+AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
+CATGCAGCtCATCATGCTCTGTTACAACCCTGACTTTGAGAAGCAGAAGCCAGAGTTCTT
+GAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAAGAGGCCATGGTT
+TGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTACCG
+TATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTT
+CGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTAT
+ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
+GAGGACCTGTCCACACTGGGgATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
+TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
+TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
+ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
+TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
+CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
+CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
+SHAR_EOF
+chmod 0644 mgstm1.seq ||
+echo 'restore of mgstm1.seq failed'
+Wc_c="`wc -c < 'mgstm1.seq'`"
+test 1158 -eq "$Wc_c" ||
+ echo 'mgstm1.seq: original size 1158, current size' "$Wc_c"
+fi
+# ============= mgtt2_x.seq ==============
+if test -f 'mgtt2_x.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping mgtt2_x.seq (File already exists)'
+else
+echo 'x - extracting mgtt2_x.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mgtt2_x.seq' &&
+>>mgtt2_x Length: 1089 January 26, 2000 04:00 Type: N Check: 1394 ..
+CTGAGTTGGG TCCACGAAAG CCCAGCTAGG CCATTACCGC GTCCGGGTGA GACTAAGGTC CTGGGCTGGA TTCCTGGCTC CACGGTCCGC TGGAGCAAAT
+CGCATAAGTC AGTCTGAGTG CGCGCGCCCT CAGCCCTGCT TTTGGTATAA AGTCCTCCAA AGCGTCTCCC TCCCCAANNN NGATCagCAg GtGTCAGCTA
+TCCAGAGGAG GAAATCGTTT GGCTTGGcCA ACTGAGGcTG TGCTGGACCC CAGCTTGCTG TTATCGAACG CAGTCGGCAC ACCATCTTGT GTCGCTACCG
+GCAATGGGCT TGGAGCTCTA CCTGGACCTG CTGTCACAAC CCAGCCGCGC TGTCTACATC tTCNGCCAaG AAGAATGGCA TCCCCTTCCA GACGCGTACC
+GTGGATATAC TCAAAGGGCA GCACATGAGC GAGCAATTCT CCCAGGTGAA CTGCTTAAAC AAAGTTCCTG TACTCAAAGA CGGAAGCTTC GTGTTGACCG
+AAAGCACAGC CATCTtGATT TACCTGAGTT CCAAGTACCA GGTGGCAGAC CACTGGTACC CGGCCGACCT ACAGGCCCGT GCCCAAGTCC ACGAATACCT
+GGGCTGGCAT GcCGACAACA TCCgtGGTAC TTtcgGAGTG CTCCTATGGA CCNAAgGTGT TgGGGCCACT CATTGgGGTc CAgGTTCCCC agGAGAAGGT
+GGAACgGAAC agAGATAGAA TGGTCCTGGt TCTGCaACAG CTGGAgGACA AGTTCTCAGG GACAGGsCTC CTGTTGGCAG CAGTGAGCTA GCGATCTCAT
+TCTCTGGAGA GTGATGCAGC GTGCTCTTGC TATACCTGTT GAGGACGGCT CAGCTGACAG CATGCGAGAA AGGTGGAGGC GTCTTGGTGC TGAGCTGTGT
+AGAGCTCATA GACATCTGGC ATCTGGACAA GCAGCAGAAA TGTACAGTAC CCCTTCGAGT CATGCACATG CACTCAATTG TAGATCCTGA TGGTTGACCA
+CATAAGACTA TTTGTGTTAA AAAAGGGGGC CGTCCCATTC CCTTATGATC GATACATACT GGCTCCTTTA CACATNGATG GAAAACTGC
+SHAR_EOF
+chmod 0644 mgtt2_x.seq ||
+echo 'restore of mgtt2_x.seq failed'
+Wc_c="`wc -c < 'mgtt2_x.seq'`"
+test 1286 -eq "$Wc_c" ||
+ echo 'mgtt2_x.seq: original size 1286, current size' "$Wc_c"
+fi
+# ============= mm_file.h ==============
+if test -f 'mm_file.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping mm_file.h (File already exists)'
+else
+echo 'x - extracting mm_file.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mm_file.h' &&
+/*
+X copyright (c) 1999 William R. Pearson
+*/
+X
+/* $Name: fa_34_26_5 $ - $Id: mm_file.h,v 1.26 2006/10/05 18:20:40 wrp Exp $ */
+X
+/*
+X mm_file.h - defines m_file_str for mmap()ed files
+*/
+X
+#include <sys/types.h>
+X
+#ifndef USE_FSEEKO
+#define FSEEK fseek
+#define FTELL ftell
+typedef long fseek_t;
+#else
+#define FSEEK fseeko
+#define FTELL ftello
+typedef off_t fseek_t;
+#endif
+#define FSEEK_T_DEF
+X
+#ifdef HAS_INTTYPES
+#include <inttypes.h>
+#else
+#ifdef WIN32
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#endif
+#endif
+#ifdef BIG_LIB64
+typedef int64_t MM_OFF;
+#else
+typedef long MM_OFF;
+#endif
+X
+#ifdef MYSQL_DB
+#include <mysql.h>
+#endif
+#ifdef PGSQL_DB
+#include <libpq-fe.h>
+#endif
+X
+struct lmf_str {
+X FILE *libf; /* sequence file being read */
+X FILE *hfile; /* BLAST2.0 description file */
+X unsigned int *oid_list; /* oid list for subsets */
+X int oid_seqs; /* start offset for mask array */
+X int pref_db; /* preferred database */
+X unsigned int max_oid; /* start offset for mask array */
+X
+X char lb_name[120]; /* file name */
+X int lb_type; /* library type */
+X int *sascii; /* ascii -> sq mapping */
+X
+X /* used by flat files */
+X char *lline; /* last line read */
+X unsigned char *cpsave; /* position in line for lgetlib() */
+X fseek_t lpos; /* position in file */
+X
+X /* Genbank Flat files */
+X int lfflag; /* flag for CRLF in EMBL CDROM files */
+X
+X /* stuff for GCG format files (5,6) */
+X int gcg_binary; /* flag for binary gcg format */
+X long gcg_len; /* length of GCG sequence */
+X
+X int bl_lib_pos; /* for ncbl2 */
+X int bl_format_ver; /* blast formatdb version */
+X char opt_text[MAX_FN]; /* text after filename */
+X
+X /* used when memory mapping */
+X int mm_flg; /* mmap worked */
+X int mmap_fd; /* mmap_fd */
+X char *mmap_base; /* base */
+X char *mmap_addr; /* current pos */
+X long st_size; /* file size */
+X
+X MM_OFF *d_pos_arr; /* pointer to desc. offsets */
+X MM_OFF *s_pos_arr; /* pointer to seq. offsets */
+X MM_OFF *a_pos_arr; /* pointer to aux offsets */
+X
+X /* currently available only for memory mapped files */
+X int max_cnt; /* # database entries */
+X int64_t tot_len; /* total residue length */
+X long max_len; /* maximum sequence lengh */
+X int lib_aa; /* 0 = DNA, 1 = prot */
+X char *tmp_buf; /* temporary buffer */
+X int tmp_buf_max; /* max size */
+X
+X /* used for SQL database queries */
+X char *sql_db, *sql_query, *sql_getdesc, *sql_getseq;
+X int sql_reopen;
+X char **sql_uid_arr; /* indexed by lpos */
+X /* used to get sequence data */
+X char *sql_seqp;
+X
+#ifdef MYSQL_DB
+X /* used to open the database */
+X MYSQL *mysql_conn;
+X MYSQL_RES *mysql_res;
+X MYSQL_ROW mysql_row;
+#endif
+X
+#ifdef PGSQL_DB
+X /* used to open the database */
+X PGconn *pgsql_conn;
+X PGresult *pgsql_res;
+#endif
+X
+X int (*getlib)(unsigned char *seq, int maxs,
+X char *libstr, int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off);
+X
+X void (*ranlib)(char *str, int cnt,
+X fseek_t libpos, char *libstr,
+X struct lmf_str *lm_fd);
+};
+X
+SHAR_EOF
+chmod 0644 mm_file.h ||
+echo 'restore of mm_file.h failed'
+Wc_c="`wc -c < 'mm_file.h'`"
+test 3057 -eq "$Wc_c" ||
+ echo 'mm_file.h: original size 3057, current size' "$Wc_c"
+fi
+# ============= mmgetaa.c ==============
+if test -f 'mmgetaa.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping mmgetaa.c (File already exists)'
+else
+echo 'x - extracting mmgetaa.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mmgetaa.c' &&
+/* mmgetaa.c - functions for mmap()ed access to libraries */
+X
+/* copyright (c) 1999,2000 William R. Pearson */
+X
+/* version 0 September, 1999 */
+X
+/*
+X This is one of two alternative files that can be used to
+X read a database. The two files are nmgetaa.c, and mmgetaa.c
+X (nxgetaa.c has been retired).
+X
+X nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
+X the same functions as nxgetaa.c if memory mapping is not used,
+X mmgetaa.c provides the database reading functions if memory
+X mapping is used. The decision to use memory mapping is made on
+X a file-by-file basis.
+*/
+X
+/* $Name: fa_34_26_5 $ - $Id: mmgetaa.c,v 1.41 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+X
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+X
+#define MAXLINE 512
+#define EOSEQ 0
+X
+#define XTERNAL
+#include "uascii.h"
+/* #include "upam.h" */
+#undef XTERNAL
+X
+#ifdef SUPERFAMNUM
+extern int nsfnum; /* number of superfamily numbers */
+extern int sfnum[10]; /* superfamily number from types 0 and 5 */
+extern int nsfnum_n;
+extern int sfnum_n[10];
+static char tline[MAXLINE];
+#endif
+X
+#define GCGBIN 6
+X
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+extern MM_OFF bl2_long8_cvt(int64_t);
+extern int bl2_uint4_cvt(int);
+X
+X
+long crck(char *, int);
+extern void src_int4_read(FILE *fd, int *val);
+extern void src_long4_read(FILE *fd, long *valp);
+extern void src_long8_read(FILE *fd, int64_t *val);
+X
+/* load_mmap() loads the d_pos[] and s_pos[] arrays for rapid access */
+X
+struct lmf_str *
+load_mmap(FILE *libi, /* fd for already open ".xin" file */
+X char *sname, /* name of sequence database file */
+X int lib_type, /* 0-Fasta, 5-vms_pir, 6-gcg_binary */
+X int ldnaseq, /* 1 for DNA, 0 for protein */
+X struct lmf_str *m_fd)
+{
+X char format[4];
+X int i, lib_aa;
+X MM_OFF f_size;
+X long lf_size;
+X struct stat statbuf;
+X int max_cnt;
+X MM_OFF *d_pos_arr, *s_pos_arr;
+X int mm_flag, mm64_flag;
+X int *tmp_pos_arr;
+X
+X /* first check that the necessary indices are up-to-date */
+X /* read the offsets in ".xin" file */
+X if (fread(format,1,4,libi)==0) {
+X fprintf(stderr," cannot read .xin format\n");
+X return NULL;
+X }
+X
+X mm64_flag = (format[2]==1); /* 4 bytes or 8 bytes for long? */
+X
+#ifndef BIG_LIB64
+X if (mm64_flag) {return NULL;}
+#endif
+X
+X if (format[3]!=lib_type) {
+X fprintf(stderr," cannot read format %d != lib_type %d\n",
+X format[3],lib_type);
+X return NULL;
+X }
+X
+X src_int4_read(libi,&lib_aa);
+X if (lib_aa == ldnaseq) { /* database residue mismatch */
+X fprintf(stderr," residue type mismatch %s != %s (.xin) in %s\n",
+X (lib_aa ? "DNA" : "prot."),(ldnaseq ? "prot." : "DNA"),
+X sname);
+X return NULL;
+X }
+X
+X /* everything looks good, allocate an lmf_str */
+X
+X m_fd->lib_aa = lib_aa;
+X
+X /* get get file size from index */
+X if (mm64_flag) src_long8_read(libi,&f_size);
+X else {
+X src_long4_read(libi,&lf_size);
+X f_size = lf_size;
+X }
+X
+X /* now, start to open mmap()ed file */
+X mm_flag=((m_fd->mmap_fd=open(sname,O_RDONLY))>=0);
+X if (!mm_flag) {
+X fprintf(stderr," cannot open %s for mmap()", sname);
+X perror("...");
+X return NULL; /* file did not open */
+X }
+X
+X /* fstat the library file and get size */
+X if(fstat(m_fd->mmap_fd, &statbuf) < 0) {
+X fprintf(stderr," cannot stat %s for mmap()", sname);
+X perror("...");
+X m_fd->mm_flg = 0;
+X goto finish;
+X }
+X
+X /* check for identical sizes - if different, do not mmap */
+X if (f_size != statbuf.st_size) {
+X fprintf(stderr," %s file size (%lld) and expected size (%ld) don't match\n",
+X sname,statbuf.st_size,f_size);
+X mm_flag = 0;
+X goto finish;
+X }
+X
+X /* the index file and library file are open and the sizes match */
+X /* allocate the m_file struct and map the file */
+X
+X m_fd->st_size = statbuf.st_size;
+X if((m_fd->mmap_base =
+X mmap(NULL, m_fd->st_size, PROT_READ,
+X MAP_FILE | MAP_SHARED, m_fd->mmap_fd, 0)) == (char *) -1) {
+X mm_flag = 0;
+#ifdef DEBUG
+X fprintf(stderr," cannot mmap %s", sname);
+X perror("...");
+#endif
+X }
+X finish:
+X close(m_fd->mmap_fd);
+X if (!mm_flag) { return NULL; }
+X
+X /* now finish reading the index file */
+X src_int4_read(libi,&max_cnt);
+X
+X if (mm64_flag) {
+X src_long8_read(libi,&m_fd->tot_len);
+X }
+X else {
+X src_long4_read(libi,&lf_size);
+X m_fd->tot_len = lf_size;
+X }
+X src_long4_read(libi,&lf_size);
+X m_fd->max_len = lf_size;
+X
+#ifdef DEBUG
+X fprintf(stderr,
+X "%s\tformat: %c%c%d %d; max_cnt: %d; tot_len: %lld max_len: %ld\n",
+X sname,format[0],format[1],format[2],format[3],
+X max_cnt,m_fd->tot_len,m_fd->max_len);
+#endif
+X
+X /* allocate array of description pointers */
+X if (!mm64_flag) {
+X if ((tmp_pos_arr=(int *)calloc(max_cnt+1,sizeof(int)))==NULL) {
+X fprintf(stderr," cannot allocate %d for tmp_pos array\n",
+X max_cnt+1);
+X }
+X }
+X
+X if ((d_pos_arr=(MM_OFF *)calloc(max_cnt+1, sizeof(MM_OFF)))==NULL) {
+X fprintf(stderr," cannot allocate %d for desc. array\n",max_cnt+1);
+X exit(1);
+X }
+X
+X /* read m_fd->d_pos[max_cnt+1] */
+X if (mm64_flag) {
+X if (fread(d_pos_arr,sizeof(MM_OFF),max_cnt+1,libi)!=
+X max_cnt+1) {
+X fprintf(stderr," error reading desc. offsets: %s\n",sname);
+X return NULL;
+X }
+X }
+X else {
+X if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
+X max_cnt+1) {
+X fprintf(stderr," error reading desc. offsets: %s\n",sname);
+X return NULL;
+X }
+#ifdef DEBUG
+X fprintf(stderr,"d_pos_crc: %ld\n",
+X crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
+#endif
+X }
+X
+X
+#ifndef IS_BIG_ENDIAN
+X if (mm64_flag)
+X for (i=0; i<=max_cnt; i++) {
+X d_pos_arr[i] = bl2_long8_cvt(d_pos_arr[i]);
+X }
+X else
+X for (i=0; i<=max_cnt; i++) {
+X d_pos_arr[i] = bl2_uint4_cvt(tmp_pos_arr[i]);
+X }
+#else
+X if (!mm64_flag) {
+X for (i=0; i<=max_cnt; i++) {
+X d_pos_arr[i] = tmp_pos_arr[i];
+X }
+X }
+#endif
+X
+#ifdef DEBUG
+X for (i=0; i<max_cnt-1; i++) {
+X if (d_pos_arr[i+1] <= d_pos_arr[i] )
+X fprintf(stderr," ** dpos_error [%d]\t%ld\t%ld\n",
+X i,d_pos_arr[i],d_pos_arr[i+1]);
+X }
+#endif
+X
+X /* allocate array of sequence pointers */
+X if ((s_pos_arr=(MM_OFF *)calloc(max_cnt+1,sizeof(MM_OFF)))==NULL) {
+X fprintf(stderr," cannot allocate %d for seq. array\n",max_cnt+1);
+X exit(1);
+X }
+X
+X /* read m_fd->s_pos[max_cnt+1] */
+X if (mm64_flag) {
+X if (fread(s_pos_arr,sizeof(long),max_cnt+1,libi)!=
+X max_cnt+1) {
+X fprintf(stderr," error reading seq offsets: %s\n",sname);
+X return NULL;
+X }
+X }
+X else {
+X if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
+X max_cnt+1) {
+X fprintf(stderr," error reading seq offsets: %s\n",sname);
+X return NULL;
+X }
+#ifdef DEBUG
+X fprintf(stderr,"s_pos_crc: %ld\n",
+X crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
+#endif
+X }
+X
+#ifndef IS_BIG_ENDIAN
+X if (mm64_flag)
+X for (i=0; i<=max_cnt; i++)
+X s_pos_arr[i] = bl2_long8_cvt(s_pos_arr[i]);
+X else
+X for (i=0; i<=max_cnt; i++)
+X s_pos_arr[i] = (long)bl2_uint4_cvt(tmp_pos_arr[i]);
+#else
+X if (!mm64_flag)
+X for (i=0; i<=max_cnt; i++)
+X s_pos_arr[i] = (long)tmp_pos_arr[i];
+#endif
+X
+#ifdef DEBUG
+X for (i=1; i<max_cnt-1; i++) {
+X if (s_pos_arr[i+1]<s_pos_arr[i])
+X fprintf(stderr," ** spos_error [%d]\t%ld\t%ld\n",
+X i,s_pos_arr[i],s_pos_arr[i]);
+X }
+#endif
+X
+X if (!mm64_flag) free(tmp_pos_arr);
+X
+X m_fd->max_cnt = max_cnt;
+X m_fd->d_pos_arr = d_pos_arr;
+X m_fd->s_pos_arr = s_pos_arr;
+X m_fd->lpos = 0;
+X
+X /* check_mmap(m_fd,-2); */
+X
+X return m_fd;
+}
+X
+char *mgets (char *s, int n, struct lmf_str *m_fd)
+{
+X char *cs, *mfp;
+X
+X mfp = m_fd->mmap_addr;
+X cs = s;
+X
+X while (--n > 0 && (*mfp != (char)EOF))
+X if ((*cs++ = *mfp++) == '\n') break;
+X *cs = '\0';
+X
+X m_fd->mmap_addr = mfp;
+X return (*mfp == (char)EOF && cs == s) ? NULL : s;
+}
+X
+int
+agetlibm(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X char *desc;
+X int lpos; /* entry number in library */
+X long l;
+X unsigned char *seqm, *seqm1;
+X char *bp;
+X static long seq_len;
+X static unsigned char *cp_max;
+#ifdef SUPERFAMNUM
+X char *bp1, *bpa, *tp;
+X int i;
+#endif
+X
+X *l_off = 1;
+X
+X lpos = m_fd->lpos;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = m_fd->sascii;
+X
+X if (*lcont==0) {
+X if (lpos >= m_fd->max_cnt) return (-1);
+X seq_len = m_fd->d_pos_arr[lpos+1] - m_fd->s_pos_arr[lpos];
+X if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
+X fprintf(stderr," ** sequence over-run: %ld at %d\n",seq_len,lpos);
+X return(-1);
+X }
+X *libpos = (fseek_t)lpos;
+X
+X desc = m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1;
+X strncpy(libstr,desc,n_libstr-1);
+X libstr[n_libstr-1]='\0';
+X if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+X if (n_libstr > MAX_UID) {
+X bp = libstr;
+X while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
+X }
+X
+X for (bp = desc; *bp && (*bp != '\n'); *bp++ )
+X if (*bp == '@' && !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
+X
+#ifdef SUPERFAMNUM
+X sfnum[0]=nsfnum=0;
+X strncpy(tline,desc,sizeof(tline));
+X tline[MAXLINE-1]='\0';
+X if ((bp=strchr(tline,'\n'))!=NULL) *bp='\0';
+X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
+X if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';
+X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
+X fprintf(stderr," second %c missing: %s\n",SFCHAR,tline);
+X }
+X else {
+X *bp1 = '\0';
+X i = 0;
+X if ((tp = strtok(bp+1," \t"))!=NULL) {
+X sfnum[i++] = atoi(tp);
+X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
+X sfnum[i++] = atoi(tp);
+X if (i>=9) break;
+X }
+X }
+X sfnum[nsfnum=i]= 0;
+X if (nsfnum>1) sf_sort(sfnum,nsfnum);
+X else {
+X if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
+X }
+X }
+X }
+X else {
+X sfnum[0] = nsfnum = 0;
+X }
+#endif
+X
+X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+X cp_max = (unsigned char *)(m_fd->mmap_addr+seq_len);
+X }
+X
+X for (cp=(unsigned char *)m_fd->mmap_addr; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X --seqp;
+X if (cp >= cp_max) break;
+X }
+X m_fd->mmap_addr = (char *)cp;
+X
+X if (seqp>=seqm1) (*lcont)++;
+X else {
+X *lcont=0;
+X lpos++;
+X m_fd->lpos = lpos;
+X }
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+aranlibm(char *str,
+X int cnt,
+X fseek_t libpos,
+X char *libstr,
+X struct lmf_str *m_fd)
+{
+X char *bp;
+X int llen;
+X int lpos;
+X
+X lpos = (int) libpos;
+X
+X llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
+X if (llen >= cnt) llen = cnt-1;
+X
+X strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen);
+X str[llen]='\0';
+X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X bp = str;
+X while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
+X m_fd->lpos = lpos;
+}
+X
+/* there is no vgetlibm() because vgetlibm() and agetlibm() are
+X identical - the difference in the two file formats relates to the
+X location of the sequence, which is already available in spos_arr[].
+X
+X however vranlibm must accomodate both type 5 and 6 files;
+X type 6 has extra stuff after the seq_id.
+*/
+X
+void
+vranlibm(char *str,
+X int cnt,
+X fseek_t libpos,
+X char *libstr,
+X struct lmf_str *m_fd)
+{
+X char *bp, *mp;
+X int llen;
+X int lpos;
+X
+X lpos = (int)libpos;
+X
+X llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
+X
+X mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos];
+X
+X strncpy(str,mp+4,20);
+X str[20]='\0';
+X if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0';
+X else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' ';
+X bp = strchr(mp,'\n');
+X
+X llen -= (bp-mp)-5;
+X if (llen > cnt-strlen(str)) llen = cnt-strlen(str)-1;
+X
+X strncat(str,bp+1,llen);
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X str[cnt-1]='\0';
+X m_fd->lpos = lpos;
+}
+X
+void
+close_mmap(struct lmf_str *m_fd) {
+X free(m_fd->s_pos_arr);
+X free(m_fd->d_pos_arr);
+X if (m_fd->mm_flg) {
+X munmap(m_fd->mmap_base,m_fd->st_size);
+X free(m_fd);
+X }
+X m_fd->mm_flg=0;
+}
+X
+#ifndef min
+#define min(x,y) ((x) > (y) ? (y) : (x))
+#endif
+X
+static int gcg_bton[4]={2,4,1,3};
+X
+int
+gcg_getlibm(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X char dummy[20];
+X char gcg_date[6];
+X char gcg_type[10];
+X register unsigned char *cp, *seqp, stmp;
+X register int *ap, lpos;
+X unsigned char *seqm, *seqm1;
+X long r_block, b_block, r_fact, r16_block;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = m_fd->sascii;
+X lpos = m_fd->lpos;
+X
+X if (*lcont==0) {
+X if (lpos >= m_fd->max_cnt) return (-1);
+X sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n",
+X libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len));
+X
+X m_fd->gcg_binary = (gcg_type[0]=='2');
+X
+X libstr[12]='\0';
+X *libpos = lpos;
+X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+X }
+X
+X r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len);
+X if (m_fd->gcg_binary) {
+X r_block = (r_block+3)/4;
+X }
+X
+X cp=(unsigned char *)m_fd->mmap_addr;
+X if (!m_fd->gcg_binary) {
+X r_fact = 1;
+X r16_block = r_block/16;
+X while (r16_block-- > 0) {
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X *seqp++ = ap[*cp++];
+X }
+X while (seqp<seq+r_block) *seqp++ = ap[*cp++];
+X }
+X else if (m_fd->gcg_binary) {
+X r_fact = 4;
+X r16_block = r_block/8;
+X while(r16_block-- > 0) {
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X }
+X
+X while (seqp < seq+4*r_block) {
+X stmp = *cp++;
+X *seqp++ = gcg_bton[(stmp>>6) &3];
+X *seqp++ = gcg_bton[(stmp>>4) &3];
+X *seqp++ = gcg_bton[(stmp>>2) &3];
+X *seqp++ = gcg_bton[(stmp) &3];
+X }
+X }
+X if (r_fact * r_block >= m_fd->gcg_len) {
+X *lcont = 0;
+X m_fd->lpos++;
+X }
+X else {
+X if (m_fd->gcg_binary) b_block = 4*r_block;
+X m_fd->gcg_len -= b_block;
+X (*lcont)++;
+X }
+X
+X seq[b_block] = EOSEQ;
+X /* if (b_block==0) return 1; else */
+X return b_block;
+}
+X
+void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);
+X
+int
+lgetlibm(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap, lpos;
+X unsigned char *seqm, *seqm1;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-11];
+X seqm1 = seqm-1;
+X
+X lpos = m_fd->lpos;
+X ap = m_fd->sascii;
+X
+X if (*lcont==0) {
+X if (lpos >= m_fd->max_cnt) return (-1);
+X
+X if (n_libstr <= 21) {
+X strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12);
+X libstr[12]='\0';
+X }
+X else {
+X lget_ann_m(m_fd,libstr,n_libstr);
+X }
+X *libpos = lpos;
+X
+X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
+X cp = (unsigned char *)m_fd->mmap_addr;
+X }
+X else cp = (unsigned char *)m_fd->mmap_addr;
+X
+X while (seqp<seqm1) {
+X if (*cp=='/' && *(cp-1)=='\n') break;
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X --seqp;
+X if (*cp=='\n' && *(cp+1)==' ') cp += 11;
+X }
+X
+X if (seqp>=seqm1) {
+X (*lcont)++;
+X m_fd->mmap_addr = (char *)cp;
+X }
+X else {
+X *lcont=0;
+X m_fd->lpos++;
+X }
+X
+X *seqp = EOSEQ;
+X return (int)(seqp-seq);
+}
+X
+void
+lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
+X char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
+X
+X /* copy in locus from lm_fd->lline */
+X strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus));
+X if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
+X
+X /* get description */
+X mgets(desc,sizeof(desc),lm_fd);
+X while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
+X mgets(desc,sizeof(desc),lm_fd);
+X if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
+X
+X /* get accession */
+X mgets(acc,sizeof(acc),lm_fd);
+X while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
+X mgets(acc,sizeof(acc),lm_fd);
+X if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
+X break;
+X }
+X if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
+X if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
+X
+X /* get version */
+X mgets(ver,sizeof(ver),lm_fd);
+X while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
+X mgets(ver,sizeof(ver),lm_fd);
+X if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
+X break;
+X }
+X if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
+X
+X /* extract gi:123456 from version line */
+X bp_gid = strchr(&ver[12],':');
+X if (bp_gid != NULL) {
+X if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
+X bp_gid++;
+X }
+X if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
+X
+X /* build up FASTA header line */
+X if (bp_gid != NULL) {
+X strncpy(libstr,"gi|",n_libstr-1);
+X strncat(libstr,bp_gid,n_libstr-4);
+X strncat(libstr,"|gb|",n_libstr-20);
+X }
+X else {libstr[0]='\0';}
+X
+X /* if we have a version number, use it, otherwise accession,
+X otherwise locus/description */
+X
+X if (ver[0]=='V') {
+X strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
+X strncat(libstr,"|",n_libstr-1-strlen(libstr));
+X }
+X else if (acc[0]=='A') {
+X strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
+X strncat(libstr," ",n_libstr-1-strlen(libstr));
+X }
+X
+X strncat(libstr,locus,n_libstr-1-strlen(libstr));
+X strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
+X libstr[n_libstr-1]='\0';
+}
+X
+void
+lranlibm(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *m_fd)
+{
+X char *bp, *llp;
+X char acc[MAXLINE], desc[MAXLINE];
+X
+X llp = m_fd->mmap_addr = m_fd->mmap_base + m_fd->d_pos_arr[seek];
+X
+X lget_ann_m(m_fd,str,cnt);
+X
+X str[cnt-1]='\0';
+X
+X m_fd->lpos = seek;
+}
+X
+static int check_status=0;
+X
+void
+check_mmap(struct lmf_str *m_fd,long ntt) {
+X
+X int i, seq_len, ok_stat;
+X
+X ok_stat = 1;
+X if ( ++check_status > 5) return;
+X
+X fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt);
+X for (i=0; i<m_fd->max_cnt; i++) {
+X seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i];
+X if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
+X fprintf(stderr,"%d:\t%ld\t%ld\t%ld\n",
+X i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i],
+X m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]);
+X ok_stat=0;
+X }
+X }
+X if (ok_stat) {
+X if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n",
+X m_fd->lb_name,ntt);
+X }
+}
+X
+#ifdef DEBUG
+/* C H K 3 -- Compute a type-3 Kermit block check. */
+/*
+X Calculate the 16-bit CRC of a null-terminated string using a byte-oriented
+X tableless algorithm invented by Andy Lowry (Columbia University). The
+X magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1.
+X Note - this function could be adapted for strings containing imbedded 0's
+X by including a length argument.
+*/
+long
+crck(s,n)
+X char *s; int n;
+{
+X unsigned int c, q;
+X long crc = 0;
+X
+X while (n-->0) {
+X c = *s++;
+X /* if (parity)*/
+X c &= 0177;
+X q = (crc ^ c) & 017; /* Low-order nibble */
+X crc = (crc >> 4) ^ (q * 010201);
+X q = (crc ^ (c >> 4)) & 017; /* High order nibble */
+X crc = (crc >> 4) ^ (q * 010201);
+X }
+X return(crc);
+}
+#endif
+SHAR_EOF
+chmod 0644 mmgetaa.c ||
+echo 'restore of mmgetaa.c failed'
+Wc_c="`wc -c < 'mmgetaa.c'`"
+test 21318 -eq "$Wc_c" ||
+ echo 'mmgetaa.c: original size 21318, current size' "$Wc_c"
+fi
+# ============= ms1.aa ==============
+if test -f 'ms1.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping ms1.aa (File already exists)'
+else
+echo 'x - extracting ms1.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ms1.aa' &&
+>test m1
+MPMIL,
+MLLEY,
+MGDAP,
+MDTRX,
+MLCYN
+SHAR_EOF
+chmod 0644 ms1.aa ||
+echo 'restore of ms1.aa failed'
+Wc_c="`wc -c < 'ms1.aa'`"
+test 43 -eq "$Wc_c" ||
+ echo 'ms1.aa: original size 43, current size' "$Wc_c"
+fi
+# ============= msg.h ==============
+if test -f 'msg.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping msg.h (File already exists)'
+else
+echo 'x - extracting msg.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'msg.h' &&
+/* Concurrent read version */
+X
+/* $Name: fa_34_26_5 $ - $Id: msg.h,v 1.9 2006/03/17 18:34:59 wrp Exp $ */
+X
+/* Cube definitions */
+X
+#ifdef PVM_SRC
+#define FIRSTNODE 1
+#define FIRSTWORK 1
+#else
+#define FIRSTNODE 1
+#define FIRSTWORK 1
+#endif
+X
+#define MAXNOD 128
+#define ALLTYPES -1
+#ifdef IPSC2
+#define HOSTPID 99
+#define MANAGEPID 100
+#define WORKPID 101
+#else
+#define HOSTPID 0
+#define MANAGEPID 0
+#define WORKPID 0
+#endif
+#define MANAGER 0
+#define ALLNODES -1
+#define ALLPIDS -1
+#define STARTTYPE0 0
+#define STARTTYPE1 1
+#define STARTTYPE2 2
+#define STARTTYPE3 3
+#define STARTTYPE4 4
+#define STARTTYPE5 5
+#define STARTTYPE6 6
+#define PARAMTYPE 7
+#define HSEQTYPE 3
+#define MSEQTYPE 4
+#define ONETYPE 5
+#define TWOTYPE 6
+#define MSEQTYPE0 7
+#define MSEQTYPE1 8
+#define MSEQTYPE2 8
+#define LISTTYPE 10
+#define LISTRTYPE 11
+#define CODERTYPE 12
+#define ALN1TYPE 21
+#define ALN2TYPE 22
+#define ALN3TYPE 23
+#define FINISHED 16384 /* this must be larger than BFR */
+X
+#define DO_SEARCH_FLG 0
+#define DO_OPT_FLG 1
+#define DO_ALIGN_FLG 2
+#define DO_CALC_FLG 3
+SHAR_EOF
+chmod 0644 msg.h ||
+echo 'restore of msg.h failed'
+Wc_c="`wc -c < 'msg.h'`"
+test 1085 -eq "$Wc_c" ||
+ echo 'msg.h: original size 1085, current size' "$Wc_c"
+fi
+# ============= mshowalign.c ==============
+if test -f 'mshowalign.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping mshowalign.c (File already exists)'
+else
+echo 'x - extracting mshowalign.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mshowalign.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: mshowalign.c,v 1.43 2007/01/08 15:38:46 wrp Exp $ */
+X
+/* mshowalign.c - show sequence alignments in pvcomplib */
+X
+/*
+X this is a merged version of showalign.c that works properly with
+X both the comp_lib (serial, threaded) and PCOMPLIB parallel versions
+X of the programs.
+X
+X In the serial and current threaded versions of the programs,
+X showalign gets a list of high scoring sequences and must
+X re_getlib() the sequence, do_walign(), and then calculate the
+X alignment.
+X
+X In the PCOMPLIB parallel versions, the worker programs do the
+X aligning, so showalign() must send them the appropriate messages to
+X have the alignment done, and then collect the alignment results
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "msg.h"
+#include "structs.h"
+#include "param.h"
+X
+#ifdef PCOMPLIB
+#ifdef PVM_SRC
+#include "pvm3.h"
+extern int pinums[];
+#endif
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+#include "p_mw.h"
+#else
+#include "mm_file.h"
+#include "mw.h"
+#endif
+X
+#ifndef PCOMPLIB
+X
+/* used to position the library sequence for re_getlib - also gets
+X description */
+#define RANLIB (m_fptr->ranlib)
+X
+extern struct lmf_str *
+re_openlib(struct lmf_str *, int outtty);
+X
+int
+re_getlib(unsigned char *aa1, int maxn, int maxt,
+X int loff, int cont, int term_code,
+X long *loffset, long *l_off,
+X struct lmf_str *m_fptr);
+X
+#include "drop_func.h"
+X
+#endif
+X
+X
+extern void cal_coord(int n0, int n1, long sq0off, long loffset,
+X struct a_struct *aln);
+X
+void initseq(char **, char **, char **, char **, int);
+void freeseq(char **, char **, char **, char **);
+X
+void do_show(FILE *fp, int n0, int n1, int score,
+X char *name0, char *name1, int nml,
+X struct mngmsg m_msg, struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
+X float percent, float gpercent, int lc,
+X struct a_struct *aln, long loffset);
+X
+extern void discons(FILE *fd, struct mngmsg m_msg, struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+X int nc,
+X int n0, int n1, char *name0, char *name1, int nml,
+X struct a_struct *aln,
+X long loffset);
+X
+extern void disgraph(FILE *fd, int n0, int n1,
+X float percent, int score,
+X int min0, int min1, int max0, int max1, long sq0off,
+X char *name0, char *name1, int nml, int llen, int markx);
+X
+extern double zs_to_bit(double, int, int);
+X
+extern void
+do_url1(FILE *, struct mngmsg, struct pstruct, char *, int,
+X struct a_struct , long);
+X
+#ifndef A_MARK
+#define A_MARK ">>"
+#endif
+X
+static char l_name[200]; /* link name */
+X
+#ifdef PCOMPLIB
+#define BBP_INFO(info) bbp->desptr->info
+#else
+#define BBP_INFO(info) bbp->info
+#endif
+X
+/* this version does not check for m_msg.e_cut because nshow/nbest has
+X already been set to limit on e_cut */
+X
+void showalign (FILE *fp,
+#ifndef PCOMPLIB
+X unsigned char **aa0, unsigned char *aa1, int maxn,
+#endif
+X struct beststr **bptr, int nbest, int qlib,
+X struct mngmsg m_msg, struct pstruct pst, char *gstring2
+#ifndef PCOMPLIB
+X , void **f_str
+#endif
+)
+{
+X char tmp_str[20];
+X char info_str[200];
+X char bline[2048], *bl_ptr, *bp, fmt[40];
+X int tmp_len, l_llen;
+X int t_have_ares;
+X char name0[80], name0s[80], name1[200];
+X int istart = 0, istop, i = 0, ib, nml;
+X int n1tot;
+X struct beststr *bbp;
+X int nc, lc, maxc;
+X float percent, gpercent;
+X char *seqc0, *seqc0a, *seqc1, *seqca;
+X long loffset, l_off;
+#ifdef PCOMPLIB
+X struct stage2_str liblist;
+X struct qmng_str qm_msg;
+#ifdef MPI_SRC
+X int int_msg_b[10];
+X MPI_Status mpi_status;
+#endif
+#else
+X int n1;
+X struct lmf_str *m_fptr;
+X int ngap;
+#endif
+X
+#ifdef PCOMPLIB
+X /* this function has its own copy of qm_msg, so we must fill it
+X appropriately */
+X qm_msg.n0 = m_msg.n0;
+X strncpy(qm_msg.libstr,m_msg.qtitle,sizeof(qm_msg.libstr));
+#endif
+X
+X /* set the name0,1 label length */
+X if (m_msg.markx & MX_M10FORM) nml = 12;
+X else nml = m_msg.nmlen;
+X
+X if (strlen(m_msg.qtitle) > 0) {
+X if (m_msg.qtitle[0]=='>') strncpy(name0s,&m_msg.qtitle[1],sizeof(name0s));
+X else strncpy(name0s,m_msg.qtitle,sizeof(name0s));
+X }
+X else {
+X strncpy(name0s,m_msg.tname,sizeof(name0s));
+X }
+X name0s[sizeof(name0s)-1]='\0';
+X
+X if ((bp=strchr(name0s,' '))!=NULL) *bp='\0';
+X
+X if (m_msg.revcomp) name0[nml-1]='-';
+X
+X l_llen = m_msg.aln.llen;
+X if ((m_msg.markx & MX_M9SUMM) && m_msg.show_code != SHOW_CODE_ID) {
+X l_llen += 40;
+X if (l_llen > 200) l_llen=200;
+X }
+X
+X sprintf(fmt,"%s%%-%ds (%%d %s)\n",A_MARK,l_llen-5,m_msg.sqnam);
+X
+X if (!(m_msg.markx&MX_M10FORM)) fprintf(fp,"\n");
+X
+X if (m_msg.ashow < 0) m_msg.ashow = m_msg.nshow;
+X istart = 0; istop = min(min(nbest,m_msg.ashow),m_msg.nshow);
+X
+X for (ib=istart; ib<istop; ib++) {
+X bbp = bptr[ib];
+X
+#ifdef SHOWUN
+X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg.qsfnum,BBP_INFO(sfnum))) {
+X istop = min(istop+1,nbest);
+X continue;
+X }
+#endif
+X if (bbp->score[0] <= 0) break;
+X
+X if (m_msg.quiet==1 && pst.zsflag>=0
+X && bbp->escore < m_msg.e_low) continue;
+X
+#ifndef PCOMPLIB
+X /* get the alignment and score by re-aligning */
+X
+X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg.quiet))==NULL)
+X exit(1);
+X
+X /* get the description - do not "edit" it yet */
+X
+X if (!(m_msg.markx & MX_M10FORM)){
+X if (m_msg.long_info) {tmp_len = sizeof(bline)-1;}
+X else {tmp_len = l_llen-5;}
+X RANLIB(bline,tmp_len,bbp->lseek,bbp->libstr,bbp->m_file_p);
+X bline[tmp_len]='\0';
+X }
+X else {
+X RANLIB(bline,sizeof(bline),bbp->lseek,bbp->libstr,bbp->m_file_p);
+X bline[sizeof(bline)-1]='\0';
+X }
+X
+X n1 = re_getlib(aa1,maxn,m_msg.maxt3,m_msg.loff,bbp->cont,m_msg.term_code,
+X &loffset,&l_off,bbp->m_file_p);
+#ifdef DEBUG
+X if (n1 != bbp->n1) {
+X fprintf(stderr," library sequence: %s lengths differ: %d != %d\n",
+X bline,bbp->n1, n1);
+X fprintf(stderr, "offset is: %lld\n",bbp->lseek);
+X }
+#endif
+X
+X if (!bbp->have_ares) {
+X bbp->sw_score =
+X do_walign(aa0[bbp->frame],m_msg.n0, aa1, n1, bbp->frame, &pst,
+X f_str[bbp->frame], &bbp->a_res, &t_have_ares);
+X }
+X else {
+X pre_cons(aa1,n1,bbp->frame,f_str[bbp->frame]);
+X }
+X
+X aln_func_vals(bbp->frame, &m_msg.aln);
+X
+#else /* PCOMPLIB - get the alignment information from a worker */
+X
+X /* we have a sequence that we need an alignment for -
+X send a message to the appropriate worker to produce an alignment
+X qm_msg.slist == 1 -> one alignment
+X qm_msg.s_func == DO_ALIGN_FLG -> use the alignment function
+X send mngmsg (MSEQTYPE)
+X then send number of sequence to be aligned
+X */
+X
+X qm_msg.slist = 1;
+X qm_msg.s_func = DO_ALIGN_FLG;
+X
+X liblist.seqnm = bbp->seqnm;
+X liblist.frame = bbp->frame;
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)&qm_msg,sizeof(struct qmng_str),1);
+X pvm_send(pinums[bbp->wrkr],MSEQTYPE);
+X
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)&liblist,sizeof(struct stage2_str),1);
+X pvm_send(pinums[bbp->wrkr],LISTTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&qm_msg,sizeof(struct qmng_str),MPI_BYTE,bbp->wrkr,
+X MSEQTYPE,MPI_COMM_WORLD);
+X MPI_Send(&liblist,sizeof(struct stage2_str),MPI_BYTE,bbp->wrkr,
+X LISTTYPE,MPI_COMM_WORLD);
+#endif
+X /* information should be sent */
+X /* pick up description */
+X strncpy(bline,bbp->desptr->bline,l_llen-5);
+X bline[l_llen-5]='\0';
+#endif /* PCOMPLIB */
+X
+X if (strlen(bline)==0) {
+X bline[0]='>';
+X strncpy(&bline[1],m_msg.lname,l_llen-5);
+X bline[l_llen-5]='\0';
+X }
+X /* re-format bline */
+X while ((bp=strchr(bline,'\n'))!=NULL) *bp=' ';
+X if (m_msg.long_info) {
+X tmp_len = strlen(bline);
+X bl_ptr = bline;
+X if (!(m_msg.markx & MX_M10FORM)) while (tmp_len > l_llen) {
+X for (i=l_llen; i>10; i--)
+X if (bl_ptr[i]==' ') {
+X bl_ptr[i]='\n';
+X break;
+X }
+X if (i <= 10) break;
+X tmp_len -= i;
+X bl_ptr += i;
+X }
+X bline[sizeof(bline)-1]='\0';
+X }
+X
+X n1tot = (BBP_INFO(n1tot_p)) ? *BBP_INFO(n1tot_p) : bbp->n1;
+X
+X strncpy(name1,bline,sizeof(name1));
+X
+X if ((!m_msg.markx & MX_M10FORM)) name1[nml]='\0';
+X if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
+X
+X /* l_name is used to build an HTML link from the bestscore line to
+X the alignment. It can also be used to discriminate multiple hits
+X from the same long sequence. Text must match that in showbest.c */
+X
+X strncpy(name1,bline,sizeof(name1));
+X name1[sizeof(name1)-1]='\0';
+X if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
+X strncpy(l_name,name1,sizeof(l_name));
+X l_name[sizeof(l_name)-1]='\0';
+X if ((bp=strchr(&l_name[3],'|'))!=NULL) *bp='\0';
+X if (m_msg.nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
+X else if (m_msg.qframe >= 0 && bbp->frame == 1)
+X strncat(l_name,"_r",sizeof(l_name));
+X if (bbp->cont-1 > 0) {
+X sprintf(tmp_str,":%d",bbp->cont-1);
+X strncat(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
+X }
+X
+X if (!(m_msg.markx & MX_M10FORM)) name1[nml]='\0';
+X
+X /* print out score information; */
+X
+X if (m_msg.markx & MX_HTML ) {
+X fprintf (fp,"<A name=%s>\n<tt><pre>\n",l_name);
+X }
+X strncpy(name0,name0s,nml);
+X name0[nml]='\0';
+X
+X if (pst.zsflag%10 == 6) {
+X sprintf(info_str," comp: %.5f H: %.5f",bbp->comp,bbp->H);
+X }
+X else info_str[0]='\0';
+X
+X if ((m_msg.markx & MX_ATYPE)!=7 && !(m_msg.markx & MX_M10FORM)) {
+X fprintf (fp, fmt,bp=bline,n1tot);
+X if (m_msg.nframe > 2)
+X fprintf (fp, "Frame: %d",bbp->frame+1);
+X else if (m_msg.nframe > 1)
+X fprintf (fp, "Frame: %c",(bbp->frame? 'r': 'f'));
+X else if (m_msg.qframe >= 0 && bbp->frame > 0 ) {
+X fputs("rev-comp",fp);
+X name0[nml-1]='\0';
+X strcat(name0,"-");
+X }
+X
+X if (m_msg.arelv > 0)
+X fprintf (fp, " %s: %3d", m_msg.alab[0],bbp->score[0]);
+X if (m_msg.arelv > 1)
+X fprintf (fp, " %s: %3d", m_msg.alab[1],bbp->score[1]);
+X if (m_msg.arelv > 2)
+X fprintf (fp, " %s: %3d", m_msg.alab[2],bbp->score[2]);
+X fprintf(fp,"%s",info_str);
+X if (pst.zsflag>=0)
+X fprintf (fp, " Z-score: %4.1f bits: %3.1f E(): %4.2g",
+X bbp->zscore,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),bbp->escore);
+X fprintf (fp, "\n");
+X }
+X else if (m_msg.markx & MX_M10FORM) {
+X fprintf(fp,">>%s\n",bline);
+X if (m_msg.qframe > -1) {
+X if (m_msg.nframe > 2) {
+X fprintf(fp,"; %s_frame: %d\n",m_msg.f_id0,bbp->frame+1);
+X }
+X else {
+X fprintf(fp,"; %s_frame: %c\n",m_msg.f_id0,(bbp->frame > 0? 'r':'f'));
+X }
+X }
+X fprintf (fp, "; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[0],bbp->score[0]);
+X if (m_msg.arelv > 1)
+X fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[1],bbp->score[1]);
+X if (m_msg.arelv > 2)
+X fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[2],bbp->score[2]);
+X if (info_str[0]) fprintf(fp,"; %s_info: %s\n",m_msg.f_id0,info_str);
+X if (pst.zsflag>=0)
+X fprintf (fp,"; %s_z-score: %4.1f\n; %s_bits: %3.1f\n; %s_expect: %6.2g\n",
+X m_msg.f_id0,bbp->zscore,
+X m_msg.f_id0,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),
+X m_msg.f_id0,bbp->escore);
+X }
+X
+X
+#ifdef PCOMPLIB
+X /* get the sw_score, alignment information, get seqc0, seqc1 */
+X
+#ifdef PVM_SRC
+X /* get alignment lengths, percents */
+X pvm_recv(pinums[bbp->wrkr],ALN1TYPE);
+X pvm_upkint(&nc,1,1);
+X pvm_upkint(&lc,1,1);
+X pvm_upkint(&maxc,1,1);
+X
+X pvm_upkfloat(&percent,1,1);
+X pvm_upkfloat(&gpercent,1,1);
+X
+X pvm_upkint(&bbp->sw_score,1,1);
+X pvm_upkbyte((char *)&m_msg.aln,sizeof(struct a_struct),1);
+X
+X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
+X
+X pvm_recv(pinums[bbp->wrkr],ALN2TYPE);
+X pvm_upkbyte(seqc0,maxc,1);
+X if (m_msg.ann_flg) pvm_upkbyte(seqc0a,maxc,1);
+X pvm_upkbyte(seqc1,maxc,1);
+X pvm_upkbyte(seqca,maxc,1);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(int_msg_b,4,MPI_INT,bbp->wrkr,ALN1TYPE,MPI_COMM_WORLD,
+X &mpi_status);
+X nc = int_msg_b[0];
+X lc = int_msg_b[1];
+X maxc = int_msg_b[2];
+X bbp->sw_score = int_msg_b[3];
+X MPI_Recv(&percent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(&gpercent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(&m_msg.aln,sizeof(struct a_struct),MPI_BYTE,
+X bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
+X
+X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
+X MPI_Recv(seqc0,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
+X if (m_msg.ann_flg)
+X MPI_Recv(seqc0a,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
+X MPI_Recv(seqc1,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
+X MPI_Recv(seqca,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
+#endif
+X
+X /* l_off is the coordinate of the first residue */
+X l_off = 1;
+X /* loffset is the offset of the aa1 in the full sequence */
+X loffset = bbp->desptr->loffset-l_off;
+X
+#else /* not PCOMPLIB */
+X
+X /* estimate space for alignment consensus */
+X if (m_msg.aln.showall==1) {
+X maxc = bbp->a_res.nres + max(bbp->a_res.min0,bbp->a_res.min1)+
+X max((m_msg.n0-bbp->a_res.max0),(n1-bbp->a_res.max1))+4;
+X }
+X else {
+X maxc = bbp->a_res.nres + 4*m_msg.aln.llen+4;
+X }
+X
+X /* get space to put the sequence alignment consensus */
+X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
+X
+X /* build consensus from res, nres (done by workers if PCOMPLIB) */
+X if (!m_msg.ann_flg) {
+X nc=calcons(aa0[bbp->frame],m_msg.n0,aa1,n1,
+X &lc,&m_msg.aln, bbp->a_res, pst, seqc0, seqc1, seqca,
+X f_str[bbp->frame]);
+X memset(seqc0a,' ',nc);
+X seqc0a[nc]='\0';
+X }
+X else {
+X nc=calcons_a(aa0[bbp->frame],m_msg.aa0a,m_msg.n0,aa1,n1,
+X &lc,&m_msg.aln,bbp->a_res,pst, seqc0, seqc0a,
+X seqc1, seqca, m_msg.ann_arr,f_str[bbp->frame]);
+X }
+X
+X /* PCOMPLIB workers return percent, gpercent, so calculate it here */
+X if (lc > 0) percent = (100.0*(float)m_msg.aln.nident)/(float)lc;
+X else percent = -1.00;
+X ngap = m_msg.aln.ngap_q + m_msg.aln.ngap_l;
+#ifndef SHOWSIM
+X if (lc-ngap> 0) gpercent =(100.0*(float)m_msg.aln.nident)/(float)(lc-ngap);
+#else
+X if (lc > 0) gpercent =(100.0*(float)m_msg.aln.nsim)/(float)lc;
+#endif
+X else gpercent = -1.00;
+#endif
+X
+X if (max(strlen(seqc0),strlen(seqc1)) > nc) {
+X fprintf(stderr," mshowalign: nc/maxc: %d/%d seqc0/1: %u/%u\n",
+X nc,maxc,strlen(seqc0),strlen(seqc1));
+X }
+X
+X /* here PCOMPLIB/comp_lib logic is the same */
+X
+#ifdef DEBUG
+X if (bbp->sw_score < bbp->score[pst.score_ix]) {
+X fprintf(stderr," *** warning - SW score=%d < opt score=%d ***\n",
+X bbp->sw_score, bbp->score[pst.score_ix]);
+X }
+#endif
+X
+X cal_coord(m_msg.n0,bbp->n1,m_msg.sq0off,loffset+l_off-1,&m_msg.aln);
+X
+#ifndef PCOMPLIB
+X if (bbp->a_res.nres > 0)
+#endif
+X do_show(fp, m_msg.n0, bbp->n1, bbp->sw_score, name0, name1, nml,
+X m_msg, pst, seqc0, seqc0a, seqc1, seqca,
+X nc, percent, gpercent, lc, &m_msg.aln,
+X loffset+l_off-1);
+X
+X if (m_msg.markx & MX_HTML) fprintf(fp,"</pre></tt>\n<hr>\n");
+X fflush(fp);
+X
+X freeseq(&seqc0,&seqc0a,&seqc1, &seqca);
+X }
+X if (fp!=stdout) fprintf(fp,"\n");
+}
+X
+void do_show(FILE *fp, int n0,int n1, int score,
+X char *name0, char *name1, int nml,
+X struct mngmsg m_msg, struct pstruct pst,
+X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
+X float percent, float gpercent, int lc,
+X struct a_struct *aln, long loffset)
+{
+X int tmp;
+X
+X if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)==7)
+X disgraph(fp, n0, n1, percent, score,
+X aln->amin0, aln->amin1, aln->amax0, aln->amax1, m_msg.sq0off,
+X name0, name1, nml, aln->llen, m_msg.markx);
+X else if (m_msg.markx & MX_M10FORM) {
+X if (pst.sw_flag && m_msg.arelv>0)
+X fprintf(fp,"; %s_score: %d\n",m_msg.f_id1,score);
+X fprintf(fp,"; %s_ident: %5.3f\n",m_msg.f_id1,percent/100.0);
+#ifndef SHOWSIM
+X fprintf(fp,"; %s_gident: %5.3f\n",m_msg.f_id1,gpercent/100.0);
+#else
+X fprintf(fp,"; %s_sim: %5.3f\n",m_msg.f_id1,gpercent/100.0);
+#endif
+X
+X fprintf(fp,"; %s_overlap: %d\n",m_msg.f_id1,lc);
+X discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
+X n0, n1, name0, name1, nml, aln, loffset);
+X }
+X else {
+X if (pst.sw_flag) fprintf(fp,"Smith-Waterman score: %d; ",score);
+X else fprintf(fp,"banded Smith-Waterman score: %d; ",score);
+#ifndef SHOWSIM
+X fprintf(fp," %6.3f%% identity (%6.3f%% ungapped) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
+X percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
+X aln->d_start1,aln->d_stop1);
+#else
+X fprintf(fp," %6.3f%% identity (%6.3f%% similar) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
+X percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
+X aln->d_start1,aln->d_stop1);
+#endif
+X
+X if (m_msg.markx & MX_HTML) {
+X do_url1(fp, m_msg, pst, l_name,n1,*aln,loffset);
+X }
+X
+X if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)!=7) {
+X fputc('\n',fp);
+X tmp = n0;
+X
+X if (m_msg.qdnaseq == SEQT_DNA && m_msg.ldnaseq== SEQT_PROT)
+X tmp /= 3;
+X
+X disgraph(fp, tmp, n1, percent, score,
+X aln->amin0, aln->amin1,
+X aln->amax0, aln->amax1,
+X m_msg.sq0off,
+X name0, name1, nml, aln->llen,m_msg.markx);
+X }
+X
+X discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
+X n0, n1, name0, name1, nml, aln, loffset);
+X
+X fputc('\n',fp);
+X
+X }
+}
+X
+X
+#ifndef MPI_SRC
+void /* initialize consensus arrays */
+initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz)
+{
+X *seqc0=(char *)calloc((size_t)seqsiz*4,sizeof(char));
+X if (*seqc0==NULL)
+X {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
+X exit(1);}
+X *seqc0a=*seqc0 + seqsiz;
+X *seqc1=*seqc0a + seqsiz;
+X *seqca=*seqc1 + seqsiz;
+}
+X
+void freeseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca)
+{
+X free(*seqc0);
+}
+#endif
+SHAR_EOF
+chmod 0644 mshowalign.c ||
+echo 'restore of mshowalign.c failed'
+Wc_c="`wc -c < 'mshowalign.c'`"
+test 17780 -eq "$Wc_c" ||
+ echo 'mshowalign.c: original size 17780, current size' "$Wc_c"
+fi
+# ============= mshowbest.c ==============
+if test -f 'mshowbest.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping mshowbest.c (File already exists)'
+else
+echo 'x - extracting mshowbest.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mshowbest.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: mshowbest.c,v 1.44 2006/06/30 19:46:36 wrp Exp $ */
+X
+/* 29-Oct-2003 - changes so that bbp->cont < 0 => aa1 sequence is
+X already in aa1, no re_openlib or re_getlib required
+*/
+X
+/* 14-May-2003 Changes to use a more consistent coordinate numbering
+X system for displays. aln->d_start[01] is now consistently used
+X to report the start of the alignment in all functions, and
+X mshowbest.c has been modified to use d_start[01] instead of
+X d_start[01]-1. aln->min[01] now starts at 0 for all functions;
+X instead of 1 for some functions (dropnfa.c, dropgsw.c, dropfs2.c
+X earlier).
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+X
+#ifndef PCOMPLIB
+#include "mm_file.h"
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+X
+#define MAX_BLINE 256
+X
+#ifndef PCOMPLIB
+/* function calls necessary to re_getlib() the sequence and, do
+X alignments, if necessary
+*/
+X
+#define RANLIB (m_fptr->ranlib)
+X
+int
+re_getlib(unsigned char *, int, int, int, int, int, long *, long *,
+X struct lmf_str *m_fptr);
+X
+#include "drop_func.h"
+X
+struct lmf_str *re_openlib(struct lmf_str *, int outtty);
+#endif
+X
+extern void cal_coord(int n0, int n1, long sq0off, long loffset,
+X struct a_struct *aln);
+X
+void header_aux(FILE *);
+void show_aux(FILE *, struct beststr *);
+void w_abort (char *p, char *p1);
+X
+/* BBP_INFO get stuff directly from beststr or from beststr->desptr */
+#ifdef PCOMPLIB
+#define BBP_INFO(info) bbp->desptr->info
+#else
+#define BBP_INFO(info) bbp->info
+#endif
+X
+extern double zs_to_bit(double, int, int);
+X
+/* showbest() shows a list of high scoring sequence descriptions, and
+X their scores. If -m 9, then an additional complete set of
+X alignment information is provided.
+X
+X If PCOMPLIB or m_msg.quiet then the number of high scores to be
+X shown is pre-determined by m_msg.mshow before showbest is called.
+X
+X The comp_lib.c version re_getlib()'s the sequence for its
+X discription, and then does another alignment for -m 9 (Thus, it
+X needs an f_str. The PCOMPLIB version has everything available in
+X beststr before showbest() is called.
+*/
+X
+void showbest (FILE *fp,
+#ifndef PCOMPLIB
+X unsigned char **aa0, unsigned char *aa1, int maxn,
+#endif
+X struct beststr **bptr,int nbest, int qlib, struct mngmsg *m_msg,
+X struct pstruct pst, struct db_str db,
+X char *gstring2
+#ifndef PCOMPLIB
+X ,void **f_str
+#endif
+)
+{
+X int ntmp = 0;
+X char bline[MAX_BLINE], fmt[40], pad[MAX_BLINE], rline[40];
+X char l_name[128];
+X int istart = 0, istop, ib;
+X int nshow;
+X int quiet;
+X int r_margin;
+X struct beststr *bbp;
+X int n1tot;
+X char *bp;
+X char rel_label[12];
+X char tmp_str[20], *seqc;
+X int seqc_len;
+X long loffset, l_off;
+X int n0, n1;
+X struct rstruct rst;
+X int lc, maxc, nident, ngap;
+X float percent, gpercent;
+X struct a_struct *aln_p;
+X int *tres;
+X int gi_num;
+X
+#ifndef PCOMPLIB
+X struct lmf_str *m_fptr;
+#endif
+X
+X strncpy(rel_label,"\0",2);
+#ifdef SHOWREL
+X strncpy(rel_label," related",sizeof(rel_label));
+#endif
+#ifdef SHOWUN
+X strncpy(rel_label," unrelated",sizeof(rel_label));
+#endif
+X rel_label[sizeof(rel_label)-1]='\0';
+X
+#ifdef PCOMPLIB
+X quiet = 1;
+#else
+X quiet = m_msg->quiet;
+#endif
+X
+X n0 = m_msg->n0;
+X
+X if (m_msg->aln.llen > MAX_BLINE) m_msg->aln.llen = MAX_BLINE;
+X
+X if (pst.zsflag < 0) r_margin = 10;
+X else if (pst.zsflag>=0 && m_msg->srelv > 1 ) r_margin = 19;
+X else r_margin = 10;
+X
+X if (m_msg->markx & MX_M9SUMM && m_msg->show_code == SHOW_CODE_ID) {
+#ifdef SHOWSIM
+X r_margin += 15;
+#else
+X r_margin += 10;
+#endif
+X }
+X
+X if (m_msg->nframe < 0)
+#ifndef SUPERFAMNUM
+X sprintf(fmt,"%%-%ds (%%4d)",m_msg->aln.llen-r_margin);
+#else
+X sprintf(fmt,"%%-%ds [%%4d](%%4d)",m_msg->aln.llen-(r_margin+4));
+#endif
+X else
+X sprintf(fmt,"%%-%ds (%%4d)",m_msg->aln.llen-(r_margin+4));
+X
+X memset(pad,' ',m_msg->aln.llen-(r_margin+6));
+X pad[m_msg->aln.llen-(r_margin+12)]='\0';
+X
+X if (quiet != -1) { /* quiet is set to -1 in comp_mlib.c to force
+X all significant hits to be shown */
+X nshow = 20;
+X if (m_msg->mshow == -1) nshow = nbest; /* show all */
+X /* show specified number */
+X else if (m_msg->mshow_flg) {
+X nshow = min (m_msg->mshow, nshow);
+X }
+X }
+X else nshow = m_msg->nshow;
+X
+X if (quiet==0) istop = 20;
+X else istop = nshow;
+X
+X if (quiet==0) {
+X printf(" How many scores would you like to see? [%d] ",m_msg->nshow);
+X fflush(stdout);
+X if (fgets(rline,20,stdin)==NULL) exit(0);
+X nshow = m_msg->nshow;
+X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
+X if (nshow<=0) nshow = min(20,nbest);
+X }
+X
+X if ((bp = strchr (m_msg->qtitle, '\n')) != NULL) *bp = '\0';
+/* fprintf (fp, "%3d %s\n", qlib,m_msg->qtitle); */
+X
+X if (m_msg->markx & MX_HTML) fprintf(fp,"<p><tt><pre>\n");
+X
+X if (pst.zsflag >= 0) {
+X if (bptr[0]->escore < m_msg->e_cut) {
+X if (m_msg->z_bits==1) {/* show bit score */
+X fprintf(fp,"The best%s scores are:%s%s bits E(%ld)",
+X rel_label,pad,m_msg->label,pst.zdb_size);
+X }
+X else {/* show z-score */
+X fprintf(fp,"The best%s scores are:%s%s z-sc E(%ld)",
+X rel_label,pad,m_msg->label,pst.zdb_size);
+X }
+X header_aux(fp);
+X if (m_msg->markx & MX_M9SUMM) {
+X if (m_msg->show_code == SHOW_CODE_ID) {
+#ifdef SHOWSIM
+X fprintf(fp," %%_id %%_sim alen");
+#else
+X fprintf(fp," %%_id alen");
+#endif
+X }
+X else {
+X if (m_msg->markx & MX_HTML && m_msg->show_code !=1) { fprintf(fp,"<!-- ");}
+#ifndef SHOWSIM
+X fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
+#else
+X fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
+#endif
+X }
+X if (m_msg->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
+X if (m_msg->markx & MX_HTML && m_msg->show_code!=1) { fprintf(fp," -->");}
+X }
+X fprintf(fp,"\n");
+X }
+X else {
+X fprintf(fp,"!! No library sequences with E() < %.2g\n",m_msg->e_cut);
+X m_msg->nshow = 0;
+X if (m_msg->markx & MX_HTML) fprintf(fp,"<p></tt></pre>\n");
+X return;
+X }
+X }
+X else {
+X fprintf(fp,"The best%s scores are:%s%s",rel_label,pad,m_msg->label);
+X header_aux(fp);
+X if (m_msg->markx & MX_M9SUMM) {
+X if (m_msg->show_code == SHOW_CODE_ID) {
+#ifdef SHOWSIM
+X fprintf(fp," %%_id %%_sm alen");
+#else
+X fprintf(fp," %%_id alen");
+#endif
+X }
+X else {
+#ifndef SHOWSIM
+X fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
+#else
+X fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
+#endif
+X }
+X }
+X if (m_msg->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
+X fprintf(fp,"\n");
+X }
+X
+X istart = 0;
+l1:
+X istop = min(nbest,nshow);
+X for (ib=istart; ib<istop; ib++) {
+X bbp = bptr[ib];
+#ifdef SUPERFAMNUM
+X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg->qsfnum_n,BBP_INFO(sfnum))) continue;
+#ifdef SHOWUN
+X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg->qsfnum,BBP_INFO(sfnum))) {
+X istop = min(istop+1,nbest);
+X /*
+X fprintf(stderr,"skipping %d: %d==%d\n",ib,m_msg->qsfnum,BBP_INFO(sfnum));
+X */
+X continue;
+X }
+#endif
+#ifdef SHOWREL
+X if (BBP_INFO(nsfnum) == 0 || (BBP_INFO(nsfnum) > 0 && !sfn_cmp(m_msg->qsfnum,BBP_INFO(sfnum)))) {
+X istop = min(istop+1,nbest);
+X /*
+X fprintf(stderr,"skipping %d: %d==%d\n",ib,m_msg->qsfnum,BBP_INFO(sfnum));
+X */
+X continue;
+X }
+#endif
+#endif
+X if (quiet==1 && pst.zsflag>=0) {
+X if (bbp->escore > m_msg->e_cut) {
+X nshow = ib;
+X goto done;
+X }
+X else if (bbp->escore < m_msg->e_low) continue;
+X }
+X
+#ifndef PCOMPLIB
+X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg->quiet))==NULL) {
+X fprintf(stderr,"*** cannot re-open %s\n",bbp->m_file_p->lb_name);
+X exit(1);
+X }
+X RANLIB(bline,m_msg->aln.llen,bbp->lseek,bbp->libstr,m_fptr);
+#else
+X strncpy(bline,BBP_INFO(bline),m_msg->aln.llen-r_margin);
+X bline[m_msg->aln.llen]='\0';
+#endif
+X
+X /* l_name is used to build an HTML link from the bestscore line to
+X the alignment. It can also be used to discriminate multiple hits
+X from the same long sequence. This requires that fast_pan use -m 6. */
+X
+X strncpy(l_name,bline,sizeof(l_name)); /* get rid of text after second "|" */
+X l_name[sizeof(l_name)-1]='\0';
+X if ((bp=strchr(l_name,' '))!=NULL) *bp=0;
+X if ((bp=strchr(&l_name[3],'|'))!=NULL) *bp='\0';
+X if (m_msg->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
+X else if (m_msg->nframe > 0 && bbp->frame == 1)
+X strncat(l_name,"_r",sizeof(l_name));
+X if (bbp->cont-1 > 0) {
+X sprintf(tmp_str,":%d",bbp->cont-1);
+X strncat(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
+X }
+X
+X
+#ifndef PCOMPLIB
+X if (m_msg->stages>1 || m_msg->markx & MX_M9SUMM) {
+X if (bbp->cont >= 0) {
+X n1 = re_getlib(aa1,maxn,m_msg->maxt3,m_msg->loff,bbp->cont,m_msg->term_code,
+X &loffset,&l_off,bbp->m_file_p);
+X }
+X else { n1 = maxn;}
+X if (! m_msg->markx & MX_M9SUMM) {
+X do_opt (aa0[bbp->frame], m_msg->n0, aa1, n1, bbp->frame, &pst, f_str[bbp->frame], &rst);
+X bbp->score[2]=rst.score[2];
+X }
+X else {
+X bbp->sw_score =
+X do_walign(aa0[bbp->frame],m_msg->n0, aa1, n1, bbp->frame,
+X &pst, f_str[bbp->frame], &bbp->a_res, &bbp->have_ares);
+X
+X
+X /* save the alignment encoding for future use */
+X if (bbp->have_ares && ((tres = calloc(bbp->a_res.nres+1,sizeof(int)))!=NULL)) {
+X memcpy(tres,bbp->a_res.res,sizeof(int)*bbp->a_res.nres);
+X bbp->a_res.res = tres;
+X }
+X
+X aln_func_vals(bbp->frame, &m_msg->aln);
+X
+X maxc = bbp->a_res.nres + 4*m_msg->aln.llen+4;
+X seqc = NULL;
+X seqc_len = 0;
+X if (m_msg->show_code == SHOW_CODE_ALIGN) {
+X if ((seqc=(char *)calloc(maxc,sizeof(char)))!=NULL) {
+X lc=calc_code(aa0[bbp->frame],m_msg->n0,
+X aa1,n1,
+X &m_msg->aln,bbp->a_res,
+X pst,seqc,maxc,f_str[bbp->frame]);
+X seqc_len = strlen(seqc);
+X }
+X }
+X else {
+X lc=calc_id(aa0[bbp->frame],m_msg->n0,aa1,n1,
+X &m_msg->aln, bbp->a_res,
+X pst,f_str[bbp->frame]);
+X }
+X m_msg->aln.a_len = lc;
+X
+X nident = m_msg->aln.nident;
+X if (lc > 0) percent = (100.0*(float)nident)/(float)lc;
+X else percent = -1.00;
+X
+X ngap = m_msg->aln.ngap_q + m_msg->aln.ngap_l;
+#ifndef SHOWSIM
+X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
+X else gpercent = -1.00;
+#else
+X if (lc-ngap > 0) gpercent = (100.0*(float)m_msg->aln.nsim)/(float)(lc);
+X else gpercent = -1.00;
+#endif
+X
+X }
+X }
+#endif
+X
+X n1tot = (BBP_INFO(n1tot_p)) ? *BBP_INFO(n1tot_p) : bbp->n1;
+X
+X bp = bline;
+X if ((m_msg->markx & MX_HTML) && !strncmp(bline,"gi|",3)) {
+X bp = strchr(bline+4,'|')+1;
+X *(bp-1) = 0;
+X gi_num = atoi(bline+3);
+X }
+X
+#ifndef SUPERFAMNUM
+X bp[m_msg->aln.llen-r_margin]='\0';
+#else
+X bp[m_msg->aln.llen-r_margin-5]='\0';
+#endif
+X
+X if (m_msg->nframe == -1) bp[m_msg->aln.llen-r_margin]='\0';
+X else bp[m_msg->aln.llen-(r_margin+4)]='\0';
+X
+#ifndef SUPERFAMNUM
+X fprintf (fp, fmt,bp,n1tot);
+#else
+X if (m_msg->nframe == -1) {
+X fprintf (fp, fmt,bp,BBP_INFO(sfnum[0]),n1tot);
+X }
+X else {fprintf (fp, fmt,bp,n1tot);}
+#endif
+X
+X if (m_msg->nframe > 2) fprintf (fp, " [%d]", bbp->frame+1);
+X else if (m_msg->nframe >= 0) fprintf(fp," [%c]",(bbp->frame > 0 ?'r':'f'));
+X
+X if (m_msg->srelv == 1) fprintf (fp, " %4d", bbp->score[pst.score_ix]);
+X else {
+X if (m_msg->srelv-1 > 0) fprintf (fp, " %4d", bbp->score[0]);
+X if (m_msg->srelv-1 > 1 || m_msg->stages>1)
+X fprintf (fp, " %4d", bbp->score[1]);
+X fprintf (fp, " %4d", bbp->score[pst.score_ix]);
+X }
+X
+X if (pst.zsflag>=0) {
+X if (m_msg->z_bits==1) {
+X fprintf (fp, " %.1f %7.2g",zs_to_bit(bbp->zscore,m_msg->n0,bbp->n1),bbp->escore);
+X }
+X else fprintf (fp, " %.1f %7.2g",bbp->zscore,bbp->escore);
+X }
+X show_aux(fp,bbp);
+X
+#ifdef PCOMPLIB
+X n1 = bbp->n1;
+X percent = bbp->percent;
+X gpercent = bbp->gpercent;
+X aln_p = bbp->aln_d;
+X seqc = bbp->aln_code;
+X seqc_len = bbp->aln_code_n;
+X loffset = bbp->desptr->loffset;
+X l_off = 0;
+#else
+X aln_p = &(m_msg->aln);
+#endif
+X
+X if (m_msg->markx & MX_M9SUMM) {
+X if (m_msg->show_code != SHOW_CODE_ID) {
+X if (m_msg->markx & MX_HTML) fprintf(fp,"<!-- ");
+X cal_coord(m_msg->n0,bbp->n1,m_msg->sq0off,loffset+l_off-1,aln_p);
+X
+X /* %_id %_sim s-w alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs */
+X /* alignment min max min max */
+X /* sequence coordinate min max min max */
+X fprintf(fp,"\t%5.3f %5.3f %4d %4d %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %3d %3d %3d",
+X percent/100.0,gpercent/100.0, bbp->sw_score,aln_p->a_len,
+X aln_p->d_start0,aln_p->d_stop0,
+X m_msg->sq0off, m_msg->sq0off+m_msg->n0-1,
+X aln_p->d_start1,aln_p->d_stop1,
+X loffset+l_off, loffset+l_off+bbp->n1-1,
+X aln_p->ngap_q,aln_p->ngap_l,aln_p->nfs);
+X if (m_msg->show_code == SHOW_CODE_ALIGN
+X && seqc_len > 0 && seqc != NULL) {
+X fprintf(fp,"\t%s",seqc);
+X /* fprintf(fp," [%2d:%d]",bbp->wrkr,bbp->seqnm); */
+X free(seqc);
+X seqc = NULL;
+X }
+X if (m_msg->markx & MX_HTML) fprintf(fp," -->");
+X }
+X else {
+#ifdef SHOWSIM
+X fprintf(fp," %5.3f %5.3f %4d", percent/100.0,(float)aln_p->nsim/(float)aln_p->a_len,aln_p->a_len);
+#else
+X fprintf(fp," %5.3f %4d", percent/100.0,aln_p->a_len);
+#endif
+X }
+X }
+X if (m_msg->markx & MX_HTML) fprintf(fp," <A HREF=\"#%s\">align</A>",l_name);
+X fprintf (fp, "\n");
+X fflush(fp);
+X }
+X
+X if (quiet==0) {
+X printf(" More scores? [0] ");
+X fflush(stdout);
+X if (fgets(rline,20,stdin)==NULL) exit(0);
+X ntmp = 0;
+X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
+X if (ntmp<=0) ntmp = 0;
+X if (ntmp>0) {
+X istart = istop;
+X nshow += ntmp;
+X goto l1;
+X }
+X }
+X else if (quiet == 1)
+X if (ib < nbest && (pst.zsflag>=0 && bbp->escore < m_msg->e_cut)) {
+X if (m_msg->mshow_flg && istop >= m_msg->mshow) goto done;
+X istart=istop;
+X nshow += 10;
+X goto l1;
+X }
+X
+X done:
+X m_msg->nshow = nshow;
+X if (m_msg->markx & MX_HTML) fprintf(fp,"</pre></tt><p><hr><p>\n");
+X if (fp!=stdout) fprintf(fp,"\n");
+}
+X
+/*
+X q[] has one set of sfnums, 0 terminated
+X s[] has second
+X return first match or 0
+*/
+SHAR_EOF
+chmod 0644 mshowbest.c ||
+echo 'restore of mshowbest.c failed'
+Wc_c="`wc -c < 'mshowbest.c'`"
+test 14393 -eq "$Wc_c" ||
+ echo 'mshowbest.c: original size 14393, current size' "$Wc_c"
+fi
+# ============= mu.lib ==============
+if test -f 'mu.lib' -a X"$1" != X"-c"; then
+ echo 'x - skipping mu.lib (File already exists)'
+else
+echo 'x - extracting mu.lib (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mu.lib' &&
+>GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
+QSNAILRY
+LARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFA
+GDKVTYVD
+FLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
+>GTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
+PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKIT
+QSNAI
+LCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRP
+WFAGN
+KITFVDFLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
+>GTMU_CRILO GLUTATHIONE S-TRANSFERASE Y1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+PMILGYWNVRGLTNPIRLLLEYTDSSYEEKKYTMGDAPDSDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
+QSNAI
+LRYIARKHNLCGETEEERIRVDIVENQAMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKMYSEFLGKRP
+WFAGD
+KVTLCGFLAYDVLDQYQMFEPKCLDPFPNLKDFLARFEGLKKISAYMKTSRFLRRPIFSKMAQWSNK
+>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGSRKIT
+QSNAI
+MRYLARKHHLCGETEEERIRADIVENQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRP
+WFAGD
+KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSSRYLSTPIFSKLAQWSNK
+>GTMU_RABIT GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GST MU I) (CLASS-MU
+PMTLGYWDVRGLALPIRMLLEYTDTSYEEKKYTMGDAPNYDQSKWLSEKFTLGLDFPNLPYLIDGTHKLT
+QSNAI
+LRYLARKHGLCGETEEERIRVDILENQLMDNRFQLVNVCYSPDFEKLKPEYLKGLPEKLQLYSQFLGSLP
+WFAGD
+KITFADFLVYDVLDQNRIFVPGCLDAFPNLKDFHVRFEGLPKISAYMKSSRFIRVPVFLKKATWTGI
+>GTM4_HUMAN GLUTATHIONE S-TRANSFERASE MU 4 (EC 2.5.1.18) (GSTM4-4) (GTS-MU2)
+MSMTLGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKI
+TQSNAILC
+YIARKHNLCGETEEEKIRVDILENQAMDVSNQLARVCYSPDFEKLKPEYLEELPTMMQHFSQFLGKRPWF
+VGDKITFV
+DFLAYDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPKPLYTRVAVWGNK
+>GLNA_ANASP GLUTAMINE SYNTHETASE (EC 6.3.1.2) (GLUTAMATE--AMMONIA LIGASE).
+TTPQEVLKRIQDEKIELIDLKFIDTVGTWQHLTLYQNQIDESSFSDGVPFDGSSIRGWKAINESDMTMVL
+DPNTA
+WIDPFMEVPTLSIVCSIKEPRTGEWYNRCPRVIAQKAIDYLVSTGIGDTAFFGPEAEFFIFDSARFAQNA
+NEGYY
+FLDSVEGAWNSGKEGTADKPNLAYKPRFKEGYFPVSPTDSFQDIRTEMLLTMAKLGVPIEKHHHEVATGG
+QCELG
+FRFGKLIEAADWLMIYKYVIKNVAKKYGKTVTFMPKPIFGDNGSGMHCHQSIWKDGKPLFAGDQYAGLSE
+MGLYY
+IGGLLKHAPALLAITNPSTNSYKRLVPGYEAPVNLAYSQGNRSASIRIPLSGTNPKAKRLEFRCPDATSN
+PYLAF
+AAMLCAGIDGIKNKIHPGEPLDKNIYELSPEELAKVPSTPGSLELALEALENDHAFLTDTGVFTEDFIQN
+WIDYK
+LANEVKQMQLRPHPYEFSIYYDV
+SHAR_EOF
+chmod 0644 mu.lib ||
+echo 'restore of mu.lib failed'
+Wc_c="`wc -c < 'mu.lib'`"
+test 2361 -eq "$Wc_c" ||
+ echo 'mu.lib: original size 2361, current size' "$Wc_c"
+fi
+# ============= musplfm.aa ==============
+if test -f 'musplfm.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping musplfm.aa (File already exists)'
+else
+echo 'x - extracting musplfm.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'musplfm.aa' &&
+>musplfm transl. of musplfm.seq, 2 to 676
+X M L P S L I Q P C S W I L L L
+X L L V N S S L L W K N V A S F P
+X M C A M R N G R C F M S F E D T
+X F E L A G S L S H N I S I E V S
+X E L F T E F E K H Y S N V S G L
+X R D K S P M R C N T S F L P T P
+X E N K E Q A R L T H Y S A L L K
+X S G A M I L D A W E S P L D D L
+X V S E L S T I K N V P D I I I S
+X K A T D I K K K I N A V R N G V
+X N A L M S T M L Q N G D E E K K
+X N P A W F L Q S D N E D A R I H
+X S L Y G M I S C L D N D F K K V
+X D I Y L N V L K C Y M L K I D N
+X C
+SHAR_EOF
+chmod 0644 musplfm.aa ||
+echo 'restore of musplfm.aa failed'
+Wc_c="`wc -c < 'musplfm.aa'`"
+test 953 -eq "$Wc_c" ||
+ echo 'musplfm.aa: original size 953, current size' "$Wc_c"
+fi
+# ============= mw.h ==============
+if test -f 'mw.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping mw.h (File already exists)'
+else
+echo 'x - extracting mw.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mw.h' &&
+/* Concurrent read version */
+X
+/* $Name: fa_34_26_5 $ - $Id: mw.h,v 1.20 2006/03/20 17:38:15 wrp Exp $ */
+X
+#include <sys/types.h>
+X
+#include "aln_structs.h"
+X
+#ifndef FSEEK_T_DEF
+#ifndef USE_FSEEKO
+typedef long fseek_t;
+#else
+typedef off_t fseek_t;
+#endif
+#endif
+X
+struct beststr {
+X int n1; /* sequence length */
+X int *n1tot_p; /* pointer (or NULL) to long sequence length */
+X int score[3]; /* score */
+X int sw_score; /* do_walign() score */
+X double comp;
+X double H;
+X double zscore;
+X double escore;
+X int segnum;
+X int seglen;
+X struct lmf_str *m_file_p;
+X fseek_t lseek;
+X char libstr[MAX_UID];
+X int cont;
+X int frame;
+X int nsfnum;
+X int sfnum[10];
+X long loffset;
+X struct a_struct aln_d; /* these values are used by -m9 */
+X struct a_res_str a_res; /* need only a_res, not a_res[2], because different frames
+X for the same sequence are stored separately */
+X int have_ares;
+X float percent, gpercent;
+};
+X
+struct stat_str {
+X int score;
+X int n1;
+X double comp;
+X double H;
+X double escore;
+X int segnum;
+X int seglen;
+};
+X
+X
+SHAR_EOF
+chmod 0644 mw.h ||
+echo 'restore of mw.h failed'
+Wc_c="`wc -c < 'mw.h'`"
+test 1042 -eq "$Wc_c" ||
+ echo 'mw.h: original size 1042, current size' "$Wc_c"
+fi
+# ============= mwkw.aa ==============
+if test -f 'mwkw.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mwkw.aa (File already exists)'
+else
+echo 'x - extracting mwkw.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mwkw.aa' &&
+>MWKW Myosin heavy chain - Caenorhabditis elegans
+MEHEKDPGWQYLRRTREQVLEDQSKPYDSKKNVWIPDPEEGYLAGEITATKGDQVTIVTAREMSVIQVTL
+KKELVQEMNPPKFEKTEDMSNLSFLNDASVLHNLRSRYAAMLIYTYSGLFCVVINPYKRLPIYTDSCARM
+FMGKRKTEMPPHLFAVSDEAYRNMLQDHENQSMLITGESGAGKTENTKKVICYFAAVGASQQEGGAEVDP
+NKKKVTLEDQIVQTNPVLEAFGNAKTVRNNNSSRFGKFIRIHFNKHGRLASCDIEHYLLEKSRVIRQAPG
+ERCYHIFYQIYSDFRPELKKELLLDLPIKDYWFVAQAELIIDGIDDVEEFQLTDEAFDILNFSAVEKQDC
+YRLMSAHMHMGNMKFKQRPREEQAEPDGTVEAEKASNMYGIGCE
+EFLKALTKPRVKVGTEWVSKGQNCEQVNWAVGAMAKGLYSRVFNWLVKKCNLTLDQKGIDRDYFIGVLDI
+AGFEIFDFNSFEQLWINFVNEKLQQFFNHHMFVLEQEEYAREGIQWVFIDFGLDLQACIELIEKPLGIIS
+MLDEECIVPKATDLTLASKLVDQHLGKHPNFEKPKPPKGKQGEAHFAMRHYAGTVRYNCLNWLEKNKDPL
+NDTVVSAMKQSKGNDLLVEIWQDYTTQEEAAAKAKEGGGGGKKKGKSGSFMTVSMLYRESLNNLMTMLNK
+THPHFIRCIIPNEKKQSGMIDAALVLNQLTCNGVLEGIRICRKGFPNRTLHPDFVQRYAILAAKEAKSDD
+DKKKCAEAIMSKLVNDGSLSEEMFRIGLTKVFFKAGVLAHLEDI
+RDEKLATILTGFQSQIRWHLGLKDRKRRMEQRAGLLIVQRNVRSWCTLRTWEWFKLYGKVKPMLKAGKEA
+EELEKINDKVKALEDSLAKEEKLRKELEESSAKLVEEKTSLFTNLESTKTQLSDAEERLAKLEAQQKDAS
+KQLSELNDQLADNEDRTADVQRAKKKIEAEVEALKKQIQDLEMSLRKAESEKQSKDHQIRSLQDEMQQQD
+EAIAKLNKEKKHQEEINRKLMEDLQSEEDKGNHQNKVKAKLEQTLDDLEDSLEREKRARADLDKQKRKVE
+GELKIAQENIDESGRQRHDLENNLKKKESELHSVSSRLEDEQALVSKLQRQIKDGQSRISELEEELENER
+QSRSKADRAKSDLQRELEELGEKLDEQGGATAAQVEVNKKREAE
+LAKLRRDLEEANMNHENQLGGLRKKHTDAVAELTDQLDQLNKAKAKVEKDKAQAVRDAEDLAAQLDQETS
+GKLNNEKLAKQFELQLTELQSKADEQSRQLQDFTSLKGRLHSENGDLVRQLEDAESQVNQLTRLKSQLTS
+QLEEARRTADEEARERQTVAAQAKNYQHEAEQLQESLEEEIEGKNEILRQLSKANADIQQWKARFEGEGL
+LKADELEDAKRRQAQKINELQEALDAANSKNASLEKTKSRLVGDLDDAQVDVERANGVASALEKKQKGFD
+KIIDEWRKKTDDLAAELDGAQRDLRNTSTDLFKAKNAQEELAEVVEGLRRENKSLSQEIKDLTDQLGEGG
+RSVHEMQKIIRRLEIEKEELQHALDEAEAALEAEESKVLRAQVE
+VSQIRSEIEKRIQEKEEEFENTRKNHARALESMQASLETEAKGKAELLRIKKKLEGDINELEIALDHANK
+ANADAQKNLKRYQEQVRELQLQVEEEQRNGADTREQFFNAEKRATLLQSEKEELLVANEAAERARKQAEY
+EAADARDQANEANAQVSSLTSAKRKLEGEIQAIHADLDETLNEYKAAEERSKKAIADATRLAEELRQEQE
+HSQHVDRLRKGLEQQLKEIQVRLDEAEAAALKGGKKVIAKLEQRVRELESELDGEQRRFQDANKNLGRAD
+RRVRELQFQVDEDKKNFERLQDLIDKLQQKLKTQKKQVEEAEELANLNLQKYKQLTHQLEDAEERADQAE
+NSLSKMRSKSRASASVAPGLQSSASAAVIRSPSRARASDF
+SHAR_EOF
+chmod 0644 mwkw.aa ||
+echo 'restore of mwkw.aa failed'
+Wc_c="`wc -c < 'mwkw.aa'`"
+test 2047 -eq "$Wc_c" ||
+ echo 'mwkw.aa: original size 2047, current size' "$Wc_c"
+fi
+# ============= mwrtc1.aa ==============
+if test -f 'mwrtc1.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping mwrtc1.aa (File already exists)'
+else
+echo 'x - extracting mwrtc1.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mwrtc1.aa' &&
+>MWRTC1 - Myosin heavy chain 1, cardiac muscle - Rat (fragment)
+/DLTEQLGEGGKNVHELEKIRKQLEVEKLELQSALEEAEASLEHEEGKILRAQLEFNQIKAEIE
+SKLAEKDEEMEQAKRNHLRVVDSLQTSLDAETRSRNEALRVKKKMEGDLNEMEIQLSQANRIAS
+EAQKHLKNAQAHLKDTQLQLDDAVRANDDLKENIAIVERRNTLLQAELEELRAVVEQTERSRKL
+AEQELIETSERVQLLHSQNNSLINQKKKMDADLSQLQTEVEEAVQECRNAEEKAKKAITDAAMM
+AEELKKEQDTSAHLERMKKNMEQTIKDLQHRLDEAEQIALKGGKKQLQKLEARVRELENELEAE
+QKRNAESVKGMRKSERRIKELNYQTEEDKKNLVRLQDLVNKLQLKVKAYKRQAEEAEEQANTNL
+SKFRKVQHELDEAEERADIAESQVNKLRAKSRDIGAKQKIHDEE*
+SHAR_EOF
+chmod 0644 mwrtc1.aa ||
+echo 'restore of mwrtc1.aa failed'
+Wc_c="`wc -c < 'mwrtc1.aa'`"
+test 500 -eq "$Wc_c" ||
+ echo 'mwrtc1.aa: original size 500, current size' "$Wc_c"
+fi
+# ============= myosin_bp.aa ==============
+if test -f 'myosin_bp.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping myosin_bp.aa (File already exists)'
+else
+echo 'x - extracting myosin_bp.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'myosin_bp.aa' &&
+>gi|46049110|ref|NP_996557| myosin binding protein C, slow type isoform 4; myosin-binding protein C, slow-type; skeletal muscle C-protein [Homo sapiens]
+MPEPTkkeenevpapapppeepskekeaGTTPAKDWTLVETPPGEEQAKQNANSQLSILF
+IEKPQGGTVKVGEDITFIAKVKAEDLLRKPTIKWFKGKWMDLASKAGKHLQLKETFERHS
+RVYTFEMQIIKAKDNFAGNYRCEVTYKDKFDSCSFDLEVHESTGTTPNIDIRSAFKRSGE
+GQEDAGELDFSGLLKRREVKQQEEEPQVDVWELLKNAKPSEYEKIAFQYGITDLRGmlkr
+lkrmrreekkSAAFAKILDPAYQVDKGGRVRFVVELADPKLEVKWYKNGQEIRPSTKYIF
+EHKGCQRILFINNCQMTDDSEYYVTAGDEKCSTELFVREPPIMVTKQLEDTTAYCGERVE
+LECEVSEDDANVKWFKNGEEIIPGPKSRYRIRVEGKKHILIIEGATKADAAEYSVMTTGG
+QSSAKLSVDLKPLKILTPLTDQTVNLGKEICLKCEISENIPGKWTKNGLPVQESDRLKVV
+HKGRIHKLVIANALTEDEGDYVFAPDAYNVTLPAKVHVIDPPKIILDGLDADNTVTVIAG
+NKLRLEIPISGEPPPKAMWSRGDKAIMEGSGRIRTESYPDSSTLVIDIAERDDSGVYHIN
+LKNEAGEAHASIkvkvvdfpdppvaptvtEVGDDWCIMNWEPPAYDGGSPILGYFIERKK
+KQSSRWMRLNFDLCKETTFEPKKMIEGVAYEVRIFAVNAIGISKPSMPSRPFVPLAVTSP
+PtlltvdsvtdttvtMRWRPPDHIGAAGLDGYVLEYCFEGTEDWIVANKDLIDKTKFTIT
+GLPTDAKIFVRVKAVNAAGASEPKYYSQPILVkeiieppkiriprHLKQTYIRRVGEAVN
+LVIPFQGKPRPELTWKKDGAEIDKNQINIRNSETDTIIFIRKAERSHSGKYDLQVKVDKF
+VETASIDIQIIDRPGPPQIVKIEDVWGENVALTWTPPKDDGNAAITGYTIQKADKKSMEW
+FTVIEHYHRTSATITELVIGNEYYFRVFSENMCGLSEDATMTKESAVIARDGKIYKNPVY
+EDFDFSEAPMFTQPLVNTYAIAGYNATLNCSVRGNPKPKITWMKNKVAIVDDPRYRMFSN
+QGVCTLEIRKPSPYDGGTYCCKAVNDLGTVEIECKLEVKVIAQ
+SHAR_EOF
+chmod 0644 myosin_bp.aa ||
+echo 'restore of myosin_bp.aa failed'
+Wc_c="`wc -c < 'myosin_bp.aa'`"
+test 1294 -eq "$Wc_c" ||
+ echo 'myosin_bp.aa: original size 1294, current size' "$Wc_c"
+fi
+# ============= mysql_demo1.sql ==============
+if test -f 'mysql_demo1.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping mysql_demo1.sql (File already exists)'
+else
+echo 'x - extracting mysql_demo1.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mysql_demo1.sql' &&
+xdb.wrplab seqdb_demo wrplab gstmu;
+SELECT acc, protein.seq, sp_name
+X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+X WHERE annot.acc='#' AND db='sp';
+SHAR_EOF
+chmod 0644 mysql_demo1.sql ||
+echo 'restore of mysql_demo1.sql failed'
+Wc_c="`wc -c < 'mysql_demo1.sql'`"
+test 340 -eq "$Wc_c" ||
+ echo 'mysql_demo1.sql: original size 340, current size' "$Wc_c"
+fi
+# ============= mysql_demo_pv.sql ==============
+if test -f 'mysql_demo_pv.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping mysql_demo_pv.sql (File already exists)'
+else
+echo 'x - extracting mysql_demo_pv.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mysql_demo_pv.sql' &&
+xdb.wrplab seqdb_demo wrplab gstmu;
+SELECT acc, protein.seq, sp_name, concat('sp|',acc,'|',sp_name,' ',descr)
+X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+X WHERE annot.acc='#' AND db='sp';
+SHAR_EOF
+chmod 0644 mysql_demo_pv.sql ||
+echo 'restore of mysql_demo_pv.sql failed'
+Wc_c="`wc -c < 'mysql_demo_pv.sql'`"
+test 381 -eq "$Wc_c" ||
+ echo 'mysql_demo_pv.sql: original size 381, current size' "$Wc_c"
+fi
+# ============= mysql_lib.c ==============
+if test -f 'mysql_lib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping mysql_lib.c (File already exists)'
+else
+echo 'x - extracting mysql_lib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'mysql_lib.c' &&
+X
+/* mysql_lib.c copyright (c) 2000 William R. Pearson */
+X
+/* $Name: fa_34_26_5 $ - $Id: mysql_lib.c,v 1.21 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* functions for opening, reading, seeking a mySQL database */
+X
+/*
+X For the moment, this interface assumes that the file to be searched will
+X be specified in a single, long, string with 4 parts:
+X
+X (1) a database open string. This string has four fields, separated by
+X whitespace (' \t'):
+X hostname:port dbname user password
+X
+X '--' dashes at the beginning of lines are ignored -
+X thus the first line could be:
+X -- hostname:port dbname user password
+X
+X (2) a database query string that will return an unique ID (not
+X necessarily numberic, but it must be < 12 characters as libstr[12]
+X is used) and a sequence string
+X
+X (2a) a series of mySQL commands that do not generate results
+X starting with 'DO', followed by a select() statement.
+X
+X (3) a database select string that will return a description
+X given a unique ID
+X
+X (4) a database select string that well return a sequence given a
+X unique ID
+X
+X Lines (3) and (4) are not required for pv34comp* libraries, but
+X line (2) must generate a complete description as well as a sequence.
+X
+X
+X 18-July-2001
+X Additional syntax has been added to support multiline SQL queries.
+X
+X If the host line begins with '+', then the SQL is openned on the same
+X connection as the previous SQL file.
+X
+X If the host line contains '-' just before the terminal ';', then
+X the file will not produce any output.
+X
+X This string can contain "\n". ";" are used to separate the four
+X functions, which must be specified in the order shown above.
+X The last (fourth) query must terminate with a ';' */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+X
+#include <mysql.h>
+#define MYSQL_LIB 16
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#define EOSEQ 0
+/* #include "upam.h" */
+X
+#ifdef SUPERFAMNUM
+int sfnum[10], nsfnum;
+#endif
+X
+int mysql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+void mysql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+X
+#define MYSQL_BUF 4096
+X
+struct lmf_str *
+mysql_openlib(char *sname, int ldnaseq, int *sascii) {
+X FILE *sql_file;
+X char *tmp_str, *ttmp_str;
+X int tmp_str_len;
+X char *bp, *bps, *bdp, *tp, tchar;
+X int i, qs_len, qqs_len;
+X char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
+X char *sql_do;
+X int sql_do_cnt;
+X int sql_port;
+X struct lmf_str *m_fptr;
+X
+X /* if (sql_reopen) return NULL; - should not be called for re-open */
+X
+X tmp_str_len = MYSQL_BUF;
+X if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate %d for mySQL buffer\n",tmp_str_len);
+X return NULL;
+X }
+X
+X if (sname[0] == '%') {
+X strncpy(tmp_str,sname+1,tmp_str_len);
+X tmp_str[sizeof(tmp_str)-1]='\0';
+X }
+X else {
+X if ((sql_file=fopen(sname,"r"))==NULL) {
+X fprintf(stderr," cannot open mySQL file: %s\n",sname);
+X return NULL;
+X }
+X
+X if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
+X fprintf(stderr," cannot read mySQL file: %s\n",sname);
+X return NULL;
+X }
+X else {
+X tmp_str[qs_len]='\0';
+X qqs_len = qs_len;
+X while (qqs_len >= tmp_str_len-1) {
+X tmp_str_len += MYSQL_BUF;
+X if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
+X fprintf(stderr,
+X " cannot reallocate %d for mySQL buffer\n",tmp_str_len);
+X return NULL;
+X }
+X ttmp_str = &tmp_str[qqs_len];
+X if ((qs_len=fread(ttmp_str,sizeof(char),MYSQL_BUF,sql_file))<0) {
+X fprintf(stderr," cannot read mySQL file: %s\n",sname);
+X return NULL;
+X }
+X ttmp_str[qs_len]='\0';
+X qqs_len += qs_len;
+X }
+X }
+X fclose(sql_file);
+X }
+X
+X bps = tmp_str;
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X return NULL;
+X }
+X /* have database name, parse the fields */
+X else {
+X strcpy(sql_db,bps); /* strcpy OK because allocated strlen(bps) */
+X bps = bp+1; /* points to next char after ';' */
+X while (isspace(*bps)) bps++;
+X *bp=';'; /* replace ; */
+X bp = sql_db;
+X while (*bp=='-') {*bp++ = ' ';}
+X sql_host = strtok(bp," \t\n");
+X sql_dbname = strtok(NULL," \t\n");
+X sql_user = strtok(NULL," \t\n");
+X sql_pass = strtok(NULL," \t\n");
+X if ((tp=strchr(sql_host,':'))!=NULL) {
+X *tp='\0';
+X sql_port=atoi(tp+1);
+X }
+X else sql_port = 0;
+X }
+X }
+X else {
+X fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
+X return NULL;
+X }
+X
+X /* we have all the info we need to open a database, allocate lmf_str */
+X if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+X sizeof(struct lmf_str),sname);
+X return NULL;
+X }
+X
+X /* have our struct, initialize it */
+X
+X strncpy(m_fptr->lb_name,sname,MAX_FN);
+X m_fptr->lb_name[MAX_FN-1]='\0';
+X
+X m_fptr->sascii = sascii;
+X
+X m_fptr->sql_db = sql_db;
+X m_fptr->getlib = mysql_getlib;
+X m_fptr->ranlib = mysql_ranlib;
+X m_fptr->mm_flg = 0;
+X m_fptr->sql_reopen = 0;
+X m_fptr->lb_type = MYSQL_LIB;
+X
+X /* now open the database, if necessary */
+X if ((m_fptr->mysql_conn=mysql_init(NULL))==NULL) {
+X fprintf(stderr,"*** Error - mysql_init\n");
+X goto error_r;
+X }
+X
+X if (mysql_real_connect(m_fptr->mysql_conn,
+X sql_host,sql_user,sql_pass,
+X sql_dbname,
+X sql_port,
+X NULL,
+X 0)==NULL)
+X {
+X fprintf(stderr,"*** Error %u - could not open database:\n%s\n%s",
+X mysql_errno(m_fptr->mysql_conn),tmp_str,
+X mysql_error(m_fptr->mysql_conn));
+X goto error_r;
+X }
+X else {
+X fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
+X }
+X
+X /* check for 'DO' command - copy to 'DO' string */
+X while (*bps == '-') { *bps++=' ';}
+X if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
+X toupper(bps[1])=='O' && isspace(bps[2])) {
+X /* have some 'DO' commands */
+X /* check where the end of the last DO statement is */
+X
+X sql_do_cnt = 1; /* count up the number of 'DO' statements for later */
+X bdp=bps+3;
+X while ((bp=strchr(bdp,';'))!=NULL) {
+X tp = bp+2; /* skip ;\n */
+X while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
+X if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
+X sql_do_cnt++; /* count the DO statements */
+X bdp = tp+3; /* move to the next DO statement */
+X }
+X else break;
+X }
+X if (bp != NULL) { /* end of the last DO, begin of select */
+X tchar = *(bp+1);
+X *(bp+1)='\0'; /* terminate DO strings */
+X if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate %d for sql_do\n",strlen(bps));
+X goto error_r;
+X }
+X else {
+X strcpy(sql_do,bps);
+X *(bp+1)=tchar; /* replace missing ';' */
+X }
+X bps = bp+1;
+X while (isspace(*bps)) bps++;
+X }
+X else {
+X fprintf(stderr," terminal ';' not found: %s\n",bps);
+X goto error_r;
+X }
+X /* all the DO commands are in m_fptr->sql_do in the form:
+X DO command1; DO command2; DO command3; */
+X bdp = sql_do;
+X while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
+X /* do the mysql statement on bdp+3 */
+X /* check for error */
+X *bp='\0';
+X if (mysql_query(m_fptr->mysql_conn,bdp+3)) {
+X fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
+X mysql_errno(m_fptr->mysql_conn), bdp+3, mysql_error(m_fptr->mysql_conn));
+X goto error_r;
+X }
+X *bp=';';
+X bdp = bp+1;
+X while (isspace(*bdp)) bdp++;
+X }
+X }
+X
+X /* copy 1st query field */
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((m_fptr->sql_query=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X /* have query, copy it */
+X else {
+X strcpy(m_fptr->sql_query,bps);
+X *bp=';'; /* replace ; */
+X bps = bp+1;
+X while(isspace(*bps)) bps++;
+X }
+X }
+X else {
+X fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
+X goto error_r;
+X }
+X
+X /* copy get_desc field */
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X /* have get_desc, copy it */
+X else {
+X strcpy(m_fptr->sql_getdesc,bps);
+X *bp=';'; /* replace ; */
+X bps = bp+1;
+X while(isspace(*bps)) bps++;
+X }
+X }
+X else {
+X fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
+X goto error_r;
+X }
+X
+X if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
+X
+X if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X
+X if (strlen(bps) > 0) {
+X strcpy(m_fptr->sql_getseq,bps);
+X }
+X else {
+X fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
+X return 0;
+X }
+X if (bp!=NULL) *bp=';';
+X
+X /* now do the query */
+X
+X if (mysql_query(m_fptr->mysql_conn,m_fptr->sql_query)) {
+X fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
+X mysql_errno(m_fptr->mysql_conn), m_fptr->sql_query, mysql_error(m_fptr->mysql_conn));
+X goto error_r;
+X }
+X
+X if ((m_fptr->mysql_res = mysql_use_result(m_fptr->mysql_conn)) == NULL) {
+X fprintf(stderr,"*** Error = use result failed\n%s\n",
+X mysql_error(m_fptr->mysql_conn));
+X goto error_r;
+X }
+X return m_fptr;
+X
+X error_r:
+X free(m_fptr->sql_getseq);
+X free(m_fptr->sql_getdesc);
+X free(m_fptr->sql_query);
+X free(m_fptr);
+X free(sql_db);
+X return NULL;
+}
+X
+struct lmf_str *
+mysql_reopen(struct lmf_str *m_fptr) {
+X m_fptr->sql_reopen = 1;
+X return m_fptr;
+}
+X
+void
+mysql_closelib(struct lmf_str *m_fptr) {
+X
+X if (m_fptr == NULL) return;
+X if (m_fptr->mysql_res != NULL)
+X mysql_free_result(m_fptr->mysql_res);
+X mysql_close(m_fptr->mysql_conn);
+X m_fptr->sql_reopen=0;
+}
+X
+/*
+static char *sql_seq = NULL, *sql_seqp;
+static int sql_seq_len;
+static MYSQL_ROW sql_row;
+*/
+X
+int
+mysql_getlib( unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X char *bp;
+X /* int l_start, l_stop, len; */
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+#ifdef SUPERFAMNUM
+X sfnum[0]=nsfnum = 0;
+#endif
+X
+X if (*lcont==0) {
+X /* get a row, with UID, sequence */
+X *l_off = 1;
+X if ((lm_fd->mysql_row =mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
+X *libpos=(fseek_t)atol(lm_fd->mysql_row[0]);
+X
+X /* for @P:1-n removed */
+X /*
+X if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
+X !strncmp(bp+1,"P:",2)) {
+X sscanf(bp+3,"%d-%d",&l_start,&l_stop)
+X l_start--;
+X if (l_start < 0) l_start=0;
+X if (l_stop > (len=strlen(lm_fd->mysql_row[1]))) l_stop= len-1;
+X lm_fd->sql_seqp = lm_fd->mysql_row[1];
+X lm_fd->sql_seqp[l_stop]='\0';
+X lm_fd->sql_seqp += l_start;
+X */
+X
+X if (lm_fd->mysql_row[2] == NULL) {
+X fprintf(stderr," NULL comment at: [%s] %ld\n",
+X lm_fd->mysql_row[0],*libpos);
+X }
+X else if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
+X !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
+X else *l_off = 1;
+X
+X lm_fd->sql_seqp = lm_fd->mysql_row[1];
+X
+X /* because of changes in mysql_ranlib(), it is essential that
+X libstr return the unique identifier; thus we must use
+X sql_row[0], not sql_row[2]. Using libstr as the UID allows
+X one to use any UID, not just numeric ones. *libpos is not
+X used for mysql libraries.
+X */
+X
+X if (n_libstr <= MAX_UID) {
+X /* the normal case returns only GID/sequence */
+X strncpy(libstr,lm_fd->mysql_row[0],MAX_UID-1);
+X libstr[MAX_UID-1]='\0';
+X }
+X else {
+X /* here we do not use the UID in libstr, because we are not
+X going back into the db */
+X /* the PVM case also returns a long description */
+X if (lm_fd->mysql_row[2]!=NULL) {
+X strncpy(libstr,lm_fd->mysql_row[2],n_libstr-1);
+X }
+X else {
+X strncpy(libstr,lm_fd->mysql_row[0],n_libstr-1);
+X }
+X libstr[n_libstr-1]='\0';
+X }
+X }
+X else {
+X mysql_free_result(lm_fd->mysql_res);
+X lm_fd->mysql_res=NULL;
+X *lcont = 0;
+X *seqp = EOSEQ;
+X return -1;
+X }
+X }
+X
+X for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X --seqp;
+X if (*(cp-1)==0) break;
+X }
+X lm_fd->sql_seqp = (char *)cp;
+X
+X if (seqp>=seqm1) (*lcont)++;
+X else {
+X *lcont=0;
+X if (lm_fd->sql_reopen) {
+X mysql_free_result(lm_fd->mysql_res);
+X lm_fd->mysql_res = NULL;
+X }
+X }
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+mysql_ranlib(char *str,
+X int cnt,
+X fseek_t libpos,
+X char *libstr,
+X struct lmf_str *lm_fd
+X )
+{
+X char tmp_query[1024], tmp_val[20];
+X char *bp;
+X
+X str[0]='\0';
+X
+X /* put the UID into the query string - cannot use sprintf because of
+X "%' etc */
+X
+X /* sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
+X
+X if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
+X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getdesc);
+X goto next1;
+X }
+X else {
+X *bp = '\0';
+X strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
+X tmp_query[sizeof(tmp_query)-1]='\0';
+X /* sprintf(tmp_val,"%ld",(long)libpos); */
+X strncat(tmp_query,libstr,sizeof(tmp_query)-1);
+X strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
+X *bp='#';
+X lm_fd->lpos = libpos;
+X }
+X
+X /* fprintf(stderr," requesting: %s\n",tmp_query); */
+X
+X if (lm_fd->mysql_res !=NULL) {
+X mysql_free_result(lm_fd->mysql_res);
+X lm_fd->mysql_res = NULL;
+X }
+X
+X if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
+X fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
+X mysql_error(lm_fd->mysql_conn));
+X sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
+X goto next1;
+X }
+X
+X if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
+/* fprintf(stderr,"*** Error = use result failed\n%s\n",
+X mysql_error(lm_fd->mysql_conn)); */
+X sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
+X goto next0;
+X }
+X
+X /* have the description */
+X if ((lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))==NULL) {
+X /* fprintf(stderr," cannot fetch description: %s\n",tmp_query); */
+X sprintf(str,"gi|%ld ***cannot fetch description***",(long)libpos);
+X goto next0;
+X }
+X
+X if (lm_fd->mysql_row[1] != NULL) strncpy(str,lm_fd->mysql_row[1],cnt-1);
+X else strncpy(str,lm_fd->mysql_row[0],cnt-1);
+X str[cnt-1]='\0';
+X while (strlen(str) < cnt-1 &&
+X (lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
+X strncat(str," ",cnt-2-strlen(str));
+X if (lm_fd->mysql_row[1]!=NULL)
+X strncat(str,lm_fd->mysql_row[1],cnt-2-strlen(str));
+X else break;
+X }
+X
+X str[cnt-1]='\0';
+X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X
+X next0:
+X mysql_free_result(lm_fd->mysql_res);
+X next1:
+X lm_fd->mysql_res = NULL;
+X
+X /* get the sequence, set up for mysql_getseq() */
+X /* put the UID into the query string */
+X
+X if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
+X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
+X return;
+X }
+X else {
+X *bp = '\0';
+X strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
+X tmp_query[sizeof(tmp_query)-1]='\0';
+X /* sprintf(tmp_val,"%ld",(long)libpos); */
+X strncat(tmp_query,libstr,sizeof(tmp_query));
+X strncat(tmp_query,bp+1,sizeof(tmp_query));
+X *bp='#';
+X }
+X
+X if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
+X fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
+X mysql_error(lm_fd->mysql_conn));
+X }
+X
+X if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
+X fprintf(stderr,"*** Error = use result failed\n%s\n",
+X mysql_error(lm_fd->mysql_conn));
+X }
+}
+SHAR_EOF
+chmod 0644 mysql_lib.c ||
+echo 'restore of mysql_lib.c failed'
+Wc_c="`wc -c < 'mysql_lib.c'`"
+test 16406 -eq "$Wc_c" ||
+ echo 'mysql_lib.c: original size 16406, current size' "$Wc_c"
+fi
+# ============= n0.aa ==============
+if test -f 'n0.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping n0.aa (File already exists)'
+else
+echo 'x - extracting n0.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n0.aa' &&
+>mgstm1
+MGDAPDFD,
+MLLEYTD
+SHAR_EOF
+chmod 0644 n0.aa ||
+echo 'restore of n0.aa failed'
+Wc_c="`wc -c < 'n0.aa'`"
+test 26 -eq "$Wc_c" ||
+ echo 'n0.aa: original size 26, current size' "$Wc_c"
+fi
+# ============= n1.aa ==============
+if test -f 'n1.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping n1.aa (File already exists)'
+else
+echo 'x - extracting n1.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n1.aa' &&
+>tests from mgstm1
+MILGYW,
+MLLE,
+MGDAP,
+MLCYNP
+SHAR_EOF
+chmod 0644 n1.aa ||
+echo 'restore of n1.aa failed'
+Wc_c="`wc -c < 'n1.aa'`"
+test 47 -eq "$Wc_c" ||
+ echo 'n1.aa: original size 47, current size' "$Wc_c"
+fi
+# ============= n2.aa ==============
+if test -f 'n2.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping n2.aa (File already exists)'
+else
+echo 'x - extracting n2.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n2.aa' &&
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+LIGVITENPVWIIMELCTLGELRSFLQVR,
+KPPRPGAPHLGSLASLNSPVDSYNEGVK,
+EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+QVTVSWDSGGSDEAPPKPSRPGYPSPR,
+GANPTHLADFNQVQTIQYSNSEDKDR,
+LPMPPNCPPTLYSLMTKCWAYDPSR,
+PGAPHLGSLASLNSPVDSYNEGVK,
+GANPTHLADFNQVQTIQYSNSEDK,
+LSHLQSEEVHWLHLDMGVSNVR,
+QVTVSWDSGGSDEAPPKPSR,
+VFHYFENSSEPTTWASIIR,
+TLLATVDESLPVLPASTHR,
+RQVTVSWDSGGSDEAPPK,
+AQLSTILEEEKLQQEER,
+EKFELAHPPEEWKYELR,
+LAQQYVMTSLQQEYKK,
+FELAHPPEEWKYELR,
+LVNGATQSFIIRPQK,
+KQMLTAAHALAVDAK,
+SNDKVYENVTGLVK,
+QMLTAAHALAVDAK,
+GMGQVLPTHLMEER,
+PQEISPPPTANLDR,
+IQPAPPEEYVPMVK,
+GMGQVLPTHLMEER,
+QFANLNREESILK,
+SHAR_EOF
+chmod 0644 n2.aa ||
+echo 'restore of n2.aa failed'
+Wc_c="`wc -c < 'n2.aa'`"
+test 692 -eq "$Wc_c" ||
+ echo 'n2.aa: original size 692, current size' "$Wc_c"
+fi
+# ============= n2_fs.lib ==============
+if test -f 'n2_fs.lib' -a X"$1" != X"-c"; then
+ echo 'x - skipping n2_fs.lib (File already exists)'
+else
+echo 'x - extracting n2_fs.lib (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n2_fs.lib' &&
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR,
+YTMGD,
+EKQKPEFL,
+VRGLTHP,
+TRMQLI,
+FKLGLDFP,
+NLPYLI,
+DGSHKIT,
+LRYLAR,
+KTIPEK,
+KRPWFA,
+ETEEERIR,
+GDKVTYVD,
+HWSNK
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+LIGVITENPVWIIMELCTLGELRSFLQVR,
+KPPRPGAPHLGSLASLNSPVDSYNEGVK,
+EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
+QVTVSWDSGGSDEAPPKPSRPGYPSPR,
+GANPTHLADFNQVQTIQYSNSEDKDR,
+LPMPPNCPPTLYSLMTKCWAYDPSR,
+PGAPHLGSLASLNSPVDSYNEGVK,
+GANPTHLADFNQVQTIQYSNSEDK,
+LSHLQSEEVHWLHLDMGVSNVR,
+QVTVSWDSGGSDEAPPKPSR,
+VFHYFENSSEPTTWASIIR,
+TLLATVDESLPVLPASTHR,
+RQVTVSWDSGGSDEAPPK,
+AQLSTILEEEKLQQEER,
+EKFELAHPPEEWKYELR,
+LAQQYVMTSLQQEYKK,
+FELAHPPEEWKYELR,
+LVNGATQSFIIRPQK,
+KQMLTAAHALAVDAK,
+SNDKVYENVTGLVK,
+QMLTAAHALAVDAK,
+GMGQVLPTHLMEER,
+PQEISPPPTANLDR,
+IQPAPPEEYVPMVK,
+GMGQVLPTHLMEER,
+QFANLNREESILK,
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA,
+AQLSTILE,
+KPPRPGA,
+GSIERED,
+VFHYFEN,
+LIGVIT,
+LPMPP,
+RQVTV,
+QVTV
+SHAR_EOF
+chmod 0644 n2_fs.lib ||
+echo 'restore of n2_fs.lib failed'
+Wc_c="`wc -c < 'n2_fs.lib'`"
+test 1482 -eq "$Wc_c" ||
+ echo 'n2_fs.lib: original size 1482, current size' "$Wc_c"
+fi
+# ============= n2s.aa ==============
+if test -f 'n2s.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping n2s.aa (File already exists)'
+else
+echo 'x - extracting n2s.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n2s.aa' &&
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
+SHAR_EOF
+chmod 0644 n2s.aa ||
+echo 'restore of n2s.aa failed'
+Wc_c="`wc -c < 'n2s.aa'`"
+test 178 -eq "$Wc_c" ||
+ echo 'n2s.aa: original size 178, current size' "$Wc_c"
+fi
+# ============= n2t.aa ==============
+if test -f 'n2t.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping n2t.aa (File already exists)'
+else
+echo 'x - extracting n2t.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n2t.aa' &&
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA,
+AQLSTILE,
+KPPRPGA,
+GSIERED,
+VFHYFEN,
+LIGVIT,
+LPMPP,
+RQVTV,
+QVTV
+SHAR_EOF
+chmod 0644 n2t.aa ||
+echo 'restore of n2t.aa failed'
+Wc_c="`wc -c < 'n2t.aa'`"
+test 243 -eq "$Wc_c" ||
+ echo 'n2t.aa: original size 243, current size' "$Wc_c"
+fi
+# ============= n_fs.lib ==============
+if test -f 'n_fs.lib' -a X"$1" != X"-c"; then
+ echo 'x - skipping n_fs.lib (File already exists)'
+else
+echo 'x - extracting n_fs.lib (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'n_fs.lib' &&
+>tests from mgstm1
+MLLE,
+MILGYW,
+MGADP,
+MLCYNP
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR
+>gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
+GANPTHLADF,
+QVTVSWDSGG,
+EDGGLQGPA,
+TLLATVDE,
+LSHLQSEE,
+PGAPHLGS,
+GANPTHLA
+SHAR_EOF
+chmod 0644 n_fs.lib ||
+echo 'restore of n_fs.lib failed'
+Wc_c="`wc -c < 'n_fs.lib'`"
+test 330 -eq "$Wc_c" ||
+ echo 'n_fs.lib: original size 330, current size' "$Wc_c"
+fi
+# ============= ncbl2_head.h ==============
+if test -f 'ncbl2_head.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping ncbl2_head.h (File already exists)'
+else
+echo 'x - extracting ncbl2_head.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ncbl2_head.h' &&
+/* ncbl_head.h header files for blast1.3 format */
+X
+/* $Name: fa_34_26_5 $ - $Id: ncbl2_head.h,v 1.3 2006/05/18 19:04:25 wrp Exp $ */
+X
+#define AMINO_ACID_SEQTYPE 1
+#define AA_SEQTYPE AMINO_ACID_SEQTYPE
+#define AAFORMAT AA_SEQTYPE
+X
+#define NUCLEIC_ACID_SEQTYPE 0
+#define NT_SEQTYPE NUCLEIC_ACID_SEQTYPE
+#define NTFORMAT NT_SEQTYPE
+X
+/* Filename extensions used by the two types of databases (a.a. and nt.) */
+#define AA_LIST_EXT "pal"
+#define AA_HEADER_EXT "phr"
+#define AA_INDEX_EXT "pin"
+#define AA_SEARCHSEQ_EXT "psq"
+X
+#define NT_LIST_EXT "nal"
+#define NT_HEADER_EXT "nhr"
+#define NT_INDEX_EXT "nin"
+#define NT_SEARCHSEQ_EXT "nsq"
+X
+#define FORMATDBV3 3 /* formatdb version */
+#define FORMATDBV4 4 /* formatdb version */
+X
+#define NULLB '\0' /* sentinel byte */
+X
+#ifndef CHAR_BIT
+#define CHAR_BIT 8 /* these values should match blast */
+#endif
+X
+#define NBPN 2
+#define NSENTINELS 2
+SHAR_EOF
+chmod 0644 ncbl2_head.h ||
+echo 'restore of ncbl2_head.h failed'
+Wc_c="`wc -c < 'ncbl2_head.h'`"
+test 882 -eq "$Wc_c" ||
+ echo 'ncbl2_head.h: original size 882, current size' "$Wc_c"
+fi
+# ============= ncbl2_mlib.c ==============
+if test -f 'ncbl2_mlib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping ncbl2_mlib.c (File already exists)'
+else
+echo 'x - extracting ncbl2_mlib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ncbl2_mlib.c' &&
+/* ncbl2_lib.c functions to read ncbi-blast format files from
+X formatdb (blast2.0 format files)
+X
+X copyright (c) 1999 William R. Pearson
+*/
+X
+/* $Name: fa_34_26_5 $ - $Id: ncbl2_mlib.c,v 1.56 2007/04/02 18:08:11 wrp Exp $ */
+X
+/* to turn on mmap()ing for Blast2 files: */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef UNIX
+#include <unistd.h>
+#endif
+#include <errno.h>
+X
+X
+/* ****************************************************************
+X
+17-May-2006
+X
+Modified to read NCBI .[np]al and .msk files. The .nal or .pal file
+provides a way to read sequences from a list of files. The .msk file
+provides a compact way of indicating the subset of sequences in a
+larger database (typically nr or nt) that comprise a smaller database
+(e.g. swissprot or pdbaa). A .pal file (e.g. swissprot.00.pal) that
+uses a .msk file has the form:
+X
+X # Alias file generated by genmask
+X # Date created: Mon Apr 10 11:24:05 2006
+X #
+X TITLE Non-redundant SwissProt sequences
+X DBLIST nr.00
+X OIDLIST swissprot.00.msk
+X LENGTH 74351250
+X NSEQ 198346
+X MAXOID 2617347
+X MEMB_BIT 1
+X # end of the file
+X
+To work with this file, we must first load the nr.00 file, and then
+read the swissprot.00.msk file, and then scan all the entries in the
+swissprot.00.msk file (which are packed 32 mask-bit to an int) to
+determine whether a specific libpos index entry is present in the
+subset database.
+X
+**************************************************************** */
+X
+X
+/* ****************************************************************
+This code reads NCBI Blast2 format databases from formatdb version 3 and 4
+X
+(From NCBI) This section describes the format of the databases.
+X
+Formatdb creates three main files for proteins containing indices,
+sequences, and headers with the extensions, respectively, of pin, psq,
+and phr (for nucleotides these are nin, nsq, and nhr). A number of
+other ISAM indices are created, but these are described elsewhere.
+X
+FORMAT OF THE INDEX FILE
+------------------------
+X
+1.) formatdb version number [4 bytes].
+X
+2.) protein dump flag (1 for a protein database, 0 for a nucleotide
+X database) [4 bytes].
+X
+3.) length of the database title in bytes [4 bytes].
+4.) the database title [length given in 3.)].
+5.) length of the date/time string [4 bytes].
+6.) the date/time string [length given in 5.)].
+7.) the number of sequences in the database [4 bytes].
+8.) the total length of the database in residues/basepairs [4 bytes].
+9.) the length of the longest sequence in the database [4 bytes].
+X
+10.) a list of the offsets for definitions (one for each sequence) in
+the header file. There are num_of_seq+1 of these, where num_of_seq is
+the number of sequences given in 7.).
+X
+11.) a list of the offsets for sequences (one for each sequence) in
+the sequence file. There are num_of_seq+1 of these, where num_of_seq
+is the number of sequences given in 7.).
+X
+12.) a list of the offsets for the ambiguity characters (one for each
+sequence) in the sequence file. This list is only present for
+nucleotide databases and, since the database is compressed 4/1 for
+nucleotides, allows the ambiguity characters to be restored when the
+sequence is generated. There are num_of_seq+1 of these, where
+num_of_seq is the number of sequences given in 7.).
+X
+X
+FORMAT OF THE SEQUENCE FILE
+---------------------------
+X
+There are different formats for the protein and nucleotide sequence files.
+X
+The protein sequence files is quite simple. The first byte in the
+file is a NULL byte, followed by the sequence in ncbistdaa format
+(described in the NCBI Software Development Toolkit documentation).
+Following the sequence is another NULL byte, followed by the next
+sequence. The file ends with a NULL byte, following the last
+sequence.
+X
+The nucleotide sequence file contains the nucleotide sequence, with
+four basepairs compressed into one byte. The format used is NCBI2na,
+documented in the NCBI Software Development Toolkit manual. Any
+ambiguity characters present in the original sequence are replaced at
+random by A, C, G or T. The true value of ambiguity characters are
+stored at the end of each sequence to allow true reproduction of the
+original sequence.
+X
+FORMAT OF THE HEADER FILE (formatdb version 3)
+-------------------------
+X
+The format of the header file depends on whether or not the identifiers in the
+original file were parsed or not. For the case that they were not, then each
+entry has the format:
+X
+gnl|BL_ORD_ID|entry_number my favorite yeast sequence...
+X
+Here entry_number gives the ordinal number of the sequence in the
+database (with zero offset). The identifier
+gnl|BL_ORD_ID|entry_number is used by the BLAST software to identify
+the entry, if the user has not provided another identifier. If the
+identifier was parsed, then gnl|BL_ORD_ID|entry_number is replaced by
+the correct identifier, as described in
+ftp://ncbi.nlm.nih.gov/blast/db/README .
+X
+There are no separators between these deflines.
+X
+For formatdb version 4, the header file contains blast ASN.1 binary
+deflines, which can parsed with parse_fastadl_asn().
+X
+FORMAT OF THE .MSK FILE
+-----------------------
+X
+The .msk file is simply a packed list of masks for formatdb "oids" for
+some other file (typically nr). The first value is the last oid
+available; the remainder are packed 32 oids/mask, so that the number
+of masks is 1/32 the number of sequences in the file.
+X
+**************************************************************** */
+X
+#ifdef USE_MMAP
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#ifdef IBM_AIX
+#include <fcntl.h>
+#else
+#include <sys/fcntl.h>
+#endif
+#endif
+X
+#ifdef USE_MMAP
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+#endif
+X
+#ifdef UNIX
+#define RBSTR "r"
+#else
+#define RBSTR "rb"
+#endif
+X
+#ifdef WIN32
+#define SLASH_CHAR '\\'
+#define SLASH_STR "\\"
+#else
+#define SLASH_CHAR '/'
+#define SLASH_STR "/"
+#endif
+X
+#define XTERNAL
+#include "uascii.h"
+X
+#define XTERNAL
+#include "upam.h"
+#include "ncbl2_head.h"
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+unsigned int bl2_uint4_cvt(unsigned int);
+unsigned int bl2_long4_cvt(long);
+int64_t bl2_long8_cvt(int64_t);
+void src_int4_read(FILE *fd, int *valp);
+void src_uint4_read(FILE *fd, unsigned int *valp);
+void src_long4_read(FILE *fd, long *valp);
+void ncbi_long8_read(FILE *fd, int64_t *valp);
+void src_char_read(FILE *fd, char *valp);
+unsigned char *parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
+X int *gi_p, int *db, char *acc, char *name,
+X char *title, int t_len, int *taxid);
+X
+/* nt_btoa maps from blast 2bit format to ascii characters */
+static char nt_btoa[5] = {"ACGT"};
+X
+static char aa_b2toa[27]= {"-ABCDEFGHIKLMNPQRSTVWXYZU*"};
+X
+static int aa_btof[32]; /* maps to fasta alphabet */
+X
+static int dbtype, dbformat, amb_cnt;
+X
+#define NCBIBL20 12
+X
+int ncbl2_getliba(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl2_getlibn(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+X
+int ncbl2_getliba_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl2_getlibn_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+X
+void newname(char *, char *, char *, int);
+void parse_pal(char *, char *, int *, int *, FILE *);
+X
+void ncbl2_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+X
+/* ncbl2_openlib() is used to open (and memory map) a BLAST2.0 format
+X file. Ifdef USE_MMAP, then ncbl2_openlib returns a structure that can
+X be used to read the database. */
+X
+struct lmf_str *
+ncbl2_openlib(char *name, int ldnaseq)
+{
+X char lname[256];
+X char dname[256];
+X char msk_name[256];
+X char hname[256];
+X char sname[256];
+X char tname[256];
+X char db_dir[256];
+X int pref_db= -1;
+X char *bp;
+X int title_len;
+X char *title_str=NULL;
+X int date_len;
+X char *date_str=NULL;
+X long ltmp;
+X int64_t l8tmp;
+X int oid_seqs, max_oid;
+X int oid_cnt, oid_len;
+X unsigned int *oid_list, o_max;
+X int tmp;
+X int i;
+#ifdef USE_MMAP
+X struct stat statbuf;
+#endif
+X FILE *ifile; /* index offsets, also DB info */
+X unsigned int *f_pos_arr;
+X struct lmf_str *m_fptr;
+X
+X if (ldnaseq==SEQT_PROT) { /* read a protein database */
+X newname(lname,name,AA_LIST_EXT,(int)sizeof(lname));
+X newname(tname,name,AA_INDEX_EXT,(int)sizeof(tname));
+X newname(hname,name,AA_HEADER_EXT,(int)sizeof(hname));
+X newname(sname,name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+X
+X /* initialize map of BLAST2 amino acids to FASTA amino acids */
+X for (i=0; i<sizeof(aa_b2toa); i++) {
+X if ((tmp=aascii[aa_b2toa[i]])<NA) aa_btof[i]=tmp;
+X else if (aa_b2toa[i]=='*') aa_btof[i]=aascii['X'];
+X else aa_b2toa[i]=0;
+/* else aa_btof[i]=aascii['X']; */
+X }
+X }
+X else { /* reading DNA library */
+X newname(lname,name,NT_LIST_EXT,(int)sizeof(lname));
+X newname(tname,name,NT_INDEX_EXT,(int)sizeof(tname));
+X newname(hname,name,NT_HEADER_EXT,(int)sizeof(hname));
+X newname(sname,name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+X
+X }
+X
+X /* check first for list name */
+X max_oid = oid_seqs = 0;
+X oid_list = NULL;
+X if ((ifile = fopen(lname,"r"))!=NULL) {
+X
+X if ((bp = strrchr(name,SLASH_CHAR))!=NULL) {
+X *bp = '\0';
+X strncpy(db_dir,name,sizeof(db_dir));
+X strncat(db_dir,SLASH_STR,sizeof(db_dir)-strlen(db_dir)-1);
+X *bp = SLASH_CHAR;
+X }
+X else {
+X db_dir[0]='\0';
+X }
+X
+X /* we have a list file, we need to parse it */
+X parse_pal(dname, msk_name, &oid_seqs, &max_oid, ifile);
+X fclose(ifile);
+X
+X pref_db = -1;
+X if (oid_seqs > 0) {
+X
+X /* get the pref_db before adding the directory */
+X if (strncmp(msk_name,"swissprot",9)==0) {
+X pref_db = 7;
+X }
+X else if (strncmp(msk_name,"pdbaa",5)==0) {
+X pref_db = 14;
+X }
+X
+X /* need to add directory to both dname and msk_name */
+X strncpy(tname,db_dir,sizeof(tname));
+X strncat(tname,msk_name, sizeof(tname));
+X strncpy(msk_name, tname, sizeof(msk_name));
+X
+X strncpy(tname,db_dir,sizeof(tname));
+X strncat(tname,dname, sizeof(tname));
+X strncpy(dname,tname,sizeof(dname));
+X
+X if (ldnaseq == SEQT_PROT) {
+X newname(tname,dname,AA_INDEX_EXT,(int)sizeof(tname));
+X newname(hname,dname,AA_HEADER_EXT,(int)sizeof(hname));
+X newname(sname,dname,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+X }
+X else { /* reading DNA library */
+X newname(tname,dname,NT_INDEX_EXT,(int)sizeof(tname));
+X newname(hname,dname,NT_HEADER_EXT,(int)sizeof(hname));
+X newname(sname,dname,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+X }
+X /* now load the oid file */
+X if ((ifile = fopen(msk_name,RBSTR))==NULL) {
+X fprintf(stderr,"error - cannot load %s file\n",msk_name);
+X return NULL;
+X }
+X else {
+X src_uint4_read(ifile,&o_max);
+X if (o_max != max_oid) {
+X fprintf(stderr," error - oid count mismatch %d != %d\n",max_oid, o_max);
+X }
+X oid_len = (max_oid/32+1);
+X if ((oid_list=(unsigned int *)calloc(oid_len,sizeof(int)))==NULL) {
+X fprintf(stderr," error - cannot allocate oid_list[%d]\n",oid_len);
+X return NULL;
+X }
+X if ((oid_cnt=fread(oid_list,sizeof(int),oid_len,ifile))==0) {
+X fprintf(stderr," error - cannot read oid_list[%d]\n",oid_len);
+X return NULL;
+X }
+X fclose(ifile);
+X }
+X }
+X else { /* we had a .msk file, but there are no oid's in it.
+X allocate an m_fptr and return it empty */
+X if ((m_fptr=(struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str\n");
+X return NULL;
+X }
+X
+X m_fptr->tmp_buf_max = 0;
+X
+X /* load the oid info */
+X m_fptr->max_oid = 0;
+X m_fptr->oid_seqs = 0;
+X m_fptr->oid_list = (unsigned int *)calloc(1,sizeof(int));
+X m_fptr->pref_db= -1;
+X
+X if (ldnaseq==SEQT_DNA) {
+X m_fptr->getlib = ncbl2_getlibn_o;
+X m_fptr->sascii = nascii;
+X }
+X else {
+X m_fptr->getlib = ncbl2_getliba_o;
+X m_fptr->sascii = aascii;
+X }
+X strncpy(m_fptr->lb_name,sname,MAX_FN);
+X return m_fptr;
+X }
+X }
+X
+X /* open the index file */
+X if ((ifile = fopen(tname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s (%s) INDEX file",tname,name);
+X perror("...");
+X return 0;
+X }
+X src_uint4_read(ifile,(unsigned *)&dbformat); /* get format DB version number */
+X src_uint4_read(ifile,(unsigned *)&dbtype); /* get 1 for protein/0 DNA */
+X
+X if (dbformat != FORMATDBV3 && dbformat!=FORMATDBV4) {
+X fprintf(stderr,"error - %s wrong formatdb version (%d/%d)\n",
+X tname,dbformat,FORMATDBV3);
+X return NULL;
+X }
+X
+X if ((ldnaseq==SEQT_PROT && dbtype != AAFORMAT) ||
+X (ldnaseq==SEQT_DNA && dbtype!=NTFORMAT)) {
+X fprintf(stderr,"error - %s wrong format (%d/%d)\n",
+X tname,dbtype,(ldnaseq ? NTFORMAT: AAFORMAT));
+X return NULL;
+X }
+X
+X /* the files are there - allocate lmf_str */
+X
+X if ((m_fptr=(struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str\n");
+X return NULL;
+X }
+X
+X m_fptr->tmp_buf_max = 4096;
+X if ((m_fptr->tmp_buf=
+X (char *)calloc(m_fptr->tmp_buf_max,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str->tmp_buffer\n");
+X return NULL;
+X }
+X
+X /* load the oid info */
+X m_fptr->max_oid = max_oid;
+X m_fptr->oid_seqs = oid_seqs;
+X m_fptr->oid_list = oid_list;
+X m_fptr->pref_db= pref_db;
+X
+X /* open the header file */
+X if ((m_fptr->hfile = fopen(hname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s header file\n",hname);
+X goto error_r;
+X }
+X
+X /* ncbl2_ranlib is used for all BLAST2.0 access */
+X m_fptr->ranlib = ncbl2_ranlib;
+X m_fptr->bl_format_ver = dbformat;
+X
+X if (ldnaseq==SEQT_DNA) {
+X if (oid_seqs > 0) {
+X m_fptr->getlib = ncbl2_getlibn_o;
+X }
+X else {
+X m_fptr->getlib = ncbl2_getlibn;
+X }
+X m_fptr->sascii = nascii;
+X }
+X else {
+X if (oid_seqs > 0) {
+X m_fptr->getlib = ncbl2_getliba_o;
+X }
+X else {
+X m_fptr->getlib = ncbl2_getliba;
+X }
+X m_fptr->sascii = aascii;
+X }
+X strncpy(m_fptr->lb_name,sname,MAX_FN);
+X
+X /* open the sequence file */
+X
+#if defined (USE_MMAP)
+X m_fptr->mm_flg=((m_fptr->mmap_fd=open(sname,O_RDONLY))>=0);
+X if (!m_fptr->mm_flg) {
+X fprintf(stderr," cannot open %s",sname);
+X perror("...");
+X }
+X else {
+X if(fstat(m_fptr->mmap_fd, &statbuf) < 0) {
+X fprintf(stderr," cannot fstat %s",sname);
+X perror("...");
+X m_fptr->mm_flg = 0;
+X }
+X else {
+X m_fptr->st_size = statbuf.st_size;
+X if((m_fptr->mmap_base =
+X mmap(NULL, m_fptr->st_size, PROT_READ,
+X MAP_FILE | MAP_SHARED, m_fptr->mmap_fd, 0)) == (char *) -1) {
+X fprintf(stderr," cannot mmap %s",sname);
+X perror("...");
+X m_fptr->mm_flg = 0;
+X }
+X else {
+X m_fptr->mmap_addr = m_fptr->mmap_base;
+X m_fptr->mm_flg = 1;
+X }
+X }
+X /* regardless, close the open()ed version */
+X close(m_fptr->mmap_fd);
+X }
+#else
+X m_fptr->mm_flg = 0;
+#endif
+X
+X if (!m_fptr->mm_flg) {
+X if ((m_fptr->libf = fopen(sname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s sequence file",sname);
+X perror("...");
+X goto error_r;
+X }
+X }
+X
+/* all files should be open */
+X
+X src_uint4_read(ifile,(unsigned *)&title_len);
+X
+X if (title_len > 0) {
+X if ((title_str = calloc((size_t)title_len+1,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate title string (%d)\n",title_len);
+X goto error_r;
+X }
+X fread(title_str,(size_t)1,(size_t)title_len,ifile);
+X }
+X
+X src_uint4_read(ifile,(unsigned *)&date_len);
+X
+X if (date_len > 0) {
+X if ((date_str = calloc((size_t)date_len+1,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate date string (%d)\n",date_len);
+X goto error_r;
+X }
+X fread(date_str,(size_t)1,(size_t)date_len,ifile);
+X }
+X
+X m_fptr->lpos = 0;
+X src_uint4_read(ifile,(unsigned *)&m_fptr->max_cnt);
+X
+X if (dbformat == FORMATDBV3) {
+X src_long4_read(ifile,<mp);
+X m_fptr->tot_len = ltmp;
+X }
+X else {
+X ncbi_long8_read(ifile,&l8tmp);
+X m_fptr->tot_len = ltmp;
+X }
+X
+X src_long4_read(ifile,<mp);
+X m_fptr->max_len = ltmp;
+X
+X /* currently we are not using this information, but perhaps later */
+X if (title_str!=NULL) free(title_str);
+X if (date_str!=NULL) free(date_str);
+X
+#ifdef DEBUG
+X fprintf(stderr,"%s format: BL2 (%s) max_cnt: %d, totlen: %lld, maxlen %ld\n",
+X name,m_fptr->mm_flg ? "mmap" : "fopen",
+X m_fptr->max_cnt,m_fptr->tot_len,m_fptr->max_len);
+#endif
+X
+X /* allocate and read hdr indexes */
+X if ((f_pos_arr=(unsigned int *)calloc((size_t)m_fptr->max_cnt+1,sizeof(int)))==NULL) {
+X fprintf(stderr," cannot allocate tmp header pointers\n");
+X goto error_r;
+X }
+X
+X if ((m_fptr->d_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+X fprintf(stderr," cannot allocate header pointers\n");
+X goto error_r;
+X }
+X
+X /* allocate and read sequence offsets */
+X if ((m_fptr->s_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+X fprintf(stderr," cannot allocate sequence pointers\n");
+X goto error_r;
+X }
+X
+X /*
+X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->d_pos_arr[i]);
+X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->s_pos_arr[i]);
+X */
+X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+X fprintf(stderr," error reading hdr offsets: %s\n",tname);
+X goto error_r;
+X }
+X
+X for (i=0; i<=m_fptr->max_cnt; i++)
+#ifdef IS_BIG_ENDIAN
+X m_fptr->d_pos_arr[i] = f_pos_arr[i];
+#else
+X m_fptr->d_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+X
+X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+X fprintf(stderr," error reading seq offsets: %s\n",tname);
+X goto error_r;
+X }
+X for (i=0; i<=m_fptr->max_cnt; i++) {
+#ifdef IS_BIG_ENDIAN
+X m_fptr->s_pos_arr[i] = f_pos_arr[i];
+#else
+X m_fptr->s_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+X }
+X
+X if (dbtype == NTFORMAT) {
+X /* allocate and ambiguity offsets */
+X if ((m_fptr->a_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
+X fprintf(stderr," cannot allocate sequence pointers\n");
+X goto error_r;
+X }
+X
+X /*
+X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->a_pos_arr[i]);
+X */
+X
+X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
+X fprintf(stderr," error reading seq offsets: %s\n",tname);
+X goto error_r;
+X }
+X for (i=0; i<=m_fptr->max_cnt; i++) {
+#ifdef IS_BIG_ENDIAN
+X m_fptr->a_pos_arr[i] = f_pos_arr[i];
+#else
+X m_fptr->a_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
+#endif
+X }
+X }
+X
+X /*
+X for (i=0; i < min(m_fptr->max_cnt,10); i++) {
+X fprintf(stderr,"%d: %d %d %d\n",i,m_fptr->s_pos_arr[i],m_fptr->a_pos_arr[i],m_fptr->d_pos_arr[i]);
+X }
+X */
+X
+X /* all done with ifile, close it */
+X fclose(ifile);
+X
+X free(f_pos_arr);
+X
+X if (!m_fptr->mm_flg) {
+X tmp = fgetc(m_fptr->libf);
+X if (tmp!=NULLB)
+X fprintf(stderr," phase error: %d:%d found\n",0,tmp);
+X }
+X
+X m_fptr->bl_lib_pos = 1;
+X amb_cnt = 0;
+X return m_fptr;
+X
+X error_r:
+X /* here if failure after m_fptr allocated */
+X free(m_fptr);
+X return NULL;
+}
+X
+void ncbl2_closelib(struct lmf_str *m_fptr)
+{
+X if (m_fptr->tmp_buf != NULL) {
+X free(m_fptr->tmp_buf);
+X m_fptr->tmp_buf_max = 0;
+X }
+X
+X if (m_fptr->s_pos_arr !=NULL) {
+X free(m_fptr->s_pos_arr);
+X m_fptr->s_pos_arr = NULL;
+X }
+X if (m_fptr->a_pos_arr!=NULL) {
+X free(m_fptr->a_pos_arr);
+X m_fptr->a_pos_arr = NULL;
+X }
+X
+X if (m_fptr->hfile !=NULL ) {
+X fclose(m_fptr->hfile); m_fptr->hfile=NULL;
+X free(m_fptr->d_pos_arr); m_fptr->d_pos_arr = NULL;
+X }
+X
+X if (m_fptr->oid_list != NULL) {
+X free(m_fptr->oid_list); m_fptr->oid_list = NULL;
+X m_fptr->oid_seqs = m_fptr->max_oid = 0;
+X }
+X
+#ifdef use_mmap
+X if (m_fptr->mm_flg) {
+X munmap(m_fptr->mmap_base,m_fptr->st_size);
+X m_fptr->mmap_fd = -1;
+X }
+X else
+#endif
+X if (m_fptr->libf !=NULL ) {fclose(m_fptr->libf); m_fptr->libf=NULL;}
+}
+X
+int
+ncbl2_getliba_o(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X int tpos;
+X unsigned int t_mask, t_shift, oid_mask;
+X
+X /* get to the next valid pointer */
+X
+X for ( tpos = m_fd->lpos ;tpos <= m_fd->max_oid; tpos++) {
+X t_mask = tpos / 32;
+X t_shift = 31 - (tpos % 32);
+X if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; }
+X
+X if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
+X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
+X m_fd->lpos = tpos; /* already bumped up */
+X m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
+X return ncbl2_getliba(seq, maxs, libstr, n_libstr,
+X libpos, lcont, m_fd, l_off);
+X }
+X }
+X return -1;
+}
+X
+int
+ncbl2_getliba(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X unsigned char *sptr, *dptr;
+X int s_chunk, d_len, lib_cnt;
+X long seqcnt;
+X long tmp;
+X static long seq_len;
+#if defined(DEBUG) || defined(PCOMPLIB)
+X int gi, my_db, taxid;
+X char acc[20], title[21], name[20];
+#endif
+X
+X *l_off = 1;
+X
+X lib_cnt = m_fd->lpos;
+X *libpos = (fseek_t)m_fd->lpos;
+X
+X if (*lcont==0) {
+X if (lib_cnt >= m_fd->max_cnt) return -1; /* no more sequences */
+X seq_len = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]; /* value is +1 off to get the NULL */
+X if (m_fd->mm_flg) m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lib_cnt];
+#if !defined(DEBUG) && !defined(PCOMPLIB)
+X libstr[0]='\0';
+#else
+X /* get the name from the header file */
+X fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
+X
+X if (m_fd->bl_format_ver == FORMATDBV3) {
+X d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+X fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
+X libstr[d_len]='\0';
+X }
+X else {
+X d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+X fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile);
+X parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len,
+X &gi, &my_db, acc, name, title, 20, &taxid);
+X sprintf(libstr,"gi|%d",gi);
+X }
+#endif
+X }
+X if (seq_len <= maxs) { /* sequence fits */
+X seqcnt = seq_len;
+X m_fd->lpos++;
+X *lcont = 0;
+X }
+X else { /* doesn't fit */
+X seqcnt = maxs-1;
+X (*lcont)++;
+X }
+X
+X if (m_fd->mm_flg) sptr = (unsigned char *)m_fd->mmap_addr;
+X else {
+X if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,m_fd->libf))!=(size_t)seq_len) {
+X fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",
+X *libpos,tmp,seq_len);
+X goto error;
+X }
+X sptr = seq;
+X }
+X if (seq_len <= maxs) {seqcnt = --seq_len;}
+X
+X /* everything is ready, set up dst. pointer, seq_len */
+X dptr = seq;
+X
+X if (aa_b2toa[sptr[seq_len-1]]=='*') seq_len--;
+X s_chunk = seqcnt/16;
+X while (s_chunk-- > 0) {
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X *dptr++ = aa_btof[*sptr++];
+X }
+X while (dptr < seq+seqcnt) *dptr++ = aa_btof[*sptr++];
+X
+X if (m_fd->mm_flg) m_fd->mmap_addr = (char *)sptr;
+X
+X /* we didn't get it all, so reset for more */
+X if (*lcont) seq_len -= seqcnt;
+X
+X seq[seqcnt]= EOSEQ;
+X return (seqcnt);
+X
+error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);
+X fflush(stderr);
+X return (-1);
+}
+X
+int
+ncbl2_getlibn_o(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X int tpos;
+X unsigned int t_mask, t_shift, oid_mask;
+X
+X /* get to the next valid pointer */
+X
+X for (tpos = m_fd->lpos; tpos <= m_fd->max_oid; tpos++) {
+X t_mask = tpos / 32;
+X t_shift = 31 - (tpos % 32);
+X if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; }
+X
+X if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
+X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
+X m_fd->lpos = tpos; /* already bumped up */
+X m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
+X return ncbl2_getlibn(seq, maxs, libstr, n_libstr,
+X libpos, lcont, m_fd, l_off);
+X }
+X }
+X return -1;
+}
+X
+static char tmp_amb[4096];
+X
+int
+ncbl2_getlibn(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *m_fd,
+X long *l_off)
+{
+X unsigned char *sptr, *tptr, stmp;
+X long seqcnt;
+X int s_chunk, lib_cnt;
+X size_t tmp;
+X char ch;
+X static long seq_len;
+X static int c_len,c_pad;
+X int c_len_set, d_len;
+X
+X *l_off = 1;
+X
+X lib_cnt = m_fd->lpos;
+X *libpos = (fseek_t)lib_cnt;
+X if (*lcont==0) { /* not a continuation of previous */
+X if (lib_cnt >= m_fd->max_cnt) return (-1);
+X c_len = m_fd->a_pos_arr[lib_cnt]- m_fd->s_pos_arr[lib_cnt];
+X if (!m_fd->mm_flg) {
+X if (m_fd->bl_lib_pos != m_fd->s_pos_arr[lib_cnt]) { /* are we positioned to read? */
+X amb_cnt++;
+X if ((m_fd->bl_lib_pos - m_fd->s_pos_arr[lib_cnt]) < sizeof(tmp_amb)) {
+X /* jump over amb_ray */
+X fread(tmp_amb,(size_t)1,(size_t)(m_fd->s_pos_arr[lib_cnt]-m_fd->bl_lib_pos),m_fd->libf);
+X }
+X else { /* fseek over amb_ray */
+X fseek(m_fd->libf,m_fd->s_pos_arr[lib_cnt],0);
+X }
+X m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
+X }
+X }
+X else m_fd->mmap_addr = m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt];
+#if !defined(DEBUG) && !defined(PCOMPLIB)
+X libstr[0]='\0';
+#else
+X /* get the name from the header file */
+X fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
+X
+X d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
+X fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
+X libstr[d_len]='\0';
+#endif
+X } /* end of *lcont==0 */
+X
+X /* To avoid the situation where c_len <= 1; we must anticipate what
+X c_len will be after this pass. If it will be <= 64, back off this
+X time so next time it will be > 64 */
+X
+X seq_len = c_len*4;
+X
+X if ((seq_len+4 > maxs) && (seq_len+4 - maxs <= 256)) {
+X /* we won't be done but we will have less than 256 to go */
+X c_len -= 64; seq_len -= 256; c_len_set = 1; maxs -= 256;}
+X else c_len_set = 0;
+X
+X /*
+X fprintf(stderr," lib_cnt: %d %d %d %d\n",lib_cnt,c_len,seq_len,maxs);
+X */
+X
+X /* does the rest of the sequence fit? */
+X if (seq_len <= maxs-4 && !c_len_set) {
+X seqcnt = c_len;
+X if (!m_fd->mm_flg) {
+X if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,m_fd->libf))!=(size_t)seqcnt) {
+X fprintf(stderr,
+X " could not read sequence record: %s %lld %ld != %ld: %d\n",
+X libstr,*libpos,tmp,seqcnt,*seq);
+X goto error;
+X }
+X m_fd->bl_lib_pos += tmp;
+X sptr = seq + seqcnt;
+X }
+X else sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
+X
+X *lcont = 0; /* this is the last chunk */
+X lib_cnt++; /* increment to the next sequence */
+X /* the last byte is either '0' (no remainder) or the last 1-3 chars and the remainder */
+X c_pad = *(sptr-1);
+X c_pad &= 0x3; /* get the last (low) 2 bits */
+X seq_len -= (4 - c_pad); /* if the last 2 bits are 0, its a NULL byte */
+X }
+X else { /* get the next chunk, but more to come */
+X seqcnt = ((maxs+3)/4)-1;
+X if (!m_fd->mm_flg) {
+X if ((tmp=fread(seq,(size_t)1,(size_t)(seqcnt),m_fd->libf))!=(size_t)(seqcnt)) {
+X fprintf(stderr," could not read sequence record: %lld %ld/%ld\n",
+X *libpos,tmp,seqcnt);
+X goto error;
+X }
+X m_fd->bl_lib_pos += tmp;
+X sptr = seq + seqcnt;
+X }
+X else {
+X sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
+X m_fd->mmap_addr += seqcnt;
+X }
+X seq_len = 4*seqcnt;
+X c_len -= seqcnt;
+X if (c_len_set) {c_len += 64; maxs += 256;}
+X (*lcont)++;
+/* hopefully we don't need this because of c_len -= 64. */
+/*
+X if (c_len == 1) {
+#if !defined (USE_MMAP)
+X c_pad = fgetc(m_fd->libf);
+X *sptr=c_pad;
+#else
+X c_pad = *m_fd->mmap_addr++;
+X sptr = m_fd->mmap_addr;
+#endif
+X c_pad &= 0x3;
+X seq_len += c_pad;
+X seqcnt++;
+X lib_cnt++;
+X *lcont = 0;
+X }
+*/
+X }
+X
+X /* point to the last packed byte and to the end of the array
+X seqcnt is the exact number of bytes read
+X tptr points to the destination, use multiple of 4 to simplify math
+X sptr points to the source, note that the last byte will be read 4 cycles
+X before it is written
+X */
+X
+X tptr = seq + 4*seqcnt;
+X s_chunk = seqcnt/8;
+X while (s_chunk-- > 0) {
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X }
+X while (tptr>seq) {
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X }
+X /*
+X for (sptr=seq; sptr < seq+seq_len; sptr++) {
+X printf("%c",nt[*sptr]);
+X if ((int)(sptr-seq) % 60 == 59) printf("\n");
+X }
+X printf("\n");
+X */
+X
+X m_fd->lpos = lib_cnt;
+X if (seqcnt*4 >= seq_len) { /* there was enough room */
+X seq[seq_len]= EOSEQ;
+X /* printf("%d\n",seq_len); */
+X return seq_len;
+X }
+X else { /* not enough room */
+X seq[seqcnt*4]=EOSEQ;
+X seq_len -= 4*seqcnt;
+X return (4*seqcnt);
+X }
+X
+error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);
+X fflush(stderr);
+X return (-1);
+}
+X
+X /* 0 1 2 3 4 5 6 7
+X 8 9 10 11 12 13 14 15
+X 16 17 */
+static char
+*db_type_arr[] = {"lcl","gib","gim","gii","gb","emb","pir","sp",
+X "pat","ref","gnl","gi","dbj","prf","pdb","tpg",
+X "tpe","tpd"};
+X
+void
+ncbl2_ranlib(char *str,
+X int cnt,
+X fseek_t libpos,
+X char *libstr,
+X struct lmf_str *m_fd)
+{
+X int llen, lib_cnt;
+X char *bp;
+X unsigned char *my_buff=NULL;
+X char descr[2048];
+X unsigned char *abp;
+X int gi, taxid;
+X int my_db;
+X char db[5], acc[20], name[20];
+X char title[1024];
+X int have_my_buff=0;
+X int have_descr = 0;
+X
+X lib_cnt = (int)libpos;
+X llen = m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt];
+X
+X fseek(m_fd->hfile,m_fd->d_pos_arr[libpos],0);
+X
+X if (m_fd->bl_format_ver == FORMATDBV3) {
+X if (llen >= cnt) llen = cnt-1;
+X fread(str,(size_t)1,(size_t)(llen),m_fd->hfile);
+X }
+X else {
+X if (llen >= m_fd->tmp_buf_max) {
+X if ((my_buff=(unsigned char *)calloc(llen,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate ASN.1 buffer: %d\n",llen);
+X my_buff = (unsigned char *)m_fd->tmp_buf;
+X llen = m_fd->tmp_buf_max;
+X }
+X else have_my_buff = 1;
+X }
+X else {
+X my_buff = (unsigned char *)m_fd->tmp_buf;
+X }
+X abp = my_buff;
+X fread(my_buff,(size_t)1,llen,m_fd->hfile);
+X
+X do {
+X abp = parse_fastadl_asn(abp, my_buff+llen,
+X &gi, &my_db, acc, name,
+X title, sizeof(title), &taxid);
+X
+X if (gi > 0) {
+X sprintf(descr,"gi|%d|%s|%s|%s ",gi,db_type_arr[my_db],acc,name);
+X }
+X else {
+X if (acc[0] != '\0') sprintf(descr,"%s ",acc);
+X else descr[0] = '\0';
+X if (name[0] != '\0' && strcmp(name,"BL_ORD_ID")!=0) sprintf(descr+strlen(descr),"%s ", name);
+X }
+X if (m_fd->pref_db < 0) {
+X if (!have_descr) {
+X strncpy(str,descr,cnt-1);
+X have_descr = 1;
+X }
+X else {
+X strncat(str,"\001",cnt-strlen(str)-1);
+X strncat(str,descr,cnt-strlen(str)-1);
+X }
+X strncat(str,title,cnt-strlen(str)-1);
+X if (strlen(str) >= cnt-1) break;
+X }
+X else if (m_fd->pref_db == my_db) {
+X have_descr = 1;
+X strncpy(str,descr,cnt-1);
+X strncat(str,title,cnt-strlen(str)-1);
+X break;
+X }
+X } while (abp);
+X
+X if (!have_descr) {
+X strncpy(str,descr,cnt-1);
+X strncat(str,descr,cnt-strlen(str)-1);
+X }
+X
+X if (have_my_buff) free(my_buff);
+X }
+X
+X str[cnt-1]='\0';
+X
+X bp = str;
+X while((bp=strchr(bp,'\001'))!=NULL) {*bp++=' ';}
+X
+X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[libpos],0);
+X
+X m_fd->lpos = lib_cnt;
+X m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
+}
+X
+unsigned int bl2_uint4_cvt(unsigned int val)
+{
+X unsigned int res;
+#ifdef IS_BIG_ENDIAN
+X return val;
+#else /* it better be LITTLE_ENDIAN */
+X res = ((val&255)*256)+ ((val>>8)&255);
+X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+X return res;
+#endif
+}
+X
+unsigned int bl2_long4_cvt(long val)
+{
+X int val4;
+X unsigned int res;
+#ifdef IS_BIG_ENDIAN
+X val4 = val;
+X return val4;
+#else /* it better be LITTLE_ENDIAN */
+X res = ((val&255)*256)+ ((val>>8)&255);
+X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+X return res;
+#endif
+}
+X
+int64_t bl2_long8_cvt(int64_t val)
+{
+X int64_t res;
+#ifdef IS_BIG_ENDIAN
+X return val;
+#else /* it better be LITTLE_ENDIAN */
+X res = ((val&255)*256)+ ((val>>8)&255);
+X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
+#ifdef BIG_LIB64
+X res = (res<<16) + (((val>>32)&255)*256) + ((val>>40)&255);
+X res = (res<<16) + (((val>>48)&255)*256) + ((val>>56)&255);
+#else
+X fprintf(stderr,"Cannot use bl2_long8_cvt without 64-bit longs\n");
+X exit(1);
+#endif
+X return res;
+#endif
+}
+X
+void src_int4_read(FILE *fd, int *val)
+{
+#ifdef IS_BIG_ENDIAN
+X fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+#endif
+}
+X
+void src_long4_read(FILE *fd, long *valp)
+{
+X int val4;
+#ifdef IS_BIG_ENDIAN
+X fread(&val4,(size_t)4,(size_t)1,fd);
+X *valp = val4;
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X val4 = 0;
+X val4 = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+X *valp = val4;
+#endif
+}
+X
+void src_uint4_read(FILE *fd, unsigned int *valp)
+{
+#ifdef IS_BIG_ENDIAN
+X fread(valp,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *valp = 0;
+X *valp = (unsigned int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
+X +(int)b[3];
+#endif
+}
+X
+void src_long8_read(FILE *fd, long *val)
+{
+#ifdef IS_BIG_ENDIAN
+X fread((void *)val,(size_t)8,(size_t)1,fd);
+#else
+X unsigned char b[8];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+X *val = 0;
+X *val = (long)((((((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)
+X +(long)b[3]<<8)+(long)b[4]<<8)+(long)b[5]<<8)
+X +(long)b[6]<<8)+(long)b[7];
+#endif
+}
+X
+void ncbi_long8_read(FILE *fd, int64_t *val)
+{
+X unsigned char b[8];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
+X *val = 0;
+X *val = (long)((((((long)((long)(b[7]<<8)+(long)b[6]<<8)+(long)b[5]<<8)
+X +(long)b[4]<<8)+(long)b[3]<<8)+(long)b[2]<<8)
+X +(long)b[1]<<8)+(long)b[0];
+}
+X
+void src_char_read(FILE *fd, char *val)
+{
+X fread(val,(size_t)1,(size_t)1,fd);
+}
+X
+void src_fstr_read(FILE *fd, char *val, int slen)
+{
+X fread(val,(size_t)slen,(size_t)1,fd);
+}
+X
+void
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+X strncpy(nname,oname,maxn-1);
+X strncat(nname,".",1);
+X strncat(nname,suff,maxn-strlen(nname));
+}
+X
+#define ASN_SEQ 0x30
+#define ASN_IS_BOOL 1
+#define ASN_IS_INT 2
+#define ASN_IS_STR 26
+X
+unsigned char *
+get_asn_int(unsigned char *abp, int *val) {
+X
+X int v_len, v;
+X
+X v = 0;
+X if (*abp++ != ASN_IS_INT) { /* check for int */
+X fprintf(stderr," int missing\n");
+X }
+X else {
+X v_len = *abp++;
+X while (v_len-- > 0) {
+X v *= 256;
+X v += *abp++;
+X }
+X abp += 2; /* skip over null's */
+X }
+X *val = v;
+X return abp;
+}
+X
+unsigned char *
+get_asn_text(unsigned char *abp, char *text, int t_len) {
+X int tch, at_len;
+X
+X text[0] = '\0';
+X if (*abp++ != ASN_IS_STR) { /* check for str */
+X fprintf(stderr," str missing\n");
+X }
+X else {
+X if ((tch = *abp++) > 128) { /* string length is in next bytes */
+X tch &= 0x7f; /* get number of bytes for len */
+X at_len = 0;
+X while (tch-- > 0) { at_len = (at_len << 8) + *abp++;}
+X }
+X else {
+X at_len = tch;
+X }
+X
+X if ( at_len < t_len-1) {
+X memcpy(text, abp, at_len);
+X text[at_len] = '\0';
+X }
+X else {
+X memcpy(text, abp, t_len-1);
+X text[t_len-1] = '\0';
+X }
+X abp += at_len + 2;
+X }
+X return abp;
+}
+X
+/* something to try to skip over stuff we don't want */
+unsigned char *
+get_asn_junk(unsigned char *abp) {
+X
+X int seq_cnt = 0;
+X int tmp;
+X char string[256];
+X
+X while (*abp) {
+X if ( *abp == ASN_SEQ) { abp += 2; seq_cnt++;}
+X else if ( *abp == ASN_IS_BOOL ) {abp = get_asn_int(abp, &tmp);}
+X else if ( *abp == ASN_IS_INT ) {abp = get_asn_int(abp, &tmp);}
+X else if ( *abp == ASN_IS_STR ) {abp = get_asn_text(abp, string, sizeof(string)-1);}
+X }
+X
+X while (seq_cnt-- > 0) abp += 2;
+X return abp;
+}
+X
+unsigned char *
+get_asn_textseq_id(unsigned char *abp,
+X char *name, char *acc)
+{
+X char release[20], ver_str[10];
+X int version;
+X int seqcnt = 0;
+X
+X ver_str[0]='\0';
+X
+X if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
+X
+X while (*abp) {
+X switch (*abp) {
+X case 0xa0:
+X abp = get_asn_text(abp+2, name, 20);
+X break;
+X case 0xa1:
+X abp = get_asn_text(abp+2, acc, 20);
+X break;
+X case 0xa2:
+X abp = get_asn_text(abp+2, release, sizeof(release));
+X break;
+X case 0xa3:
+X abp = get_asn_int(abp+2, &version);
+X sprintf(ver_str,".%d",version);
+X break;
+X default: abp += 2;
+X }
+X }
+X while (seqcnt-- > 0) abp += 4;
+X strncat(acc,ver_str,20-strlen(acc));
+X acc[19]='\0';
+X return abp; /* skip 2 NULL's */
+}
+X
+unsigned char *
+get_asn_local_id(unsigned char *abp, char *acc)
+{
+X int seqcnt = 0;
+X
+X if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
+X
+X abp = get_asn_text(abp+2, acc, 20);
+X
+X while (seqcnt-- > 0) abp += 4;
+X acc[19]='\0';
+X return abp; /* skip 2 NULL's */
+}
+X
+unsigned char *
+get_asn_dbtag(unsigned char *abp, char *name, char *str, int *id_p) {
+X
+X if (*abp == ASN_SEQ) { abp += 2;}
+X
+X if (*abp == 0xa0) { /* get db */
+X abp = get_asn_text(abp+2, name, 20);
+X }
+X else {
+X fprintf(stderr," missing dbtag:db %d %d\n",abp[0],abp[1]);
+X abp += 2;
+X }
+X
+X if (*abp == 0xa1) { /* get tag */
+X abp += 2;
+X abp += 2; /* skip over id */
+X if (*abp == 2) abp = get_asn_int(abp,id_p);
+X else abp = get_asn_text(abp+2, str, 20);
+X }
+X else {
+X fprintf(stderr," missing dbtag:tag %2x %2x\n",abp[0],abp[1]);
+X abp += 2;
+X }
+X return abp+2; /* skip 2 NULL's */
+}
+X
+unsigned char *
+get_asn_pdb_id(unsigned char *abp, char *acc, char *chain)
+{
+X int ichain, seq_cnt=0;
+X
+X if (*abp == ASN_SEQ) { abp += 2; seq_cnt++;}
+X
+X while (*abp) {
+X switch (*abp) {
+X case 0: abp += 2; break;
+X case 0xa0: /* mol-id */
+X abp = get_asn_text(abp+2, acc, 20);
+X break;
+X case 0xa1:
+X abp = get_asn_int(abp+2, &ichain);
+X chain[0] = ichain;
+X chain[1] = '\0';
+X break;
+X case 0xa2: /* ignore date - scan until NULL's */
+X while (*abp++) {}
+X abp += 2; /* skip the NULL's */
+X break;
+X default: abp+=2;
+X }
+X }
+X while (seq_cnt-- > 0) {abp += 4;}
+X return abp;
+}
+X
+#define ASN_TYPE_MASK 31
+X
+unsigned char
+*get_asn_seqid(unsigned char *abp,
+X int *gi_p, int *db, char *acc, char *name) {
+X
+X int db_type, itmp, seq_cnt=0;
+X
+X *gi_p = 0;
+X
+X if (*abp != ASN_SEQ) {
+X fprintf(stderr, "seqid - missing SEQ 1: %2x %2x\n",abp[0], abp[1]);
+X return abp;
+X }
+X else { abp += 2; seq_cnt++;}
+X
+X db_type = (*abp & ASN_TYPE_MASK);
+X
+X if (db_type == 11) { /* gi */
+X abp = get_asn_int(abp+2,gi_p);
+X }
+X
+X while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;}
+X
+X db_type = (*abp & ASN_TYPE_MASK);
+X if (db_type > 17) {db_type = 0;}
+X *db = db_type;
+X
+X switch(db_type) {
+X case 0:
+X abp = get_asn_local_id(abp+2, acc);
+X break;
+X case 1:
+X case 2:
+X abp = get_asn_int(abp+2,&itmp);
+X abp += 2;
+X break;
+X case 11:
+X abp = get_asn_int(abp+2,&itmp);
+X break;
+X case 4:
+X case 5:
+X case 6:
+X case 7:
+X case 9:
+X case 12:
+X case 13:
+X case 15:
+X case 16:
+X case 17:
+X abp = get_asn_textseq_id(abp+2,name,acc);
+X break;
+X case 10:
+X abp = get_asn_dbtag(abp+2,name,acc,&itmp);
+X case 14:
+X abp = get_asn_pdb_id(abp+2,acc,name);
+X break;
+X default: abp += 2;
+X }
+X
+X while (seq_cnt-- > 0) { abp += 4;}
+X return abp; /* skip over 2 NULL's */
+}
+X
+#define ASN_FADL_TITLE 0xa0
+#define ASN_FADL_SEQID 0xa1
+#define ASN_FADL_TAXID 0xa2
+#define ASN_FADL_MEMBERS 0xa3
+#define ASN_FADL_LINKS 0xa4
+#define ASN_FADL_OTHER 0xa5
+X
+unsigned char *
+parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
+X int *gi_p, int *db, char *acc,
+X char *name, char *title, int t_len, int *taxid_p) {
+X unsigned char *abp;
+X char tmp_db[4], tmp_acc[32], tmp_name[32];
+X int this_db;
+X int seq_cnt = 0;
+X int tmp_gi;
+X
+X acc[0] = name[0] = db[0] = title[0] = '\0';
+X
+X abp = asn_buff;
+X while ( abp < asn_max && *abp) {
+X if (*abp == ASN_SEQ) { abp += 2; seq_cnt++; }
+X else if (*abp == ASN_FADL_TITLE) {
+X abp = get_asn_text(abp+2, title, t_len);
+X }
+X else if (*abp == ASN_FADL_SEQID ) {
+X abp = get_asn_seqid(abp+2, gi_p, db, acc, name);
+X if (*db > 17) *db = 0;
+X }
+X else if (*abp == ASN_FADL_TAXID ) {
+X abp = get_asn_int(abp+2, taxid_p);
+X }
+X else if (*abp == ASN_FADL_MEMBERS) {
+X abp = get_asn_junk(abp+2);
+X }
+X else if (*abp == ASN_FADL_LINKS ) {
+X abp = get_asn_junk(abp+2);
+X }
+X else if (*abp == ASN_FADL_OTHER ) {
+X abp = get_asn_junk(abp+2);
+X }
+X else {
+X /* fprintf(stderr, " Error - missing ASN.1 %2x:%2x:%2x:%2x\n",
+X abp[-2],abp[-1],abp[0],abp[1]); */
+X abp += 2;
+X }
+X }
+X while (abp < asn_max && *abp == '\0' ) abp++;
+X if (abp >= asn_max) return NULL;
+X else return abp;
+}
+X
+X
+void
+parse_pal(char *dname, char *msk_name,
+X int *oid_seqs, int *max_oid,
+X FILE *fd) {
+X
+X char line[MAX_STR];
+X
+X while (fgets(line,sizeof(line),fd)) {
+X if (line[0] == '#') continue;
+X
+X if (strncmp(line, "DBLIST", 6)==0) {
+X sscanf(line+7,"%s",dname);
+X }
+X else if (strncmp(line, "OIDLIST", 7)==0) {
+X sscanf(line+8,"%s",msk_name);
+X }
+X else if (strncmp(line, "NSEQ", 4)==0) {
+X sscanf(line+5,"%d",oid_seqs);
+X }
+X else if (strncmp(line, "MAXOID", 6)==0) {
+X sscanf(line+7,"%d",max_oid);
+X }
+X }
+}
+SHAR_EOF
+chmod 0644 ncbl2_mlib.c ||
+echo 'restore of ncbl2_mlib.c failed'
+Wc_c="`wc -c < 'ncbl2_mlib.c'`"
+test 42930 -eq "$Wc_c" ||
+ echo 'ncbl2_mlib.c: original size 42930, current size' "$Wc_c"
+fi
+# ============= ncbl_head.h ==============
+if test -f 'ncbl_head.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping ncbl_head.h (File already exists)'
+else
+echo 'x - extracting ncbl_head.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ncbl_head.h' &&
+/* ncbl_head.h header files for blast1.3 format */
+X
+/* $Name: fa_34_26_5 $ - $Id: ncbl_head.h,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
+X
+#define AMINO_ACID_SEQTYPE 1
+#define AA_SEQTYPE AMINO_ACID_SEQTYPE
+#define NUCLEIC_ACID_SEQTYPE 2
+#define NT_SEQTYPE NUCLEIC_ACID_SEQTYPE
+X
+/* Filename extensions used by the two types of databases (a.a. and nt.) */
+#define AA_HEADER_EXT "ahd"
+#define AA_TABLE_EXT "atb"
+#define AA_SEARCHSEQ_EXT "bsq"
+#define NT_HEADER_EXT "nhd"
+#define NT_TABLE_EXT "ntb"
+#define NT_SEARCHSEQ_EXT "csq"
+X
+#define DB_TYPE_PRO 0x78857a4f /* Magic # for a protein sequence database */
+#define DB_TYPE_NUC 0x788325f8 /* Magic # for a nt. sequence database */
+X
+#define AAFORMAT 3 /* Latest a.a. database format ID number */
+#define NTFORMAT 6 /* Latest nt. database format ID number */
+X
+#define NULLB '\0' /* sentinel byte */
+#define NT_MAGIC_BYTE 0xfc /* Magic byte at end of compressed nt db */
+X
+#ifndef CHAR_BIT
+#define CHAR_BIT 8 /* these values should match blast */
+#endif
+X
+#define NBPN 2
+#define NSENTINELS 2
+SHAR_EOF
+chmod 0644 ncbl_head.h ||
+echo 'restore of ncbl_head.h failed'
+Wc_c="`wc -c < 'ncbl_head.h'`"
+test 1034 -eq "$Wc_c" ||
+ echo 'ncbl_head.h: original size 1034, current size' "$Wc_c"
+fi
+# ============= ncbl_lib.c ==============
+if test -f 'ncbl_lib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping ncbl_lib.c (File already exists)'
+else
+echo 'x - extracting ncbl_lib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ncbl_lib.c' &&
+/* ncbl_lib.c functions to read ncbi-blast format files from
+X setdb (blastp 1.3.2) format files
+X
+X copyright (c) 1992 William R. Pearson
+*/
+X
+/* $Name: fa_34_26_5 $ - $Id: ncbl_lib.c,v 1.9 2006/10/05 18:22:07 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#ifndef WIN32
+#define RBSTR "r"
+#else
+#define RBSTR "rb"
+#endif
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#include "ncbl_head.h"
+#include "mm_file.h"
+X
+int ncbl_getliba(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+int ncbl_getlibn(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+X
+void src_ulong_read();
+X
+#ifndef NCBL13_ONLY
+static void src_char_read();
+static void newname(char *, char *, char *, int);
+#else
+void src_char_read();
+void newname(char *, char *, char *, int);
+#endif
+X
+/* nt_btoa maps from blast 2bit format to ascii characters */
+static char nt_btoa[5] = {"ACGT"};
+X
+static char aa_btoa[27]= {"-ARNDCQEGHILKMFPSTWYVBZX*"};
+static int aa_btof[32]; /* maps to fasta alphabet */
+X
+static FILE *tfile=NULL, /* table of offsets, also DB info */
+X *hfile=NULL, /* description lines */
+X *sfile=NULL; /* binary sequence data */
+X
+static unsigned long lib_cnt, max_cnt, totlen, mxlen, dbline_len;
+static unsigned long *seq_beg, *hdr_beg;
+static unsigned char *ambiguity_ray;
+static long seq_format, dbtype, dbformat;
+static char dline[512];
+X
+#define NCBIBL13 11
+X
+struct lmf_str *
+ncbl_openlib(char *name, int ldnaseq)
+{
+X char hname[256];
+X char sname[256];
+X char tname[256];
+X long title_len;
+X char *title_str;
+X int rdtmp;
+X int i;
+X unsigned long line_len, c_len, clean_count;
+X
+X if (ldnaseq!=1) {
+X newname(tname,name,AA_TABLE_EXT,(int)sizeof(tname));
+X if ((tfile = fopen(tname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s (%s.%s) table file\n",
+X name,tname,NT_TABLE_EXT);
+X return (-1);
+X }
+X seq_format = AAFORMAT;
+X }
+X else {
+X newname(tname,name,NT_TABLE_EXT,(int)sizeof(tname));
+X if ((tfile = fopen(tname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s (%s.%s) table file\n",
+X name,tname,NT_TABLE_EXT);
+X return (-1);
+X }
+X seq_format = NTFORMAT;
+X }
+X
+X src_ulong_read(tfile,&dbtype);
+X src_ulong_read(tfile,&dbformat);
+X
+X if (seq_format == AAFORMAT && (dbformat != seq_format || dbtype !=
+X DB_TYPE_PRO)) {
+X fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
+X tname,dbtype,DB_TYPE_PRO,dbformat,seq_format);
+X return (-1);
+X }
+X else if (seq_format == NTFORMAT && (dbformat != seq_format || dbtype !=
+X DB_TYPE_NUC)) {
+X fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
+X tname,dbtype,DB_TYPE_NUC,dbformat,seq_format);
+X return (-1);
+X }
+X
+X if (seq_format == AAFORMAT) {
+X newname(hname,name,AA_HEADER_EXT,(int)sizeof(hname));
+X if ((hfile = fopen(hname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s header file\n",hname);
+X return (-1);
+X }
+X newname(sname,name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
+X if ((sfile = fopen(sname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s sequence file\n",sname);
+X return (-1);
+X }
+X }
+X else {
+X newname(hname,name,NT_HEADER_EXT,(int)sizeof(hname));
+X if ((hfile = fopen(hname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s header file\n",hname);
+X return (-1);
+X }
+X newname(sname,name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
+X if ((sfile = fopen(sname,RBSTR))==NULL) {
+X fprintf(stderr," cannot open %s sequence file\n",sname);
+X return (-1);
+X }
+X }
+X
+/* all files should be open */
+X
+X src_ulong_read(tfile,&title_len);
+X rdtmp = title_len + ((title_len%4 !=0 ) ? 4-(title_len%4) : 0);
+X if ((title_str = calloc((size_t)rdtmp,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate title string (%d)\n",rdtmp);
+X return(-1);
+X }
+X fread(title_str,(size_t)1,(size_t)rdtmp,tfile);
+X
+X lib_cnt = 0;
+X if (seq_format == AAFORMAT) {
+X src_ulong_read(tfile,&max_cnt);
+X src_ulong_read(tfile,&totlen);
+X src_ulong_read(tfile,&mxlen);
+X
+X /* fprintf(stderr," max_cnt: %d, totlen: %d\n",max_cnt,totlen); */
+X
+X if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate sequence pointers\n");
+X return -1;
+X }
+X if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate header pointers\n");
+X return -1;
+X }
+X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
+X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
+X
+X for (i=0; i<sizeof(aa_btoa); i++) {
+X if ((rdtmp=aascii[aa_btoa[i]])<NA) aa_btof[i]=rdtmp;
+X else aa_btof[i]=aascii['X'];
+X }
+X }
+X else if (seq_format == NTFORMAT) {
+X src_ulong_read(tfile,&dbline_len); /* length of uncompress DB lines */
+X src_ulong_read(tfile,&max_cnt); /* number of entries */
+X src_ulong_read(tfile,&mxlen); /* maximum length sequence */
+X src_ulong_read(tfile,&totlen); /* total count */
+X src_ulong_read(tfile,&c_len); /* compressed db length */
+X src_ulong_read(tfile,&clean_count); /* count of nt's cleaned */
+X
+X fseek(tfile,(size_t)((clean_count)*4),1);
+X /* seek over clean_count */
+X if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate sequence pointers\n");
+X return -1;
+X }
+X if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
+X fprintf(stderr," cannot allocate header pointers\n");
+X return -1;
+X }
+X if ((ambiguity_ray=
+X (unsigned char *)calloc((size_t)max_cnt/CHAR_BIT+1,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate ambiguity_ray\n");
+X return -1;
+X }
+X
+X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
+X fseek(tfile,(size_t)((max_cnt+1)*4),1);
+X /* seek over seq_beg */
+X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
+X for (i=0; i<max_cnt/CHAR_BIT+1; i++)
+X src_char_read(tfile,&ambiguity_ray[i]);
+X }
+X return 1;
+}
+X
+void ncbl_closelib()
+{
+X if (tfile !=NULL ) {fclose(tfile); tfile=NULL;}
+X if (hfile !=NULL ) {fclose(hfile); hfile=NULL;}
+X if (sfile !=NULL ) {fclose(sfile); sfile=NULL;}
+}
+X
+int
+ncbl_getliba(char *seq, int maxs,
+X char *libstr, int n_libstr,
+X fseek_t *libpos,
+X int lcont)
+{
+X register char *sptr;
+X long seqcnt;
+X long tmp;
+X char ch;
+X static long seq_len;
+X
+X *libpos = lib_cnt;
+X if (*lcont==0) {
+X if (lib_cnt >= max_cnt) return -1;
+X seq_len = seq_beg[lib_cnt+1] - seq_beg[lib_cnt] -1;
+X tmp=(long)fgetc(sfile); /* skip the null byte */
+X if (tmp!=NULLB)
+X fprintf(stderr," phase error: %ld:%ld found\n",lib_cnt,tmp);
+X libstr[0]='\0';
+X }
+X
+X if (seq_len < maxs) {
+X if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,sfile))!=(size_t)seq_len) {
+X fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",
+X *libpos,tmp,seq_len);
+X goto error;
+X }
+X if (aa_btoa[seq[seq_len-1]]=='*') seqcnt = seq_len-1;
+X else seqcnt=seq_len;
+X lib_cnt++;
+X *lcont = 0;
+X }
+X else {
+X if (fread(seq,(size_t)1,(size_t)(maxs-1),sfile)!=(size_t)(maxs-1)) {
+X fprintf(stderr," could not read sequence record: %ld %ld\n",
+X *libpos,seq_len);
+X goto error;
+X }
+X (*lcont)++;
+X seqcnt = maxs-1;
+X seq_len -= seqcnt;
+X }
+X sptr = seq+seqcnt;
+X
+X while (--sptr >= seq) *sptr = aa_btof[*sptr];
+X
+X seq[seqcnt]= EOSEQ;
+X return (seqcnt);
+X
+error: fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
+X fflush(stderr);
+X return (-1);
+}
+X
+int
+ncbl_getlibn(char *seq, int maxs,
+X char *libstr, int n_libstr,
+X fseek_t *libpos, int *lcont)
+{
+X register char *sptr, *tptr, stmp;
+X long seqcnt;
+X long tmp;
+X char ch;
+X static long seq_len;
+X static int c_len,c_pad;
+X
+X *libpos = lib_cnt;
+X if (*lcont==0) {
+X if (lib_cnt >= max_cnt) return -1;
+X c_len = seq_beg[lib_cnt+1]/(CHAR_BIT/NBPN)
+X - seq_beg[lib_cnt]/(CHAR_BIT/NBPN);
+X c_len -= NSENTINELS;
+X
+X seq_len = c_len*(CHAR_BIT/NBPN);
+X c_pad = seq_beg[lib_cnt] & ((CHAR_BIT/NBPN)-1);
+X if (c_pad != 0) seq_len -= ((CHAR_BIT/NBPN) - c_pad);
+X
+X tmp=fgetc(sfile); /* skip the null byte */
+X if (tmp!=NT_MAGIC_BYTE) {
+X fprintf(stderr," phase error: %ld:%ld (%ld/%d) found\n",
+X lib_cnt,seq_len,tmp,NT_MAGIC_BYTE);
+X goto error;
+X }
+X libstr[0]='\0';
+X }
+X
+X if (seq_len < maxs-3) {
+X seqcnt=(seq_len+3)/4;
+X if (seqcnt==0) seqcnt++;
+X if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,sfile))
+X !=(size_t)seqcnt) {
+X fprintf(stderr,
+X " could not read sequence record: %s %ld %ld != %ld: %d\n",
+X libstr,*libpos,tmp,seqcnt,*seq);
+X goto error;
+X }
+X tmp=fgetc(sfile); /* skip the null byte */
+X if (tmp!=(unsigned char)NT_MAGIC_BYTE) {
+X fprintf(stderr," phase2 error: %ld:%ld (%ld/%d) next ",
+X lib_cnt,seqcnt,tmp,NT_MAGIC_BYTE);
+X
+X goto error;
+X }
+X *lcont = 0;
+X lib_cnt++;
+X }
+X else {
+X seqcnt = ((maxs+3)/4)-1;
+X if (fread(seq,(size_t)1,(size_t)(seqcnt),sfile)!=(size_t)(seqcnt)) {
+X fprintf(stderr," could not read sequence record: %s %ld %ld\n",
+X libstr,*libpos,seqcnt);
+X goto error;
+X }
+X (*lcont)++;
+X }
+X
+X /* point to the last packed byte and to the end of the array
+X seqcnt is the exact number of bytes read
+X tptr points to the destination, use multiple of 4 to simplify math
+X sptr points to the source, note that the last byte will be read 4 cycles
+X before it is written
+X */
+X
+X sptr = seq + seqcnt;
+X tptr = seq + 4*seqcnt;
+X while (sptr>seq) {
+X stmp = *--sptr;
+X *--tptr = (stmp&3) +1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X *--tptr = ((stmp >>= 2)&3)+1;
+X }
+X /*
+X for (sptr=seq; sptr < seq+seq_len; sptr++) {
+X printf("%c",nt[*sptr]);
+X if ((int)(sptr-seq) % 60 == 59) printf("\n");
+X }
+X printf("\n");
+X */
+X if (seqcnt*4 >= seq_len) { /* there was enough room */
+X seq[seq_len]= EOSEQ;
+X /* printf("%d\n",seq_len); */
+X return seq_len;
+X }
+X else { /* not enough room */
+X seq[seqcnt*4]=EOSEQ;
+X seq_len -= 4*seqcnt;
+X return (4*seqcnt);
+X }
+X
+error: fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
+X fflush(stderr);
+X return (-1);
+}
+X
+void
+ncbl_ranlib(str,cnt,libpos)
+X char *str; int cnt;
+X long libpos;
+{
+X char hline[256], *bp, *bp0;
+X int llen;
+X long spos;
+X
+X lib_cnt = libpos;
+X llen = hdr_beg[lib_cnt+1]-hdr_beg[lib_cnt];
+X if (llen > sizeof(hline)) llen = sizeof(hline);
+X fseek(hfile,hdr_beg[lib_cnt]+1,0);
+X
+X fread(hline,(size_t)1,(size_t)(llen-1),hfile);
+X hline[llen-1]='\0';
+X
+X if (hline[9]=='|' || hline[10]=='|') {
+X bp0 = strchr(hline+3,'|');
+X if ((bp=strchr(bp0+1,' '))!=NULL) *bp='\0';
+X if (dbformat == NTFORMAT &&
+X (ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
+X sprintf(str,"*%-9s ",bp0+1);
+X }
+X else sprintf(str,"%-10s ",bp0+1);
+X strncat(str+11,bp+1,cnt-strlen(str));
+X }
+X else {
+X if (dbformat == NTFORMAT &&
+X (ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
+X str[0]='*';
+X strncpy(str+1,hline,cnt-1);
+X }
+X else strncpy(str,hline,cnt);
+X }
+X str[cnt-1]='\0';
+X
+X if (dbformat == AAFORMAT)
+X fseek(sfile,seq_beg[lib_cnt]-1,0);
+X else {
+X spos = (seq_beg[lib_cnt])/(CHAR_BIT/NBPN);
+X fseek(sfile,spos-1,0);
+X }
+}
+X
+void src_ulong_read(fd, val)
+X FILE *fd;
+X unsigned long *val;
+{
+#ifdef IS_BIG_ENDIAN
+X fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (unsigned long)((unsigned long)((unsigned long)(b[0]<<8) +
+X (unsigned long)b[1]<<8) + (unsigned long)b[2]<<8)+(unsigned long)b[3];
+#endif
+}
+X
+void src_long_read(fd,val)
+X FILE *fd;
+X long *val;
+{
+#ifdef IS_BIG_ENDIAN
+X fread((char *)val,(size_t)4,(size_t)1,fd);
+#else
+X unsigned char b[4];
+X
+X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
+X *val = 0;
+X *val = (long)((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)
+X +(long)b[3];
+#endif
+}
+X
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+src_char_read(fd, val)
+X FILE *fd;
+X char *val;
+{
+X fread(val,(size_t)1,(size_t)1,fd);
+}
+X
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+src_fstr_read(fd, val, slen)
+X FILE *fd;
+X char *val;
+X long slen;
+{
+X fread(val,(size_t)slen,(size_t)1,fd);
+}
+X
+#ifndef NCBL13_ONLY
+static void
+#else
+void
+#endif
+newname(char *nname, char *oname, char *suff, int maxn)
+{
+X char *tptr;
+X
+X if (oname[0]=='@') strncpy(nname,&oname[1],maxn);
+X else strncpy(nname,oname,maxn);
+X for (tptr=nname; *tptr=='.' && *tptr; tptr++);
+X for (; *tptr!='.'&& *tptr; tptr++); /* get to '.' or EOS */
+X *tptr++='.'; *tptr='\0';
+X strncat(nname,suff,maxn);
+}
+X
+SHAR_EOF
+chmod 0644 ncbl_lib.c ||
+echo 'restore of ncbl_lib.c failed'
+Wc_c="`wc -c < 'ncbl_lib.c'`"
+test 12694 -eq "$Wc_c" ||
+ echo 'ncbl_lib.c: original size 12694, current size' "$Wc_c"
+fi
+# ============= ngt.aa ==============
+if test -f 'ngt.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping ngt.aa (File already exists)'
+else
+echo 'x - extracting ngt.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ngt.aa' &&
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGYWN,
+DQYRMFEP,
+SRYIATP,
+KCLDAFP,
+EYTDS,
+SYDEKR,
+YTMGD,
+EKQKPEFL,
+VRGLTHP,
+TRMQLI,
+FKLGLDFP,
+NLPYLI,
+DGSHKIT,
+LRYLAR,
+KTIPEK,
+KRPWFA,
+ETEEERIR,
+GDKVTYVD,
+HWSNK
+SHAR_EOF
+chmod 0644 ngt.aa ||
+echo 'restore of ngt.aa failed'
+Wc_c="`wc -c < 'ngt.aa'`"
+test 217 -eq "$Wc_c" ||
+ echo 'ngt.aa: original size 217, current size' "$Wc_c"
+fi
+# ============= ngts.aa ==============
+if test -f 'ngts.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping ngts.aa (File already exists)'
+else
+echo 'x - extracting ngts.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'ngts.aa' &&
+>GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
+ILGY*WN,
+EYTDS?,
+S?YDEKR,
+DQY*RMFEP,
+KCLDAFP,
+S*RY*IATP
+SHAR_EOF
+chmod 0644 ngts.aa ||
+echo 'restore of ngts.aa failed'
+Wc_c="`wc -c < 'ngts.aa'`"
+test 111 -eq "$Wc_c" ||
+ echo 'ngts.aa: original size 111, current size' "$Wc_c"
+fi
+# ============= nmgetlib.c ==============
+if test -f 'nmgetlib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping nmgetlib.c (File already exists)'
+else
+echo 'x - extracting nmgetlib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'nmgetlib.c' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: nmgetlib.c,v 1.35 2007/01/08 15:38:46 wrp Exp $ */
+X
+/* May, June 1987 - modified for rapid read of database
+X
+X copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson
+X
+X revised (split) version of nmgetaa.c -> renamed nmgetlib.c
+X
+X This version seeks to be a thread safe, no global, library
+X reading program. While adjusting the routines in this file
+X should be relatively easy, ncbl2_mlib.c and mysql_lib.c may be
+X more difficult.
+X
+X nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
+X the same functions as nxgetaa.c if memory mapping is not used,
+X mmgetaa.c provides the database reading functions if memory
+X mapping is used. The decision to use memory mapping is made on
+X a file-by-file basis.
+X
+X June 2, 1987 - added TFASTA
+X March 30, 1988 - combined ffgetaa, fgetgb;
+X April 8, 1988 - added PIRLIB format for unix
+X Feb 4, 1989 - added universal subroutines for libraries
+X December, 1995 - added range option file.name:1-1000
+X September, 1999 - added option for mmap()ed files using ".xin" */
+X
+X
+/*
+X February 4, 1988 - this starts a major revision of the getaa
+X routines. The goal is to be able to seach the following format
+X libraries:
+X
+X 0 - normal FASTA format
+X 1 - full Genbank tape format
+X 2 - NBRF/PIR CODATA format
+X 3 - EMBL/Swiss-prot format
+X 4 - Intelligentics format
+X 5 - NBRF/PIR VMS format
+X 6 - GCG 2bit format
+X
+X 11 - NCBI setdb/blastp (1.3.2) AA/NT
+X 12 - NCBI setdb/blastp (2.0) AA/NT
+X 16 - mySQL queries
+X
+X see file altlib.h to confirm numbers
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+X
+#include "defs.h"
+#include "structs.h"
+X
+#ifndef SFCHAR
+#define SFCHAR ':'
+#endif
+X
+#define EOSEQ 0
+X
+#include "uascii.h"
+/* #include "upam.h" */
+X
+#define LFCHAR '\015' /* for MWC 5.5 */
+X
+#include "altlib.h"
+X
+X
+#include <fcntl.h>
+#ifndef O_RAW
+#ifdef O_BINARY
+#define O_RAW O_BINARY
+#else
+#define O_RAW 0
+#endif /* O_BINARY */
+#endif /* O_RAW */
+X
+#ifdef WIN32
+#define RBSTR "rb" /* read file in binary mode */
+#else
+#define RBSTR "r"
+#endif
+X
+#include "mm_file.h"
+struct lmf_str *load_mmap(FILE *, char *, int, int, struct lmf_str *);
+struct lmf_str *ncbl2_reopen(struct lmf_str *);
+struct lmf_str *ncbl2_openlib(char *, int);
+X
+static struct lmf_str *last_m_fptr=NULL;
+X
+#ifdef MYSQL_DB
+struct lmf_str *mysql_openlib(char *, int, int *);
+struct lmf_str *mysql_reopen(struct lmf_str *);
+#endif
+X
+#ifdef PGSQL_DB
+struct lmf_str *pgsql_openlib(char *, int, int *);
+struct lmf_str *pgsql_reopen(struct lmf_str *);
+#endif
+X
+void closelib(struct lmf_str *m_fptr);
+extern void newname(char *nname, char *oname, char *suff, int maxn);
+X
+/* a file name for openlib may include a library type suffix */
+X
+struct lmf_str *
+openlib(char *lname, int ldnaseq, int *sascii,
+X int outtty, struct lmf_str *om_fptr)
+{
+X char rline[10],sname[MAX_FN], iname[MAX_FN], *bp;
+X char opt_text[MAX_FN]; /* save text after ':' */
+X int wcnt, opnflg;
+X int libtype;
+X FILE *libi=NULL;
+X FILE *libf;
+X int use_stdin;
+X struct lmf_str *m_fptr=NULL;
+X
+X /* this is currently unavailable - later it can return a value somewhere */
+X /*
+X if (lname[0]=='#') {return -9;}
+X */
+X
+X if (om_fptr != NULL && om_fptr->mm_flg) {
+X om_fptr->lpos = 0;
+X return om_fptr;
+X }
+X
+X wcnt = 0; /* number of times to ask for file name */
+X
+X /* check to see if there is a file option ":1-100" */
+#ifndef WIN32
+X if ((bp=strchr(lname,':'))!=NULL && *(bp+1)!='\0') {
+#else
+X if ((bp=strchr(lname+3,':'))!=NULL && *(bp+1)!='\0') {
+#endif
+X strncpy(opt_text,bp+1,sizeof(opt_text));
+X opt_text[sizeof(opt_text)-1]='\0';
+X *bp = '\0';
+X }
+X else opt_text[0]='\0';
+X
+X if (lname[0] == '-' || lname[0] == '@') {
+X use_stdin = 1;
+X }
+X else use_stdin=0;
+X
+X strncpy(sname,lname,sizeof(sname));
+X sname[sizeof(sname)-1]='\0';
+X /* check for library type */
+X if ((bp=strchr(sname,' '))!=NULL) {
+X *bp='\0';
+X sscanf(bp+1,"%d",&libtype);
+X if (libtype<0 || libtype >= LASTLIB) {
+X fprintf(stderr," invalid library type: %d (>%d)- resetting\n%s\n",
+X libtype,LASTLIB,lname);
+X libtype=0;
+X }
+X }
+X else libtype=0;
+X
+X if (use_stdin && libtype !=0) {
+X fprintf(stderr," @/- STDIN libraries must be in FASTA format\n");
+X return NULL;
+X }
+X
+X /* check to see if file can be open()ed? */
+X
+X l1:
+X if (libtype<=LASTTXT) {
+X if (!use_stdin) {
+X opnflg=((libf=fopen(sname,RBSTR))!=NULL);
+X }
+X else {
+X libf=stdin;
+X strncpy(sname,"STDIN",sizeof(sname));
+X sname[sizeof(sname)-1]='\0';
+X opnflg=1;
+X }
+X }
+#ifdef NCBIBL13
+X else if (libtype==NCBIBL13) opnflg=(ncbl_openlib(sname,ldnaseq)!= -1);
+#endif
+#ifdef NCBIBL20
+X else if (libtype==NCBIBL20) {
+X opnflg=((m_fptr=ncbl2_openlib(sname,ldnaseq))!=NULL);
+X }
+#endif
+X
+#ifdef MYSQL_DB
+X /* a mySQL filename contains mySQL commands, not sequences */
+X else if (libtype==MYSQL_LIB) {
+X opnflg=((m_fptr=mysql_openlib(sname,ldnaseq,sascii))!=NULL);
+X }
+#endif
+#ifdef PGSQL_DB
+X /* a mySQL filename contains mySQL commands, not sequences */
+X else if (libtype==PGSQL_LIB) {
+X opnflg=((m_fptr=pgsql_openlib(sname,ldnaseq,sascii))!=NULL);
+X }
+#endif
+X
+X if (!opnflg) { /* here if open failed */
+X if (outtty) {
+X fprintf(stderr," cannot open %s library\n",sname);
+X fprintf(stderr," enter new file name or <RET> to quit ");
+X fflush(stderr);
+X if (fgets(sname,sizeof(sname),stdin)==NULL) return NULL;
+X if ((bp=strchr(sname,'\n'))!=0) *bp='\0';
+X if (strlen(sname)==0) return NULL;
+X if (++wcnt > 10) return NULL;
+X strncpy(lname,sname,sizeof(lname)-1);
+X lname[sizeof(lname)-1]='\0';
+X goto l1;
+X }
+X else return NULL;
+X } /* !openflg */
+X
+X if (libtype <= LASTTXT) {
+X /* now allocate a buffer for the opened text file */
+X if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+X sizeof(struct lmf_str),sname);
+X return NULL;
+X }
+X if ((m_fptr->lline = calloc(MAX_STR,sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate lline (%d) for %s\n",
+X MAX_STR,sname);
+X return NULL;
+X }
+X
+X strncpy(m_fptr->lb_name,sname,MAX_FN);
+X m_fptr->lb_name[MAX_FN-1]='\0';
+X strncpy(m_fptr->opt_text,opt_text,MAX_FN);
+X m_fptr->opt_text[MAX_FN-1]='\0';
+X m_fptr->sascii = sascii;
+X
+X m_fptr->libf = libf;
+X m_fptr->lb_type = libtype;
+X m_fptr->getlib = getliba[libtype];
+X m_fptr->ranlib = ranliba[libtype];
+X m_fptr->mm_flg = 0;
+X m_fptr->tot_len = 0;
+X m_fptr->max_len = 0;
+X m_fptr->lib_aa = (ldnaseq==0);
+X }
+X last_m_fptr = m_fptr;
+X
+#ifdef USE_MMAP
+X /* check for possible mmap()ed files */
+X if (!use_stdin && (libtype <= LASTTXT) && (getlibam[libtype]!=NULL)) {
+X /* this is a file we can mmap() */
+X /* look for .xin file */
+X newname(iname,sname,"xin",sizeof(iname));
+X if ((libi=fopen(iname,"r"))!=NULL) { /* have a *.xin file, use mmap */
+X if (load_mmap(libi,sname,libtype,ldnaseq,m_fptr)!=NULL) {
+X fclose(libi); /* close index file */
+X m_fptr->lb_type = libtype;
+X m_fptr->getlib = getlibam[libtype];
+X m_fptr->ranlib = ranlibam[libtype];
+X m_fptr->mm_flg = 1;
+X return m_fptr;
+X }
+X fclose(libi); /* memory mapping failed, but still must close file */
+X }
+X }
+#endif
+X
+X if (libtype <= LASTTXT) {
+X m_fptr->lpos = 0;
+X if (fgets(m_fptr->lline,MAX_STR,libf)==NULL) return NULL;
+X }
+X return m_fptr;
+}
+X
+void
+closelib(struct lmf_str *m_fptr) {
+X
+X
+#ifdef MMAP
+X if (m_fptr->mm_flag) {
+/* don't close memory mapped files
+X close_mmap(m_fptr);
+*/
+X return;
+X }
+#endif
+X
+X if (m_fptr->libf!=NULL && m_fptr->libf != stdin) {
+X fclose(m_fptr->libf);
+X m_fptr->libf = NULL;
+X }
+X
+#ifdef NCBIBL13
+X if (m_fptr->lb_type == NCBIBL13) ncbl_closelib(m_fptr);
+#endif
+#ifdef NCBIBL20
+X if (m_fptr->lb_type == NCBIBL20) ncbl2_closelib(m_fptr);
+#endif
+#ifdef MYSQL_DB
+X if (m_fptr->lb_type == MYSQL_LIB) mysql_closelib(m_fptr);
+#endif
+}
+X
+struct lmf_str *
+re_openlib(struct lmf_str *om_fptr, int outtty)
+{
+X int opnflg;
+X
+X /* if the file mmap()ed and has been opened - use it and return */
+X if (om_fptr->mm_flg) {
+X return om_fptr;
+X }
+#ifdef MYSQL_DB
+X /* if this is a mysql database - use it and return */
+X else if (om_fptr->lb_type == MYSQL_LIB) {
+X return om_fptr;
+X }
+#endif
+X
+X /* data is available, but file is closed or not memory mapped, open it */
+X /* no longer check to memory map - because we could not do it before */
+X
+X opnflg = 1;
+X if (om_fptr->lb_type<=LASTTXT && om_fptr->libf==NULL)
+X opnflg=((om_fptr->libf=fopen(om_fptr->lb_name,RBSTR))!=NULL);
+#ifdef NCBIBL13
+X else if (om_fptr->lb_type==NCBIBL13)
+X opnflg=(ncbl_openlib(om_fptr->lb_name,!om_fptr->lib_aa)!= -1);
+#endif
+#ifdef NCBIBL20
+X else if (om_fptr->lb_type==NCBIBL20) {
+X opnflg=((om_fptr=ncbl2_openlib(om_fptr->lb_name,!om_fptr->lib_aa))!=NULL);
+X }
+#endif
+#ifdef MYSQL_DB
+X /* a mySQL filename contains mySQL commands, not sequences */
+X else if (om_fptr->lb_type==MYSQL_LIB)
+X opnflg=(mysql_reopen(om_fptr)!=NULL);
+#endif
+X
+X if (!opnflg) {
+X fprintf(stderr,"*** could not re_open %s\n",om_fptr->lb_name);
+X return NULL;
+X }
+X
+X /* use the old buffer for the opened text file */
+X om_fptr->mm_flg = 0;
+X last_m_fptr = om_fptr;
+X
+X return om_fptr;
+}
+X
+#ifdef SUPERFAMNUM
+static char tline[512];
+extern int nsfnum; /* number of superfamily numbers */
+extern int sfnum[10]; /* superfamily number from types 0 and 5 */
+extern int nsfnum_n;
+extern int sfnum_n[10];
+#endif
+X
+void sf_sort(int *, int);
+X
+int
+agetlib(unsigned char *seq, int maxs,
+X char *libstr, int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X int i;
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X /* int ic, l_start, l_stop, l_limit, rn; */
+X char *bp, *bp1, *bpa, *tp;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X *l_off = 1;
+X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X }
+#ifdef SUPERFAMNUM
+X strncpy(tline,lm_fd->lline+1,sizeof(tline));
+X tline[sizeof(tline)-1]='\0';
+X sfnum[0]=nsfnum=0;
+X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
+X if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';
+X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
+/* fprintf(stderr," second %c missing: %s\n",SFCHAR,libstr); */
+X }
+X else {
+X *bp1 = '\0';
+X i = 0;
+X if ((tp = strtok(bp+1," \t"))!=NULL) {
+X sfnum[i++] = atoi(tp);
+X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
+X if (isdigit(*tp)) sfnum[i++] = atoi(tp);
+X if (i>=9) break;
+X }
+X }
+X sfnum[nsfnum=i]= 0;
+X if (nsfnum>1) sf_sort(sfnum,nsfnum);
+X else {
+X if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
+X }
+X }
+X }
+X else {
+X sfnum[0] = nsfnum = 0;
+X }
+#endif
+X
+X if ((bp=strchr(lm_fd->lline,'@'))!=NULL && !strncmp(bp+1,"C:",2)) {
+X sscanf(bp+3,"%ld",l_off);
+X }
+X
+X strncpy(libstr,lm_fd->lline+1,n_libstr-1);
+X libstr[n_libstr-1]='\0';
+X if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+X if (n_libstr > MAX_UID) {
+X tp = libstr;
+X while (*tp++) if (*tp == '\001' || *tp== '\t') *tp = ' ';
+X }
+X
+X *libpos = lm_fd->lpos;
+X
+X /* make certain we have the end of the line */
+X while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+X if (strlen(lm_fd->lline)<MAX_STR/2)
+X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
+X else
+X fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf);
+X }
+X lm_fd->lline[MAX_STR-1]='\0';
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
+X if (*seqp=='>') goto new;
+X if (*seqp==';') {
+X if (strchr((char *)seqp,'\n')==NULL) goto cont;
+X continue;
+X }
+X
+X /* removed - used for @P:1-n
+X if (l_limit) {
+X for (cp=seqp; seqp<seqm1 && rn < l_stop && (ic=ap[*cp++])<EL; )
+X if (ic < NA && ++rn > l_start) *seqp++ = (unsigned char)ic;
+X if (rn > l_stop) goto finish;
+X }
+X else {
+X */
+X for (cp=seqp; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+X }
+X goto done;
+X new:
+X strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+X lm_fd->lline[MAX_STR-1]='\0';
+X /* be certain to get complete line, if possible */
+X if (strchr(lm_fd->lline,'\n')==NULL)
+X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+X lm_fd->lline[MAX_STR-1]='\0';
+X if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL)
+X lm_fd->lline[strlen(lm_fd->lline)-1]='\n';
+X goto done;
+X
+X /* removed - used for @P:1-n
+finish:
+X while (lm_fd->lline[0]!='>' &&
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
+X }
+X goto done;
+*/
+X cont:
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X seqm1 = seqp;
+X done:
+X if (seqp>=seqm1) (*lcont)++;
+X else {
+X *lcont=0;
+X }
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+aranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd)
+{
+X char *bp;
+X
+X if (lm_fd->libf != stdin) {
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+X if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
+X strncpy(str,lm_fd->lline+1,cnt);
+X str[cnt-1]='\0';
+X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X /*
+X if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0';
+X else if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
+X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X else str[cnt-1]='\0';
+X */
+X bp = str;
+X while (*bp++) if (*bp=='\001' || *bp=='\t') *bp=' ';
+X }
+X else {
+X str[0]='\0';
+X }
+X }
+X else str[0]='\0';
+}
+X
+void lget_ann(struct lmf_str *, char *, int);
+X
+int
+lgetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X char *bp, *bp_gid;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-11];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!='L' || lm_fd->lline[1]!='O' ||
+X strncmp(lm_fd->lline,"LOCUS",5)) { /* find LOCUS */
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X }
+X *libpos= lm_fd->lpos;
+X
+X if (n_libstr <= 21) {
+X strncpy(libstr,&lm_fd->lline[12],12);
+X libstr[12]='\0';
+X }
+X else {
+X lget_ann(lm_fd,libstr,n_libstr);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X }
+X
+X while (lm_fd->lline[0]!='O' || lm_fd->lline[1]!='R' ||
+X strncmp(lm_fd->lline,"ORIGIN",6)) { /* find ORIGIN */
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X }
+X }
+X else {
+X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X if (lm_fd->lline[0]=='/') goto new;
+X for (cp= (unsigned char *)&lm_fd->lline[10]; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X }
+X goto done;
+new:
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+done:
+X if (seqp>=seqm1) {
+X lm_fd->cpsave = cp;
+X (*lcont)++;
+X }
+X else *lcont=0;
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+lget_ann(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
+X char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
+X
+X /* copy in locus from lm_fd->lline */
+X strncpy(locus,&lm_fd->lline[12],sizeof(locus));
+X if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
+X
+X /* get description */
+X fgets(desc,sizeof(desc),lm_fd->libf);
+X while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
+X fgets(desc,sizeof(desc),lm_fd->libf);
+X if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
+X
+X /* get accession */
+X fgets(acc,sizeof(acc),lm_fd->libf);
+X while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
+X fgets(acc,sizeof(acc),lm_fd->libf);
+X if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
+X break;
+X }
+X if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
+X if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
+X
+X /* get version */
+X fgets(ver,sizeof(ver),lm_fd->libf);
+X while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
+X fgets(ver,sizeof(ver),lm_fd->libf);
+X if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
+X break;
+X }
+X if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
+X
+X /* extract gi:123456 from version line */
+X bp_gid = strchr(&ver[12],':');
+X if (bp_gid != NULL) {
+X if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
+X bp_gid++;
+X }
+X if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
+X
+X /* build up FASTA header line */
+X if (bp_gid != NULL) {
+X strncpy(libstr,"gi|",n_libstr-1);
+X strncat(libstr,bp_gid,n_libstr-4);
+X strncat(libstr,"|gb|",n_libstr-20);
+X }
+X else {libstr[0]='\0';}
+X
+X /* if we have a version number, use it, otherwise accession,
+X otherwise locus/description */
+X
+X if (ver[0]=='V') {
+X strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
+X strncat(libstr,"|",n_libstr-1-strlen(libstr));
+X }
+X else if (acc[0]=='A') {
+X strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
+X strncat(libstr," ",n_libstr-1-strlen(libstr));
+X }
+X
+X strncat(libstr,locus,n_libstr-1-strlen(libstr));
+X strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
+X libstr[n_libstr-1]='\0';
+}
+X
+X
+/* this code seeks to provide both the various accession numbers
+X necessary to identify the sequence, and also some description.
+X
+X Unfortunately, the various contributors to Genbank use three
+X slightly different formats for including the accession number.
+X
+(1)LOCUS HSJ214M20 107422 bp DNA HTG 16-JUN-2000
+X DEFINITION Homo sapiens chromosome 6 clone RP1-214M20 map p12.1-12.3, ***
+X SEQUENCING IN PROGRESS ***, in unordered pieces.
+X ACCESSION AL121969
+X
+(2)LOCUS AL359201 117444 bp DNA HTG 15-JUN-2000
+X DEFINITION Homo sapiens chromosome 1 clone RP4-671C13 map p13.2-21.1, ***
+X SEQUENCING IN PROGRESS ***, in unordered pieces.
+X ACCESSION AL359201
+X
+(3)LOCUS BB067000 280 bp mRNA EST 19-JUN-2000
+X DEFINITION BB067000 RIKEN full-length enriched, 15 days embryo male testis Mus
+X musculus cDNA clone 8030456L01 3', mRNA sequence.
+X ACCESSION BB067000
+X
+This makes it more difficult to both provide the accession number in a
+standard location and to conserve definition space
+*/
+X
+void
+lranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp, acc[MAX_STR], desc[MAX_STR];
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+X lget_ann(lm_fd, str, cnt);
+X str[cnt-1]='\0';
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+X
+int
+pgetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X int ic;
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-11];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!='E' || lm_fd->lline[1]!='N' || strncmp(lm_fd->lline,"ENTRY",5))
+X { /* find ENTRY */
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X }
+X strncpy(libstr,&lm_fd->lline[16],8);
+X libstr[8]='\0';
+X *libpos = lm_fd->lpos;
+X while (lm_fd->lline[2]!='Q' || lm_fd->lline[0]!='S' || strncmp(lm_fd->lline,"SEQUENCE",8))
+X { /* find SEQUENCE */
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X }
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf); /* get the extra line */
+X }
+X else {
+X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+X if (lm_fd->lline[0]=='/') goto new;
+X for (cp= (unsigned char *)&lm_fd->lline[8]; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X };
+X if (*seqp==ES) goto done;
+X }
+X goto done;
+new:
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+done:
+X if (seqp>=seqm1) {
+X lm_fd->cpsave = cp;
+X (*lcont)++;
+X }
+X else *lcont=0;
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+pranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp;
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+X strncpy(str,&lm_fd->lline[16],8);
+X str[8]='\0';
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X while (lm_fd->lline[0]!='T' || lm_fd->lline[1]!='I' || strncmp(lm_fd->lline,"TITLE",5))
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X strncpy(&str[8],&lm_fd->lline[16],cnt-9);
+X str[cnt-9]='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+}
+X
+int
+egetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X int ll;
+X int ic;
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X char id[11]; /* Holds Identifier */
+X
+X *l_off=1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-11];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!='I' || lm_fd->lline[1]!='D') { /* find ID */
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X }
+X sscanf(&lm_fd->lline[5],"%s",id);
+X sprintf(libstr,"%-12.12s",id);
+X libstr[12]='\0';
+X *libpos = lm_fd->lpos;
+X while (lm_fd->lline[0]!='S' || lm_fd->lline[1]!='Q') { /* find ORIGIN */
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X }
+X sscanf(&lm_fd->lline[14],"%ld",&lm_fd->gcg_len);
+X }
+X else {
+X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X if (lm_fd->lline[0]=='/') goto new;
+X lm_fd->lline[70]='\0';
+X for (cp= (unsigned char *)&lm_fd->lline[5]; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X }
+X goto done;
+new: lm_fd->lpos = FTELL(lm_fd->libf);
+fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+if (lm_fd->lfflag) getc(lm_fd->libf);
+goto done;
+X
+done: if (seqp>=seqm1) {
+X lm_fd->cpsave = cp;
+X (*lcont)++;
+X lm_fd->gcg_len -= (long)(seqp-seq);
+}
+else *lcont=0;
+X
+*seqp = EOSEQ;
+/* if ((int)(seqp-seq)==0) return 1; */
+/* if (*lcont==0 && (long)(seqp-seq)!=lm_fd->gcg_len)
+X printf("%s read %d of %d\n",libstr,(int)(seqp-seq),lm_fd->gcg_len);
+X */
+return (int)(seqp-seq);
+}
+X
+void
+eranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp;
+X char id[11]; /* Holds Identifier */
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+X sscanf(&lm_fd->lline[5],"%s",id);
+X sprintf(str,"%-10.10s ",id);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X while (lm_fd->lline[0]!='D' || lm_fd->lline[1]!='E') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X strncpy(&str[11],&lm_fd->lline[5],cnt-11);
+X str[cnt-11]='\0';
+X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+X
+int
+igetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X char *bp;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!=';') {
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X }
+X *libpos = lm_fd->lpos;
+X while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X strncpy(libstr,lm_fd->lline+1,12);
+X libstr[12]='\0';
+X if((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
+X if (*seqp=='>') goto new;
+X if (*seqp==';') {
+X if (strchr((char *)seqp,'\n')==NULL) goto cont;
+X continue;
+X }
+X for (cp=seqp; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X }
+X goto done;
+new: strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+X lm_fd->lline[MAX_STR-1]='\0';
+X if (strchr((char *)seqp,'\n')==NULL)
+X fgets(lm_fd->lline,MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+X goto done;
+X
+cont:
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X seqm1 = seqp;
+X
+done: if (seqp>=seqm1) {
+X (*lcont)++;
+X }
+X else {
+X *lcont=0;
+X }
+X
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+X }
+X
+void
+iranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp;
+X char tline[MAX_FN];
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+X if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
+X strncpy(tline,lm_fd->lline+1,sizeof(tline));
+X tline[sizeof(tline)-1]='\0';
+X if ((bp = strchr(tline,'\n'))!=NULL) *bp='\0';
+X }
+X else {
+X tline[0]='\0';
+X }
+X
+X while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp=0;
+X if ((bp=strchr(lm_fd->lline,' '))!=NULL) *bp=0;
+X strncpy(str,lm_fd->lline,cnt);
+X str[cnt-1]='\0';
+X strncat(str," ",cnt-strlen(str)-1);
+X strncat(str,tline,cnt-strlen(str)-1);
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X }
+X
+int
+vgetlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X int i, ich;
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X char *bp, *tp;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X }
+X
+#ifdef SUPERFAMNUM
+X if ((bp=strchr(&lm_fd->lline[1],' ')) &&
+X (bp=strchr(bp+1,SFCHAR))) {
+X i=0;
+X if ((tp = strtok(bp+1," \t\n"))!=NULL) sfnum[i++] = atoi(tp);
+X while ((tp = strtok(NULL," \t")) != NULL) {
+X sfnum[i++] = atoi(tp);
+X if (i>=10) break;
+X }
+X sfnum[nsfnum=i]= 0;
+X if (nsfnum>1) sf_sort(sfnum,nsfnum);
+X else {
+X if (nsfnum < 1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
+X }
+X }
+X else sfnum[0]=nsfnum=0;
+#endif
+X
+X if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp='\0';
+X strncpy(libstr,&lm_fd->lline[4],12);
+X libstr[12]='\0';
+X if ((bp=strchr(libstr,' '))!=NULL) *bp='\0';
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+X
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+X if (n_libstr > 21) {
+X strcat(libstr," ");
+X strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
+X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
+X libstr[n_libstr-1]='\0';
+X }
+X *libpos = lm_fd->lpos;
+X }
+X
+X lm_fd->lline[0]='\0';
+X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
+X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+X if (*seqp=='>') goto new;
+X if (*seqp==';') {
+X if (strchr((char *)seqp,'\n')==NULL) goto cont;
+X continue;
+X }
+X for (cp=seqp; seqp<seqm1; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X if (*(--seqp)>NA) break;
+X }
+X if (*seqp==ES) goto done;
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X }
+X goto done;
+new:
+X strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
+X lm_fd->lline[MAX_STR-1]='\0';
+X if (strchr((char *)seqp,'\n')==NULL) {
+X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
+X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+X }
+X goto done;
+X
+cont:
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
+X seqm1 = seqp;
+X
+done:
+X if (seqp>=seqm1) {
+X (*lcont)++;
+X }
+X else {
+X *lcont=0;
+X }
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1;*/
+X return (int)(seqp-seq);
+}
+X
+void
+vranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp, *llp;
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+X if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||lm_fd->lline[3]=='>')) {
+X strncpy(str,&lm_fd->lline[4],cnt-1);
+X str[cnt-1]='\0';
+X
+X if ((bp = strchr(str,':'))!=NULL) *bp='\0';
+X if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
+X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X else str[cnt-1]='\0';
+X
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+X
+X /* skip over redundant stuff */
+X for (llp=lm_fd->lline,bp=str; *llp==*bp; llp++,bp++);
+X if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
+X
+X if ((bp=strchr(llp,'\r'))!=NULL) *bp=' ';
+X if ((bp=strchr(llp,'\n'))!=NULL) *bp='\0';
+X strncat(str," ",(size_t)1);
+X strncat(str,llp,(size_t)cnt-strlen(str)-1);
+X }
+X else {
+X str[0]='\0';
+X }
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X if (lm_fd->lfflag) getc(lm_fd->libf);
+}
+X
+static int gcg_bton[4]={2,4,1,3};
+X
+int
+gcg_getlib(unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X char dummy[20];
+X char gcg_date[10];
+X register unsigned char *cp, *seqp, stmp;
+X register int *ap;
+X char gcg_type[10];
+X unsigned char *seqm, *seqm1;
+X long r_block, b_block;
+X char *bp;
+X
+X *l_off = 1;
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+X if (*lcont==0) {
+X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
+X lm_fd->lpos = FTELL(lm_fd->libf);
+X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
+X }
+X sscanf(&lm_fd->lline[4],"%s %s %s %s %ld",
+X libstr,gcg_date,gcg_type,dummy,&(lm_fd->gcg_len));
+X
+X lm_fd->gcg_binary = (gcg_type[0]=='2');
+X
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X while (strchr((char *)lm_fd->lline,'\n')==NULL) {
+X if (strlen(lm_fd->lline)<MAX_STR/2)
+X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
+X else
+X fgets(&lm_fd->lline[strlen(lm_fd->lline)-MAX_STR/2],MAX_STR/2,lm_fd->libf);
+X }
+X lm_fd->lline[MAX_STR-1]='\0';
+X if (n_libstr <= 21) {
+X libstr[12]='\0';
+X }
+X else {
+X strncat(libstr," ",1);
+X strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
+X if ((bp = strchr(libstr,'\n'))!=NULL) *bp='\0';
+X libstr[n_libstr-1]='\0';
+X }
+X *libpos = lm_fd->lpos;
+X }
+X
+X lm_fd->lline[0]='\0';
+X
+X r_block = b_block = min((size_t)(seqm-seqp),lm_fd->gcg_len);
+X if (lm_fd->gcg_binary) { r_block = (r_block+3)/4; }
+X
+X fread((char *)seqp,(size_t)r_block,(size_t)1,lm_fd->libf);
+X if (!lm_fd->gcg_binary)
+X for (cp=seqp; seqp<seq+r_block; ) *seqp++ = ap[*cp++];
+X else if (lm_fd->gcg_binary) {
+X seqp = seq + r_block;
+X cp = seq + 4*r_block;
+X while (seqp > seq) {
+X stmp = *--seqp;
+X *--cp = gcg_bton[stmp&3];
+X *--cp = gcg_bton[(stmp >>= 2)&3];
+X *--cp = gcg_bton[(stmp >>= 2)&3];
+X *--cp = gcg_bton[(stmp >>= 2)&3];
+X }
+X }
+X if (4 * r_block >= lm_fd->gcg_len) {
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X *lcont = 0;
+X }
+X else {
+X if (lm_fd->gcg_binary) b_block = 4*r_block;
+X lm_fd->gcg_len -= b_block;
+X (*lcont)++;
+X }
+X
+X seq[b_block] = EOSEQ;
+X /* if (b_block==0) return 1; else */
+X return b_block;
+}
+X
+void
+gcg_ranlib(char *str,
+X int cnt,
+X fseek_t seek,
+X char *libstr,
+X struct lmf_str *lm_fd)
+{
+X char *bp, *bp1, *llp;
+X
+X FSEEK(lm_fd->libf, seek, 0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+X if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||lm_fd->lline[3]=='>')) {
+X strncpy(str,&lm_fd->lline[4],cnt-1);
+X str[cnt-1]='\0';
+X if ((bp = strchr(str,' '))!=NULL) *bp='\0';
+X else if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
+X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X else str[cnt-1]='\0';
+X
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+X
+X /* check beginning of line it is a duplicate */
+X for (llp=lm_fd->lline,bp=str; *llp == *bp; llp++,bp++);
+X if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
+X
+X /* here we would like to skip over some species stuff */
+X /*
+X if ((bp1 = strchr(llp,';'))!=NULL && (int)(bp1-llp)<50) {
+X if ((bp2 = strchr(bp1+1,';'))!=NULL && (int)(bp2-bp1)<50) {
+X *(bp2+1)='\0'; bp1 = bp2+2;
+X }
+X else {bp1=llp;}
+X }
+X else if ((bp1=strchr(llp,'.'))!=NULL && *(bp1+1)==' ') {
+X *(bp1+1) = '\0'; bp1 += 2;}
+X else bp1 = llp;
+X */
+X
+X bp1 = llp;
+X if ((bp=strchr(bp1,'\r'))!=NULL) *bp='\0';
+X if ((bp=strchr(bp1,'\n'))!=NULL) *bp='\0';
+X strncat(str," ",(size_t)1);
+X strncat(str,bp1,(size_t)cnt-strlen(str));
+X if (bp1!=llp) strncat(str,llp,(size_t)cnt-strlen(str));
+X }
+X else {
+X str[0]='\0';
+X }
+X
+X FSEEK(lm_fd->libf,seek,0);
+X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
+}
+X
+void
+sf_sort(s,n)
+X int *s, n;
+{
+X int gap, i, j;
+X int itmp;
+X
+X if (n == 1) return;
+X
+X for (i=0; i<n-1; i++)
+X if (s[i]>s[i+1]) goto l2;
+X return;
+X
+l2:
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if (s[j] <= s[j+gap]) break;
+X itmp = s[j];
+X s[j]=s[j+gap];
+X s[j+gap]=itmp;
+X }
+}
+SHAR_EOF
+chmod 0644 nmgetlib.c ||
+echo 'restore of nmgetlib.c failed'
+Wc_c="`wc -c < 'nmgetlib.c'`"
+test 36301 -eq "$Wc_c" ||
+ echo 'nmgetlib.c: original size 36301, current size' "$Wc_c"
+fi
+# ============= nr_to_sql.pl ==============
+if test -f 'nr_to_sql.pl' -a X"$1" != X"-c"; then
+ echo 'x - skipping nr_to_sql.pl (File already exists)'
+else
+echo 'x - extracting nr_to_sql.pl (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'nr_to_sql.pl' &&
+#!/usr/bin/perl -w
+X
+use DBI;
+X
+$SIG{__WARN__} = sub { die @_ };
+X
+my $mysql = DBI->connect("DBI:mysql:database=seq_demo;user=seq_demo;password=demo_pass");
+X
+$mysql->do(q{LOCK TABLES prot WRITE,
+X annot WRITE,
+X sp WRITE });
+X
+my $EL = 125;
+my $NA = 123;
+X
+my @aatrans = ($EL,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$EL,$NA,$NA,$EL,$NA,$NA,
+X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
+X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA, 24,$NA,$NA,$NA,$NA,$NA,
+X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
+X $NA, 1, 21, 5, 4, 7, 14, 8, 9, 10,$NA, 12, 11, 13, 3,$NA,
+X 15, 6, 2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA,
+X $NA, 1, 21, 5, 4, 7, 14, 8, 9, 10,$NA, 12, 11, 13, 3,$NA,
+X 15, 6, 2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA
+X );
+X
+my $ins_prot = $mysql->prepare(q{
+X INSERT INTO prot (seq,bin,len) VALUES (?, ?, ?)
+X });
+X
+my $ins_annot = $mysql->prepare(q{
+X INSERT INTO annot (gi, prot_id, db, descr) VALUES (?, ?, ?, ?)
+X });
+X
+my $ins_sp = $mysql->prepare(q{
+X INSERT INTO sp (gi, acc, name) VALUES (?, ?, ?)
+X });
+X
+use vars qw( $seq $bin $tot_seq $tot_annot $tot_sp );
+use vars qw( $gi $prot_id $db $desc $sp_acc $sp_name );
+use vars qw( $header $seq @entries );
+use vars qw( $gi $db $db_acc $db_name $desc);
+X
+$tot_seq = $tot_annot = $tot_sp = 0;
+X
+for my $db_file ( @ARGV ) {
+X open(DATA, "<$db_file") or die $!;
+X local $/ = "\n>";
+X while (<DATA>) {
+X chomp; # remove trailing "\n>" record header
+X ($header, $seq) = $_ =~ m/^>? # record separator (first entry)
+X ( [^\n]* ) \n # header line
+X ( .* ) # the sequence
+X /osx; # optimize, multiline, commented
+X
+X $seq =~ s/\W|\d//sg;
+X $bin = pack('C*', map { $aatrans[unpack('C', $_)] } split(//, $seq));
+X $ins_prot->execute($seq,$bin,length($seq));
+X $prot_id = $ins_prot->{mysql_insertid};
+X
+X $tot_seq++;
+X
+# print STDERR "Inserted $prot_id: ". length($seq)."\n";
+X
+X @entries = split(/\001/, $header);
+X
+X for ( @entries ) {
+X ($gi,$db,$db_acc,$db_name,$desc)=
+X $_ =~ /^gi\|(\d+)\|([a-z]+)\|(\S*)\|(\S*) (.*)$/o;
+# print "$prot_id: $gi\t$db\t$db_acc\t$desc\n";
+X $ins_annot->execute($gi,$prot_id,$db,$desc);
+X
+X $tot_annot++;
+X
+X if ($db eq "sp") {
+X $ins_sp->execute($gi,$db_acc,$db_name);
+X $tot_sp++;
+X }
+X }
+X }
+X close(DATA);
+}
+X
+print "Inserted $tot_seq sequences; $tot_annot annotations; $tot_sp swissprot\n";
+X
+X
+X
+SHAR_EOF
+chmod 0755 nr_to_sql.pl ||
+echo 'restore of nr_to_sql.pl failed'
+Wc_c="`wc -c < 'nr_to_sql.pl'`"
+test 2452 -eq "$Wc_c" ||
+ echo 'nr_to_sql.pl: original size 2452, current size' "$Wc_c"
+fi
+# ============= nrand.c ==============
+if test -f 'nrand.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping nrand.c (File already exists)'
+else
+echo 'x - extracting nrand.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'nrand.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: nrand.c,v 1.2 2005/09/23 16:27:25 wrp Exp $ */
+X
+#include <stdlib.h>
+#include <time.h>
+X
+int
+irand(int n) /* initialize random number generator */
+{
+X
+X if (n == 0) {
+X n = time(NULL);
+X n = n % 16381;
+X if ((n % 2)==0) n++;
+X
+X }
+X srand(n);
+}
+X
+int
+nrand(int n) /* returns a random number between 1 and n where n < 64K) */
+{
+X int rand();
+X long rn;
+X
+X rn = rand();
+#ifdef RAND32
+X rn = rn >> 16;
+#endif
+X rn = rn % n;
+X return (int)rn;
+}
+X
+X
+X
+X
+SHAR_EOF
+chmod 0644 nrand.c ||
+echo 'restore of nrand.c failed'
+Wc_c="`wc -c < 'nrand.c'`"
+test 566 -eq "$Wc_c" ||
+ echo 'nrand.c: original size 566, current size' "$Wc_c"
+fi
+# ============= nrand48.c ==============
+if test -f 'nrand48.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping nrand48.c (File already exists)'
+else
+echo 'x - extracting nrand48.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'nrand48.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: nrand48.c,v 1.4 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdlib.h>
+#include <time.h>
+X
+void
+irand(int n) /* initialize random number generator */
+{
+X if (n == 0) {
+X n = time(NULL);
+X n = n % 16381;
+X if ((n % 2)==0) n++;
+X }
+X srand48(n);
+}
+X
+int
+nrand(int n) /* returns a random number between 0 and n-1 where n < 64K) */
+{
+X int rn;
+X
+X rn = lrand48();
+X rn = rn >> 16;
+X rn = (rn % n);
+X return rn;
+}
+X
+SHAR_EOF
+chmod 0644 nrand48.c ||
+echo 'restore of nrand48.c failed'
+Wc_c="`wc -c < 'nrand48.c'`"
+test 533 -eq "$Wc_c" ||
+ echo 'nrand48.c: original size 533, current size' "$Wc_c"
+fi
+# ============= nrandom.c ==============
+if test -f 'nrandom.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping nrandom.c (File already exists)'
+else
+echo 'x - extracting nrandom.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'nrandom.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: nrandom.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdlib.h>
+#include <time.h>
+X
+void
+irand(n) /* initialize random number generator */
+X int n;
+{
+X if (n == 0) {
+X n = time(NULL);
+X n = n % 16381;
+X if ((n % 2)==0) n++;
+X }
+X srandom(n);
+}
+X
+int
+nrand(n) /* returns a random number between 0 and n-1 where n < 2^24) */
+X int n;
+{
+X int rn;
+X
+X rn = random();
+X rn = (rn % n);
+X return rn;
+}
+X
+SHAR_EOF
+chmod 0644 nrandom.c ||
+echo 'restore of nrandom.c failed'
+Wc_c="`wc -c < 'nrandom.c'`"
+test 532 -eq "$Wc_c" ||
+ echo 'nrandom.c: original size 532, current size' "$Wc_c"
+fi
+# ============= oohu.aa ==============
+if test -f 'oohu.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping oohu.aa (File already exists)'
+else
+echo 'x - extracting oohu.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'oohu.aa' &&
+>OOHU | 1358 rhodopsin - human
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV
+HFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
+SNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
+SHAR_EOF
+chmod 0644 oohu.aa ||
+echo 'restore of oohu.aa failed'
+Wc_c="`wc -c < 'oohu.aa'`"
+test 385 -eq "$Wc_c" ||
+ echo 'oohu.aa: original size 385, current size' "$Wc_c"
+fi
+# ============= oohu.raa ==============
+if test -f 'oohu.raa' -a X"$1" != X"-c"; then
+ echo 'x - skipping oohu.raa (File already exists)'
+else
+echo 'x - extracting oohu.raa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'oohu.raa' &&
+>oohu.aa shuffled
+KLILINAIFT GLQNSGCTAQ PTPEFFVMAQ YLFAMVSNMG GVFFQTALLN SAGQGFYSWC
+IFIFMTYPGF MLFIQLTGAD FVTVNEGANL CMTFCTQVTA VEYAKPTPVN AAPSSYRILR
+VIGGPYQAIF HSIATVFINS PTTEELQFLR IVVIHIAFIV VAVPLTDPRA VKFNAGELTF
+GCIFYMQYYM VISLFAANPF YYAFIRVPFE VYCETELIMG PLCAKRYVLA AASNGAYLGW
+LKLLEVYSAF PSVKCLNMLR GHVFTTIPET QNAVMYKDVI SSTLFVLSEQ LSAWITSEYP
+VPGKCYWMPF GANTHKNINP DPFAEHEKEY ILVWMVCKFG LGMTVMAG
+SHAR_EOF
+chmod 0644 oohu.raa ||
+echo 'restore of oohu.raa failed'
+Wc_c="`wc -c < 'oohu.raa'`"
+test 401 -eq "$Wc_c" ||
+ echo 'oohu.raa: original size 401, current size' "$Wc_c"
+fi
+# ============= p2_complib.c ==============
+if test -f 'p2_complib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping p2_complib.c (File already exists)'
+else
+echo 'x - extracting p2_complib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'p2_complib.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: p2_complib.c,v 1.96 2007/01/12 20:15:16 wrp Exp $ */
+X
+/*
+X * pcomplib.c : Parallel library search
+X *
+X * #define FIRSTNODE 0/1 (in msg.h) can be used to reserve one node
+X * for collecting results
+X *
+X * Parallel specific options (from doinit.c):
+X * -J # jump to query #
+X * -I self-comparison, do (N choose 2) comparisons
+X * -T # number of workers
+X */
+X
+/* This version is modifed to read all files, query and database,
+X through the manager process. Workers will now receive their
+X database from the manager, rather than reading it themselves. This
+X cuts down considerably on NFS traffic, simplifies searches of
+X multiple files, and allows use of clusters of slave nodes that do
+X not have NFS access
+*/
+X
+/* modified 5-November-2004 to ensure 15 byte (SEQ_PAD) NULL
+X padding
+X
+X modified 12-December-2006 to ensure n0>0 before SEQ_PAD padding.
+X */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+X
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+X
+#include <unistd.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/stat.h>
+X
+#ifdef PVM_SRC
+#include "pvm3.h"
+char *mp_verstr="34.26, January 12, 2007 PVM";
+#endif
+X
+#ifdef MPI_SRC
+#include "mpi.h"
+char *mp_verstr="34.26, January 12, 2007 MPI";
+#endif
+X
+#include "msg.h"
+#include "defs.h"
+#include "mm_file.h"
+X
+#include "structs.h"
+#include "param.h"
+#include "p_mw.h"
+X
+#define XTERNAL
+#include "uascii.h"
+X
+char pgmdir[MAX_FN];
+char workerpgm[MAX_FN];
+char managepgm[MAX_FN];
+X
+#define XTERNAL
+#include "upam.h"
+#undef XTERNAL
+X
+/********************************/
+/* global variable declarations */
+/********************************/
+char gstring2[MAX_STR]; /* string for label */
+char gstring3[MAX_STR]; /* string for label */
+char hstring1[MAX_STR];
+X
+int nsfnum; /* number of superfamily numbers */
+int sfnum[10]; /* superfamily number from types 0 and 5 */
+int nsfnum_n;
+int sfnum_n[10];
+X
+/********************************/
+/* extern variable declarations */
+/********************************/
+extern char *prog_func; /* function label */
+extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
+X
+/********************************/
+/*extern function declarations */
+/********************************/
+X
+void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
+void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
+X
+extern void closelib();
+/* check for DNA sequence (nxgetaa.c) */
+extern int scanseq(unsigned char *seq, int n, char *str);
+extern void re_ascii(int *qascii, int *sascii);
+extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
+X
+/* 1d to 2d pam (initxx.c) */
+extern void initpam2 (struct pstruct *ppst);
+/* initialize environment (doinit.c) */
+extern void h_init (struct pstruct *ppst, struct mngmsg *, char *);
+extern void s_abort (char *p, char *p1);
+extern void query_parm (struct mngmsg *m_msp, struct pstruct *ppst);
+extern void last_init (struct mngmsg *, struct pstruct *, int);
+X
+extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
+X struct pstruct *ppst, unsigned char **aa0);
+X
+/* print hist, summaries, timing information */
+void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, int nstats, struct db_str, char *);
+void printsum(FILE *);
+extern void ptime (FILE *, time_t);
+X
+/* reset parameters if DNA sequence (initxx.c) */
+extern void resetp (struct mngmsg *, struct pstruct *);
+X
+/* read a sequence (nmgetlib.c) */
+struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
+X
+#define QGETLIB (q_file_p->getlib)
+#define LGETLIB (l_file_p->getlib)
+X
+/* these functions are in scaleswn.c */
+extern int process_hist(struct stat_str *sptr, int nstat,
+X struct mngmsg m_msg, struct pstruct pst,
+X struct hist_str *hist, void **pstat_void, int);
+extern double zs_to_E(double zs, int n1, int isdna, long, struct db_str ntt);
+extern double (*find_zp)(int score, double escore, int length, double comp, void *);
+void addhistz(double zscore, struct hist_str *); /* scaleswn.c */
+void last_stats(const unsigned char *aa0, int n0,
+X struct stat_str *sptr, int nstats,
+X struct beststr **bestp_arr, int nbest,
+X struct mngmsg m_msg, struct pstruct pst,
+X struct hist_str *histp, void *rs);
+X
+void selectbestz(struct beststr **, int, int);
+void sortbest(struct beststr **, int, int);
+X
+void showbest (FILE *fp, struct beststr **bptr, int nbest,
+X int qlib, struct mngmsg *m_msg, struct pstruct pst,
+X struct db_str ntt, char *gstring2);
+X
+void showalign (FILE *fp,
+X struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,
+X struct pstruct pst, char *gstring2);
+X
+#ifdef PVM_SRC
+char worknode[120];
+int pinums[MAXNOD],hosttid;
+int narch;
+struct pvmhostinfo *hostp;
+#endif
+X
+FILE *outfd; /* Output file */
+X
+extern time_t s_time (); /* fetches time for timing */
+X
+/* this information is global for fsigint() */
+time_t tstart, tscan, tprev, tdone; /* Timing */
+time_t tdstart, tddone, time();
+int max_nodes, nnodes; /* number of nodes */
+int node_map[MAXWRKR], node_id[MAXWRKR];
+int tot_speed,h_speed;
+int qlib = 0; /* number of sequences scanned */
+struct db_str ntt, qtt;
+X
+extern int max_workers, worker_1, worker_n;
+int wlsn [MAXWRKR + 1]; /* number of library sequences in worker */
+int clsn [MAXWRKR + 1]; /* number of 1st library sequence in worker */
+X
+int max_buf_cnt;
+X
+#ifdef PVM_SRC
+#ifndef WORKERPGM
+#define WORKERPGM "c34.work"
+#endif
+#endif
+X
+main (int argc, char *argv[])
+{
+X unsigned char *aa00, *aa01, *aa0p0, *aa0p1;
+X unsigned char *aa1, *aa1ptr, *aa1prev;
+X int aa1i, *aa1i_arr; /* integer offset of sequence in buffer */
+X
+X int n1;
+X int *n1tot_ptr=NULL, *n1tot_cur;
+X int n1tot_cnt=0;
+X int n1tot_v;
+X
+X long l_off;
+X char nodefile[240];
+X struct pstruct pst;
+X int i_score;
+X struct lmf_str *q_file_p;
+X struct lmf_str *l_file_p;
+X
+X /* from manage code */
+X struct mngmsg m_msg0, m_msg1; /* Message from host to manager */
+X struct mngmsg *m_msp0, *m_msp1; /* alternating pointers */
+X struct qmng_str qm_msg0, qm_msg1; /* stuff updated for each query */
+X char q_sqnam[4];
+X int sstart, sstop;
+X
+X struct qmng_str *qm_msp0, *qm_msp1; /* pointer to stuff updated */
+X int last_msg_b[10]; /* last set of numbers */
+X long curtype = ONETYPE; /* current message type */
+X int nclib;
+X struct beststr *best, /* array of best scores */
+X **bptr; /* array of pointers */
+X struct comstr bestr[BFR+1]; /* temporary structure array */
+X struct comstr2 bestr2[BFR2+1]; /* temporary structure array */
+X struct a_struct *aln_d_base=NULL; /* alignment info for -m 9 */
+X int qres_bufsize; /* buffer size for results */
+X struct stat_str *stats=NULL, *qstats=NULL;
+X int best_flag = 1; /* bptr[] must be re-initialized */
+X int fast_flag = 0; /* send new sequences before old displayed */
+X int nstats, nqstats, kstats, jstats;
+X int nbest, nres; /* number of best scores */
+X double zbestcut = -BIGNUM; /* z-value cutoff */
+X int lcnt; /* counters */
+X int nopt;
+X int i, j, k, is, id, iw, ires, naa0 = 0;
+X
+X FILE *fdata=NULL; /* file for full results */
+X struct sql *desptr;
+X struct sql *ldes; /* descriptive lines for all lib sequences */
+X char *bline_buf, *bline_bufp;
+X char *bline_buf_mx; /* buffer for blines */
+X char q_bline[256];
+X char t_bline[256];
+X int max_bline_b, bline_inc;
+X int *n1_arr, *m_seqnm_arr;
+X unsigned char *aa1_buf;
+X
+X char tlibstr[11]; /* used only for fdata *.res files */
+X
+X int node, snode, zero; /* Number of nodes */
+X int bufid, numt, tid;
+X
+X int ave_seq_len;
+X int max_sql;
+X int ntbuff, nseq, m_seqnm;
+X int iln, ocont, maxt;
+X long loffset;
+X
+X int leng; /* leng is length of the descriptive line */
+X fseek_t qseek,lseek; /* seek into library of current sequence */
+X int qlcont,lcont; /* continued sequence */
+X int n_proc, n_tmp;
+X char errstr[120];
+X int stats_done =0; /* flag for z-value processing */
+X int tm_best, t_rbest, t_qrbest, t_best, t_n1;
+X double e_score, tm_escore, t_rescore, t_qrescore;
+X double zscore; /* tmp value */
+X double k_H, k_comp;
+X char tmp_str[MAX_FN];
+X char pgm_abbr[MAX_SSTR];
+X char *bp;
+#ifdef MPI_SRC
+X MPI_Status mpi_status;
+#endif
+X
+X void fsigint();
+X
+X signal(SIGHUP,SIG_IGN);
+X if (signal(SIGINT,SIG_IGN) != SIG_IGN) signal(SIGINT,fsigint);
+X if (signal(SIGQUIT,SIG_IGN) != SIG_IGN) signal(SIGQUIT,fsigint);
+/* if (signal(SIGSEGV,SIG_IGN) != SIG_IGN) signal(SIGSEGV,fsigint); */
+X
+X /* Initialization */
+X
+X
+#if defined(UNIX)
+X m_msg0.quiet = !isatty(1);
+#endif
+X
+X /* BFR must be %6 = 0 for TFASTA */
+X if ((BFR%6) != 0) {
+X fprintf(stderr," BFR size %d not %%6=0 - recompile\n",BFR);
+X exit(1);
+X }
+X
+#ifdef MPI_SRC
+X MPI_Init(&argc, &argv);
+X MPI_Comm_rank(MPI_COMM_WORLD,&tid);
+X if (tid > 0) {
+X workcomp(tid);
+X MPI_Finalize();
+X exit(0);
+X }
+#endif
+X
+X printf("#");
+X for (i=0; i<argc; i++) {
+X if (strchr(argv[i],' ')) printf(" \"%s\"",argv[i]);
+X else printf(" %s",argv[i]);
+X }
+X printf("\n");
+X
+#ifdef MPI_SRC
+X MPI_Comm_size(MPI_COMM_WORLD,&nnodes);
+X if (nnodes <= 1) {
+X fprintf(stderr," nnodes = %d; no workers available\n",nnodes);
+X exit(1);
+X }
+X else fprintf(stderr," have %d nodes\n",nnodes);
+X
+X tot_speed = nnodes*100;
+#endif
+X
+X h_init (&pst,&m_msg0, pgm_abbr);
+X
+X initenv (argc, argv, &m_msg0, &pst, &aa00);
+X
+#ifdef PVM_SRC
+X strncpy (workerpgm, WORKERPGM,sizeof(workerpgm)-1);
+X strncat(workerpgm, pgm_abbr, sizeof(workerpgm)-strlen(workerpgm)-1);
+X workerpgm[sizeof(workerpgm)-1] = '\0';
+#endif
+X
+X strncpy(q_sqnam,"aa",sizeof(q_sqnam));
+X m_msg0.quiet = 1;
+X if (m_msg0.qdnaseq != SEQT_UNK &&
+X (m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq == SEQT_RNA))
+X strncpy(q_sqnam,"nt",sizeof(q_sqnam));
+X
+X m_msg0.pstat_void = NULL;
+X m_msg0.hist.hist_a = NULL;
+X
+X fprintf (stderr, "Pcomp library processor\n");
+X fprintf (stderr, "Using %s\n", prog_func);
+X
+X tstart = tscan = s_time();
+X tdstart = time(NULL);
+X
+X
+#ifdef PVM_SRC
+X if ((hosttid=pvm_mytid())<0) {
+X pvm_perror("initialization");
+X fprintf(stderr,"can't initialize %s\n", argv[0]);
+X pvm_exit();
+X exit(1);
+X }
+X
+X pvm_config(&nnodes,&narch,&hostp);
+X fprintf(stderr,"nnodes: %d, narch: %d\n",nnodes, narch);
+X max_nodes = nnodes;
+X
+#ifdef DEBUG
+X pvm_catchout(stderr);
+#endif
+X
+/* if (nnodes < 2 ) nnodes = 4; */
+X if (max_workers > 0 && nnodes > max_workers) {
+X nnodes = max_workers+FIRSTNODE;
+X fprintf(stderr," workers reset from %d to %d\n",
+X max_nodes,nnodes-FIRSTNODE);
+X }
+X else max_workers = nnodes;
+X
+X strncpy(nodefile,pgmdir,sizeof(nodefile)-1);
+X strncat(nodefile,workerpgm,sizeof(nodefile)-strlen(nodefile)-1);
+X nodefile[sizeof(nodefile)-1] = '\0';
+X
+X if (worker_1 > 0) {
+X /* remap configuration to specific nodes */
+X for (i=FIRSTNODE, j=worker_1; i<nnodes && j<=worker_n; i++,j++)
+X node_id[i]=j;
+X nnodes = i;
+X max_workers = i-FIRSTNODE;
+X fprintf(stderr," workers remapped from %d to %d\n",
+X max_nodes,nnodes-FIRSTNODE);
+X max_nodes = nnodes;
+X }
+X else {
+X for (i=0; i< nnodes; i++) node_map[i]=node_id[i] = i;
+X }
+X
+X if (nnodes < max_nodes) {
+X hostp++; /* bump over host name for spawn */
+X rand_nodes(node_map,nnodes,max_nodes-1);
+X for (i=FIRSTNODE; i<nnodes; i++) {
+X numt+=pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_map[i]].hi_name,
+X 1,&pinums[i]);
+X }
+X }
+X else {
+X /* i counts through nodes (machines) */
+X /* j counts through processes (multiple processes/node) */
+X /* node map maps the process (virtual node) to a physical node (machine) */
+X
+X for (i=j=FIRSTNODE; i<nnodes && j < MAXWRKR; i++) {
+X n_proc = hostp[node_id[i]].hi_speed%100;
+X if (n_proc == 0) n_proc = 1;
+X if (n_proc > max_workers) n_proc = max_workers;
+X
+X n_tmp =pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_id[i]].hi_name,
+X n_proc,&pinums[j]);
+X if (n_tmp < n_proc)
+X fprintf(stderr," spawn problem: %d\n", pinums[j]);
+X if (n_tmp > 0) {
+X for (k=j; k < j+n_tmp; k++) node_map[k]=node_id[i];
+X j += n_tmp;
+X }
+X }
+X nnodes = numt = j;
+X }
+X
+X if (numt < nnodes) {
+X if (numt <= 0) {
+X pvm_perror("");
+X pvm_exit();
+X exit(1);
+X }
+X nnodes = numt;
+X }
+X
+X for (tot_speed=0,i=FIRSTNODE; i<nnodes; i++) {
+X if (pinums[i]<0) {
+X fprintf(stderr," tids %d %8o\n",i,pinums[i]);
+X pvm_perror("");
+X pvm_exit();
+X exit(1);
+X }
+X else {
+X h_speed = hostp[node_map[tidtonode(pinums[i])]].hi_speed;
+X if (h_speed <= 0) h_speed = 100;
+X fprintf(stderr," tids %d %8o %s %5d\n",i,pinums[i],
+X hostp[node_map[tidtonode(pinums[i])]].hi_name,
+X h_speed);
+X tot_speed +=(hostp[node_map[tidtonode(pinums[i])]].hi_speed);
+X }
+X }
+X
+X strncpy(worknode,nodefile,sizeof(worknode));
+X fprintf (stderr, "%3d worker programs loaded from %s\n",
+X nnodes-FIRSTNODE,worknode);
+#endif
+X
+X /* need to allocate two aa0 arrays so that the old is saved for alignments */
+X
+X /* Allocate space for the query sequence */
+X if ((aa00 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1)* sizeof (char))) == NULL)
+X s_abort ("Unable to allocate query sequence", "");
+X
+X if ((aa01 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1) * sizeof (char))) == NULL)
+X s_abort ("Unable to allocate query sequence", "");
+X
+X fputs(iprompt0,stdout);
+X fprintf(stdout," %s%s\n",verstr,refstr);
+X
+X /* Query library */
+X if (m_msg0.tname[0] == '\0') {
+X if (m_msg0.quiet == 1) s_abort("query sequence undefined","");
+X
+X fprintf(stderr, "Pvcomplib [%s]\n",mp_verstr);
+X l1: fputs (iprompt1, stdout);
+X fflush (stdout);
+X if (fgets (m_msg0.tname, 80, stdin) == NULL)
+X s_abort ("Unable to read query library name","");
+X if ((bp=strchr(m_msg0.tname,'\n'))!=NULL) *bp='\0';
+X if (m_msg0.tname[0] == '\0') goto l1;
+X }
+X
+X /* Open query library */
+X if ((q_file_p=
+X openlib(m_msg0.tname, m_msg0.qdnaseq,qascii,!m_msg0.quiet,NULL))==NULL) {
+X s_abort(" cannot open library ",m_msg0.tname);
+X }
+X /*
+X else {
+X printf ("searching %s library\n",m_msg0.tname);
+X }
+X */
+X
+X ntt.entries = qtt.entries = 0;
+X ntt.carry = qtt.carry = 0;
+X ntt.length = qtt.length = 0l;
+X
+X /* Fetch first sequence */
+X qlcont = 0;
+X while (qlib < m_msg0.ql_start) { /* skip through query sequences */
+X pst.n0 = qm_msg0.n0 = m_msg0.n0 =
+X QGETLIB (aa00, MAXTST, q_bline, sizeof(q_bline), &qseek, &qlcont,
+X q_file_p,&m_msg0.sq0off);
+X
+X strncpy(qm_msg0.libstr,q_bline,sizeof(qm_msg0.libstr)-20);
+X qm_msg0.libstr[sizeof(qm_msg0.libstr)-21]='\0';
+X if ((bp=strchr(qm_msg0.libstr,' '))!=NULL) *bp='\0';
+X
+X /* if annotations are included in sequence, remove them */
+X if (m_msg0.ann_flg) {
+X pst.n0 = qm_msg0.n0 = m_msg0.n0 =
+X ann_scan(aa00, m_msg0.n0, &m_msg0, m_msg0.qdnaseq);
+#ifdef DEBUG
+X fprintf(stderr,"m_msp0->/aa0a is: %o/%o\n",&m_msg0,m_msg0.aa0a);
+#endif
+X }
+X
+X if (m_msg0.term_code &&
+X !(m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq==SEQT_RNA) &&
+X aa00[m_msg0.n0-1]!='*') {
+X aa00[m_msg0.n0++]='*';
+X aa00[m_msg0.n0]=0;
+X pst.n0 = qm_msg0.n0 = m_msg0.n0;
+X }
+X
+X /* check for subset */
+X if (q_file_p->opt_text[0]!='\0') {
+X if (q_file_p->opt_text[0]=='-') {
+X sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
+X }
+X else {
+X sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
+X sstart--;
+X if (sstop <= 0 ) sstop = BIGNUM;
+X }
+X for (id=0,is=sstart; is<min(m_msg0.n0,sstop); ) aa00[id++]=aa00[is++];
+X aa00[id]=0;
+X pst.n0 = qm_msg0.n0 = m_msg0.n0 = min(m_msg0.n0,sstop)-sstart;
+X if (m_msg0.sq0off==1) m_msg0.sq0off = sstart+1;
+X }
+X
+X qlib++;
+X
+X if (m_msg0.n0 <= 0)
+X s_abort ("Unable to fetch sequence from library: ", m_msg0.tname);
+X }
+X qtt.entries=1;
+X qm_msg0.slist = 0;
+X
+X /* now have correct query sequence - check sequence type and reset */
+X if (m_msg0.qdnaseq == SEQT_UNK) { /* check for DNA sequence */
+X if (m_msg0.n0 > 20 &&
+X (float)scanseq(aa00,m_msg0.n0,"ACGTUNacgtun")/(float)m_msg0.n0>0.85) {
+X pascii = nascii;
+X m_msg0.qdnaseq = SEQT_DNA;
+X }
+X else { /* its protein */
+X pascii = aascii;
+X m_msg0.qdnaseq = SEQT_PROT;
+X }
+X
+X re_ascii(qascii,pascii);
+X init_ascii(pst.ext_sq_set,qascii,m_msg0.qdnaseq);
+X m_msg0.n0 = recode(aa00,m_msg0.n0,qascii,pst.nsqx);
+X }
+X
+X /* for ALTIVEC, must pad with 15 NULL's */
+X for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
+X
+X qtt.length = m_msg0.n0;
+X
+X if (qlib <= 0) {
+X fprintf(stderr," no sequences found in query library\n");
+X exit(1);
+X }
+X
+X resetp (&m_msg0, &pst);
+X
+X sprintf(tmp_str," %d %s", qm_msg0.n0, q_sqnam);
+X leng = strlen (qm_msg0.libstr);
+X if (leng + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
+X qm_msg0.libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
+X strncat(&qm_msg0.libstr[0],tmp_str,
+X sizeof(qm_msg0.libstr)-strlen(qm_msg0.libstr)-1);
+X qm_msg0.libstr[sizeof(qm_msg0.libstr)-1]='\0';
+X
+X qm_msg0.seqnm = qlib-1;
+X
+X /* Library */
+X
+X if (strlen (m_msg0.lname) == 0) {
+X if (m_msg0.quiet == 1) s_abort("library name undefined","");
+X libchoice(m_msg0.lname, sizeof(m_msg0.lname), &m_msg0);
+X }
+X
+X libselect(m_msg0.lname, &m_msg0);
+X
+X /* Get additional parameters here */
+X if (!m_msg0.quiet) query_parm (&m_msg0, &pst);
+X
+X last_init(&m_msg0, &pst,nnodes-FIRSTNODE);
+X memcpy(&m_msg1, &m_msg0, sizeof(m_msg0));
+X
+X /* m_msg0.maxn needs to be set to MAXLIB or MAXTRN, depending on the
+X function - max_tot has the MAXTST + (MAXLIB|MAXTRN) */
+X if (m_msg0.maxn <= 0) m_msg0.maxn = m_msg0.max_tot - MAXTST;
+X
+X if (m_msg0.maxn < 2 * m_msg0.dupn) m_msg0.maxn = 5*m_msg0.dupn;
+X pst.maxlen = m_msg0.maxn;
+X
+X m_msg0.loff = m_msg0.dupn;
+X m_msg0.maxt3 = m_msg0.maxn-m_msg0.loff;
+X
+X
+X /* ******************** */
+X /* initial manager code */
+X /* ******************** */
+X
+X outfd = stdout;
+X if (m_msg0.outfile[0]!='\0') {
+X if ((outfd = fopen(m_msg0.outfile,"w"))==NULL) {
+X fprintf(stderr, "cannot open %s for output\n", m_msg0.outfile);
+X outfd = stdout;
+X }
+X }
+X
+X /* Label the output */
+X printf("Query library %s vs %s library\n", m_msg0.tname, m_msg0.lname);
+X
+X /* Allocate space for saved scores */
+X if ((best =
+X (struct beststr *)malloc((MAXBEST+1)*sizeof(struct beststr)))==NULL)
+X s_abort ("Cannot allocate best struct","");
+X if ((bptr =
+X (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
+X s_abort ("Cannot allocate bptr","");
+X
+X /* Initialize bptr */
+X for (nbest = 0; nbest < MAXBEST+1; nbest++)
+X bptr[nbest] = &best[nbest];
+X
+X best++; bptr++;
+X best[-1].score[0]=best[-1].score[1]=best[-1].score[2]=INT_MAX;
+X best[-1].zscore = FLT_MAX;
+X best[-1].escore = FLT_MIN;
+X best_flag = 0;
+X
+X if ((stats =
+X (struct stat_str *)calloc((size_t)MAXSTATS,sizeof(struct stat_str)))
+X ==NULL)
+X s_abort ("Cannot allocate stats struct","");
+X nstats = 0;
+X
+X /* Now open the second library, divide it, send sequences to all workers */
+X /* Set up buffer for reading the library:
+X
+X We will start by using a 2 Mbyte buffer for each worker. For
+X proteins, that means 5,000 sequences of length 400 (average).
+X For DNA, that means 2,000 sequences of length 1000. At the moment,
+X those are good averages.
+X */
+X
+X if (max_buf_cnt <= 0) {
+X if (m_msg0.ldnaseq==SEQT_DNA) max_buf_cnt = MAX_NT_BUF;
+X else max_buf_cnt = MAX_AA_BUF;
+X }
+X
+X if (m_msg0.ldnaseq==SEQT_DNA) ave_seq_len = AVE_NT_LEN;
+X else ave_seq_len = AVE_AA_LEN;
+X
+X /* however - buffer sizes should be a function of the number of
+X workers so that all the workers are kept busy. Assuming a 10,000
+X entry library is the smallest we want to schedule, then
+X */
+X
+X if (max_buf_cnt > 10000/(nnodes-FIRSTNODE))
+X max_buf_cnt = 10000/(2*(nnodes-FIRSTNODE));
+X
+X /* allocate space for sequence buffers */
+X
+X m_msg0.pbuf_siz=max_buf_cnt*ave_seq_len;
+X if (m_msg0.pbuf_siz < 5*m_msg0.maxn)
+X m_msg0.pbuf_siz = 5*m_msg0.maxn;
+X
+#ifdef PVM_SRC
+#ifdef ROUTE_DIRECT
+X pvm_setopt(PvmRoute,PvmRouteDirect);
+#endif
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&nnodes,1,1);
+X pvm_pkint(pinums,nnodes,1);
+X pvm_pkbyte((char *)&m_msg0,(int)sizeof(m_msg0),1);
+X for (node = FIRSTNODE; node<nnodes; node++)
+X if (pvm_send(pinums[node],STARTTYPE0)<0) {
+X pvm_perror("pvm_send1");
+X pvm_exit();
+X exit(1);
+X }
+#endif
+#ifdef MPI_SRC
+X for (node = FIRSTNODE; node<nnodes; node++) {
+X MPI_Send(&m_msg0,(int)sizeof(m_msg0),MPI_BYTE,node,STARTTYPE0,
+X MPI_COMM_WORLD);
+X }
+#endif
+X
+X /* now send pst, sascii */
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)&pst,(int)sizeof(pst),1);
+X pvm_pkbyte((char *)pascii,(int)sizeof(aascii),1);
+X
+X for (node = FIRSTNODE; node< nnodes; node++)
+X pvm_send(pinums[node],STARTTYPE1);
+X
+X /* send pam12 */
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(pam12,m_msg0.pamd1*m_msg0.pamd2,1);
+X for (node = FIRSTNODE; node< nnodes; node++)
+X pvm_send(pinums[node],STARTTYPE2);
+X
+X /* send pam12x */
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(pam12x,m_msg0.pamd1*m_msg0.pamd2,1);
+X for (node = FIRSTNODE; node< nnodes; node++)
+X pvm_send(pinums[node],STARTTYPE3);
+X
+#endif
+#ifdef MPI_SRC
+X for (node=FIRSTNODE; node < nnodes; node++) {
+X MPI_Send(&pst,(int)sizeof(pst),MPI_BYTE,node,STARTTYPE1,
+X MPI_COMM_WORLD);
+X MPI_Send(pascii,(int)sizeof(aascii),MPI_BYTE,node,STARTTYPE1,
+X MPI_COMM_WORLD);
+X MPI_Send(pam12,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE2,
+X MPI_COMM_WORLD);
+X MPI_Send(pam12x,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE3,
+X MPI_COMM_WORLD);
+X }
+#endif
+X
+X if ((n1_arr =
+X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate n1_arr %d\n",max_buf_cnt+1);
+X s_abort(" cannot allocate n1_arr","");
+X exit(1);
+X }
+X
+X if ((aa1i_arr =
+X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate aa1i_arr %d\n",max_buf_cnt+1);
+X s_abort(" cannot allocate aa1i_arr","");
+X exit(1);
+X }
+X
+X if ((m_seqnm_arr=
+X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate m_seqnm_arr %d\n",max_buf_cnt+1);
+X s_abort(" cannot allocate m_seqnm_arr","");
+X exit(1);
+X }
+X
+X if ((aa1_buf =
+X (unsigned char *)calloc((size_t)(m_msg0.pbuf_siz),sizeof(unsigned char)))
+X ==NULL) {
+X s_abort(" cannot allocate library buffer %d","");
+X exit(1);
+X }
+X
+X
+X /* also allocate space for descriptions. Assume max of 250,000 sequences/
+X worker for now
+X */
+X
+X /* max_sql is the maxinum number of library sequences that can be stored */
+X max_sql = MAXSQL;
+X
+X if ((ldes=(struct sql *)calloc(max_sql,sizeof(struct sql)))==NULL) {
+X fprintf(stderr," failure to allocate ldes(%d) %ld\n",
+X max_sql,max_sql*sizeof(struct sql));
+X s_abort("cannot allocate ldes","");
+X exit(1);
+X }
+X
+X max_bline_b = MAXSQL * (m_msg0.aln.llen+1)/4;
+X bline_inc = m_msg0.aln.llen;
+X if (m_msg0.markx & MX_M9SUMM) bline_inc += 40;
+X
+X i = 4;
+X while (i-- > 0) {
+X if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL) break;
+X max_bline_b /= 2;
+X bline_inc /= 2;
+X }
+X if (bline_buf == NULL) {
+X fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
+X max_sql,max_bline_b);
+X s_abort(" cannot allocate bline_buf","");
+X }
+X
+X bline_bufp = bline_buf;
+X bline_buf_mx = bline_buf+max_bline_b;
+X
+X /* the code for filling the buffers is copied from comp_thr.c */
+X /* the major differences reflect the fact that all library descriptions
+X will be kept in memory, indexed by sequence number.
+X
+X As a result, one buffer is filled by this loop -
+X ldes[] has the descriptive information for every sequence
+X this array could potentially be quite large
+X */
+X
+X /* now open the library and start reading */
+X /* get a buffer and fill it up */
+X
+X ntbuff = 0;
+X m_seqnm = 0; /* m_seqnm is the number of this library sequence */
+X nseq = 0;
+X
+X node = FIRSTNODE;
+X
+X /* sqs2_buf[0].aa1 = aa1_buf; */
+X aa1 = aa1_buf;
+X
+X /* iln counts through each library */
+X for (iln = 0; iln < m_msg0.nln; iln++) {
+X if ((l_file_p=
+X openlib(m_msg0.lbnames[iln], m_msg0.ldnaseq,lascii,!m_msg0.quiet,NULL))==NULL) {
+X fprintf(stderr," cannot open library %s\n",m_msg0.lbnames[iln]);
+X continue;
+X }
+X else {
+X printf ("searching %s library\n",m_msg0.lbnames[iln]);
+X }
+X
+X lcont = ocont = 0;
+X n1tot_v = n1tot_cnt = 0;
+X n1tot_ptr = n1tot_cur = NULL;
+X maxt = m_msg0.maxn;
+X loffset = 0l;
+X
+X /* read sequence directly into buffer */
+X aa1ptr = aa1; /* = sqs2_buf[0].aa1; */
+X
+X while ((n1= LGETLIB(aa1ptr,maxt,t_bline,sizeof(t_bline),&lseek,&lcont,
+X l_file_p,&l_off))>=0) {
+X
+X /* skip sequences outside range */
+X if (n1 < m_msg0.n1_low || n1 > m_msg0.n1_high) goto loop1;
+X
+X /* add termination code for proteins, if asked */
+X if (m_msg0.term_code && !lcont &&
+X m_msg0.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg0.term_code) {
+X aa1ptr[n1++]=m_msg0.term_code;
+X aa1ptr[n1]=0;
+X }
+X
+X /* check for a continued sequence and provide a pointer to
+X the n1_tot array if lcont || ocont */
+X n1tot_v += n1;
+X if (lcont && !ocont) { /* get a new pointer */
+X if (n1tot_cnt <= 0) {
+X if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
+X fprintf(stderr," cannot allocate n1tot_ptr\n");
+X exit(1);
+X }
+X else {n1tot_cnt=1000;}
+X }
+X n1tot_cnt--;
+X n1tot_cur = n1tot_ptr++;
+X }
+X
+X if (bline_bufp + bline_inc > bline_buf_mx) {
+X i = 4;
+X while (i-- > 0) {
+X if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL)
+X break;
+X fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
+X max_sql,max_bline_b);
+X max_bline_b /= 2;
+X bline_inc /= 2;
+X }
+X if (bline_buf != NULL) {
+X bline_bufp = bline_buf;
+X bline_buf_mx = bline_buf+max_bline_b;
+X }
+X else {
+X s_abort("cannot allocate bline_buf ","");
+X exit(1);
+X }
+X }
+X
+X if (bline_bufp+bline_inc < bline_buf_mx ) {
+X strncpy(bline_bufp,t_bline,bline_inc);
+X ldes[m_seqnm].bline = bline_bufp;
+X bline_bufp[bline_inc]= '\0';
+X bline_bufp += bline_inc+1;
+X }
+X else {
+X fprintf(stderr," bline_buf overrun\n");
+X }
+X
+X ntt.entries++; /* inc number of sequences */
+X ntt.length += n1; /* update total library length */
+X if (ntt.length > LONG_MAX) {ntt.length -= LONG_MAX; ntt.carry++;}
+X
+#ifdef DEBUG
+X /* This discovers most reasons for core dumps */
+X if (pst.debug_lib)
+X for (i=0; i<n1; i++)
+X if (aa1[i]>pst.nsq) {
+X fprintf(stderr,
+X "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
+X qm_msg0.libstr,i,n1,aa1[i],pst.nsq,lcont,ocont,aa1ptr+i);
+X aa1[i]=0;
+X n1=i-1;
+X break;
+X }
+#endif
+X
+X /* for ALTIVEC, must pad with 15 NULL's */
+X for (i=0; i<SEQ_PAD+1; i++) {aa1ptr[n1+i]=0;}
+X
+X /* don't count long sequences more than once */
+X if (aa1!=aa1ptr) {
+X n1 += m_msg0.loff; m_msg0.db.entries--; ntt.entries--;
+X }
+X
+X if (n1>1) {
+X
+X desptr = &ldes[m_seqnm];
+X
+X aa1i_arr[nseq] = (int)(aa1-aa1_buf);
+X m_seqnm_arr[nseq] = m_seqnm;
+X desptr->n1 = n1_arr[nseq] = n1;
+X desptr->n1tot_p = n1tot_cur;
+X desptr->lseek = lseek;
+X desptr->loffset = loffset+l_off;
+X desptr->cont = ocont;
+X desptr->wrkr = node;
+X desptr->nsfnum = nsfnum;
+#ifdef SUPERFAMNUM
+X if ((desptr->sfnum[0]=sfnum[0])>0 &&
+X (desptr->sfnum[1]=sfnum[1])>0 &&
+X (desptr->sfnum[2]=sfnum[2])>0 &&
+X (desptr->sfnum[3]=sfnum[3])>0 &&
+X (desptr->sfnum[4]=sfnum[4])>0 &&
+X (desptr->sfnum[5]=sfnum[5])>0 &&
+X (desptr->sfnum[6]=sfnum[6])>0 &&
+X (desptr->sfnum[7]=sfnum[7])>0 &&
+X (desptr->sfnum[8]=sfnum[8])>0 &&
+X (desptr->sfnum[9]=sfnum[9])>0) ;
+#endif
+X m_seqnm++;
+X nseq++;
+X
+X if (m_seqnm >= max_sql) {
+X max_sql += MAXSQL;
+X if ((ldes=(struct sql *)realloc(ldes,max_sql*sizeof(struct sql)))
+X ==NULL) {
+X fprintf(stderr," failure to realloc ldes(%d) %ld\n",
+X max_sql,max_sql*sizeof(struct sql));
+X s_abort("cannot allocate ldes","");
+X exit(1);
+X }
+X }
+X
+X /* increment ptrs */
+X aa1prev = aa1;
+X
+X aa1 += n1+1+SEQ_PAD;
+X ntbuff += n1+1+SEQ_PAD;
+X
+X /* if the buffer is filled */
+X if (nseq >= max_buf_cnt || ntbuff >= m_msg0.pbuf_siz - m_msg0.maxn) {
+X /* provide filled buffer to workers */
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&nseq,1,1);
+X pvm_pkint(&ntbuff,1,1);
+X pvm_pkint(n1_arr,nseq,1);
+X pvm_pkint(aa1i_arr,nseq,1);
+X pvm_pkint(m_seqnm_arr,nseq,1);
+X pvm_send(pinums[node],STARTTYPE4);
+X
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)aa1_buf,ntbuff,1);
+X pvm_send(pinums[node],STARTTYPE5);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X
+X MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
+#endif
+X nseq = 0;
+X
+X aa1 = aa1_buf;
+X ntbuff = 0;
+X if (++node >= nnodes) node = FIRSTNODE;
+X }
+X
+X loop1:
+X if (lcont) {
+X memcpy(aa1,&aa1prev[n1-m_msg0.loff],m_msg0.loff);
+X aa1ptr = &aa1[m_msg0.loff];
+X ocont = lcont;
+X maxt = m_msg0.maxt3;
+X loffset += n1 - m_msg0.loff;
+X }
+X else {
+X if (ocont) *n1tot_cur = n1tot_v;
+X n1tot_v = 0;
+X n1tot_cur = NULL;
+X
+X ocont = 0;
+X aa1ptr = aa1;
+X maxt = m_msg0.maxn;
+X loffset = 0l;
+X }
+X }
+X }
+X } /* for (iln < nln) */
+X
+X if (nseq > 0) {
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&nseq,1,1);
+X pvm_pkint(&ntbuff,1,1);
+X pvm_pkint(n1_arr,nseq,1);
+X pvm_pkint(aa1i_arr,nseq,1);
+X pvm_pkint(m_seqnm_arr,nseq,1);
+X pvm_send(pinums[node],STARTTYPE4);
+X
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)aa1_buf,ntbuff,1);
+X pvm_send(pinums[node],STARTTYPE5);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X
+X MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
+#endif
+X }
+X
+X /* fprintf(stderr," all sequences sent\n"); */
+X
+X if (ntt.entries <= 0) {
+X s_abort("no reference library sequences found\n","");
+X }
+X
+X zero = 0;
+X for (node=FIRSTNODE; node < nnodes; node++) {
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&zero,1,1);
+X pvm_pkint(&zero,1,1);
+X pvm_pkint(n1_arr,1,1);
+X pvm_pkint(aa1i_arr,1,1);
+X pvm_pkint(m_seqnm_arr,1,1);
+X pvm_send(pinums[node],STARTTYPE4);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(n1_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(aa1i_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+X MPI_Send(m_seqnm_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
+#endif
+X }
+X
+X for (node = FIRSTNODE; node < nnodes; node++) {
+#ifdef PVM_SRC
+X bufid = pvm_recv(-1,STARTTYPE0);
+X pvm_bufinfo(bufid,NULL,NULL,&tid);
+X snode = tidtonode(tid);
+X pvm_upkint(&lcnt,1,1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(&lcnt,1,MPI_INT,MPI_ANY_SOURCE,STARTTYPE0,
+X MPI_COMM_WORLD,&mpi_status);
+X snode= mpi_status.MPI_SOURCE;
+#endif
+X wlsn [snode-FIRSTNODE] = lcnt;
+X fprintf(stderr," %d sequences at %d\n",lcnt,snode);
+X }
+X
+X /* print out all descriptions */
+X /*
+X for (node = FIRSTNODE; node < nnodes; node++)
+X for (lcnt = 0; lcnt < wlsn[node-FIRSTNODE]; lcnt ++)
+X printf("%2d:%3d\t%s\n",node,lcnt,ldes[lcnt].bline);
+X */
+X
+X /* Calculate cumulative totals and send to workers for a self search */
+X
+X clsn [0] = nclib= 0;
+X for (node = FIRSTNODE; node < nnodes-1; node++) {
+X /* clsn[] is for the next node */
+X clsn[node-FIRSTNODE+1] = nclib += wlsn[node-FIRSTNODE];
+X }
+X
+X if (m_msg0.self)
+X for (node = FIRSTNODE; node < nnodes; node++) {
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&clsn[node-FIRSTNODE],1,1);
+X pvm_send(pinums[node],STARTTYPE1);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&clsn[node-FIRSTNODE],1,MPI_INT,node,STARTTYPE1,MPI_COMM_WORLD);
+#endif
+X fprintf(stderr,"sending lend: %d to worker %d\n",clsn[node-FIRSTNODE],node);
+X }
+X
+X last_msg_b[0] = m_msg0.nbr_seq = m_msg1.nbr_seq = ntt.entries;
+X
+X qres_bufsize = BFR;
+X /* if BFR is too big for this library, reduce it */
+X while ( ntt.entries*(m_msg0.nitt1+1)/(2*nnodes) < qres_bufsize) {
+X qres_bufsize /= 2;
+X if ((qres_bufsize%(m_msg0.nitt1+1))!= 0) {
+X qres_bufsize *= (m_msg0.nitt1+1);
+X break;
+X }
+X if (qres_bufsize < 50) break;
+X }
+X last_msg_b[1] = qres_bufsize;
+X
+X fprintf(stderr," using BFR=%d/%d\n",qres_bufsize,BFR);
+X
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(last_msg_b,2,1);
+X for (node=FIRSTNODE; node < nnodes; node++)
+X pvm_send(pinums[node],STARTTYPE0);
+#endif
+#ifdef MPI_SRC
+X for (node=FIRSTNODE; node < nnodes; node++)
+X MPI_Send(last_msg_b,2,MPI_INT,node,STARTTYPE0,MPI_COMM_WORLD);
+#endif
+X
+X tscan = tprev = s_time();
+X
+/**************************************
+X The logic of this section has been simplified to allow multistage
+X comparison functions to be used and alignments to be generated.
+X
+X send 1st query to workers
+X get next query sequence from host (m_msp1)
+X L1: get results from next-1 search (m_msp0)
+X sort the results of the next-1 search
+X (possibly) do additional stages of search
+X (possibly produce alignments for search
+X send next query to workers (m_msp1)
+X display result of next-1 search (m_msp0)
+X get next query sequence from host (m_msp1)
+X goto L1;
+X
+As a result of the interleaving, there must be two qm_msg structures,
+one for the next-1 sequence (which is required for labeling the
+output), and one for the next sequence (which is sent to the workers
+while the results are being displayed. qm_msp0 and qm_msp1 alternate
+between these two structures.
+***************************************/
+X
+/*
+X qm_msp0 points to the older qm_msg
+X qm_msp1 points to the newer qm_msg
+X the assignment below goes with curtype==ONETYPE
+*/
+X m_msp0 = &m_msg0;
+X m_msp1 = &m_msg1;
+X
+X qm_msp0 = &qm_msg0;
+X qm_msp1 = &qm_msg1;
+X
+X aa0p0 = aa00; /* aa0p0 is the "old" sequence */
+X aa0p1 = aa01; /* aa0p1 is the "new" sequence */
+X
+X last_params(aa00,m_msp0->n0,m_msp0,&pst,qm_msp0);
+X
+X /* process_hist() is called here to get find_zp(), and some other
+X structures initialized that would otherwise not be initialized
+X because z-scores are not being calculated */
+X
+X if (m_msp0->escore_flg) {
+X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
+X &m_msp0->hist,&m_msp0->pstat_void,0);
+X stats_done=1;
+X }
+X
+X if (m_msp0->qshuffle && qstats==NULL) {
+X if ((qstats =
+X (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
+X s_abort ("Cannot allocate qstats struct","");
+X }
+X nqstats = 0;
+X
+/* Send first query sequence to each worker */
+X
+X if (m_msg0.dfile[0] && (fdata=fopen(m_msg0.dfile,"w"))!=NULL)
+X fprintf(fdata,"%3d>%-50s\n",qlib,qm_msp0->libstr);
+X
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)qm_msp0,sizeof(qm_msg0),1);
+X if (qm_msp0->n0 > 0) {
+X pvm_pkbyte((char *)aa0p0,qm_msp0->n0+1+SEQ_PAD,1);
+X if (m_msg0.ann_flg) pvm_pkbyte((char *)m_msp0->aa0a,qm_msp0->n0+1,1);
+X }
+X for (node = FIRSTNODE; node < nnodes; node++)
+X pvm_send(pinums[node],MSEQTYPE);
+#endif
+#ifdef MPI_SRC
+X for (node = FIRSTNODE; node < nnodes; node++) {
+X MPI_Send(qm_msp0,sizeof(qm_msg0),MPI_BYTE,node,MSEQTYPE,MPI_COMM_WORLD);
+X if (qm_msp0->n0 > 0) {
+X MPI_Send(aa0p0,qm_msp0->n0+1+SEQ_PAD,MPI_BYTE,node,
+X MSEQTYPE1,MPI_COMM_WORLD);
+X if (m_msg0.ann_flg) {
+X if (m_msp0->aa0a == NULL) {
+X fprintf(stderr," m_msp0: %o/%oaa0a is null\n",m_msp0,m_msp0->aa0a);
+X }
+X MPI_Send(m_msp0->aa0a,qm_msp0->n0+1,MPI_BYTE,node, MSEQTYPE2,MPI_COMM_WORLD);
+X }
+X }
+X }
+#endif
+X
+X /* Get second query sequence (additional query sequences are read in
+X the main loop */
+X
+X m_msp1->n0 = qm_msp1->n0 =
+X QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
+X strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
+X if ((bp=strchr(qm_msp1->libstr,' '))!=NULL) *bp='\0';
+X
+X /* if annotations are included in sequence, remove them */
+X if (m_msg0.ann_flg) {
+X m_msp1->n0 = qm_msp1->n0 =
+X ann_scan(aa0p1,qm_msp1->n0,m_msp1,m_msp1->qdnaseq);
+#ifdef DEBUG
+X fprintf(stderr,"m_msp1->/aa0a is: %o/%o\n",m_msp1,m_msp1->aa0a);
+#endif
+X }
+X
+X if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
+X m_msg0.qdnaseq == SEQT_PROT &&
+X aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
+X aa0p1[m_msp1->n0++]=m_msg0.term_code;
+X aa0p1[m_msp1->n0]=0;
+X qm_msp1->n0 = m_msp1->n0;
+X }
+X
+X /* for ALTIVEC, must pad with 15 NULL's */
+X if (m_msp1->n0 > 0) {
+X for (i=0; i<SEQ_PAD+1; i++) {aa0p1[m_msp1->n0+i]=0;}
+X }
+X
+X qm_msp1->slist = 0;
+X qm_msp1->seqnm = qlib;
+X
+X last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
+X
+X sprintf(tmp_str," - %d %s", qm_msp1->n0, q_sqnam);
+X if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
+X strncat(qm_msp1->libstr,tmp_str,
+X sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
+X
+X naa0 = 0; /* reset node counter */
+X
+X /* sit in loop and collect results */
+X nbest = nopt = 0;
+X zbestcut = -BIGNUM;
+X
+X
+X while (1) {
+X
+#ifdef PVM_SRC
+X bufid = pvm_recv(-1,curtype);
+X pvm_bufinfo(bufid,NULL,NULL,&tid);
+X pvm_upkbyte((char *)&bestr[0],sizeof(struct comstr)*(qres_bufsize+1),1);
+X snode = tidtonode(tid);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(bestr,sizeof(struct comstr)*(qres_bufsize+1),
+X MPI_BYTE,MPI_ANY_SOURCE,curtype,MPI_COMM_WORLD,&mpi_status);
+X snode = mpi_status.MPI_SOURCE;
+#endif
+X
+X nres = bestr[qres_bufsize].seqnm & ~FINISHED;
+X
+#ifdef DEBUG
+X fprintf(stderr,"%d results from %d\n",nres,snode);
+#endif
+X
+X if (bestr[qres_bufsize].seqnm&FINISHED) { /* a worker is finished */
+X naa0++;
+X
+X /* fast_flag == 1 => send new sequences immediately */
+X fast_flag = ((m_msp0->stages==1) && !(m_msp0->markx & MX_M9SUMM) &&
+X (m_msp0->ashow == 0) && (m_msp0->last_calc_flg==0));
+X /* send a new query sequence if no more processing required */
+X if (fast_flag) {
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
+X if (qm_msp1->n0 != -1) {
+X pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
+X if (m_msp1->ann_flg) pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
+X }
+X pvm_send(tid,MSEQTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,snode,MSEQTYPE,MPI_COMM_WORLD);
+X if (qm_msp1->n0 != -1) {
+X MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,snode,MSEQTYPE1,MPI_COMM_WORLD);
+X if (m_msp1->ann_flg)
+X MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
+X }
+#endif
+X }
+X }
+X
+#ifdef DEBUG
+X if (pst.debug_lib)
+X fprintf(stderr," unpacking %d from %d; nbest %d\n",nres,snode,nbest);
+#endif
+X
+X /* this section is now more complex because can get groups of
+X sequence results; e.g. forward and reverse frame */
+X
+X t_best = t_rbest = t_qrbest = -1;
+X tm_escore = t_rescore = t_qrescore = FLT_MAX;
+X for (ires = 0; ires < nres; ires++) {
+X desptr = &ldes[bestr[ires].m_seqnm];
+X
+X /* save raw results */
+X if (fdata) {
+X strncpy(tlibstr,desptr->bline,10);
+X if ((bp=strchr(tlibstr,' '))!=NULL) *bp='\0';
+X fprintf(fdata,"%-10s\t%4d\t%4d\t%d\t%4d\t%4d\t%4d\t%8ld\n",
+X tlibstr,desptr->sfnum[0],desptr->n1,bestr[ires].frame,
+X bestr[ires].score[0],bestr[ires].score[1],bestr[ires].score[2],
+X desptr->lseek);
+X }
+X
+X i_score = bestr[ires].score[pst.score_ix];
+X e_score = bestr[ires].escore;
+X k_comp = bestr[ires].comp;
+X k_H = bestr[ires].H;
+X
+X t_n1 = desptr->n1;
+X if (i_score > t_best) {tm_best = t_best = i_score;}
+X if (e_score < tm_escore) tm_escore = e_score;
+X
+X if (m_msp0->qshuffle) {
+X if (bestr[ires].qr_score > t_qrbest)
+X t_qrbest = bestr[ires].qr_score;
+X if (bestr[ires].qr_escore < t_qrescore)
+X t_qrescore = bestr[ires].qr_escore;
+X
+X if (bestr[ires].frame==m_msp0->nitt1 &&
+X nqstats < m_msp0->shuff_max &&
+X bestr[ires].qr_score >= 0) {
+X qstats[nqstats].n1 = t_n1; /* save the best score */
+X qstats[nqstats].comp = bestr[ires].comp;
+X qstats[nqstats].H = bestr[ires].H;
+X qstats[nqstats].escore = t_qrescore;
+X qstats[nqstats++].score = t_qrbest;
+X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
+X t_qrescore = FLT_MAX;
+X }
+X }
+X
+X if (pst.zsflag >= 10 && bestr[ires].r_score > t_rbest) {
+X t_rbest = bestr[ires].r_score;
+X t_rescore = bestr[ires].r_escore;
+X }
+X
+X if (nstats < MAXSTATS) {
+X if (bestr[ires].frame == m_msg0.nitt1) {
+X stats[nstats].n1 = t_n1;
+X stats[nstats].comp = k_comp;
+X stats[nstats].H = k_H;
+X
+X if (pst.zsflag > 10) {
+X tm_best = t_rbest;
+X tm_escore = t_rescore;
+X t_rbest = -1;
+X t_rescore = FLT_MAX;
+X }
+X stats[nstats].escore = tm_escore;
+X stats[nstats++].score = tm_best;
+X tm_escore = FLT_MAX;
+X t_best = -1;
+X }
+X }
+X else if (pst.zsflag >=0) { /* nstats >= MAXSTATS, zsflag >=0 */
+X if (!stats_done ) {
+X pst.n0 = qm_msp0->n0;
+X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
+X &m_msp0->hist, &m_msp0->pstat_void,0);
+X stats_done = 1;
+X kstats = nstats;
+X for (i=0; i<nbest; i++) {
+X bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
+X bptr[i]->escore,bptr[i]->n1,
+X bptr[i]->comp, m_msp0->pstat_void);
+X }
+X }
+#ifdef SAMP_STATS
+X if (!m_msp0->escore_flg) {
+X jstats = nrand(kstats++);
+X if (jstats < MAXSTATS) {
+X stats[jstats].n1 = t_n1; /* save the best score */
+X stats[jstats].comp = k_comp;
+X stats[jstats].H = k_H;
+X if (pst.zsflag >=10) t_best = t_rbest;
+X stats[jstats].score = t_best;
+X }
+X }
+#endif
+X }
+X
+X if (stats_done) {
+X zscore=(*find_zp)(i_score,e_score,desptr->n1,k_comp,
+X m_msp0->pstat_void);
+X if (bestr[ires].frame == m_msg0.nitt1) {
+X addhistz((*find_zp)(tm_best,tm_escore,t_n1,k_comp,
+X m_msp0->pstat_void),
+X &(m_msp0->hist));
+X t_best = t_rbest = -1;
+X }
+X
+X }
+X else zscore = (double) i_score;
+X
+X if (zscore > zbestcut) {
+X if (nbest>=MAXBEST) {
+X selectbestz(bptr, nbest-MAXBEST/4-1, nbest);
+X nbest -= MAXBEST/4;
+X zbestcut = bptr[nbest-1]->zscore;
+X best_flag = 0;
+X }
+X /* if zbestcut == -BIGNUM, bptr[] has not been reinitialized */
+X else if (best_flag) bptr[nbest]=&best[nbest];
+X
+X bptr[nbest]->m_seqnm = bestr[ires].m_seqnm ;
+X bptr[nbest]->seqnm = bestr[ires].seqnm;
+X bptr[nbest]->score[0] = bestr[ires].score[0];
+X bptr[nbest]->score[1] = bestr[ires].score[1];
+X bptr[nbest]->score[2] = bestr[ires].score[2];
+X bptr[nbest]->escore = bestr[ires].escore;
+X bptr[nbest]->segnum = bestr[ires].segnum;
+X bptr[nbest]->seglen = bestr[ires].seglen;
+X bptr[nbest]->comp = bestr[ires].comp;
+X bptr[nbest]->H = bestr[ires].H;
+X bptr[nbest]->zscore = zscore;
+X bptr[nbest]->wrkr = snode;
+X bptr[nbest]->desptr = desptr;
+X bptr[nbest]->lseek = desptr->lseek; /* needed for identifying alternate
+X strand scores from same sequence */
+X bptr[nbest]->n1 = desptr->n1;
+X bptr[nbest]->frame = bestr[ires].frame;
+X
+X /* this was used when -m 9 info was calculated in 1st scan */
+X /*
+X bptr[nbest]->sw_score = bestr[ires].sw_score;
+X if (bestr[ires].sw_score > -1) {
+X nopt++;
+X bptr[nbest]->a_len = bestr[ires].a_len;
+X bptr[nbest]->percent = bestr[ires].percent;
+X bptr[nbest]->gpercent = bestr[ires].gpercent;
+X bptr[nbest]->min0 = bestr[ires].min0;
+X bptr[nbest]->min1 = bestr[ires].min1;
+X bptr[nbest]->max0 = bestr[ires].max0;
+X bptr[nbest]->max1 = bestr[ires].max1;
+X bptr[nbest]->ngap_q = bestr[ires].ngap_q;
+X bptr[nbest]->ngap_l = bestr[ires].ngap_l;
+X }
+X else {
+X bptr[nbest]->percent = -1.0;
+X bptr[nbest]->min0 = bptr[nbest]->min1 = bptr[nbest]->max0 =
+X bptr[nbest]->max1 = 0;
+X }
+X */
+X
+X nbest++;
+X }
+X } /* for loop */
+X if (naa0 < nnodes-FIRSTNODE) continue;
+X
+X gstring2[0]='\0';
+X
+X /* get gstring2,3 - algorithm/parameter description */
+#ifdef PVM_SRC
+X bufid = pvm_recv(pinums[FIRSTNODE],PARAMTYPE);
+X pvm_upkbyte(gstring2,sizeof(gstring2),1);
+X pvm_upkbyte(gstring3,sizeof(gstring3),1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(gstring2,sizeof(gstring2),MPI_BYTE,FIRSTNODE,PARAMTYPE,
+X MPI_COMM_WORLD,&mpi_status);
+X MPI_Recv(gstring3,sizeof(gstring3),MPI_BYTE,FIRSTNODE,PARAMTYPE,
+X MPI_COMM_WORLD,&mpi_status);
+#endif
+X
+/* ********************** */
+/* analyze the results */
+/* ********************** */
+X
+X if (!stats_done) {
+X if (nbest < 20 || pst.zsflag <= 0) {
+X pst.zsflag_f = -1;
+X }
+X else {
+X pst.n0 = qm_msp0->n0;
+X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
+X &m_msp0->hist, &m_msp0->pstat_void,stats_done);
+X
+X for (i=0; i<nbest; i++)
+X bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
+X bptr[i]->escore, bptr[i]->n1,
+X bptr[i]->comp, m_msp0->pstat_void);
+X }
+X }
+X
+X m_msp0->db.entries = ntt.entries;
+X m_msp0->db.length = ntt.length;
+X m_msp0->db.carry = ntt.carry;
+X
+X if (pst.zdb_size < 1) pst.zdb_size = ntt.entries;
+X
+X if (!qm_msp0->qshuffle) {
+X last_stats(aa0p0, m_msp0->n0,
+X stats,nstats, bptr,nbest, *m_msp0, pst,
+X &m_msp0->hist, &m_msp0->pstat_void);
+X }
+X else {
+X last_stats(aa0p0, m_msp0->n0,
+X qstats,nqstats, bptr,nbest, *m_msp0, pst,
+X &m_msp0->hist, &m_msp0->pstat_void);
+X }
+X
+X if (m_msp0->last_calc_flg) {
+X nbest = last_calc(bptr,nbest, *m_msp0, &pst,qm_msp0,
+X m_msp0->pstat_void);
+X }
+X
+X sortbeste(bptr,nbest);
+X scale_scores(bptr,nbest,m_msp0->db,pst,m_msp0->pstat_void);
+X
+X if (pst.zsflag >= 0 && bptr[0]->escore >= m_msg0.e_cut) goto no_results;
+X
+X /* else sortorder(bptr,nbest,wlsn,nnodes); */
+X
+/* if more than one stage or markx==9, calculate opt scores or do alignment */
+/* send results to workers as available */
+X
+X if (m_msg0.stages > 1 || m_msg0.markx & MX_M9SUMM) {
+X
+X /* to determine how many sequences to re-align (either for
+X do_opt() or calc_id() we need to modify m_msg.mshow to get
+X the correct number of alignments */
+X
+X if (m_msg0.mshow_flg != 1 && pst.zsflag >= 0) {
+X for (i=0; i<nbest && bptr[i]->escore< m_msg0.e_cut; i++) {}
+X m_msg0.mshow = i;
+X }
+X
+X /* allocate space for a_struct info */
+X if (m_msg0.markx & MX_M9SUMM && m_msg0.mshow > 0) {
+X if ((aln_d_base=(struct a_struct *)
+X calloc((size_t)m_msg0.mshow,sizeof(struct a_struct)))==NULL) {
+X fprintf(stderr," cannot allocate a_struct %d\n", m_msg0.mshow);
+X exit(1);
+X }
+X
+X for (is = 0; is < m_msg0.mshow; is++ ) {
+X bptr[is]->aln_d = &aln_d_base[is];
+X }
+X }
+X
+X do_stage2(bptr,m_msg0.mshow, *m_msp0, DO_OPT_FLG, qm_msp0);
+X }
+X
+X no_results:
+X tdone = s_time();
+X tddone = time(NULL);
+X
+X /* changed from >> to >>> because qm_msp0->libstr is missing '>' */
+X fprintf (outfd, "%3d>>>%s\n", qlib,qm_msp0->libstr);
+X
+X /* make certain that m_msp0->n0, libstr are current */
+X m_msp0->n0 = qm_msp0->n0;
+X /* strncpy(m_msp0->libstr,qm_msp0->libstr,sizeof(m_msg0.libstr)); */
+X
+X prhist (outfd,*m_msp0,pst,m_msp0->hist,nstats,m_msp0->db,gstring2);
+X
+X if (bptr[0]->escore < m_msg0.e_cut) {
+X
+X showbest (outfd, bptr, nbest, qlib, m_msp0,pst,ntt,gstring2);
+X
+X if (m_msg0.markx & MX_M9SUMM) {
+X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
+X m_msg0.tname,qlib,qm_msp0->libstr,
+X (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
+X m_msg0.lname);
+X }
+X else if (m_msg0.markx & MX_M10FORM) {
+X if ((bp=strchr(qm_msp0->libstr,' '))!=NULL) *bp = '\0';
+X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
+X m_msg0.tname,qlib,qm_msp0->libstr,
+X (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
+X m_msg0.lname);
+X if (bp!=NULL) *bp=' ';
+X fprintf(outfd,"; mp_name: %s\n",argv[0]);
+X fprintf(outfd,"; mp_ver: %s\n",mp_verstr);
+X fprintf(outfd,"; mp_argv:");
+X for (i=0; i<argc; i++)
+X fprintf(outfd," %s",argv[i]);
+X fputc('\n',outfd);
+X fputs(gstring3,outfd);
+X fputs(hstring1,outfd);
+X }
+X
+X /* ashow is -1 if not set, -d 0 indicates no alignments, > 0 if set */
+X /* if ashow is -1, m_msg.nshow (set by e_cut above) sets limit
+X in showalign */
+X
+X if (m_msp0->ashow != 0) {
+X /* showalign needs m_msp->qtitle, so fill it in */
+X strncpy(m_msp0->qtitle,qm_msp0->libstr,MAX_FN-1);
+X m_msp0->qtitle[MAX_FN-1]='\0';
+X showalign (outfd, bptr, nbest, qlib, *m_msp0, pst, gstring2);
+X }
+X }
+X else {
+X if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) {
+X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
+X m_msg0.tname,qlib,qm_msp0->libstr,(m_msg0.revcomp ? "-":"\0"), qm_msg0.n0, m_msg0.sqnam,
+X m_msg0.lname);
+X fprintf(outfd,">>>!!! No sequences with E() < %f\n",m_msg0.e_cut);
+X }
+X else fprintf(outfd,"!! No sequences with E() < %f\n",m_msg0.e_cut);
+X }
+X
+X if (! (m_msg0.markx & (MX_M9SUMM + MX_M10FORM))) {
+X fprintf(outfd,"/** search time: ");
+X ptime(outfd,tdone-tprev);
+X fprintf(outfd," **/\n");
+X tprev = tdone;
+X }
+X else if (m_msg0.markx & MX_M9SUMM) {
+X if (aln_d_base != NULL) {
+X free((void *)aln_d_base);
+X aln_d_base = NULL;
+X }
+X fprintf(outfd,">>>***\n");
+X fprintf(outfd,"/** %s **/\n",gstring2);
+X fprintf(outfd,"/** %s **/\n",m_msp0->hist.stat_info);
+X fprintf(outfd,">>><<<\n");
+X }
+X else if (m_msg0.markx & MX_M10FORM) {
+X fprintf(outfd,">>><<<\n");
+X }
+X fflush(outfd);
+X
+/* *********************** */
+/* end of analysis/display */
+/* *********************** */
+X
+X
+/* *********************** */
+/* start the next search */
+/* *********************** */
+X
+X if (fdata) { /* label the results file */
+X fprintf(fdata,"/** %s **/\n",gstring2);
+X fprintf(fdata,"%3d>%-50s\n",qlib-1,qm_msp1->libstr);
+X fflush(fdata);
+X }
+X
+X if (m_msp1->escore_flg) { /* re-initialize some stats stuff before search */
+X pst.zsflag_f = process_hist(stats,nstats,*m_msp1,pst,
+X &m_msp1->hist,&m_msp1->pstat_void,0);
+X stats_done=1;
+X }
+X else stats_done = 0;
+X
+X /* set up qstats if necessary - different queries have different qshuffle */
+X if (m_msp1->qshuffle && qstats==NULL) {
+X if ((qstats =
+X (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
+X s_abort ("Cannot allocate qstats struct","");
+X }
+X
+X nqstats = nstats = 0;
+X
+X /* send new qm_msp, sequence */
+X if (!fast_flag) {
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
+X if (qm_msp1->n0 != -1) {
+X pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
+X if (m_msp1->ann_flg) {
+X pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
+X }
+X }
+X for (node = FIRSTNODE; node < nnodes; node++)
+X pvm_send(pinums[node],MSEQTYPE);
+#endif
+#ifdef MPI_SRC
+X for (node=FIRSTNODE; node < nnodes; node++) {
+X MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,node,MSEQTYPE,
+X MPI_COMM_WORLD);
+X if (qm_msp1->n0 != -1) {
+X MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,node,MSEQTYPE1,MPI_COMM_WORLD);
+X if (m_msp1->ann_flg)
+X MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
+X }
+X }
+#endif
+X }
+X
+X qlib++;
+X if (qm_msp1->n0 != -1) {
+X qtt.entries++;
+X qtt.length += qm_msp1->n0;
+X }
+X else goto done;
+X
+/* ******************************** */
+/* flip m_msg, qm_msg, aa0 pointers */
+/* ******************************** */
+X
+X naa0 = 0;
+X best_flag = 1;
+X nbest = nopt = 0;
+X zbestcut = -BIGNUM;
+X if (curtype == ONETYPE) {
+X curtype = TWOTYPE;
+X qm_msp0 = &qm_msg1;
+X qm_msp1 = &qm_msg0;
+X m_msp0 = &m_msg1;
+X m_msp1 = &m_msg0;
+X aa0p0 = aa01;
+X aa0p1 = aa00;
+X }
+X else {
+X curtype = ONETYPE;
+X qm_msp0 = &qm_msg0;
+X qm_msp1 = &qm_msg1;
+X m_msp0 = &m_msg0;
+X m_msp1 = &m_msg1;
+X aa0p0 = aa00;
+X aa0p1 = aa01;
+X }
+X
+X
+/* **********************************************************/
+/* all library sequences are done get next library sequence */
+/* **********************************************************/
+X
+X m_msp1->n0 = qm_msp1->n0 =
+X QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
+X strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
+X
+X if ((qlib+1) >= m_msg0.ql_stop) { qm_msp1->n0 = m_msp1->n0 = -1;}
+X
+X if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
+X m_msg0.qdnaseq==SEQT_PROT &&
+X aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
+X aa0p1[m_msp1->n0++]=m_msg0.term_code;
+X aa0p1[m_msp1->n0]=0;
+X qm_msp1->n0 = m_msp1->n0;
+X }
+X
+X /* for ALTIVEC, must pad with 15 NULL's */
+X if (m_msg0.n0 > 0) {
+X for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
+X }
+X
+X qm_msp1->slist = 0;
+X /*
+X leng = strlen (qm_msp1->libstr);
+X sprintf (&(qm_msp1->libstr[leng]), " %d %s", qm_msp1->n0, q_sqnam);
+X */
+X sprintf(tmp_str," %d %s", qm_msp1->n0, q_sqnam);
+X if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
+X strncat(qm_msp1->libstr,tmp_str,
+X sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
+X qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
+X
+X qm_msp1->seqnm = qlib;
+X
+X last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
+X
+X } /* while loop */
+X
+X /* ******************** */
+X /* end of library while */
+X /* ******************** */
+X
+X done:
+X tdone = s_time();
+X if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) fputs(">>>///\n",outfd);
+X printsum(outfd);
+X if (outfd!=stdout) printsum(stdout);
+X printsum(stderr);
+#ifdef PVM_SRC
+X pvm_exit();
+#endif
+#ifdef MPI_SRC
+X MPI_Finalize();
+#endif
+X
+X exit(0);
+} /* End of main program */
+X
+void
+printsum(FILE *fd)
+{
+X double db_tt;
+X char tstr1[26], tstr2[26];
+X
+X strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
+X strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
+X tstr1[24]=tstr2[24]='\0';
+X
+X /* Print timing to output file as well */
+X if (qtt.carry==0) {
+X fprintf(fd, "\n%ld residues in %d query sequences\n", qtt.length, qtt.entries);
+X }
+X else {
+X db_tt = (double)qtt.carry*(double)LONG_MAX + (double)qtt.length;
+X fprintf(fd, "\n%.0g residues in %d query sequences\n", db_tt, qtt.entries);
+X }
+X
+X if (ntt.carry==0) {
+X fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
+X }
+X else {
+X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+X fprintf(fd, "%.6f residues in %ld library sequences\n", db_tt, ntt.entries);
+X }
+X
+X fprintf(fd," %d processors (%d workers) were used\n",
+X nnodes+-FIRSTNODE+1,nnodes-FIRSTNODE);
+X fprintf(fd," Pvcomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
+X fprintf(fd," Loading time: ");
+X ptime(fd, tscan - tstart);
+X fprintf (fd," Scan time: ");
+X ptime (fd, tdone - tscan);
+X fprintf (fd,"\n");
+X fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
+}
+X
+void fsigint()
+{
+X int i;
+X
+X tdone = s_time();
+X tddone = time(NULL);
+X
+X if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
+X fprintf(stderr,"/*** interrupted ***/\n");
+X
+X printsum(stdout);
+X if (outfd!=stdout) printsum(outfd);
+X
+#ifdef PVM_SRC
+X for (i=FIRSTNODE; i<nnodes; i++) pvm_kill(pinums[i]);
+X pvm_exit();
+#endif
+#ifdef MPI_SRC
+X MPI_Abort(MPI_COMM_WORLD,1);
+X MPI_Finalize();
+#endif
+X exit(1);
+}
+SHAR_EOF
+chmod 0644 p2_complib.c ||
+echo 'restore of p2_complib.c failed'
+Wc_c="`wc -c < 'p2_complib.c'`"
+test 55578 -eq "$Wc_c" ||
+ echo 'p2_complib.c: original size 55578, current size' "$Wc_c"
+fi
+# ============= p2_workcomp.c ==============
+if test -f 'p2_workcomp.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping p2_workcomp.c (File already exists)'
+else
+echo 'x - extracting p2_workcomp.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'p2_workcomp.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: p2_workcomp.c,v 1.49 2007/01/02 17:24:36 wrp Exp $ */
+X
+/* This version is modifed to read all files, query and database,
+X through the manager process. Workers will now receive their
+X database from the manager, rather than reading it themselves. This
+X cuts down considerably on NFS traffic, simplifies searches of
+X multiple files, and allows use of clusters of slave nodes that do
+X not have NFS access */
+X
+/* September, 1994 - this version has been modified to do two kinds of
+X searches, a general library search, or list of library sequences search.
+X The latter would be used to generate optimized scores for fasta and
+X to produce alignments */
+X
+/* modified July, 2002, to provide query shuffle */
+X
+/* modified October, 2005, to support struct a_res_str a_res -
+X coordinates of alignment in aa0[], aa1[]. Future modifications
+X will cause do_walign to be run only once - subsequent calls for
+X seqc[0,1] can be filled using a_res, by adding a_res to the
+X struct sqs2 array.
+X
+X 19-March-2006 - modifications to call do_walign() only once, and
+X use the resulting a_res structure for subsequent calls to calc_id,
+X calcons, calcons_a, have been implemented. Also, the -V option is
+X now valid with the parallel programs.
+X
+X 31-May-2006 - some functions (e.g. dropfs and dropff do not store
+X complete information in a_res - thus they cannot use this shortcut
+X (yet).
+X
+*/
+X
+X
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#ifdef PVM_SRC
+#include "pvm3.h"
+#endif
+X
+#ifdef MPI_SRC
+#include "mpi.h"
+#endif
+X
+/*
+#define PvmDataDefault 0
+#define PvmTaskDefault 0
+*/
+#include "msg.h"
+#include "defs.h"
+#include "param.h"
+#include "w_mw.h"
+#include "structs.h"
+X
+#ifdef MPI_SRC
+#define XTERNAL
+#endif
+#include "upam.h"
+#include "uascii.h"
+X
+#ifdef PVM_SRC
+int worker, mytid;
+int nnodes, pinums[MAXNOD];
+#endif
+X
+#include "drop_func.h"
+X
+extern void alloc_pam (int d1, int d2, struct pstruct *ppst); /* allocate ppst->pam12,pam12x */
+extern int **alloc_pam2p (int len, int nsq);
+extern void w_init ();
+extern void irand(int);
+extern void revcomp(unsigned char *, int, int *);
+X
+X
+X
+extern void initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz);
+extern void freeseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca);
+X
+void send_bestr(int, int, struct comstr *, int, int);
+void send_bestr2(int, struct comstr2 *, int);
+void send_code(int, char *, int);
+X
+extern void get_param (struct pstruct *ppst, char *pstring2, char *pstring3);
+extern void update_param(struct qmng_str *qm_msg, struct mngmsg *m_msg,
+X struct pstruct *ppst);
+extern int shuffle(unsigned char *, unsigned char *, int);
+extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
+X
+extern char err_str[];
+X
+/* local function declarations */
+void free_ares(struct sqs2 *, int itt, int *, int walign_cnt, int worker);
+X
+X
+X
+void w_abort (p, p1)
+char *p, *p1;
+{
+X fprintf (stderr, " %s %s\n", p, p1);
+#ifdef PVM_SRC
+X pvm_exit();
+X exit (1);
+#endif
+#ifdef MPI_SRC
+X MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+}
+X
+#ifdef PVM_SRC
+main ()
+#endif
+#ifdef MPI_SRC
+void
+workcomp(int worker)
+#endif
+{
+X unsigned char *aa0[6], *aa1s, *aa0s; /* Query and library sequences */
+X struct mngmsg m_msg; /* start message from manager to worker 1 */
+X struct qmng_str qm_msg; /* updated for each query */
+X int last_msg_b[10]; /* last set of numbers */
+X struct sqs2 *seqpt; /* sequence pointers for chunk */
+X int seqbuf_n,seqbuf_s; /* number of sequences, length of sequences */
+X int max_sql; /* maximum number of sequences/node */
+X int *n1_arr; /* array of sequence lengths in buffer */
+X int *m_seqnm_arr; /* array of sequence numbers in buffer */
+X int *aa1i_arr; /* array of offsets into the buffer */
+X unsigned char *seq_buf; /* space for sequence data */
+X int ntx;
+X int nsq; /* effective alphabet size */
+X long curtype = ONETYPE; /* current send message type */
+X int ieven=0; /* flag for window shuffle */
+X int cur_n0;
+X int n1, n1over; /* length of query, library sequences */
+X struct comstr bestr[BFR+1]; /* best structures */
+X struct comstr2 bestr2[BFR2+1]; /* best structures */
+X struct a_struct aln, *aln_dp;
+X int qres_bufsize; /* results buffer size */
+X int bestcnt = 0; /* how many best structures are full */
+X char gstring2[MAX_STR]; /* parameter string for manager */
+X char gstring3[MAX_STR]; /* parameter string for manager */
+X struct pstruct pst; /* parameter structure */
+X struct rstruct rst, qrst, rrst; /* results structure */
+X void *f_str[6], *qf_str;
+X int sw_score;
+X int lcnt, count, seqnm; /* counters */
+X int *walign_done[2], walign_cnt[2]; /* index of current valid a_res in seqpt */
+X int have_walign;
+X int *tres; /* allocated storage for seqpt[].a_res[].res */
+X int lend; /*global library sequence number information */
+X int lsn; /* library sequence number */
+X struct stage2_str *liblist=NULL; /* list of sequences to search */
+X int i, j; /* my turn to send sequence descriptions */
+X char libstr[21];
+X char errstr[128];
+X int itt=0;
+X int bufid;
+X char *seqc0, *seqc0a, *seqc1, *seqca;
+X char *seqc, *seqc_buff;
+X int seqc_buff_cnt, seqc_buff_len, seqc_flag;
+X int maxc, lc, nc, nident, ngap, aln_code_n;
+X float percent, gpercent;
+X int old_shuffle=0; /* did a qshuffle last time */
+X int hosttid=0;
+X char worker_str[5];
+X
+#ifdef MPI_SRC
+X MPI_Status mpi_status;
+#endif
+X
+#ifdef PVM_SRC
+X mytid = pvm_mytid();
+X hosttid = pvm_parent();
+#endif
+X
+X w_init(); /* sets up default sascii, hsq, sq */
+X
+X /* Allocate space for the query sequence */
+X if ((aa0[0] = (unsigned char *) malloc ((MAXTST+2+SEQ_PAD)*sizeof (char))) == NULL) {
+X w_abort ("Unable to allocate sequence array[0] - exiting!","");
+X }
+X *aa0[0]='\0';
+X aa0[0]++;
+X
+X /* initial messages set up various parameter structures:
+X
+X STARTTYPE0: &nnodes
+X pinums
+X &m_msg
+X
+X STARTTYPE1 &pst
+X
+X STARTTYPE2 pam12
+X STARTTYPE3 pam12x
+X */
+X
+#ifdef PVM_SRC
+#ifdef ROUTE_DIRECT
+X pvm_setopt(PvmRoute,PvmRouteDirect);
+#endif
+X /* get number of nodes, pinums */
+X bufid = pvm_recv(hosttid,STARTTYPE0);
+X pvm_upkint(&nnodes,1,1);
+X pvm_upkint(pinums,nnodes,1);
+X pvm_upkbyte((char *)&m_msg,(int)sizeof(m_msg),1);
+X worker = tidtonode(mytid);
+X pvm_freebuf(bufid);
+#endif
+X
+X sprintf(worker_str,"@%d",worker);
+X
+#ifdef MPI_SRC
+X MPI_Recv(&m_msg,sizeof(m_msg),MPI_BYTE,hosttid,STARTTYPE0,MPI_COMM_WORLD,
+X &mpi_status);
+#endif
+X
+X /* the aln structure needs some information from m_msg0.aln */
+X memcpy(&aln,&m_msg.aln,sizeof(struct a_struct));
+X
+X /*
+X fprintf(stderr,"d1: %d d2: %d\n",m_msg.pamd1,m_msg.pamd2);
+X */
+X
+X /* get pst params */
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,STARTTYPE1);
+X pvm_upkbyte((char *)&pst,(int)sizeof(pst),1);
+X /* 31t nsq = pst.nsq; */
+X pvm_upkbyte((char *)pascii,(int)sizeof(aascii),1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(&pst,(int)sizeof(pst),MPI_BYTE,hosttid,STARTTYPE1,MPI_COMM_WORLD,
+X &mpi_status);
+X
+X MPI_Recv(pascii,(int)sizeof(aascii)/sizeof(int),MPI_INT,hosttid,STARTTYPE1,MPI_COMM_WORLD,
+X &mpi_status);
+#endif
+X
+X if (pst.ext_sq_set) { nsq = pst.nsqx;}
+X else { nsq = pst.nsq;}
+X
+X aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
+X if (m_msg.qframe == 2) {
+X if ((aa0[1]=(unsigned char *)malloc((MAXTST+2)*sizeof (char)))==NULL)
+X w_abort ("Unable to allocate sequence[1] array - exiting!","");
+X *aa0[1]='\0';
+X aa0[1]++;
+X }
+X
+X if ((aa1s=(unsigned char *)malloc((m_msg.max_tot+1)*sizeof (char)))==NULL)
+X w_abort ("Unable to allocate shuffled library sequence", "");
+X *aa1s=0;
+X aa1s++;
+X
+X irand(0); /* necessary for shuffled sequences */
+X
+X /* this function allocates pam12, pam12x
+X assigns pst.pam[0][0]=pam12, pst.pam[1][0] = pam12x
+X and sets up the correct pst.pam[0][0][0] pointers */
+X
+X alloc_pam(m_msg.pamd1,m_msg.pamd2,&pst);
+X
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,STARTTYPE2);
+X pvm_upkint(pam12,m_msg.pamd1*m_msg.pamd2,1);
+X pvm_freebuf(bufid);
+X
+X bufid = pvm_recv(hosttid,STARTTYPE3);
+X pvm_upkint(pam12x,m_msg.pamd1*m_msg.pamd2,1);
+X pvm_freebuf(bufid);
+#endif
+X
+#ifdef DEBUG
+X if (worker==FIRSTNODE) {
+X fprintf(stderr,"ext?: %d\tnsq: %d\tnsqx: %d\n",pst.ext_sq_set,pst.nsq, pst.nsqx);
+X for (i=1; i<5; i++) {
+X for (j=1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sq[i],pst.sq[j],pst.pam2[0][i][j]);
+X fprintf(stderr,"\n");
+X }
+X for (i=pst.nsq+1; i<pst.nsq+5; i++) {
+X for (j=pst.nsq+1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[0][i][j]);
+X fprintf(stderr,"\n");
+X }
+X
+X for (i=1; i<5; i++) {
+X for (j=1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[1][i][j]);
+X fprintf(stderr,"\n");
+X }
+X for (i=pst.nsq+1; i<pst.nsq+5; i++) {
+X for (j=pst.nsq+1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[1][i][j]);
+X fprintf(stderr,"\n");
+X }
+X }
+#endif
+X
+#ifdef MPI_SRC
+X MPI_Recv(pam12,m_msg.pamd1*m_msg.pamd2,MPI_INT,hosttid,STARTTYPE2,
+X MPI_COMM_WORLD,&mpi_status);
+X
+X MPI_Recv(pam12x,m_msg.pamd1*m_msg.pamd2,MPI_INT,hosttid,STARTTYPE3,
+X MPI_COMM_WORLD,&mpi_status);
+#endif
+X
+/*
+X We have the PAM matrices - get the library sequences
+*/
+X
+X /* Allocate space for the sequences */
+X max_sql = MAXSQL/2;
+X
+X if ((seqpt=(struct sqs2 *)calloc(max_sql,sizeof(struct sqs2)))==NULL)
+X w_abort("cannot allocate seqpt(sqs2)","");
+X
+X if ((n1_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
+X w_abort("cannot allocate n1_arr","");
+X
+X if ((aa1i_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
+X w_abort("cannot allocate n1_arr","");
+X
+X if ((m_seqnm_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
+X w_abort("cannot allocate m_seqnm_arr","");
+X
+/*****************************************************************/
+/* This section gets all the database sequences from the manager */
+/*****************************************************************/
+X
+X lcnt = 0;
+X while (1) {
+#ifdef PVM_SRC
+X /* get the number of sequences, sequence lengths */
+X bufid = pvm_recv(hosttid,STARTTYPE4);
+X pvm_upkint(&seqbuf_n,1,1); /* number of sequences */
+X pvm_upkint(&seqbuf_s,1,1); /* size of sequence buffer */
+X pvm_upkint(n1_arr,seqbuf_n,1); /* length of each sequence in buffer */
+X pvm_upkint(aa1i_arr,seqbuf_n,1); /* indexes for each sequence */
+X pvm_upkint(m_seqnm_arr,seqbuf_n,1); /* number of each library sequence */
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(&seqbuf_n,1,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(&seqbuf_s,1,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(n1_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(aa1i_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
+X &mpi_status);
+X MPI_Recv(m_seqnm_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
+X &mpi_status);
+#endif
+X
+X if (seqbuf_n <= 0) break;
+#ifdef DEBUG
+X /*
+X fprintf(stderr,"[%d] seqbuf_n: %d seqbuf_s: %d\n",
+X worker,seqbuf_n,seqbuf_s);
+X fprintf(stderr,"[%d] lcnt: %d n1: %d seqnm %d\n",
+X worker,0,n1_arr[0],m_seqnm_arr[0]);
+X fprintf(stderr,"[%d] lcnt: %d n1: %d seqnm %d\n",
+X worker,1,n1_arr[1],m_seqnm_arr[1]);
+X */
+#endif
+X
+X /* allocate space for sequences */
+X if ((seq_buf = (unsigned char *)calloc((size_t)seqbuf_s+1,sizeof(char)))
+X ==NULL) {
+X w_abort("cannot allocate tmp_seq","");
+X }
+X seq_buf++; /* leave a '\0' at the start */
+X
+X /* get the sequence buffer */
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,STARTTYPE5);
+X pvm_upkbyte((char *)seq_buf,seqbuf_s,1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(seq_buf,seqbuf_s,MPI_BYTE,hosttid,STARTTYPE5,MPI_COMM_WORLD,
+X &mpi_status);
+#endif
+X
+X /* now we have everything - update the pointers */
+X if (lcnt+seqbuf_n >= max_sql) {
+X max_sql += max(MAXSQL/2,seqbuf_n);
+X if ((seqpt=(struct sqs2 *)realloc(seqpt,max_sql*sizeof(struct sqs2)))
+X ==NULL)
+X w_abort("cannot allocate seqpt(sqs2)","");
+X }
+X
+X /* convert from offsets to pointers into buffer */
+X /* ntx = 0; */
+X for (i=0; i<seqbuf_n; i++,lcnt++) {
+X seqpt[lcnt].n1 = n1_arr[i];
+X seqpt[lcnt].m_seqnm = m_seqnm_arr[i];
+X seqpt[lcnt].aa1 = &seq_buf[aa1i_arr[i]];
+X /* ntx += n1_arr[i]+1 + SEQ_PAD */
+X
+#ifdef DEBUG
+X /* must have null's at both ends of sequence */
+X if (seqpt[lcnt].aa1[-1]!= '\0') {
+X fprintf(stderr,"Missing null at start: %d %d\n",
+X lcnt,seqpt[lcnt].aa1[-1]);
+X seqpt[lcnt].aa1[-1]='\0';
+X }
+X if (seqpt[lcnt].aa1[seqpt[lcnt].n1]!= '\0') {
+X fprintf(stderr,"Missing null at end: %d %d\n",
+X lcnt,seqpt[lcnt].aa1[seqpt[lcnt].n1]);
+X seqpt[lcnt].aa1[seqpt[lcnt].n1]='\0';
+X }
+#endif
+X }
+X }
+X /* all done - lcnt has the total number of library sequences */
+X
+#ifdef DEBUG
+X if (lcnt > 0)
+X for (i=0; i<10; i++) {
+X for (j=0; j<10; j++) libstr[j]=pst.sq[seqpt[i].aa1[j]];
+X libstr[10]='\0';
+X fprintf(stderr,"[%d] n1: %d seqnm: %d aa1: %s\n",
+X worker,seqpt[i].n1,seqpt[i].m_seqnm,libstr);
+X }
+#endif
+X
+X /* send back the number of descriptions received */
+X
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&lcnt,1,1);
+X pvm_send(hosttid,STARTTYPE0);
+#endif
+#ifdef MPI_SRC
+/* p4_dprintf(" have %d descriptions to send\n",lcnt); */
+X MPI_Send(&lcnt,1,MPI_INT,hosttid,STARTTYPE0,MPI_COMM_WORLD);
+#endif
+X
+/*****************************************************************/
+/* Library reads are finished, get ready to do searches */
+/*****************************************************************/
+X
+X /* get last set of numbers */
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,STARTTYPE0);
+X pvm_upkint(last_msg_b,2,1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(last_msg_b, 2, MPI_INT, hosttid, STARTTYPE0, MPI_COMM_WORLD,
+X &mpi_status);
+#endif
+X m_msg.nbr_seq = last_msg_b[0];
+X qres_bufsize = last_msg_b[1];
+X
+#ifdef DEBUG
+#ifdef PVM_SRC
+X fprintf(stderr,"[%d] have nbr_seq %d qres_bufsize %d\n",worker,
+X m_msg.nbr_seq, qres_bufsize);
+#endif
+#ifdef MPI_SRC
+X /* p4_dprintf("[%d] have nbr_seq %d qres_bufsize %d\n",worker,
+X m_msg.nbr_seq, qres_bufsize);
+X */;
+#endif
+#endif
+X /* If self search, receive sequence numbering data */
+X if (m_msg.self) {
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,STARTTYPE1);
+X pvm_upkint(&lend,1,1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(&lend,1,MPI_INT,hosttid,STARTTYPE1,MPI_COMM_WORLD,&mpi_status);
+#endif
+X }
+X
+X /* allocate space for a_res flag array */
+X
+X if ((walign_done[0] = (int *)calloc(lcnt,sizeof(int)))==NULL) {
+X w_abort("cannot allocate walign_done");
+X }
+X walign_cnt[0]=0;
+X
+X if ((walign_done[1] = (int *)calloc(lcnt,sizeof(int)))==NULL) {
+X w_abort("cannot allocate walign_done");
+X }
+X walign_cnt[1]=0;
+X
+X /* was commented in for only FASTX/TFASTX, but do it always to
+X simplify */
+X aainit(pst.tr_type, pst.debug_lib);
+X pst.maxlen = m_msg.maxn;
+X
+/*****************************************************************/
+/* Main search loop, which calles do_work() repeatedly */
+/*****************************************************************/
+X
+X cur_n0 = 0;
+X while (1) {
+/*
+#ifdef DEBUG
+#ifdef PVM_SRC
+X fprintf(stderr," W: %d waiting MSEQTYPE\n",worker);
+#endif
+#ifdef MPI_SRC
+X p4_dprintf(" W: %d waiting MSEQTYPE\n",worker);
+#endif
+#endif
+*/
+X
+/*****************************************************************/
+/* Wait for a query sequence from the manager */
+/*****************************************************************/
+X
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,MSEQTYPE);
+X pvm_upkbyte((char *)&qm_msg,sizeof(qm_msg),1);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(&qm_msg,sizeof(struct mngmsg),MPI_BYTE,hosttid,MSEQTYPE,
+X MPI_COMM_WORLD,&mpi_status);
+#endif
+#ifdef DEBUG
+X fprintf(stderr,"[%d] have MSEQTYPE n0: %d s_func: %d slist: %d qf: %d\n",
+X worker,qm_msg.n0,qm_msg.s_func,qm_msg.slist,qm_msg.qshuffle);
+#endif
+X
+/*****************************************************************/
+/* New query sequence indicated by qm_msg.slist=0 */
+/*****************************************************************/
+X
+X if (qm_msg.n0 > 0 && qm_msg.slist == 0) {
+X
+X if (cur_n0 > 0) {
+X
+/*****************************************************************/
+/* free everything associated with previous search */
+/*****************************************************************/
+X
+X close_work (aa0[0], cur_n0, &pst, &f_str[0]);
+X free_ares(seqpt, 0, walign_done[0], walign_cnt[0], worker);
+X walign_cnt[0] = 0;
+X if (m_msg.ann_flg) free(m_msg.aa0a);
+X
+X
+X if (m_msg.qframe == 2) {
+X close_work(aa0[1], cur_n0, &pst, &f_str[1]);
+X free_ares(seqpt, 1, walign_done[1], walign_cnt[1], worker);
+X walign_cnt[1] = 0;
+X }
+X if (old_shuffle) {
+X close_work(aa0s,cur_n0, &pst, &qf_str);
+X aa0s--;
+X free(aa0s);
+X old_shuffle = 0;
+X }
+X if (pst.pam_pssm) {
+X free_pam2p(pst.pam2p[0]);
+X free_pam2p(pst.pam2p[1]);
+X }
+X }
+X
+/*****************************************************************/
+/* Start allocating things for the next search */
+/*****************************************************************/
+X
+X pst.pam_pssm = qm_msg.pam_pssm;
+X cur_n0 = qm_msg.n0;
+X if (m_msg.ann_flg) {
+X if ((m_msg.aa0a = calloc(qm_msg.n0+1,sizeof(char)))==NULL) {
+X w_abort(" cannot allocate aa0a");
+X }
+X }
+X
+/*****************************************************************/
+/* Get the next query sequence */
+/*****************************************************************/
+X
+#ifdef PVM_SRC
+X pvm_upkbyte((char *)aa0[0],qm_msg.n0+1+SEQ_PAD,1);
+X if (m_msg.ann_flg) {
+X pvm_upkbyte((char *)m_msg.aa0a,qm_msg.n0+1,1);
+X }
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(aa0[0],qm_msg.n0+1+SEQ_PAD,MPI_BYTE,hosttid,
+X MSEQTYPE1,MPI_COMM_WORLD, &mpi_status);
+X if (m_msg.ann_flg) {
+X MPI_Recv(m_msg.aa0a,qm_msg.n0+1,MPI_BYTE,hosttid,
+X MSEQTYPE2,MPI_COMM_WORLD, &mpi_status);
+X }
+#endif
+X
+#ifdef DEBUG
+X /* must have null's at both ends of sequence */
+X if (aa0[0][-1]!= '\0') {
+X fprintf(stderr,"Missing null at start: %s %d\n",
+X qm_msg.libstr,aa0[0][-1]);
+X aa0[0][-1]='\0';
+X }
+X if (aa0[0][qm_msg.n0]!= '\0') {
+X fprintf(stderr,"Missing null at end: %s %d\n",
+X qm_msg.libstr,aa0[0][qm_msg.n0]);
+X aa0[qm_msg.n0]='\0';
+X }
+X
+X /* This discovers most reasons for core dumps */
+X if (pst.debug_lib)
+X for (j=0; j<qm_msg.n0; j++)
+X if (aa0[0][j]>pst.nsq) {
+X fprintf(stderr,
+X "seq: %s residue[%d/%d] %d range (%d)\n",
+X qm_msg.libstr,j,qm_msg.n0,aa0[0][j],pst.nsq);
+X aa0[0][j]=0;
+X qm_msg.n0=j-1;
+X break;
+X }
+#endif
+X update_params(&qm_msg,&m_msg,&pst);
+X }
+X
+/*****************************************************************/
+/* End of free()'s/ initialization for new sequence */
+/*****************************************************************/
+X
+#ifdef PVM_SRC
+X pvm_freebuf(bufid);
+#endif
+X
+X if (qm_msg.n0 == -1) {
+X
+/*****************************************************************/
+/* All done with searches */
+/*****************************************************************/
+/* printf(" %d: got n0 == -1\n",worker); */
+X break;
+X }
+X
+X /* p4_dprintf(" W:%d n0:%d slist:%d s_func:%d (%d)\n",worker,qm_msg.n0,qm_msg.slist,qm_msg.s_func,qres_bufsize); */
+X
+/*****************************************************************/
+/* if qm_msg.slist > 0, search specific sequences, to be sent */
+/*****************************************************************/
+X
+X if (qm_msg.slist > 0) { /* list search, not library search */
+X if (liblist != NULL) free(liblist);
+X
+X /* get the list of sequences */
+X if ((liblist=(struct stage2_str *)
+X calloc(qm_msg.slist,sizeof(struct stage2_str)))==NULL) {
+X sprintf(errstr,"sequence list %d",qm_msg.slist);
+X w_abort (errstr, "");
+X }
+X
+#ifdef PVM_SRC
+X bufid = pvm_recv(hosttid,LISTTYPE);
+X pvm_upkbyte((char *)liblist,qm_msg.slist*sizeof(struct stage2_str),1);
+X pvm_freebuf(bufid);
+#endif
+#ifdef MPI_SRC
+X MPI_Recv(liblist,qm_msg.slist*sizeof(struct stage2_str),MPI_BYTE,
+X hosttid,LISTTYPE,MPI_COMM_WORLD, &mpi_status);
+#endif
+X }
+X
+/*****************************************************************/
+/* have list of sequences to be compared/aligned */
+/*****************************************************************/
+X
+X /* Initial stuff */
+X if (qm_msg.slist == 0) {
+/*****************************************************************/
+/* New query - set up matrices and init_work() */
+/*****************************************************************/
+#ifdef DEBUG
+/*
+X fprintf(stderr,"n1: %d\t",qm_msg.n0);
+X for (i=0; i<10; i++) fprintf(stderr,"%c",nt[aa0[0][i]]);
+X fprintf(stderr,"\n");
+*/
+#endif
+X if (pst.pam_pssm) {
+X pst.pam2p[0] = alloc_pam2p(qm_msg.n0,nsq);
+X pst.pam2p[1] = alloc_pam2p(qm_msg.n0,nsq);
+X }
+X
+X init_work (aa0[0], qm_msg.n0, &pst, &f_str[0]);
+X f_str[5]=f_str[4]=f_str[3]=f_str[2]=f_str[1]=f_str[0];
+X
+X if (qm_msg.qshuffle) {
+X if ((aa0s=(unsigned char *)malloc((qm_msg.n0+2)*sizeof (char)))==NULL)
+X w_abort ("Unable to allocate aa0s array - exiting!","");
+X *aa0s='\0';
+X aa0s++;
+X
+X memcpy(aa0s,aa0[0],qm_msg.n0+1);
+X qshuffle(aa0s,qm_msg.n0,qm_msg.nm0);
+#ifdef DEBUG
+X fprintf(stderr,"[%d] shuffle: %d\n",worker,qm_msg.n0);
+X fputs(" ",stderr);
+X for (i=0; i<5; i++) {fprintf(stderr,"%c",pst.sq[aa0s[i]]);}
+X fputc('\n',stderr);
+#endif
+X
+X init_work (aa0s, qm_msg.n0, &pst, &qf_str);
+X old_shuffle=1;
+X }
+X
+X if (m_msg.qframe == 2) {
+X memcpy(aa0[1],aa0[0],qm_msg.n0+1);
+X revcomp(aa0[1],qm_msg.n0,&pst.c_nt[0]);
+X init_work (aa0[1], qm_msg.n0, &pst, &f_str[1]);
+X }
+#ifdef DEBUG
+/*
+X fprintf(stderr,"[%d] init_work qf: %d nf: %d\n",worker,m_msg.qframe,m_msg.nframe);
+*/
+#endif
+X }
+X
+/*****************************************************************/
+/* Finished with initialization, */
+/* start doing comparisons or alignments */
+/*****************************************************************/
+X
+X bestcnt = 0;
+X if (qm_msg.slist == 0) { /* library search */
+X
+/*****************************************************************/
+/* Start library search */
+/*****************************************************************/
+X
+X for (count=0; count < lcnt; count++) {
+X
+X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
+X
+X rst.score[0] = rst.score[1] = rst.score[2] = 0;
+X if (m_msg.self) {
+X lsn = lend + count;
+X if ((qm_msg.seqnm > lsn) && (((qm_msg.seqnm + lsn) % 2) != 0)) {
+X do_work (aa0[itt], qm_msg.n0,seqpt[count].aa1, seqpt[count].n1,
+X itt, &pst, f_str[itt], 0, &rst);
+X }
+X else if ((qm_msg.seqnm <= lsn) && (((qm_msg.seqnm+lsn)%2) == 0)) {
+X do_work (aa0[itt], qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
+X itt, &pst, f_str[itt], 0, &rst);
+X }
+X else continue;
+X }
+X else {
+X do_work (aa0[itt], qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
+X itt, &pst, f_str[itt], 0, &rst);
+X if (qm_msg.qshuffle) {
+X do_work (aa0s, qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
+X itt, &pst, qf_str, 1, &qrst);
+X }
+X }
+#ifdef DEBUG
+/*
+X if (count < 10 || (count % 200 == 199)) {
+X fprintf(stderr,"[node %d] itt:%d/%d (%d) %3d %3d %3d - %d/%d\n",
+X worker,itt,m_msg.nitt1,count,
+X rst.score[0],rst.score[1],rst.score[2],
+X seqpt[count].m_seqnm,seqpt[count].n1);
+X }
+*/
+#endif
+X sw_score = -1;
+X
+X bestr[bestcnt].seqnm = count;
+X bestr[bestcnt].m_seqnm = seqpt[count].m_seqnm;
+X bestr[bestcnt].score[0] = rst.score[0];
+X bestr[bestcnt].score[1] = rst.score[1];
+X bestr[bestcnt].score[2] = rst.score[2];
+X bestr[bestcnt].escore = rst.escore;
+X bestr[bestcnt].segnum = rst.segnum;
+X bestr[bestcnt].seglen = rst.seglen;
+X bestr[bestcnt].frame = itt;
+X bestr[bestcnt].comp = rst.comp;
+X bestr[bestcnt].H = rst.H;
+X
+X bestr[bestcnt].qr_score = qrst.score[pst.score_ix];
+X bestr[bestcnt].qr_escore = qrst.escore;
+X
+X if (pst.zsflag >= 10) {
+X if (pst.zs_win > 0)
+X wshuffle(seqpt[count].aa1, aa1s,seqpt[count].n1,pst.zs_win,&ieven);
+X else
+X shuffle(seqpt[count].aa1, aa1s,seqpt[count].n1);
+X
+X do_work(aa0[itt],qm_msg.n0,aa1s,seqpt[count].n1,itt, &pst,
+X f_str[itt], 0, &rst);
+X bestr[bestcnt].r_score = rst.score[pst.score_ix];
+X }
+X
+X bestcnt++;
+X if (bestcnt >= qres_bufsize) {
+#ifdef DEBUG
+X fprintf(stderr," worker: %d sending %d results\n",worker,qres_bufsize);
+#endif
+X send_bestr(hosttid,curtype,bestr,qres_bufsize,bestcnt);
+X bestcnt = 0;
+X }
+X }
+X } /* END - for count loop */
+X send_bestr(hosttid, curtype, bestr,qres_bufsize, (bestcnt | FINISHED));
+X }
+X
+/*****************************************************************/
+/* End of library search section */
+/*****************************************************************/
+X
+/*****************************************************************/
+/* Do do_opt() from list s_func=DO_CALC_FLG */
+/*****************************************************************/
+X
+X else if (qm_msg.s_func== DO_CALC_FLG) { /* qm_msg.slist > 0 */
+X
+X bestcnt = 0;
+X for (count=0; count < qm_msg.slist; count++) {
+X rst.score[0] = rst.score[1] = rst.score[2] = 0;
+X itt = liblist[count].frame;
+X seqnm = bestr2[bestcnt].seqnm = liblist[count].seqnm;
+X bestr2[bestcnt].m_seqnm = seqpt[seqnm].m_seqnm;
+X
+X do_opt (aa0[itt], qm_msg.n0, seqpt[seqnm].aa1,
+X seqpt[seqnm].n1, itt,
+X &pst, f_str[itt], &rst);
+X
+X bestr2[bestcnt].score[0] = rst.score[0];
+X bestr2[bestcnt].score[1] = rst.score[1];
+X bestr2[bestcnt].score[2] = rst.score[2];
+X bestr2[bestcnt].escore = rst.escore;
+X bestr2[bestcnt].segnum = rst.segnum;
+X bestr2[bestcnt].seglen = rst.seglen;
+X bestr2[bestcnt].aln_code_n = 0;
+X bestcnt++;
+X
+X if (bestcnt >= BFR2) {
+X send_bestr2(hosttid,bestr2,bestcnt);
+X bestcnt = 0;
+X }
+X } /* END - for count loop */
+X
+X send_bestr2(hosttid,bestr2,(bestcnt|FINISHED));
+X }
+X
+/*****************************************************************/
+/* s_func=DO_OPT_FLG */
+/* */
+/* from list: */
+/* if (m_msg.stages > 1) do_opt() */
+/* do_walign() */
+/* calc_id or calc_code, no calcons */
+/*****************************************************************/
+X
+X /* s_func == 1 means do_opt if necessary */
+X else if (qm_msg.s_func== DO_OPT_FLG) { /* qm_msg.slist > 0 */
+#ifdef DEBUG
+X fprintf(stderr," [%d] starting s_func:1 slist: %d\n",
+X worker,qm_msg.slist);
+#endif
+X /* get the buffer once - re-use it for the entire slist */
+X if (m_msg.show_code == SHOW_CODE_ALIGN) {
+X seqc_buff_len = (BFR2+5)*256;
+X seqc = seqc_buff = (char *)calloc(seqc_buff_len,sizeof(char));
+X seqc_buff_cnt = 0;
+X if (seqc_buff == NULL) {
+X seqc_buff_cnt = seqc_buff_len = 0;
+X }
+X }
+X
+X bestcnt = 0;
+X for (count=0; count < qm_msg.slist; count++) {
+X rst.score[0] = rst.score[1] = rst.score[2] = 0;
+X itt = liblist[count].frame;
+X seqnm = liblist[count].seqnm;
+X
+X bestr2[bestcnt].seqnm = seqnm;
+X bestr2[bestcnt].m_seqnm = seqpt[seqnm].m_seqnm;
+X if (m_msg.stages > 1) {
+X do_opt (aa0[itt], qm_msg.n0, seqpt[seqnm].aa1,
+X seqpt[seqnm].n1, itt,
+X &pst, f_str[itt], &rst);
+X
+X bestr2[bestcnt].score[0] = rst.score[0];
+X bestr2[bestcnt].score[1] = rst.score[1];
+X bestr2[bestcnt].score[2] = rst.score[2];
+X }
+X
+X if (m_msg.markx & MX_M9SUMM) {
+#ifdef DEBUG
+X fprintf(stderr," [%d] starting do_walign seqnm: %d n1: %d\n",
+X worker,seqnm,seqpt[seqnm].n1);
+#endif
+X aln_dp = &bestr2[bestcnt].aln_d;
+X memcpy(aln_dp, &aln,sizeof(struct a_struct));
+X
+X sw_score = do_walign(aa0[itt], qm_msg.n0,
+X seqpt[seqnm].aa1, seqpt[seqnm].n1,
+X itt, &pst, f_str[itt],
+X &seqpt[seqnm].a_res[itt],
+X &have_walign);
+X seqpt[seqnm].sw_score[itt] = sw_score;
+X
+X /* the a_res[itt] provided by do_walign is re-used - so it
+X must be copied to a valid location */
+X
+X if (have_walign) {
+X if ((tres = calloc(seqpt[seqnm].a_res[itt].nres+1,sizeof(int)))==NULL) {
+X w_abort(" cannot allocate tres");
+X }
+X else {
+X memcpy(tres,seqpt[seqnm].a_res[itt].res,sizeof(int)*seqpt[seqnm].a_res[itt].nres);
+X seqpt[seqnm].a_res[itt].res = tres;
+X /*
+X fprintf(stderr, " [%d] saving %d:%d[%d]:%o\n", worker,
+X walign_cnt[itt],seqnm,itt, seqpt[seqnm].a_res[itt].res);
+X */
+X if (walign_cnt[itt] < lcnt) walign_done[itt][walign_cnt[itt]++] = seqnm;
+X else w_abort(" walign_cnt overrun");
+X seqpt[seqnm].walign_dflg[itt] = 1;
+X }
+X }
+X aln_func_vals(itt, aln_dp);
+X
+#ifdef DEBUG
+X fprintf(stderr," [%d] starting calc_id sw_score: %d\n",
+X worker,sw_score);
+X fprintf(stderr,"bi: %d seqc_buff_cnt: %d - seqc_buff_len: %d\n",
+X bestcnt, seqc_buff_cnt, seqc_buff_len);
+#endif
+X aln_code_n = 0; /* must be set in case no seqc_code */
+X if (m_msg.show_code == SHOW_CODE_ALIGN) {
+X if (seqc_buff_cnt < seqc_buff_len - 256) {
+X lc=calc_code(aa0[itt],qm_msg.n0,
+X seqpt[seqnm].aa1, seqpt[seqnm].n1,
+X aln_dp,seqpt[seqnm].a_res[itt],pst,
+X seqc,seqc_buff_len-seqc_buff_cnt-10,
+X f_str[itt]);
+X aln_code_n = strlen(seqc);
+X seqc_buff_cnt += aln_code_n + 1;
+/*
+X fprintf(stderr,"%d:%d:%d: %d/%d - [%d] %s\n",
+X worker,seqnm,bestcnt,aln_code_n,seqc_buff_cnt, seqc-seqc_buff,seqc);
+*/
+X seqc += aln_code_n;
+X *seqc++ = '\0';
+X }
+X }
+X else {
+X lc=calc_id(aa0[itt],qm_msg.n0,
+X seqpt[seqnm].aa1, seqpt[seqnm].n1,
+X aln_dp,seqpt[seqnm].a_res[itt],pst,f_str[itt]);
+X }
+X
+X nident = aln_dp->nident;
+X aln_dp->a_len = lc;
+X
+X if (lc > 0) percent = (100.0*(float)nident)/(float)lc;
+X else percent = 0.0;
+X
+X ngap = aln_dp->ngap_q + aln_dp->ngap_l;
+#ifndef SHOWSIM
+X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
+#else
+X if (lc > 0) gpercent =(100.0*(float)aln_dp->nsim)/(float)lc;
+#endif
+X else gpercent = -1.0;
+X
+X bestr2[bestcnt].sw_score = sw_score;
+X bestr2[bestcnt].percent = percent;
+X bestr2[bestcnt].gpercent = gpercent;
+X bestr2[bestcnt].aln_code_n = aln_code_n;
+X }
+X bestcnt++;
+X
+X if (bestcnt >= BFR2) {
+X send_bestr2(hosttid,bestr2,bestcnt);
+X if (m_msg.show_code == SHOW_CODE_ALIGN) {
+X send_code(hosttid,seqc_buff,seqc_buff_cnt);
+X memset(seqc_buff,0,seqc_buff_len);
+X seqc = seqc_buff;
+X seqc_buff_cnt = 0;
+X }
+X bestcnt = 0;
+X }
+X } /* END - for count loop */
+X
+X send_bestr2(hosttid,bestr2,(bestcnt|FINISHED));
+X if (m_msg.show_code == SHOW_CODE_ALIGN) {
+X send_code(hosttid,seqc_buff,seqc_buff_cnt);
+X if (seqc_buff) free(seqc_buff);
+X }
+X }
+X /* get alignments */
+X
+/*****************************************************************/
+/* s_list > */
+/* s_func=DO_ALIGN_FLG */
+/* */
+/* from list: */
+/* do_walign() if not done already */
+/* calcons() */
+/*****************************************************************/
+X
+X else if (qm_msg.s_func==DO_ALIGN_FLG) {
+X for (count=0; count < qm_msg.slist; count++) {
+X itt = liblist[count].frame;
+X seqnm = liblist[count].seqnm;
+/*
+X fprintf(stderr,"worker: %d; %s, frame: %d\n",worker,qm_msg.libstr,itt);
+*/
+X if (!seqpt[seqnm].walign_dflg[itt]) {
+X seqpt[seqnm].sw_score[itt] =
+X sw_score = do_walign (aa0[itt], qm_msg.n0,seqpt[seqnm].aa1,
+X seqpt[seqnm].n1, itt,
+X &pst, f_str[itt],
+X &seqpt[seqnm].a_res[itt],
+X &have_walign);
+X }
+X else {
+X sw_score = seqpt[seqnm].sw_score[itt];
+X pre_cons(seqpt[seqnm].aa1,seqpt[seqnm].n1,itt,f_str[itt]);
+X }
+X
+X aln_func_vals(itt, &aln);
+X
+X if (aln.showall==1)
+X maxc = seqpt[seqnm].a_res[itt].nres + max(seqpt[seqnm].a_res[itt].min0,seqpt[seqnm].a_res[itt].min1)+
+X max((qm_msg.n0-seqpt[seqnm].a_res[itt].max0),
+X (seqpt[seqnm].n1-seqpt[seqnm].a_res[itt].max1))+4;
+X else maxc = seqpt[seqnm].a_res[itt].nres + 4*aln.llen+4;
+X
+X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
+X
+X if (!m_msg.ann_flg) {
+X nc=calcons(aa0[itt],qm_msg.n0,
+X seqpt[seqnm].aa1, seqpt[seqnm].n1,
+X &lc,&aln,seqpt[seqnm].a_res[itt],pst,
+X seqc0,seqc1,seqca,f_str[itt]);
+X memset(seqc0a,' ',nc);
+X seqc0a[nc]='\0';
+X }
+X else {
+X nc=calcons_a(aa0[itt],m_msg.aa0a,qm_msg.n0,
+X seqpt[seqnm].aa1, seqpt[seqnm].n1,
+X &lc,&aln,seqpt[seqnm].a_res[itt],pst,
+X seqc0,seqc0a,seqc1,seqca,
+X m_msg.ann_arr,f_str[itt]);
+X }
+X
+X /*
+X fprintf(stderr,"[%d] nident: %d nsim: %d lc: %d\n",aln.nident, aln.nsim, lc);
+X */
+X
+X maxc = max(strlen(seqc0),strlen(seqc1))+1;
+X nident = aln.nident;
+X percent = (100.0*(float)nident)/(float)lc;
+X ngap = aln.ngap_q+aln.ngap_l;
+#ifndef SHOWSIM
+X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
+#else
+X if (lc > 0) gpercent = (100.0*(float)aln.nsim)/(float)lc;
+#endif
+X else gpercent = -1.0;
+X
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&nc,1,1);
+X pvm_pkint(&lc,1,1);
+X pvm_pkint(&maxc,1,1);
+X pvm_pkfloat(&percent,1,1);
+X pvm_pkfloat(&gpercent,1,1);
+X pvm_pkint(&sw_score,1,1);
+X pvm_pkbyte((char *)&aln,sizeof(struct a_struct),1);
+X pvm_send(hosttid,ALN1TYPE);
+#ifdef DEBUG
+X fprintf(stderr,"[%d] ALN1TYPE sent: %d\n",worker,qm_msg.n0);
+#endif
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte(seqc0,maxc,1);
+X if (m_msg.ann_flg) pvm_pkbyte(seqc0a,maxc,1);
+X pvm_pkbyte(seqc1,maxc,1);
+X pvm_pkbyte(seqca,maxc,1);
+X pvm_send(hosttid,ALN2TYPE);
+#endif
+#ifdef MPI_SRC
+X last_msg_b[0]=nc;
+X last_msg_b[1]=lc;
+X last_msg_b[2]=maxc;
+X last_msg_b[3]=sw_score;
+X MPI_Send(last_msg_b,4,MPI_INT,hosttid,ALN1TYPE,MPI_COMM_WORLD);
+X MPI_Send(&percent,1,MPI_FLOAT,hosttid,ALN2TYPE,MPI_COMM_WORLD);
+X MPI_Send(&gpercent,1,MPI_FLOAT,hosttid,ALN2TYPE,MPI_COMM_WORLD);
+X
+/* p4_dprintf("[%d] sending aln\n",worker); */
+X MPI_Send(&aln,sizeof(struct a_struct),MPI_BYTE,hosttid,
+X ALN3TYPE,MPI_COMM_WORLD);
+X
+X MPI_Send(seqc0,maxc,MPI_BYTE,hosttid,ALN2TYPE,MPI_COMM_WORLD);
+X if (m_msg.ann_flg) MPI_Send(seqc0a,maxc,MPI_BYTE,hosttid,ALN2TYPE,MPI_COMM_WORLD);
+X MPI_Send(seqc1,maxc,MPI_BYTE,hosttid,ALN3TYPE,MPI_COMM_WORLD);
+X MPI_Send(seqca,maxc,MPI_BYTE,hosttid,ALN3TYPE,MPI_COMM_WORLD);
+#endif
+X freeseq(&seqc0,&seqc0a,&seqc1,&seqca);
+X }
+X }
+X
+/* send back parameter settings */
+X if (worker==FIRSTWORK && qm_msg.slist==0) {
+X get_param(&pst, gstring2,gstring3);
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte(gstring2,sizeof(gstring2),1);
+X pvm_pkbyte(gstring3,sizeof(gstring3),1);
+X pvm_send(hosttid,PARAMTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(gstring2,sizeof(gstring2),MPI_BYTE,
+X hosttid,PARAMTYPE,MPI_COMM_WORLD);
+X MPI_Send(gstring3,sizeof(gstring3),MPI_BYTE,
+X hosttid,PARAMTYPE,MPI_COMM_WORLD);
+#endif
+X }
+X
+X if (qm_msg.slist==0) {
+X if (curtype == ONETYPE) curtype = TWOTYPE;
+X else curtype = ONETYPE;
+X }
+X } /* END - while (1) loop */
+#ifdef PVM_SRC
+X pvm_exit();
+#endif
+#ifdef MPI_SRC
+/* MPI_Finalize(); */
+#endif
+}
+X
+void
+send_bestr(int hosttid, int curtype,
+X struct comstr *bestr, int buf_size, int lastcnt) {
+X
+X bestr[buf_size].seqnm = lastcnt;
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)&bestr[0],sizeof(struct comstr)*(buf_size+1),1);
+X pvm_send(hosttid,curtype);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(bestr,sizeof(struct comstr)*(buf_size+1),MPI_BYTE,
+X hosttid,curtype,MPI_COMM_WORLD);
+#endif
+}
+X
+void
+send_bestr2(int hosttid, struct comstr2 *bestr2,
+X int lastcnt)
+{
+X bestr2[BFR2].seqnm = lastcnt;
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkbyte((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),1);
+X pvm_send(hosttid,LISTRTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&bestr2[0],sizeof(struct comstr2)*(BFR2+1),MPI_BYTE,
+X hosttid,LISTRTYPE,MPI_COMM_WORLD);
+#endif
+}
+X
+void
+send_code(int hosttid, char *seqc_buff, int seqc_buff_len) {
+X
+#ifdef PVM_SRC
+X pvm_initsend(PvmDataRaw);
+X pvm_pkint(&seqc_buff_len,1,1);
+X if (seqc_buff_len > 0) pvm_pkbyte(seqc_buff,seqc_buff_len,1);
+X pvm_send(hosttid,CODERTYPE);
+#endif
+#ifdef MPI_SRC
+X MPI_Send(&seqc_buff_len,1,MPI_INT,
+X hosttid,CODERTYPE,MPI_COMM_WORLD);
+X if (seqc_buff_len>0) MPI_Send(seqc_buff,seqc_buff_len,MPI_BYTE,
+X hosttid,CODERTYPE,MPI_COMM_WORLD);
+#endif
+}
+X
+#ifdef PVM_SRC
+int tidtonode(tid)
+X int tid;
+{
+X int i;
+X for (i=FIRSTNODE; i< nnodes; i++) if (tid==pinums[i]) return i;
+X fprintf(stderr," cannot find tid %d\n",tid);
+X return -1;
+}
+#endif
+X
+void
+free_ares(struct sqs2 *seqpt, int itt, int *walign_done, int walign_cnt, int worker) {
+X
+X int i, seqnm;
+X
+X for (i=0; i< walign_cnt; i++) {
+X seqnm = walign_done[i];
+X walign_done[i]=0;
+X if (seqpt[seqnm].walign_dflg[itt]) {
+X if (seqpt[seqnm].a_res[itt].nres > 0 ) {
+X /*
+X fprintf(stderr, "[%d] freeing %d:%d[%d]:%o\n",
+X worker,i,seqnm,itt,seqpt[seqnm].a_res[itt].res);
+X */
+X seqpt[seqnm].a_res[itt].nres = 0;
+X free(seqpt[seqnm].a_res[itt].res);
+X }
+X }
+X else {
+X w_abort(" have walign_done but no walign_dflag");
+X }
+X seqpt[seqnm].walign_dflg[itt] = 0;
+X }
+}
+SHAR_EOF
+chmod 0644 p2_workcomp.c ||
+echo 'restore of p2_workcomp.c failed'
+Wc_c="`wc -c < 'p2_workcomp.c'`"
+test 37611 -eq "$Wc_c" ||
+ echo 'p2_workcomp.c: original size 37611, current size' "$Wc_c"
+fi
+# ============= p_mw.h ==============
+if test -f 'p_mw.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping p_mw.h (File already exists)'
+else
+echo 'x - extracting p_mw.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'p_mw.h' &&
+/* Concurrent read version */
+X
+/* $Name: fa_34_26_5 $ - $Id: p_mw.h,v 1.17 2006/04/12 18:00:02 wrp Exp $ */
+X
+#ifndef FSEEK_T_DEF
+#ifndef USE_FSEEKO
+typedef long fseek_t;
+#else
+typedef off_t fseek_t;
+#endif
+#endif
+X
+struct beststr {
+X int n1; /* sequence number */
+X int score[3]; /* score */
+X int rscore; /* score from shuffled sequence */
+X int sw_score; /* optimal score from alignment */
+X double comp; /* karlin 1/lambda comp.parameter */
+X double H; /* karlin H information content */
+X double zscore;
+X double escore;
+X double r_escore;
+X int segnum;
+X int seglen;
+X int lib;
+X fseek_t lseek;
+X int cont;
+X int frame;
+X int m_seqnm;
+X int seqnm;
+X int wrkr;
+X struct sql *desptr;
+X struct a_struct *aln_d;
+X char *aln_code;
+X int aln_code_n;
+X float percent, gpercent;
+};
+X
+struct stat_str {
+X int score;
+X int n1;
+X double comp;
+X double H;
+X double escore;
+X int segnum;
+X int seglen;
+};
+X
+/* this structure passes library sequences to the worker threads
+X and returns scores */
+X
+#include "w_mw.h"
+X
+/*
+struct pbuf_head {
+X int buf_cnt;
+X unsigned char *start;
+X struct sqs2 *buf;
+};
+*/
+SHAR_EOF
+chmod 0644 p_mw.h ||
+echo 'restore of p_mw.h failed'
+Wc_c="`wc -c < 'p_mw.h'`"
+test 1096 -eq "$Wc_c" ||
+ echo 'p_mw.h: original size 1096, current size' "$Wc_c"
+fi
+# ============= pam120.mat ==============
+if test -f 'pam120.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping pam120.mat (File already exists)'
+else
+echo 'x - extracting pam120.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pam120.mat' &&
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
+#
+# Expected score = -1.64, Entropy = 0.979 bits
+#
+# Lowest score = -8, Highest score = 12
+#
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 3 -3 -1 0 -3 -1 0 1 -3 -1 -3 -2 -2 -4 1 1 1 -7 -4 0 0 -1 -1
+R -3 6 -1 -3 -4 1 -3 -4 1 -2 -4 2 -1 -5 -1 -1 -2 1 -5 -3 -2 -1 -2
+N -1 -1 4 2 -5 0 1 0 2 -2 -4 1 -3 -4 -2 1 0 -4 -2 -3 3 0 -1
+D 0 -3 2 5 -7 1 3 0 0 -3 -5 -1 -4 -7 -3 0 -1 -8 -5 -3 4 3 -2
+C -3 -4 -5 -7 9 -7 -7 -4 -4 -3 -7 -7 -6 -6 -4 0 -3 -8 -1 -3 -6 -7 -4
+Q -1 1 0 1 -7 6 2 -3 3 -3 -2 0 -1 -6 0 -2 -2 -6 -5 -3 0 4 -1
+E 0 -3 1 3 -7 2 5 -1 -1 -3 -4 -1 -3 -7 -2 -1 -2 -8 -5 -3 3 4 -1
+G 1 -4 0 0 -4 -3 -1 5 -4 -4 -5 -3 -4 -5 -2 1 -1 -8 -6 -2 0 -2 -2
+H -3 1 2 0 -4 3 -1 -4 7 -4 -3 -2 -4 -3 -1 -2 -3 -3 -1 -3 1 1 -2
+I -1 -2 -2 -3 -3 -3 -3 -4 -4 6 1 -3 1 0 -3 -2 0 -6 -2 3 -3 -3 -1
+L -3 -4 -4 -5 -7 -2 -4 -5 -3 1 5 -4 3 0 -3 -4 -3 -3 -2 1 -4 -3 -2
+K -2 2 1 -1 -7 0 -1 -3 -2 -3 -4 5 0 -7 -2 -1 -1 -5 -5 -4 0 -1 -2
+M -2 -1 -3 -4 -6 -1 -3 -4 -4 1 3 0 8 -1 -3 -2 -1 -6 -4 1 -4 -2 -2
+F -4 -5 -4 -7 -6 -6 -7 -5 -3 0 0 -7 -1 8 -5 -3 -4 -1 4 -3 -5 -6 -3
+P 1 -1 -2 -3 -4 0 -2 -2 -1 -3 -3 -2 -3 -5 6 1 -1 -7 -6 -2 -2 -1 -2
+S 1 -1 1 0 0 -2 -1 1 -2 -2 -4 -1 -2 -3 1 3 2 -2 -3 -2 0 -1 -1
+T 1 -2 0 -1 -3 -2 -2 -1 -3 0 -3 -1 -1 -4 -1 2 4 -6 -3 0 0 -2 -1
+W -7 1 -4 -8 -8 -6 -8 -8 -3 -6 -3 -5 -6 -1 -7 -2 -6 12 -2 -8 -6 -7 -5
+Y -4 -5 -2 -5 -1 -5 -5 -6 -1 -2 -2 -5 -4 4 -6 -3 -3 -2 8 -3 -3 -5 -3
+V 0 -3 -3 -3 -3 -3 -3 -2 -3 3 1 -4 1 -3 -2 -2 0 -8 -3 5 -3 -3 -1
+B 0 -2 3 4 -6 0 3 0 1 -3 -4 0 -4 -5 -2 0 0 -6 -3 -3 4 2 -1
+Z -1 -1 0 3 -7 4 4 -2 1 -3 -3 -1 -2 -6 -1 -1 -2 -7 -5 -3 2 4 -1
+XX -1 -2 -1 -2 -4 -1 -1 -2 -2 -1 -2 -2 -2 -3 -2 -1 -1 -5 -3 -1 -1 -1 -2
+X
+SHAR_EOF
+chmod 0644 pam120.mat ||
+echo 'restore of pam120.mat failed'
+Wc_c="`wc -c < 'pam120.mat'`"
+test 1922 -eq "$Wc_c" ||
+ echo 'pam120.mat: original size 1922, current size' "$Wc_c"
+fi
+# ============= pam250.mat ==============
+if test -f 'pam250.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping pam250.mat (File already exists)'
+else
+echo 'x - extracting pam250.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pam250.mat' &&
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+#
+# Expected score = -0.844, Entropy = 0.354 bits
+#
+# Lowest score = -8, Highest score = 17
+#
+X A R N D C Q E G H I L K M F P S T W Y V B Z X
+A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0
+R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1
+N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0
+D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3
+Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1
+E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1
+G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1
+H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1
+I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1
+L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1
+K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1
+M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1
+F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2
+P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1
+S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0
+T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0
+W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4
+Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2
+V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1
+B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1
+Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1
+XX 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1
+X
+SHAR_EOF
+chmod 0644 pam250.mat ||
+echo 'restore of pam250.mat failed'
+Wc_c="`wc -c < 'pam250.mat'`"
+test 1923 -eq "$Wc_c" ||
+ echo 'pam250.mat: original size 1923, current size' "$Wc_c"
+fi
+# ============= param.h ==============
+if test -f 'param.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping param.h (File already exists)'
+else
+echo 'x - extracting param.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'param.h' &&
+/* $Name: fa_34_26_5 $ - $Id: param.h,v 1.41 2007/04/26 18:37:19 wrp Exp $ */
+X
+X
+#ifndef P_STRUCT
+#define P_STRUCT
+X
+#define MAXSQ 50
+X
+X
+/* Concurrent read version */
+X
+struct fastr {
+X int ktup;
+X int cgap;
+X int pgap;
+X int pamfact;
+X int scfact;
+X int bestoff;
+X int bestscale;
+X int bkfact;
+X int bktup;
+X int bestmax;
+X int altflag;
+X int optflag;
+X int iniflag;
+X int optcut;
+X int optcut_set;
+X int optwid;
+};
+X
+struct prostr {
+X int gopen;
+X int gextend;
+X int width;
+};
+X
+struct pstruct /* parameters */
+{
+X int n0; /* length of query sequence, used for statistics */
+X int gdelval; /* value gap open (-10) */
+X int ggapval; /* value for additional residues in gap (-2) */
+X int gshift; /* frameshift for fastx, fasty */
+X int gsubs; /* nt substitution in fasty */
+X int p_d_mat; /* dna match penalty */
+X int p_d_mis; /* dna mismatch penalty */
+X int p_d_set; /* using match/mismatch */
+X int score_ix; /* index to sorted score */
+X int zsflag; /* use scalebest() */
+X int zsflag_f; /* use scalebest() */
+X int zs_win;
+X int histint; /* histogram interval */
+X char sq[MAXSQ+1];
+X int hsq[MAXSQ+1];
+X int nsq; /* length of normal sq */
+X int ext_sq_set; /* flag for using extended alphabet */
+X char sqx[MAXSQ];
+X int hsqx[MAXSQ+1];
+X int c_nt[MAXSQ+1];
+X int nsqx; /* length of extended sq */
+X int dnaseq; /* -1 = not set (protein); 0 = protein; 1 = DNA; 2 = other, 3 RNA */
+X int nt_align; /* DNA/RNA alignment = 1 */
+X int debug_lib;
+X int tr_type; /* codon table */
+X int sw_flag;
+X char pamfile[120]; /* pam file type */
+X char pgpfile[120];
+X int pgpfile_type;
+X float pamscale;
+X int pam_pssm;
+X int pam_set;
+X int have_pam2;
+X int **pam2[2];
+X int **pam2p[2];
+X int pamoff; /* offset for pam values */
+X int pam_l, pam_h, pam_xx, pam_xm; /* lowest, highest pam value */
+X int pam_x_set;
+X int pam_ms; /* use a Mass Spec pam matrix */
+X int maxlen;
+X long zdb_size; /* force database size */
+X int pgm_id;
+X union {
+X struct fastr fa;
+X struct prostr pr;
+X } param_u;
+X int pseudocts;
+X int shuff_node;
+};
+X
+/* Result structure - do not remove */
+struct rstruct
+{
+X int score[3];
+X double comp;
+X double H;
+X double escore;
+X int segnum;
+X int seglen;
+};
+X
+#ifndef PCOMPLIB
+struct thr_str {
+X int worker;
+X void *status;
+X int max_work_buf;
+X int qframe;
+X struct pstruct *ppst;
+X int qshuffle;
+X unsigned char *aa0;
+X int n0;
+X int nm0;
+X int max_tot;
+};
+X
+#include <sys/types.h>
+X
+/* this structure passes library sequences to the worker threads
+X and returns scores */
+X
+struct buf_str {
+X int n1;
+X int *n1tot_p;
+X unsigned char *aa1b;
+#ifndef USE_FSEEKO
+X long lseek;
+#else
+X off_t lseek;
+#endif
+X struct lmf_str *m_file_p;
+X int cont;
+X int qframe;
+X int frame;
+X int nsfnum;
+X int sfnum[10];
+X char libstr[20]; /* set to MAX_UID */
+X struct rstruct rst;
+X int r_score, qr_score;
+X double r_escore, qr_escore;
+};
+X
+struct buf_head {
+X int buf_cnt;
+X int have_results;
+X unsigned char *start;
+X struct buf_str *buf;
+};
+X
+#endif
+X
+#endif /* PSTRUCT */
+X
+#include "aln_structs.h"
+SHAR_EOF
+chmod 0644 param.h ||
+echo 'restore of param.h failed'
+Wc_c="`wc -c < 'param.h'`"
+test 3002 -eq "$Wc_c" ||
+ echo 'param.h: original size 3002, current size' "$Wc_c"
+fi
+# ============= pgsql_lib.c ==============
+if test -f 'pgsql_lib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping pgsql_lib.c (File already exists)'
+else
+echo 'x - extracting pgsql_lib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pgsql_lib.c' &&
+X
+/* pgsql_lib.c copyright (c) 2004 William R. Pearson */
+X
+/* $Name: fa_34_26_5 $ - $Id: pgsql_lib.c,v 1.3 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* functions for opening, reading, seeking a pgsql database */
+X
+/*
+X For the moment, this interface assumes that the file to be searched will
+X be specified in a single, long, string with 4 parts:
+X
+X (1) a database open string. This string has four fields, separated by
+X whitespace (' \t'):
+X hostname:port dbname user password
+X
+X '--' dashes at the beginning of lines are ignored -
+X thus the first line could be:
+X -- hostname:port dbname user password
+X
+X (2) a database query string that will return an unique ID (not
+X necessarily numberic, but it must be < 12 characters as libstr[12]
+X is used) and a sequence string
+X
+X (2a) a series of pgsql commands that do not generate results
+X starting with 'DO', followed by a select() statement.
+X
+X (3) a database select string that will return a description
+X given a unique ID
+X
+X (4) a database select string that well return a sequence given a
+X unique ID
+X
+X Lines (3) and (4) are not required for pv34comp* libraries, but
+X line (2) must generate a complete description as well as a sequence.
+X
+X
+X 18-July-2001
+X Additional syntax has been added to support multiline SQL queries.
+X
+X If the host line begins with '+', then the SQL is openned on the same
+X connection as the previous SQL file.
+X
+X If the host line contains '-' just before the terminal ';', then
+X the file will not produce any output.
+X
+X This string can contain "\n". ";" are used to separate the four
+X functions, which must be specified in the order shown above.
+X The last (fourth) query must terminate with a ';'
+X
+X 19-July-2004
+X
+X This file is designed for PostgreSQL, which uses a different syntax
+X for getting rows of data. Specifically, a select statement must be
+X associated with a "cursor", so that one can fetch a single row.
+X
+X This can be simply done with the statment:
+X
+X DECLARE next_seq CURSOR FOR "select statement ..."
+X
+X The need for a CURSOR complicates the getlib()/ranlib() design, which
+X assumes that ranlib() can set something up that getlib() can read.
+X This can be avoided by setting up an otherwise unnecessary cursor for
+X the ranlib statement that gets a sequence.
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+X
+#include <libpq-fe.h>
+#define PGSQL_LIB 17
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#define EOSEQ 0
+/* #include "upam.h" */
+X
+#ifdef SUPERFAMNUM
+int sfnum[10], nsfnum;
+#endif
+X
+int pgsql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
+void pgsql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
+X
+#define PGSQL_BUF 4096
+X
+struct lmf_str *
+pgsql_openlib(char *sname, int ldnaseq, int *sascii) {
+X FILE *sql_file;
+X PGconn *conn;
+X PGresult *res;
+X char *tmp_str, *ttmp_str;
+X int tmp_str_len;
+X char *bp, *bps, *bdp, *tp, tchar;
+X int i, qs_len, qqs_len;
+X char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
+X char *sql_port;
+X char *sql_do;
+X int sql_do_cnt;
+X struct lmf_str *m_fptr;
+X
+X /* if (sql_reopen) return NULL; - should not be called for re-open */
+X
+X tmp_str_len = PGSQL_BUF;
+X if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate %d for pgSQL buffer\n",tmp_str_len);
+X return NULL;
+X }
+X
+X if (sname[0] == '%') {
+X strncpy(tmp_str,sname+1,tmp_str_len);
+X tmp_str[sizeof(tmp_str)-1]='\0';
+X }
+X else {
+X if ((sql_file=fopen(sname,"r"))==NULL) {
+X fprintf(stderr," cannot open pgSQL file: %s\n",sname);
+X return NULL;
+X }
+X
+X if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
+X fprintf(stderr," cannot read pgSQL file: %s\n",sname);
+X return NULL;
+X }
+X else {
+X tmp_str[qs_len]='\0';
+X qqs_len = qs_len;
+X while (qqs_len >= tmp_str_len-1) {
+X tmp_str_len += PGSQL_BUF;
+X if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
+X fprintf(stderr,
+X " cannot reallocate %d for pgSQL buffer\n",tmp_str_len);
+X return NULL;
+X }
+X ttmp_str = &tmp_str[qqs_len];
+X if ((qs_len=fread(ttmp_str,sizeof(char),PGSQL_BUF,sql_file))<0) {
+X fprintf(stderr," cannot read pgSQL file: %s\n",sname);
+X return NULL;
+X }
+X ttmp_str[qs_len]='\0';
+X qqs_len += qs_len;
+X }
+X }
+X fclose(sql_file);
+X }
+X
+X bps = tmp_str;
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X return NULL;
+X }
+X /* have database name, parse the fields */
+X else {
+X strcpy(sql_db,bps); /* strcpy OK because allocated strlen(bps) */
+X bps = bp+1; /* points to next char after ';' */
+X while (isspace(*bps)) bps++;
+X *bp=';'; /* replace ; */
+X bp = sql_db;
+X while (*bp=='-') {*bp++ = ' ';}
+X sql_host = strtok(bp," \t\n");
+X if (sql_host[0]=='@') sql_host="";
+X sql_dbname = strtok(NULL," \t\n");
+X sql_user = strtok(NULL," \t\n");
+X if (sql_user[0]=='@') sql_user="";
+X sql_pass = strtok(NULL," \t\n");
+X if (sql_pass[0]=='@') sql_pass="";
+X if ((tp=strchr(sql_host,':'))!=NULL) {
+X sql_port = tp+1;
+X *tp='\0';
+X }
+X else sql_port = "";
+X }
+X }
+X else {
+X fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
+X return NULL;
+X }
+X
+X /* we have all the info we need to open a database, allocate lmf_str */
+X if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
+X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
+X sizeof(struct lmf_str),sname);
+X return NULL;
+X }
+X
+X /* have our struct, initialize it */
+X
+X strncpy(m_fptr->lb_name,sname,MAX_FN);
+X m_fptr->lb_name[MAX_FN-1]='\0';
+X
+X m_fptr->sascii = sascii;
+X
+X m_fptr->sql_db = sql_db;
+X m_fptr->getlib = pgsql_getlib;
+X m_fptr->ranlib = pgsql_ranlib;
+X m_fptr->mm_flg = 0;
+X m_fptr->sql_reopen = 0;
+X m_fptr->lb_type = PGSQL_LIB;
+X
+X /* now open the database, if necessary */
+X conn = PQsetdbLogin(sql_host,
+X sql_port,
+X NULL,
+X NULL,
+X sql_dbname,
+X sql_user,
+X sql_pass);
+X
+X if (PQstatus(conn) != CONNECTION_OK) {
+X fprintf(stderr, "Connection to database '%s' failed.\n", PQdb(conn));
+X fprintf(stderr, "%s", PQerrorMessage(conn));
+X PQfinish(conn);
+X goto error_r;
+X }
+X else {
+X m_fptr->pgsql_conn = conn;
+X fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
+X }
+X
+X /* check for 'DO' command - copy to 'DO' string */
+X while (*bps == '-') { *bps++=' ';}
+X if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
+X toupper(bps[1])=='O' && isspace(bps[2])) {
+X /* have some 'DO' commands */
+X /* check where the end of the last DO statement is */
+X
+X sql_do_cnt = 1; /* count up the number of 'DO' statements for later */
+X bdp=bps+3;
+X while ((bp=strchr(bdp,';'))!=NULL) {
+X tp = bp+2; /* skip ;\n */
+X while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
+X if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
+X sql_do_cnt++; /* count the DO statements */
+X bdp = tp+3; /* move to the next DO statement */
+X }
+X else break;
+X }
+X if (bp != NULL) { /* end of the last DO, begin of select */
+X tchar = *(bp+1);
+X *(bp+1)='\0'; /* terminate DO strings */
+X if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
+X fprintf(stderr," cannot allocate %d for sql_do\n",strlen(bps));
+X goto error_r;
+X }
+X else {
+X strcpy(sql_do,bps);
+X *(bp+1)=tchar; /* replace missing ';' */
+X }
+X bps = bp+1;
+X while (isspace(*bps)) bps++;
+X }
+X else {
+X fprintf(stderr," terminal ';' not found: %s\n",bps);
+X goto error_r;
+X }
+X /* all the DO commands are in m_fptr->sql_do in the form:
+X DO command1; DO command2; DO command3; */
+X bdp = sql_do;
+X while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
+X /* do the pgsql statement on bdp+3 */
+X /* check for error */
+X *bp='\0';
+X res = PQexec(m_fptr->pgsql_conn,bdp+3);
+X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+X fprintf(stderr,"*** Error %s - query failed:\n%s\n",
+X PQerrorMessage(m_fptr->pgsql_conn), bdp+3);
+X PQclear(res);
+X goto error_r;
+X }
+X PQclear(res);
+X
+X *bp=';';
+X bdp = bp+1;
+X while (isspace(*bdp)) bdp++;
+X }
+X }
+X
+X /* copy 1st query field */
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((m_fptr->sql_query=calloc(strlen(bps)+41,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X /* have query, copy it */
+X else {
+X strncpy(m_fptr->sql_query,"DECLARE next_seq CURSOR FOR ",40);
+X strcat(m_fptr->sql_query,bps);
+X *bp=';'; /* replace ; */
+X bps = bp+1;
+X while(isspace(*bps)) bps++;
+X }
+X }
+X else {
+X fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
+X goto error_r;
+X }
+X
+X /* copy get_desc field */
+X if ((bp=strchr(bps,';'))!=NULL) {
+X *bp='\0';
+X if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X /* have get_desc, copy it */
+X else {
+X strcpy(m_fptr->sql_getdesc,bps);
+X *bp=';'; /* replace ; */
+X bps = bp+1;
+X while(isspace(*bps)) bps++;
+X }
+X }
+X else {
+X fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
+X goto error_r;
+X }
+X
+X if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
+X
+X if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
+X strlen(bps),bps);
+X goto error_r;
+X }
+X
+X if (strlen(bps) > 0) {
+X strcpy(m_fptr->sql_getseq,bps);
+X }
+X else {
+X fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
+X return NULL;
+X }
+X if (bp!=NULL) *bp=';';
+X
+X /* now do the fetch */
+X
+X res = PQexec(m_fptr->pgsql_conn,"BEGIN;");
+X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+X fprintf(stderr,"*** Error %s - BEGIN failed:\n",
+X PQerrorMessage(conn));
+X PQclear(res);
+X goto error_r;
+X }
+X PQclear(res);
+X
+X res = PQexec(m_fptr->pgsql_conn, m_fptr->sql_query);
+X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+X fprintf(stderr,"*** Error %d:%s - query failed:\n%s\n",
+X PQresultStatus(res),PQerrorMessage(conn), m_fptr->sql_query);
+X PQclear(res);
+X goto error_r;
+X }
+X PQclear(res);
+X m_fptr->pgsql_res=NULL;
+X
+X return m_fptr;
+X
+X error_r:
+X free(m_fptr->sql_getseq);
+X free(m_fptr->sql_getdesc);
+X free(m_fptr->sql_query);
+X free(m_fptr);
+X free(sql_db);
+X return NULL;
+}
+X
+struct lmf_str *
+pgsql_reopen(struct lmf_str *m_fptr) {
+X m_fptr->sql_reopen = 1;
+X return m_fptr;
+}
+X
+void
+pgsql_closelib(struct lmf_str *m_fptr) {
+X
+X if (m_fptr == NULL) return;
+X if (m_fptr->pgsql_res != NULL) PQclear(m_fptr->pgsql_res);
+X PQfinish(m_fptr->pgsql_conn);
+X m_fptr->sql_reopen=0;
+}
+X
+/*
+static char *sql_seq = NULL, *sql_seqp;
+static int sql_seq_len;
+*/
+X
+int
+pgsql_getlib( unsigned char *seq,
+X int maxs,
+X char *libstr,
+X int n_libstr,
+X fseek_t *libpos,
+X int *lcont,
+X struct lmf_str *lm_fd,
+X long *l_off)
+{
+X register unsigned char *cp, *seqp;
+X register int *ap;
+X unsigned char *seqm, *seqm1;
+X PGresult *res;
+X
+X char *bp;
+X /* int l_start, l_stop, len; */
+X
+X seqp = seq;
+X seqm = &seq[maxs-9];
+X seqm1 = seqm-1;
+X
+X ap = lm_fd->sascii;
+X
+#ifdef SUPERFAMNUM
+X sfnum[0]=nsfnum = 0;
+#endif
+X
+X if (*lcont==0) {
+X /* get a row, with UID, sequence */
+X *l_off = 1;
+X
+X /* check to see if we already have a valid result */
+X if (lm_fd->pgsql_res==NULL) {
+X res = PQexec(lm_fd->pgsql_conn,"FETCH next_seq");
+X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+X fprintf(stderr,"*** Error %s - getlib FETCH failed:\n%s\n",
+X PQerrorMessage(lm_fd->pgsql_conn), lm_fd->sql_query);
+X PQclear(res);
+X lm_fd->pgsql_res = NULL;
+X *lcont = 0;
+X *seqp = EOSEQ;
+X return -1;
+X }
+X }
+X else {res = lm_fd->pgsql_res;}
+X
+X if (PQntuples(res)>0) {
+X lm_fd->pgsql_res = res;
+X *libpos=(fseek_t)atol(PQgetvalue(res,0,0));
+X
+X *l_off = 1;
+X if (PQnfields(res) > 2 && (bp=strchr(PQgetvalue(res,0,2),'@'))!=NULL &&
+X !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
+X
+X lm_fd->sql_seqp = PQgetvalue(res,0,1);
+X
+X /* because of changes in pgsql_ranlib(), it is essential that
+X libstr return the unique identifier; thus we must use
+X sql_row[0], not sql_row[2]. Using libstr as the UID allows
+X one to use any UID, not just numeric ones. *libpos is not
+X used for pgsql libraries.
+X */
+X
+X if (n_libstr <= MAX_UID) {
+X /* the normal case returns only GID/sequence */
+X strncpy(libstr,PQgetvalue(res,0,0),MAX_UID-1);
+X libstr[MAX_UID-1]='\0';
+X }
+X else {
+X /* here we do not use the UID in libstr, because we are not
+X going back into the db */
+X /* the PVM case also returns a long description */
+X if (PQnfields(res)>2) {
+X strncpy(libstr,PQgetvalue(res,0,2),n_libstr-1);
+X }
+X else {
+X strncpy(libstr,PQgetvalue(res,0,0),n_libstr-1);
+X }
+X libstr[n_libstr-1]='\0';
+X }
+X }
+X else {
+X PQclear(lm_fd->pgsql_res);
+X lm_fd->pgsql_res=NULL;
+X *lcont = 0;
+X *seqp = EOSEQ;
+X return -1;
+X }
+X }
+X
+X for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
+X if ((*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA &&
+X (*seqp++=ap[*cp++])<NA) continue;
+X --seqp;
+X if (*(cp-1)==0) break;
+X }
+X lm_fd->sql_seqp = (char *)cp;
+X
+X if (seqp>=seqm1) (*lcont)++;
+X else {
+X *lcont=0;
+X PQclear(lm_fd->pgsql_res);
+X lm_fd->pgsql_res = NULL;
+X }
+X
+X *seqp = EOSEQ;
+X /* if ((int)(seqp-seq)==0) return 1; */
+X return (int)(seqp-seq);
+}
+X
+void
+pgsql_ranlib(char *str,
+X int cnt,
+X fseek_t libpos,
+X char *libstr,
+X struct lmf_str *lm_fd
+X )
+{
+X char tmp_query[1024], tmp_val[20];
+X PGresult *res;
+X char *bp;
+X
+X str[0]='\0';
+X
+X /* put the UID into the query string - cannot use sprintf because of
+X "%' etc */
+X
+X /* sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
+X
+X if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
+X fprintf(stderr, "no KEY position in %s\n",lm_fd->sql_getdesc);
+X goto next1;
+X }
+X else {
+X *bp = '\0';
+X strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
+X tmp_query[sizeof(tmp_query)-1]='\0';
+X /* sprintf(tmp_val,"%ld",(long)libpos); */
+X strncat(tmp_query,libstr,sizeof(tmp_query)-1);
+X strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
+X *bp='#';
+X lm_fd->lpos = libpos;
+X }
+X
+X /* fprintf(stderr," requesting: %s\n",tmp_query); */
+X
+X if (lm_fd->pgsql_res !=NULL) {
+X PQclear(lm_fd->pgsql_res);
+X lm_fd->pgsql_res = NULL;
+X }
+X
+X res = PQexec(lm_fd->pgsql_conn,tmp_query);
+X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+X lm_fd->pgsql_res = NULL;
+X
+X sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
+X fprintf(stderr,"*** Error %s - ranlib DESC failed:\n%s\n",
+X PQerrorMessage(lm_fd->pgsql_conn), tmp_query);
+X PQclear(res);
+X goto next1;
+X }
+X
+X if (PQntuples(res)<=0) {
+/* fprintf(stderr,"*** Error = use result failed\n%s\n",
+X pgsql_error(lm_fd->pgsql_conn)); */
+X sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
+X goto next0;
+X }
+X
+X if (PQgetvalue(res,0,1)!= NULL) strncpy(str,PQgetvalue(res,0,1),cnt-1);
+X else strncpy(str,PQgetvalue(res,0,0),cnt-1);
+X str[cnt-1]='\0';
+X /* change this later to support multiple row returns */
+X /*
+X while (strlen(str) < cnt-1 &&
+X (lm_fd->sql_row = pgsql_fetch_row(lm_fd->pgsql_res))!=NULL) {
+X strncat(str," ",cnt-2-strlen(str));
+X if (lm_fd->sql_row[1]!=NULL)
+X strncat(str,lm_fd->sql_row[1],cnt-2-strlen(str));
+X else break;
+X }
+X */
+X
+X str[cnt-1]='\0';
+X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
+X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
+X
+X next0:
+X PQclear(res);
+X next1:
+X lm_fd->pgsql_res = NULL;
+X
+X /* get the sequence, set up for pgsql_getseq() */
+X /* put the UID into the query string */
+X
+X if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
+X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
+X return;
+X }
+X else {
+X *bp = '\0';
+X strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
+X tmp_query[sizeof(tmp_query)-1]='\0';
+X /* sprintf(tmp_val,"%ld",(long)libpos); */
+X strncat(tmp_query,libstr,sizeof(tmp_query));
+X strncat(tmp_query,bp+1,sizeof(tmp_query));
+X *bp='#';
+X }
+X
+X res = PQexec(lm_fd->pgsql_conn,tmp_query);
+X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
+X PQclear(res);
+X lm_fd->pgsql_res = NULL;
+X fprintf(stderr,"*** Error - ranlib SEQ failed:\n%s\n%s\n",tmp_query,
+X PQerrorMessage(lm_fd->pgsql_conn));
+X exit(1);
+X }
+X else {
+X lm_fd->pgsql_res = res;
+X }
+}
+SHAR_EOF
+chmod 0644 pgsql_lib.c ||
+echo 'restore of pgsql_lib.c failed'
+Wc_c="`wc -c < 'pgsql_lib.c'`"
+test 16978 -eq "$Wc_c" ||
+ echo 'pgsql_lib.c: original size 16978, current size' "$Wc_c"
+fi
+# ============= pirpsd.sql ==============
+if test -f 'pirpsd.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping pirpsd.sql (File already exists)'
+else
+echo 'x - extracting pirpsd.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pirpsd.sql' &&
+xdb.wrplab PIRPSD seq_demo demo_pass;
+SELECT PIRID, SEQUENCES, PIRID
+X FROM c_psdsequence;
+SELECT PIRID, concat(PIRID," ",TITLE) FROM c_psdmain
+X WHERE PIRID='#';
+SELECT PIRID, SEQUENCES, PIRID
+X FROM c_psdsequence
+X WHERE PIRID='#';
+SHAR_EOF
+chmod 0644 pirpsd.sql ||
+echo 'restore of pirpsd.sql failed'
+Wc_c="`wc -c < 'pirpsd.sql'`"
+test 230 -eq "$Wc_c" ||
+ echo 'pirpsd.sql: original size 230, current size' "$Wc_c"
+fi
+# ============= print_pssm.c ==============
+if test -f 'print_pssm.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping print_pssm.c (File already exists)'
+else
+echo 'x - extracting print_pssm.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'print_pssm.c' &&
+/* print_pssm.c - 21-Jan-2005
+X
+X copyright (c) 2005 - William R. Pearson and the University of Virginia
+X
+X read a binary PSSM checkpoint file from blastpgp, and produce an ascii
+X formatted file
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <math.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "mm_file.h"
+#include "param.h"
+X
+#include "uascii.h"
+#include "upam.h"
+X
+void initenv(int, char **, struct pstruct *, char *);
+void read_pssm();
+void alloc_pam();
+int **alloc_pam2p();
+void initpam2();
+void fill_pam();
+double get_lambda();
+X
+extern int optind;
+extern char *optarg;
+X
+main(int argc, char **argv) {
+X
+X char *aa0;
+X char libstr[MAX_FN];
+X char qname[MAX_FN];
+X int sq0off;
+X int i, n0;
+X FILE *fp;
+X struct pstruct pst, *ppst;
+X
+X /* stuff from initfa.c/h_init() */
+X
+X memcpy(qascii,aascii,sizeof(qascii));
+X
+X /* initialize a pam matrix */
+X ppst = &pst;
+X strncpy(ppst->pamfile,"BL50",MAX_FN);
+X standard_pam(ppst->pamfile,ppst,0,0);
+X
+X /* this is always protein by default */
+X ppst->nsq = naa;
+X ppst->nsqx = naax;
+X for (i=0; i<=ppst->nsqx; i++) {
+X ppst->sq[i] = aa[i];
+X ppst->hsq[i] = haa[i];
+X ppst->sqx[i]=aax[i]; /* sq = aa */
+X ppst->hsqx[i]=haax[i]; /* hsq = haa */
+X }
+X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
+X
+X if ((aa0 = calloc(MAXTST,sizeof(char)))==NULL) {
+X fprintf(stderr,"Cannot allocate aa0\n");
+X exit(1);
+X }
+X
+X initenv(argc, argv, &pst, qname);
+X alloc_pam(pst.nsq+1,pst.nsq+1, &pst);
+X initpam2(&pst);
+X
+X n0 = getseq (qname, qascii, aa0, MAXTST, libstr,&sq0off);
+X
+X if (!pst.pam_pssm) {
+X fprintf(stderr," ** ERROR ** No -P PSSM provided\n");
+X }
+X else {
+X ppst->pam2p[0] = alloc_pam2p(n0,pst.nsq);
+X ppst->pam2p[1] = alloc_pam2p(n0,pst.nsq);
+X if ((fp = fopen(pst.pgpfile,"rb"))!=NULL) {
+X read_pssm(aa0, n0, pst.nsq, pst.pamscale,fp,ppst);
+X }
+X }
+}
+X
+void
+initenv(int argc, char **argv, struct pstruct *ppst, char *qname) {
+X char copt;
+X
+X pascii = aascii;
+X
+X while ((copt = getopt(argc, argv, "P:s:"))!=EOF) {
+X switch (copt) {
+X case 'P':
+X strncpy(ppst->pgpfile,optarg,MAX_FN);
+X ppst->pgpfile[MAX_FN-1]='\0';
+X ppst->pam_pssm = 1;
+X break;
+X
+X case 's':
+X strncpy (ppst->pamfile, optarg, 120);
+X ppst->pamfile[120-1]='\0';
+X if (!standard_pam(ppst->pamfile,ppst,0, 0)) {
+X initpam (ppst->pamfile, ppst);
+X }
+X ppst->pam_set=1;
+X break;
+X }
+X }
+X optind--;
+X
+X if (argc - optind > 1) strncpy(qname, argv[optind+1], MAX_FN);
+}
+X
+X
+/*
+X *aa0 - query sequence
+X n0 - length
+X pamscale - scaling for pam matrix - provided by apam.c, either
+X 0.346574 = ln(2)/2 (P120, BL62) or
+X 0.231049 = ln(2)/3 (P250, BL50)
+*/
+X
+#define N_EFFECT 20
+X
+void
+read_pssm(unsigned char *aa0, int n0, int nsq, double pamscale, FILE *fp, struct pstruct *ppst) {
+X int i, j, len;
+X int qi, rj;
+X int **pam2p;
+X int first, too_high;
+X char *query;
+X double freq, **freq2d, lambda, new_lambda;
+X double scale, scale_high, scale_low;
+X
+X pam2p = ppst->pam2p[0];
+X
+X if(1 != fread(&len, sizeof(int), 1, fp)) {
+X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
+X exit(1);
+X }
+X
+X if(len != n0) {
+X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
+X len,n0);
+X exit(1);
+X }
+X
+X /* read over query sequence stored in BLAST profile */
+X if(NULL == (query = (char *) calloc(len, sizeof(char)))) {
+X fprintf(stderr, "Couldn't allocate memory for query!\n");
+X exit(1);
+X }
+X
+X if(len != fread(query, sizeof(char), len, fp)) {
+X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
+X exit(1);
+X }
+X
+X printf("%d\n%s\n",len,query);
+X
+X /* currently we don't do anything with query; ideally, we should
+X check to see that it actually matches aa0 ... */
+X
+X /* quick 2d array alloc: */
+X if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+X exit(1);
+X }
+X
+X if((freq2d[0] = (double *) calloc(n0 * N_EFFECT, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
+X exit(1);
+X }
+X
+X /* a little pointer arithmetic to fill out 2d array: */
+X for (qi = 1 ; qi < n0 ; qi++) {
+X freq2d[qi] = freq2d[0] + (N_EFFECT * qi);
+X }
+X
+X for (qi = 0 ; qi < n0 ; qi++) {
+X printf("%c",query[qi]);
+X for (rj = 0 ; rj < N_EFFECT ; rj++) {
+X if(1 != fread(&freq, sizeof(double), 1, fp)) {
+X fprintf(stderr, "Error while reading frequencies!\n");
+X exit(1);
+X }
+X printf(" %8.7g",freq*10.0);
+X
+X if (freq > 1e-12) {
+X freq = log(freq /((double) (rrcounts[rj+1])/(double) rrtotal));
+X freq /= pamscale; /* this gets us close to originial pam scores */
+X freq2d[qi][rj] = freq;
+X }
+X else {freq2d[qi][rj] = freq;}
+X }
+X printf("\n");
+X }
+X
+X
+X /* now figure out the right scale */
+X scale = 1.0;
+X lambda = get_lambda(ppst->pam2[0], 20, 20, "\0ARNDCQEGHILKMFPSTWYV");
+X
+X /* should be near 1.0 because of our initial scaling by ppst->pamscale */
+X fprintf(stderr, "real_lambda: %g\n", lambda);
+X
+X /* get initial high/low scale values: */
+X first = 1;
+X while (1) {
+X fill_pam(pam2p, n0, 20, freq2d, scale);
+X new_lambda = get_lambda(pam2p, n0, 20, query);
+X
+X if (new_lambda > lambda) {
+X if (first) {
+X first = 0;
+X scale = scale_high = 1.0 + 0.05;
+X scale_low = 1.0;
+X too_high = 1;
+X } else {
+X if (!too_high) break;
+X scale = (scale_high += scale_high - 1.0);
+X }
+X } else if (new_lambda > 0) {
+X if (first) {
+X first = 0;
+X scale_high = 1.0;
+X scale = scale_low = 1.0 - 0.05;
+X too_high = 0;
+X } else {
+X if (too_high) break;
+X scale = (scale_low += scale_low - 1.0);
+X }
+X } else {
+X fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
+X exit(1);
+X }
+X }
+X
+X /* now do binary search between low and high */
+X for (i = 0 ; i < 10 ; i++) {
+X scale = 0.5 * (scale_high + scale_low);
+X fill_pam(pam2p, n0, 20, freq2d, scale);
+X new_lambda = get_lambda(pam2p, n0, 20, query);
+X
+X if (new_lambda > lambda) scale_low = scale;
+X else scale_high = scale;
+X }
+X
+X scale = 0.5 * (scale_high + scale_low);
+X fill_pam(pam2p, n0, 20, freq2d, scale);
+X
+X fprintf(stderr, "final scale: %g\n", scale);
+X
+X for (qi = 0 ; qi < n0 ; qi++) {
+X fprintf(stderr, "%4d %c: ", qi+1, query[qi]);
+X for (rj = 1 ; rj <= 20 ; rj++) {
+X fprintf(stderr, "%4d", pam2p[qi][rj]);
+X }
+X fprintf(stderr, "\n");
+X }
+X
+X free(freq2d[0]);
+X free(freq2d);
+X
+X free(query);
+}
+X
+/*
+X * alloc_pam(): allocates memory for the 2D pam matrix as well
+X * as for the integer array used to transmit the pam matrix
+X */
+void
+alloc_pam (int d1, int d2, struct pstruct *ppst)
+{
+X int i, *d2p;
+X
+X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+X exit(1);
+X }
+X
+X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+X exit(1);
+X }
+X
+X if ((d2p = pam12 = (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
+X exit(1);
+X }
+X
+X for (i = 0; i < d1; i++, d2p += d2)
+X ppst->pam2[0][i] = d2p;
+X
+X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+X fprintf(stderr,"Cannot allocate 2d pam matrix: %d",d2);
+X exit(1);
+X }
+X
+X for (i = 0; i < d1; i++, d2p += d2)
+X ppst->pam2[1][i] = d2p;
+}
+X
+void
+fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {
+X int i, j;
+X double freq;
+X
+X /* fprintf(stderr, "scale: %g\n", scale); */
+X
+X /* now fill in the pam matrix: */
+X for (i = 0 ; i < n0 ; i++) {
+X for (j = 1 ; j <=nsq ; j++) {
+X freq = scale * freq2d[i][j-1];
+X if ( freq < 0.0) freq -= 0.5;
+X else freq += 0.5;
+X pam2p[i][j] = (int)(freq);
+X }
+X }
+}
+X
+/*
+X * initpam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
+X */
+void initpam2 (struct pstruct *ppst)
+{
+X int i, j, k, nsq, pam_xx, pam_xm;
+X int sa_x, sa_t, tmp;
+X
+X nsq = ppst->nsq;
+X sa_x = pascii['X'];
+X sa_t = pascii['*'];
+X
+X ppst->pam2[0][0][0] = -BIGNUM;
+X ppst->pam_h = -1; ppst->pam_l = 1;
+X
+X k = 0;
+X for (i = 1; i <= nsq; i++) {
+X ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;
+X for (j = 1; j <= i; j++) {
+X ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;
+X if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];
+X if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];
+X }
+X }
+X
+X ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
+X
+X if (ppst->dnaseq == SEQT_RNA) {
+X tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 1;
+X ppst->pam2[0][nascii['A']][nascii['G']] =
+X ppst->pam2[0][nascii['C']][nascii['T']] =
+X ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
+X }
+X
+X if (ppst->pam_x_set) {
+X for (i=1; i<=nsq; i++) {
+X ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
+X ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
+X }
+X ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
+X ppst->pam2[0][sa_t][sa_t]=ppst->pam_xm;
+X }
+X else {
+X ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
+X ppst->pam_xm = ppst->pam2[0][1][sa_x];
+X }
+}
+X
+double
+get_lambda(int **pam2p, int n0, int nsq, char *aa0) {
+X double lambda, H;
+X double *pr, tot, sum;
+X int i, ioff, j, min, max;
+X
+X /* get min and max scores */
+X min = BIGNUM;
+X max = -BIGNUM;
+X if(pam2p[0][1] == -BIGNUM) {
+X ioff = 1;
+X n0++;
+X } else {
+X ioff = 0;
+X }
+X
+X for (i = ioff ; i < n0 ; i++) {
+X for (j = 1; j <= nsq ; j++) {
+X if (min > pam2p[i][j])
+X min = pam2p[i][j];
+X if (max < pam2p[i][j])
+X max = pam2p[i][j];
+X }
+X }
+X
+X /* fprintf(stderr, "min: %d\tmax:%d\n", min, max); */
+X
+X if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
+X fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
+X exit(1);
+X }
+X
+X tot = (double) rrtotal * (double) rrtotal * (double) n0;
+X for (i = ioff ; i < n0 ; i++) {
+X for (j = 1; j <= nsq ; j++) {
+X pr[pam2p[i][j] - min] +=
+X (double) ((double) rrcounts[aascii[aa0[i]]] * (double) rrcounts[j]) / tot;
+X }
+X }
+X
+X sum = 0.0;
+X for(i = 0 ; i <= max-min ; i++) {
+X sum += pr[i];
+X /* fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
+X }
+X /* fprintf(stderr, "sum: %g\n", sum); */
+X
+X for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
+X
+X if (!karlin(min, max, pr, &lambda, &H)) {
+X fprintf(stderr, "Karlin lambda estimation failed\n");
+X }
+X
+X /* fprintf(stderr, "lambda: %g\n", lambda); */
+X free(pr);
+X
+X return lambda;
+}
+X
+int **
+alloc_pam2p(int len, int nsq) {
+X int i;
+X int **pam2p;
+X
+X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
+X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
+X return NULL;
+X }
+X
+X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
+X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
+X free(pam2p);
+X return NULL;
+X }
+X
+X for (i=1; i<len; i++) {
+X pam2p[i] = pam2p[0] + (i*(nsq+1));
+X }
+X
+X return pam2p;
+}
+X
+void free_pam2p(int **pam2p) {
+X if (pam2p) {
+X free(pam2p[0]);
+X free(pam2p);
+X }
+}
+X
+SHAR_EOF
+chmod 0644 print_pssm.c ||
+echo 'restore of print_pssm.c failed'
+Wc_c="`wc -c < 'print_pssm.c'`"
+test 11147 -eq "$Wc_c" ||
+ echo 'print_pssm.c: original size 11147, current size' "$Wc_c"
+fi
+# ============= prio_atepa.aa ==============
+if test -f 'prio_atepa.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping prio_atepa.aa (File already exists)'
+else
+echo 'x - extracting prio_atepa.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'prio_atepa.aa' &&
+>PRIO_ATEPA | 90377 | MAJOR PRION PROTEIN PRECURSOR (PRP) (PRP27-30) (PRP33-35C).
+MANLGYWMLVLFVATWSDLGLCKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGWGQPHGGGWGQPHGGGWGQP
+HGGGWGQPHGGGWGQAGGTHNQWNKPSKPKTNMKHMAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYR
+ENMYRYPNQVYYRPVDQYNNQNNFVHDCVNITIKQHTVTTTTKGENLTETDVKMMERVVEQMCITQYERESQAYY
+QRGSSMVLFSSPPVILLISFLIFLIVG
+SHAR_EOF
+chmod 0644 prio_atepa.aa ||
+echo 'restore of prio_atepa.aa failed'
+Wc_c="`wc -c < 'prio_atepa.aa'`"
+test 340 -eq "$Wc_c" ||
+ echo 'prio_atepa.aa: original size 340, current size' "$Wc_c"
+fi
+# ============= prot_test.lib ==============
+if test -f 'prot_test.lib' -a X"$1" != X"-c"; then
+ echo 'x - skipping prot_test.lib (File already exists)'
+else
+echo 'x - extracting prot_test.lib (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'prot_test.lib' &&
+>HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
+VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
+AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
+R
+>K1HUAG | 1091 | Ig kappa chain V-I region (Ag) - Human
+DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPSRFSGSGFGTD
+FTFTISGLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR/
+>CCHU | 1 | Cytochrome c - Human
+MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLE
+NPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
+>N2KF1U | 1021 | Long neurotoxin 1 - Many-banded krait
+IVCHTTATIPSSAVTCPPGENLCYRKMWCDAFCSSRGKVVELGCAATCPSKKPYEEVTCCSTDKCNHPPK
+RQPG
+>TPHUCS | 1322 | Troponin C, skeletal muscle - Human
+DTQQAEARSYLSEEMIAEFKAAFDMFDADGGGDISVKELGTVMRMLGQTPTKEELDAIIEEVDEDGSGTI
+DFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIFRASGEHVTDEEIESLMKDGDK
+NNDGRIDFDEFLKMMEGVQ
+>FEPE | 25 | Ferredoxin - Peptostreptococcus asaccharolyticus
+AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED
+>RKMDS | 677 | Ribulose-bisphosphate carboxylase (EC 4.1.1.39) small chain - Cry
+MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKDPAAVMFEINA
+CRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSRNIRYTIQSYASTRPEGERY*
+X
+>K3HU | 1099 | Ig kappa chain C region - Human
+/TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSS
+TLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC
+>HMIVV | 2581 | Hemagglutinin precursor - Influenza A virus (2 strains)
+MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELVQSSSTGKICN
+NPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVPDYASLRSLVASSGTLEFINE
+GFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPVQNVTMPNNDNSDKLYIWGVHHPSTDKEQTN
+LYVQASGKVTVSTKRSQQTIIPNVGSRPWVRGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTG
+KSSI
+MRSDAPIGTCSSECITPNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAG
+FIENGWEGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVEGRIQ
+DLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMGNGCFKIYHKCDNAC
+IGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVVLLGFIMWACQKGNIRCN
+ICI
+>OKBO2C | 296 | Protein kinase (EC 2.7.1.37), cAMP-dependent, catalytic chain - B
+GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLVKHMETGNHYA
+MKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVMEYVPGGEMFSHLRRIGRFSE
+PHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYIQVTDFGFAKRVKGRTWTLCGTPEYLAPEII
+LSKGYNKAVDWWALGVLIYEMAAGYPPFFADQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKR
+FGNLKDGVNDIKNHKWFATTDWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
+>GT8.7 | 266 | transl. of pa875.con, 19 to 675
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
+YTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLI
+DGSHKITQSNAILRYLARKHHLDGETEEERIR
+ADIVENQVMDTRMQLIMLCYNPDFEKQKPEFL
+KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
+YDILDQYRMFEPKCLDAFPNLRDFLARFEGLK
+KISAYMKSSRYIATPIFSKMAHWSNK
+SHAR_EOF
+chmod 0644 prot_test.lib ||
+echo 'restore of prot_test.lib failed'
+Wc_c="`wc -c < 'prot_test.lib'`"
+test 2741 -eq "$Wc_c" ||
+ echo 'prot_test.lib: original size 2741, current size' "$Wc_c"
+fi
+# ============= prot_test.lseg ==============
+if test -f 'prot_test.lseg' -a X"$1" != X"-c"; then
+ echo 'x - skipping prot_test.lseg (File already exists)'
+else
+echo 'x - extracting prot_test.lseg (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'prot_test.lseg' &&
+>HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee @P:1-50
+VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+KVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA
+VHASLDKFLASVSTVLTSKYR
+X
+>K1HUAG | 1091 | Ig kappa chain V-I region (Ag) - Human @P:51-90
+DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPs
+rfsgsgfgtdftftisgLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR*
+X
+>CCHU | 1 | Cytochrome c - Human @P:25-85
+MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIW
+GEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
+X
+>N2KF1U | 1021 | Long neurotoxin 1 - Many-banded krait
+IVCHTTATIPSSAVTCPPGENLCYRKMWCDAFCSSRGKVVELGCAATCPSKKPYEEVTCC
+STDKCNHPPKRQPG
+X
+>TPHUCS | 1322 | Troponin C, skeletal muscle - Human @P:50-125
+DTQQAEARSYLSEEMIAEfkaafdmfdadgggdISVKELGTVMRMLGQTPTKEELDAIIE
+EVDEDGSGTIDFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIFR
+ASGEHVTDEEIESLMKDGDKNNDGRIDFDEFLKMMEGVQ
+X
+>FEPE | 25 | Ferredoxin - Peptostreptococcus asaccharolyticus
+AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED
+X
+>RKMDS | 677 | Ribulose-bisphosphate carboxylase (EC 4.1.1.39) small chain - Cry
+MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKD
+PAAVMFEINACRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSR
+NIRYTIQSYASTRPEGERY*
+X
+>K3HU | 1099 | Ig kappa chain C region - Human
+TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDS
+KDstyslsstltlsKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC
+>HMIVV | 2581 | Hemagglutinin precursor - Influenza A virus (2 strains)
+MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELV
+QSSSTGKICNNPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVP
+DYASLRSLVASSGTLEFINEGFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPV
+QNVTMPNNDNSDKLYIWGVHHPSTDKEQTNLYVQASGKVTVSTKRSQQTIIPNVGSRPWV
+RGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTGKSSIMRSDAPIGTCSSECIT
+PNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAGFIENGW
+EGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVE
+GRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMG
+NGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAIS
+CFLLCVVLLGFIMWACQKGNIRCNICI
+>OKBO2C | 296 | Protein kinase (EC 2.7.1.37), cAMP-dependent, catalytic chain - B
+GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLV
+KHMETGNHYAMKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVM
+EYVPGGEMFSHLRRIGRFSEPHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYI
+QVTDFGFAKRVKGRTWTLCGTPEYLAPEIILSKGYNKAVDWWALGVLIYEMAAGYPPFFA
+DQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKRFGNLKDGVNDIKNHKWFATT
+DWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
+>GT8.7 | 266 | transl. of pa875.con, 19 to 675 @P:21-180
+MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNL
+pylidgshkitqsnailrylarkhhldget
+EEERIRADIVENQVMDTRMQLIMLCYNPDF
+ekqkpeflktipekmklyseflgkrpwfag
+DKVTYVDFLAYDILDQYRMFEPKCLDAFPN
+LRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
+X
+SHAR_EOF
+chmod 0644 prot_test.lseg ||
+echo 'restore of prot_test.lseg failed'
+Wc_c="`wc -c < 'prot_test.lseg'`"
+test 2786 -eq "$Wc_c" ||
+ echo 'prot_test.lseg: original size 2786, current size' "$Wc_c"
+fi
+# ============= prss3.1 ==============
+if test -f 'prss3.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping prss3.1 (File already exists)'
+else
+echo 'x - extracting prss3.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'prss3.1' &&
+.TH PRSS3 1 local
+.SH NAME
+prss \- test a protein sequence similarity for significance
+.SH SYNOPSIS
+.B prss34
+\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -Z #
+.I -k # -v #
+]
+sequence-file-1 sequence-file-2
+[
+.I #-of-shuffles
+]
+X
+.B prfx34
+\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -z 1,3 -Z #
+.I -k # -v #
+]
+sequence-file-1 sequence-file-2
+[
+.I ktup
+]
+[
+.I #-of-shuffles
+]
+X
+.B prss34(_t)/prfx34(_t)
+[-AfghksvwzZ]
+\- interactive mode
+X
+.SH DESCRIPTION
+.B prss34
+and
+.B prfx34
+are used to evaluate the significance of a protein:protein, DNA:DNA
+(
+.B prss34
+), or translated-DNA:protein (
+.B prfx34
+) sequence similarity score
+by comparing two sequences and calculating optimal similarity scores,
+and then repeatedly shuffling the second sequence, and calculating
+optimal similarity scores using the Smith-Waterman algorithm. An
+extreme value distribution is then fit to the shuffled-sequence
+scores. The characteristic parameters of the extreme value
+distribution are then used to estimate the probability that each of
+the unshuffled sequence scores would be obtained by chance in one
+sequence, or in a number of sequences equal to the number of shuffles.
+This program is derived from
+.B rdf2\c
+\&, described by Pearson and Lipman, PNAS (1988) 85:2444-2448, and
+Pearson (Meth. Enz. 183:63-98). Use of the extreme value
+distribution for estimating the probabilities of similarity scores was
+described by Altshul and Karlin, PNAS (1990) 87:2264-2268. The
+'z-values' calculated by rdf2 are not as informative as the P-values
+and expectations calculated by prdf.
+.B prss34
+calculates optimal scores using the same rigorous Smith-Waterman
+algorithm (Smith and Waterman, J. Mol. Biol. (1983) 147:195-197) used by the
+.B ssearch34
+program.
+.B prfx34
+calculates scores using the FASTX algorithm (Pearson et al. (1997) Genomics 46:24-36.
+.PP
+.B prss34
+and
+.B prfx34
+also allow a more sophisticated shuffling method: residues can be shuffled
+within a local window, so that the order of residues 1-10, 11-20, etc,
+is destroyed but a residue in the first 10 is never swapped with a residue
+outside the first ten, and so on for each local window.
+.SH EXAMPLES
+.TP
+(1)
+.B prss34
+\& -v 10 musplfm.aa lcbo.aa
+.PP
+Compare the amino acid sequence in the file musplfm.aa with that
+in lcbo.aa, then shuffle lcbo.aa 200 times using a local shuffle with
+a window of 10. Report the significance of the
+unshuffled musplfm/lcbo comparison scores with respect to the shuffled
+scores.
+.TP
+(2)
+.B prss34
+musplfm.aa lcbo.aa 1000
+.PP
+Compare the amino acid sequence in the file musplfm.aa with the sequences
+in the file lcbo.aa, shuffling \fClcbo.aa\fP 1000 times. Shuffles can also be specified with the -k # option.
+.TP
+(3)
+.B prfx34
+mgstm1.esq xurt8c.aa 2 1000
+.PP
+Translate the DNA sequence in the \fCmgstm1.esq\fP file in all six
+frames and compare it to the amino acid sequence in the file
+\fCxurt8c.aa\fP, using ktup=2 and shuffling \fCxurt8c.aa\fP 1000
+times. Each comparison considers the best forward or reverse
+alignment with frameshifts, using the fastx algorithm (Pearson et al
+(1997) Genomics 46:24-36).
+.TP
+(4)
+.B prss34/prfx34
+.PP
+Run prss in interactive mode. The program will prompt for the file
+name of the two query sequence files and the number of shuffles to be
+used.
+.SH OPTIONS
+.PP
+.B prss34/prfx34
+can be directed to change the scoring matrix, gap penalties, and
+shuffle parameters by entering options on the command line (preceeded
+by a `\-'). All of the options should preceed the file names number of
+shuffles.
+.TP
+\-A
+Show unshuffled alignment.
+.TP
+\-f #
+Penalty for opening a gap (-10 by default for proteins).
+.TP
+\-g #
+Penalty for additional residues in a gap (-2 by default) for proteins.
+.TP
+\-H
+Do not display histogram of similarity scores.
+.TP
+\-k #
+Number of shuffles (200 is the default)
+.TP
+\-Q -q
+"quiet" - do not prompt for filename.
+.TP
+\-O filename
+send copy of results to "filename."
+.TP
+\-s str
+specify the scoring matrix. BLOSUM50 is used by default for proteins;
++5/-4 is used by defaul for DNA.
+.B prss34
+recognizes the same scoring matrices as fasta34, ssearch34, fastx34, etc;
+e.g. BL50, P250, BL62, BL80, MD10, MD20, and other matrices in BLAST1.4
+matrix format.
+.TP
+\-v #
+Use a local window shuffle with a window size of #.
+.TP
+\-z #
+Calculate statistical significance using the mean/variance
+(moments) approach used by fasta34/ssearch or from maximum likelihood
+estimates of lambda and K.
+.TP
+\-Z #
+Present statistical significance as if a '#' entry database had
+been searched (e.g. "-Z 50000" presents statistical significance as if
+50,000 sequences had been compared).
+.SH ENVIRONMENT VARIABLES
+.PP
+.B (SMATRIX)
+the filename of an alternative scoring matrix file. For protein
+sequences, BLOSUM50 is used by default; PAM250 can be used with the
+command line option
+.B -s P250\c
+(or with -s pam250.mat). BLOSUM62 (-s BL62) and PAM120 (-S P120).
+.SH "SEE ALSO"
+ssearch3(1), fasta3(1).
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+X
+SHAR_EOF
+chmod 0644 prss3.1 ||
+echo 'restore of prss3.1 failed'
+Wc_c="`wc -c < 'prss3.1'`"
+test 4969 -eq "$Wc_c" ||
+ echo 'prss3.1: original size 4969, current size' "$Wc_c"
+fi
+# ============= prss3.rsp ==============
+if test -f 'prss3.rsp' -a X"$1" != X"-c"; then
+ echo 'x - skipping prss3.rsp (File already exists)'
+else
+echo 'x - extracting prss3.rsp (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'prss3.rsp' &&
+compacc.obj doinit.obj karlin.obj scaleswn.obj htime.obj apam.obj lib_sel.obj getopt.obj showrss.obj pssm_asn_subs.obj
+SHAR_EOF
+chmod 0644 prss3.rsp ||
+echo 'restore of prss3.rsp failed'
+Wc_c="`wc -c < 'prss3.rsp'`"
+test 119 -eq "$Wc_c" ||
+ echo 'prss3.rsp: original size 119, current size' "$Wc_c"
+fi
+# ============= psql_demo.sql ==============
+if test -f 'psql_demo.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping psql_demo.sql (File already exists)'
+else
+echo 'x - extracting psql_demo.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'psql_demo.sql' &&
+@ seqdb_demo seqdb_demo @;
+SELECT acc, protein.seq, sp_name
+X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp';
+SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+X WHERE annot.acc='#' AND db='sp';
+X
+SHAR_EOF
+chmod 0644 psql_demo.sql ||
+echo 'restore of psql_demo.sql failed'
+Wc_c="`wc -c < 'psql_demo.sql'`"
+test 317 -eq "$Wc_c" ||
+ echo 'psql_demo.sql: original size 317, current size' "$Wc_c"
+fi
+# ============= psql_demo1.sql ==============
+if test -f 'psql_demo1.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping psql_demo1.sql (File already exists)'
+else
+echo 'x - extracting psql_demo1.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'psql_demo1.sql' &&
+xdb.wrplab seqdb_demo wrplab gstmu;
+SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
+X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+X WHERE annot.acc='#' AND db='sp';
+SHAR_EOF
+chmod 0644 psql_demo1.sql ||
+echo 'restore of psql_demo1.sql failed'
+Wc_c="`wc -c < 'psql_demo1.sql'`"
+test 366 -eq "$Wc_c" ||
+ echo 'psql_demo1.sql: original size 366, current size' "$Wc_c"
+fi
+# ============= psql_demo_pv.sql ==============
+if test -f 'psql_demo_pv.sql' -a X"$1" != X"-c"; then
+ echo 'x - skipping psql_demo_pv.sql (File already exists)'
+else
+echo 'x - extracting psql_demo_pv.sql (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'psql_demo_pv.sql' &&
+xdb.wrplab seqdb_demo wrplab gstmu;
+SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
+X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
+SELECT acc, descr FROM annot WHERE acc='#' AND db='sp';
+SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
+X WHERE annot.acc='#' AND db='sp';
+X
+SHAR_EOF
+chmod 0644 psql_demo_pv.sql ||
+echo 'restore of psql_demo_pv.sql failed'
+Wc_c="`wc -c < 'psql_demo_pv.sql'`"
+test 336 -eq "$Wc_c" ||
+ echo 'psql_demo_pv.sql: original size 336, current size' "$Wc_c"
+fi
+# ============= pssm_asn_subs.c ==============
+if test -f 'pssm_asn_subs.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping pssm_asn_subs.c (File already exists)'
+else
+echo 'x - extracting pssm_asn_subs.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pssm_asn_subs.c' &&
+/* pssm_asn_subs.c */
+X
+X
+/* $Name: fa_34_26_5 $ - $Id: pssm_asn_subs.c,v 1.15 2007/04/02 18:08:11 wrp Exp $ */
+X
+/* copyright (C) 2005 by William R. Pearson and the U. of Virginia */
+X
+/* this code is designed to parse the ASN.1 binary encoded scoremat
+X object produced by blastpgp -C file.ckpt_asn -u 2 */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+X
+int parse_pssm_asn();
+int parse_pssm2_asn();
+X
+int
+parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
+X unsigned char **query, double ***freqs,
+X char *matrix, int *gap_open, int *gap_extend,
+X double *lambda);
+X
+X
+X
+#define COMPO_NUM_TRUE_AA 20
+X
+/**positions of true characters in protein alphabet*/
+/*
+static int trueCharPositions[COMPO_NUM_TRUE_AA] = {
+X 1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22
+};
+*/
+X
+#define COMPO_LARGEST_ALPHABET 28
+X
+/*
+static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
+X
+static int alphaConvert[COMPO_LARGEST_ALPHABET] = {
+X (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,
+X 16, 19, 17, (-1), 18, (-1), (-1), (-1), (-1), (-1)
+};
+*/
+X
+int pssm_aa_order[20] = { 1, /*A*/
+X 16, /*R*/
+X 13, /*N*/
+X 4, /*D*/
+X 3, /*C*/
+X 15, /*Q*/
+X 5, /*E*/
+X 7, /*G*/
+X 8, /*H*/
+X 9, /*I*/
+X 11, /*L*/
+X 10, /*K*/
+X 12, /*M*/
+X 6, /*F*/
+X 14, /*P*/
+X 17, /*S*/
+X 18, /*T*/
+X 20, /*W*/
+X 22, /*Y*/
+X 19}; /*V*/
+X
+X
+#define ASN_SEQ 48
+#define ASN_SEQOF 49
+X
+#define ASN_PSSM_QUERY 166
+#define ASN_PSSM2_QUERY 162
+X
+#define ASN_PSSM_IS_PROT 160
+#define ASN_PSSM2_MATRIX 161
+#define ASN_PSSM_NROWS 162
+#define ASN_PSSM_NCOLS 163
+X
+#define ASN_PSSM2_NCOLS 163
+#define ASN_PSSM2_NROWS 164
+#define ASN_PSSM_BYCOL 165
+#define ASN_PSSM_INTERMED_DATA 167
+#define ASN_PSSM_FREQS 162
+#define ASN_PSSM2_FREQS 165
+#define ASN_PSSM2_LAMBDA 166
+X
+#define ASN_IS_STR 26
+#define ASN_IS_INT 2
+#define ASN_IS_BOOL 1
+#define ASN_IS_ENUM 10
+X
+struct asn_bstruct {
+X FILE *fd;
+X unsigned char *buf;
+X unsigned char *abp;
+X unsigned char *buf_max;
+X int len;
+};
+X
+#define ASN_BUF 1024
+X
+unsigned char *
+chk_asn_buf(struct asn_bstruct *asnp, int v) {
+X int new_buf;
+X
+X if (v > ASN_BUF) {
+X fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n",
+X v, ASN_BUF);
+X exit(1);
+X }
+X
+X if (asnp->abp + v > asnp->buf_max) {
+X
+X /* move down the left over stuff */
+X asnp->len = asnp->buf_max - asnp->abp;
+X
+X memmove(asnp->buf, asnp->abp, asnp->len);
+X
+X asnp->abp = asnp->buf;
+X new_buf = ASN_BUF - asnp->len;
+X
+X if (!feof(asnp->fd) &&
+X (new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) {
+X asnp->len += new_buf;
+X }
+X
+X asnp->buf_max = asnp->buf + asnp->len;
+X
+X if (asnp->len < v) {
+X fprintf(stderr, " Unable to read %d bytes\n",v);
+X exit(1);
+X }
+X }
+X /* otherwise, v bytes are currently in the buffer */
+X
+X return asnp->abp;
+}
+X
+/* read_asn_dest reads v bytes into oct_str if v <= o_len */
+/* read_asn_dest is required for ASN data entities that are longer than ASN_BUF (1024) */
+unsigned char *
+read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) {
+X int new_buf;
+X unsigned char *oct_ptr;
+X
+X
+X if (v > o_len) {
+X fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n",
+X v, o_len);
+X exit(1);
+X }
+X
+X if (asnp->abp + v <= asnp->buf_max) {
+X memmove(oct_str, asnp->abp, v);
+X return asnp->abp+v;
+X }
+X else {
+X /* move down the left over stuff */
+X
+X asnp->len = asnp->buf_max - asnp->abp;
+X
+X memmove(oct_str, asnp->abp, asnp->len);
+X oct_ptr = oct_str+asnp->len;
+X v -= asnp->len;
+X
+X asnp->abp = asnp->buf;
+X new_buf = ASN_BUF;
+X
+X while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) {
+X asnp->len = new_buf;
+X asnp->buf_max = asnp->buf + asnp->len;
+X if (v <= new_buf) { /* we have it all this time */
+X memmove(oct_ptr, asnp->buf, v);
+X asnp->len -= v;
+X asnp->abp = asnp->buf + v;
+X break;
+X }
+X else { /* we need to read some more */
+X memmove(oct_ptr, asnp->buf, new_buf);
+X v -= new_buf;
+X new_buf = ASN_BUF;
+X }
+X }
+X }
+X return asnp->buf + v;
+}
+X
+unsigned char *
+get_astr_bool(struct asn_bstruct *asnp, int *val) {
+X
+X int v_len, v;
+X
+X asnp->abp = chk_asn_buf(asnp,5);
+X
+X v = 0;
+X if (*asnp->abp++ != 1) { /* check for int */
+X fprintf(stderr," bool missing\n");
+X }
+X else {
+X v_len = *asnp->abp++;
+X if (v_len != 1) {
+X fprintf(stderr, "boolean length != 1 : %d\n", v_len);
+X v = *asnp->abp++;
+X }
+X else { v = *asnp->abp++;}
+X }
+X asnp->abp += 2; /* skip over null's */
+X *val = v;
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_int(struct asn_bstruct *asnp,
+X int *val) {
+X
+X int v_len, v;
+X
+X v = 0;
+X
+X asnp->abp = chk_asn_buf(asnp,8);
+X
+X if (*asnp->abp++ != 2) { /* check for int */
+X fprintf(stderr," int missing\n");
+X }
+X else {
+X v_len = *asnp->abp++;
+X while (v_len-- > 0) {
+X v *= 256;
+X v += *asnp->abp++;
+X }
+X asnp->abp += 2; /* skip over null's */
+X }
+X *val = v;
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_enum(struct asn_bstruct *asnp, int *val) {
+X
+X int v_len, v;
+X
+X asnp->abp = chk_asn_buf(asnp,5);
+X
+X v = 0;
+X if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */
+X fprintf(stderr," enum missing\n");
+X }
+X else {
+X v_len = *asnp->abp++;
+X while (v_len-- > 0) { v *= 256; v += *asnp->abp++; }
+X asnp->abp += 2; /* skip over null's */
+X }
+X *val = v;
+X
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_packedfloat(struct asn_bstruct *asnp, double *val) {
+X
+X int v_len, v;
+X char tmp_str[64];
+X
+X asnp->abp = chk_asn_buf(asnp,2);
+X
+X v = 0;
+X if (*asnp->abp++ != 9) { /* check for packed float */
+X fprintf(stderr," float missing\n");
+X *val = 0;
+X return asnp->abp;
+X }
+X else {
+X v_len = *asnp->abp++;
+X
+X if (v_len > 63) {
+X fprintf(stderr," real string too long: %d\n",v_len);
+X }
+X
+X asnp->abp = chk_asn_buf(asnp,v_len);
+X
+X if (v_len == 2 && *asnp->abp == '\0' && *(asnp->abp+1)=='0') {
+X asnp->abp += 2;
+X *val = 0.0;
+X }
+X else { /* copy and scan it */
+X if (*asnp->abp != '\0') {
+X fprintf(stderr, " packedfloat - expected 0, got %d\n", *asnp->abp);
+X *val = -1.0;
+X return asnp->abp;
+X }
+X asnp->abp++;
+X strncpy(tmp_str, (char *)asnp->abp, sizeof(tmp_str)-1);
+X tmp_str[v_len-1] = '\0';
+X tmp_str[63] = '\0';
+X sscanf(tmp_str,"%lg",val);
+X asnp->abp += v_len-1;
+X }
+X }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) {
+X
+X int v_len;
+X
+X asnp->abp = chk_asn_buf(asnp,2);
+X
+X text[0] = '\0';
+X if (*asnp->abp++ != ASN_IS_STR) { /* check for str */
+X fprintf(stderr," str missing\n");
+X }
+X else {
+X v_len = *asnp->abp++;
+X if (v_len > 128) { /* need to read the length from the next bytes */
+X t_len = v_len &0x7f;
+X
+X asnp->abp = chk_asn_buf(asnp,t_len);
+X
+X for (v_len =0; t_len; t_len--) { v_len = (v_len << 8) + *asnp->abp++; }
+X }
+X
+X /* read v_len bytes */
+X
+X asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len);
+X asnp->abp += 2; /* skip over last nulls */
+X }
+X return asnp->abp;
+}
+X
+#define ASN_BIOSEQ_SEQ 160
+#define ASN_BIOSEQ_ID 160
+#define ASN_BIOSEQ_ID_VAL 160
+X
+#define ASN_BIOSEQ_ID_LOCAL 161
+#define ASN_BIOSEQ_ID_GIBBSQ 162
+#define ASN_BIOSEQ_ID_GIBBMT 163
+#define ASN_BIOSEQ_ID_GB 164
+#define ASN_BIOSEQ_ID_EMBL 165
+#define ASN_BIOSEQ_ID_PIR 166
+#define ASN_BIOSEQ_ID_SP 167
+#define ASN_BIOSEQ_ID_PATENT 168
+#define ASN_BIOSEQ_ID_OTHER 169
+#define ASN_BIOSEQ_ID_GEN 170
+#define ASN_BIOSEQ_ID_GI 171
+X
+#define ASN_BIOSEQ_TEXTID_NAME 160
+#define ASN_BIOSEQ_TEXTID_ACC 161
+#define ASN_BIOSEQ_TEXTID_REL 162
+#define ASN_BIOSEQ_TEXTID_VER 163
+X
+#define ASN_BIOSEQ_DESCR 161
+#define ASN_BIOSEQ_INST 162
+#define ASN_BIOSEQ_TITLE 164
+#define ASN_BIOSEQ_INST_REPR 160
+#define ASN_BIOSEQ_INST_MOL 161
+#define ASN_BIOSEQ_INST_LEN 162
+#define ASN_BIOSEQ_INST_TOPOL 166
+#define ASN_BIOSEQ_INST_SEQD 167
+#define ASN_OCTET_STR 65
+#define ASN_NCBIeaa 65
+X
+unsigned char *
+get_astr_seqdescr(struct asn_bstruct *asnp,
+X char *descr) {
+X
+X int end_seq=0;
+X
+X /* get seqof '1' */
+X /* get 164/128 - title */
+X /* get string */
+X /* pop nulls */
+X
+X asnp->abp = chk_asn_buf(asnp,6);
+X
+X if (*asnp->abp == ASN_SEQOF) {
+X end_seq++;
+X asnp->abp += 2;
+X }
+X else {
+X fprintf(stderr, " missing ASN_SEQOF '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_TITLE) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, descr, MAX_STR);
+X }
+X else {
+X fprintf(stderr, " missing ASN_BIOSEQ_TITLE '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X
+X asnp->abp = chk_asn_buf(asnp,2);
+X
+X asnp->abp += 2; /* skip over nulls */
+X
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_octstr(struct asn_bstruct *asnp,
+X unsigned char *oct_str,
+X int o_len) {
+X
+X int q_len, v_len;
+X
+X asnp->abp = chk_asn_buf(asnp,2);
+X
+X if (*asnp->abp++ == ASN_NCBIeaa) {
+X /* get length of length */
+X if (*asnp->abp > 128) {
+X v_len = *asnp->abp++ & 0x7f;
+X
+X asnp->abp = chk_asn_buf(asnp,v_len);
+X
+X q_len = 0;
+X while (v_len-- > 0) {
+X q_len *= 256;
+X q_len += *asnp->abp++;
+X }
+X }
+X else {
+X q_len = *asnp->abp++ & 0x7f;
+X }
+X
+X asnp->abp = read_asn_dest(asnp, q_len, oct_str, o_len);
+X
+X oct_str[min(q_len,o_len)]='\0';
+X
+X asnp->abp += 2; /* skip characters and NULL's */
+X }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_seqinst(struct asn_bstruct *asnp,
+X unsigned char **query,
+X int *nq) {
+X
+X int end_seq=0, tmp;
+X
+X /* get sequence '0' */
+X /* get 160/128/10/len/val - repr enum raw val */
+X /* get 161/128/10/len/val - mol enum aa val */
+X /* get 162/128/02/len/val - length int val */
+X /* get 166/128 - topology (empty) */
+X /* get 167/128 - seq-data */
+X /* get 65/len+128/len/octet_string */
+X /* pop nulls */
+X
+X asnp->abp = chk_asn_buf(asnp,12);
+X
+X if (*asnp->abp == ASN_SEQ) {
+X end_seq++;
+X asnp->abp += 2;
+X }
+X else {
+X fprintf(stderr, " missing ASN_SEQ '0': %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_INST_REPR && *(asnp->abp+1) == 128) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_enum(asnp, &tmp);
+X }
+X else {
+X fprintf(stderr, " missing ASN_BIOSEQ_INST_REPR 160: %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_INST_MOL && *(asnp->abp+1) == 128) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_enum(asnp, &tmp);
+X }
+X else {
+X fprintf(stderr, " missing ASN_BIOSEQ_INST_MOL 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_INST_LEN) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, nq);
+X }
+X else {
+X fprintf(stderr, " missing ASN_BIOSEQ_INST_LEN 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X
+X if ((*query = (unsigned char *)calloc(*nq + 1, sizeof(char)))==NULL) {
+X fprintf(stderr, " cannot read %d char query\n", *nq+1);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_INST_TOPOL && *(asnp->abp+1) == 128 ) {
+X asnp->abp += 2;
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_INST_SEQD) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_octstr(asnp, *query, *nq );
+X }
+X else {
+X fprintf(stderr, " missing ASN_BIOSEQ_INST_SEQD 166: %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X
+X asnp->abp += 4; /* skip over nulls */
+X
+X return asnp->abp;
+}
+X
+X
+unsigned char *
+get_astr_textid( struct asn_bstruct *asnp,
+X char *name,
+X char *acc) {
+X int end_seq = 0;
+X int ver;
+X
+X chk_asn_buf(asnp,16);
+X
+X if (*asnp->abp != ASN_SEQ) {
+X fprintf(stderr, " Expected ASN_SEQ: %0x %0x\n",*asnp->abp, asnp->abp[1]);
+X }
+X else {asnp->abp += 2; end_seq++;}
+X
+X name[0] = acc[0] = '\0';
+X
+X while (*asnp->abp != '\0') {
+X if (*asnp->abp == ASN_BIOSEQ_TEXTID_NAME) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
+X }
+X if (*asnp->abp == ASN_BIOSEQ_TEXTID_ACC) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, acc, MAX_SSTR);
+X }
+X if (*asnp->abp == ASN_BIOSEQ_TEXTID_VER) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, &ver);
+X }
+X }
+X asnp->abp += 4;
+X while (end_seq-- > 0) { asnp->abp += 4; }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_query(struct asn_bstruct *asnp,
+X int *gi,
+X char *name,
+X char *acc,
+X char *descr,
+X unsigned char **query,
+X int *nq
+X ) {
+X
+X int end_seq = 0;
+X
+X asnp->abp = chk_asn_buf(asnp,32);
+X
+X if (*asnp->abp != ASN_BIOSEQ_SEQ) {
+X fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else { asnp->abp += 2;}
+X
+X if (*asnp->abp != ASN_SEQ && *asnp->abp != ASN_SEQOF ) {
+X fprintf(stderr, "Bioseq - missing SEQUENCE tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X end_seq++;
+X asnp->abp += 2;
+X }
+X
+X if (*asnp->abp != ASN_BIOSEQ_ID) {
+X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X asnp->abp += 2;
+X if (*asnp->abp != ASN_SEQOF) {
+X fprintf(stderr, "missing bioseq/id/SEQOF tag: %d\n",*asnp->abp);
+X return asnp->abp;
+X }
+X else {
+X asnp->abp += 2;
+X if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
+X
+X if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, gi);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
+X *gi = 0;
+X acc[0] = '\0';
+X
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
+X asnp->abp += 2;
+X }
+X else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
+X *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
+X *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
+X
+X asnp->abp+=2;
+X asnp->abp = get_astr_textid(asnp, name, acc);
+X }
+X }
+X }
+X
+X while (*asnp->abp == 0) asnp->abp += 2;
+X
+X if (*asnp->abp == ASN_BIOSEQ_DESCR) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_seqdescr(asnp, descr);
+X asnp->abp += 2; /* skip nulls */
+X }
+X else { descr[0] = '\0';}
+X
+X if (*asnp->abp != ASN_BIOSEQ_INST) {
+X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X asnp->abp += 2;
+X asnp->abp = get_astr_seqinst(asnp, query, nq);
+X asnp->abp += 2; /* skip nulls */
+X }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_astr_query2(struct asn_bstruct *asnp,
+X int *gi,
+X char *name,
+X char *acc,
+X char *descr,
+X unsigned char **query,
+X int *nq
+X ) {
+X
+X int end_seq = 0;
+X
+X asnp->abp = chk_asn_buf(asnp,32);
+X
+X if (*asnp->abp != ASN_BIOSEQ_SEQ) {
+X fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else { asnp->abp += 2;}
+X
+X if (*asnp->abp != ASN_SEQOF ) {
+X fprintf(stderr, "Bioseq2 - missing SEQOF tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X end_seq++;
+X asnp->abp += 2;
+X }
+X
+X if (*asnp->abp != ASN_BIOSEQ_ID) {
+X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X asnp->abp += 2;
+X if (*asnp->abp == ASN_SEQOF) {
+X asnp->abp += 2;
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
+X
+X if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, gi);
+X }
+X
+X if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
+X *gi = 0;
+X acc[0] = '\0';
+X
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
+X asnp->abp += 2;
+X }
+X else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
+X *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
+X *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
+X
+X asnp->abp+=2;
+X asnp->abp = get_astr_textid(asnp, name, acc);
+X }
+X }
+X
+X while (*asnp->abp == 0) asnp->abp += 2;
+X
+X if (*asnp->abp == ASN_BIOSEQ_DESCR) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_seqdescr(asnp, descr);
+X asnp->abp += 2; /* skip nulls */
+X }
+X else { descr[0] = '\0';}
+X
+X if (*asnp->abp != ASN_BIOSEQ_INST) {
+X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
+X return asnp->abp;
+X }
+X else {
+X asnp->abp += 2;
+X asnp->abp = get_astr_seqinst(asnp, query, nq);
+X asnp->abp += 2; /* skip nulls */
+X }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_pssm_freqs(struct asn_bstruct *asnp,
+X double **freqs,
+X int n_rows,
+X int n_cols,
+X int by_row) {
+X
+X int i_rows, i_cols;
+X int in_seq = 0;
+X
+X double f_val;
+X
+X asnp->abp = chk_asn_buf(asnp,4);
+X
+X if (*asnp->abp == ASN_SEQ) {
+X in_seq = 1;
+X asnp->abp += 2;
+X in_seq = 1;
+X }
+X
+X if (!by_row) {
+X for (i_cols = 0; i_cols < n_cols; i_cols++) {
+X for (i_rows = 0; i_rows < n_rows; i_rows++) {
+X asnp->abp = get_astr_packedfloat(asnp, &f_val);
+X freqs[i_cols][i_rows] = f_val;
+X }
+X }
+X }
+X else {
+X for (i_rows = 0; i_rows < n_rows; i_rows++) {
+X for (i_cols = 0; i_cols < n_cols; i_cols++) {
+X asnp->abp = get_astr_packedfloat(asnp, &f_val);
+X freqs[i_cols][i_rows] = f_val;
+X }
+X }
+X }
+X if (in_seq) {asnp->abp +=2;} /* skip nulls */
+X asnp->abp += 2;
+X return asnp->abp;
+}
+X
+unsigned char *
+get_pssm_intermed(struct asn_bstruct *asnp,
+X double **freqs,
+X int n_rows,
+X int n_cols,
+X int by_row) {
+X
+X asnp->abp = chk_asn_buf(asnp,4);
+X
+X if (*asnp->abp == ASN_SEQ) {
+X asnp->abp += 2;
+X if (*asnp->abp == ASN_PSSM_FREQS) {
+X asnp->abp+=2;
+X asnp->abp = get_pssm_freqs(asnp, freqs, n_rows, n_cols, by_row);
+X }
+X asnp->abp +=2; /* skip nulls */
+X }
+X asnp->abp += 2;
+X return asnp->abp;
+}
+X
+X
+#define ASN_PSSM_PARAMS 161
+#define ASN_PSSM_PARAMS_PSEUDOCNT 160
+#define ASN_PSSM_PARAMS_RPSPARAMS 161
+#define ASN_PSSM_RPSPARAMS_MATRIX 160
+#define ASN_PSSM_RPSPARAMS_GAPOPEN 161
+#define ASN_PSSM_RPSPARAMS_GAPEXT 162
+X
+unsigned char *
+get_pssm_rpsparams(struct asn_bstruct *asnp,
+X char *matrix,
+X int *gap_open,
+X int *gap_ext) {
+X
+X int end_seq=0;
+X
+X asnp->abp = chk_asn_buf(asnp,4);
+X
+X if (*asnp->abp == ASN_SEQ) {
+X asnp->abp += 2;
+X end_seq++;
+X }
+X
+X if (*asnp->abp == ASN_PSSM_RPSPARAMS_MATRIX) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
+X }
+X
+X if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPOPEN) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, gap_open);
+X }
+X
+X if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPEXT) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, gap_ext);
+X }
+X
+X if (end_seq) { chk_asn_buf(asnp,end_seq * 2); }
+X while (end_seq-- > 0) { asnp->abp += 2; }
+X return asnp->abp;
+}
+X
+unsigned char *
+get_pssm_params(struct asn_bstruct *asnp,
+X int *pseudo_cnts,
+X char *matrix,
+X int *gap_open,
+X int *gap_ext) {
+X
+X int end_seq=0;
+X
+X asnp->abp = chk_asn_buf(asnp,6);
+X
+X if (*asnp->abp == ASN_SEQ) {
+X asnp->abp += 2;
+X end_seq++;
+X }
+X
+X if (*asnp->abp == ASN_PSSM_PARAMS_PSEUDOCNT) {
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, pseudo_cnts);
+X }
+X
+X if (*asnp->abp == ASN_PSSM_PARAMS_RPSPARAMS) {
+X asnp->abp+=2;
+X asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open, gap_ext);
+X asnp->abp += 2;
+X }
+X while (end_seq-- > 0) { asnp->abp+=2; }
+X return asnp->abp;
+}
+X
+X
+unsigned char *
+get_pssm2_intermed(struct asn_bstruct *asnp,
+X double ***freqs,
+X int n_rows,
+X int n_cols) {
+X
+X int i;
+X double **my_freqs;
+X
+X if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
+X fprintf(stderr, " cannot allocate freq cols - %d\n", n_cols);
+X exit(1);
+X }
+X
+X if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
+X fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", n_rows, n_cols);
+X exit(1);
+X }
+X
+X for (i=1; i < n_cols; i++) {
+X my_freqs[i] = my_freqs[i-1] + n_rows;
+X }
+X
+X *freqs = my_freqs;
+X
+X chk_asn_buf(asnp, 8);
+X
+X return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);
+}
+X
+int
+parse_pssm2_asn(struct asn_bstruct *asnp,
+X int *gi,
+X char *name,
+X char *acc,
+X char *descr,
+X unsigned char **query,
+X int *nq,
+X int *n_rows,
+X int *n_cols,
+X double ***freqs,
+X int *pseudo_cnts,
+X char *matrix,
+X double *lambda_p) {
+X
+X int is_protein;
+X int have_rows, have_cols;
+X
+X chk_asn_buf(asnp, 32);
+X
+X if (memcmp(asnp->abp, "\241\2000\200",4) != 0) {
+X fprintf(stderr, "improper PSSM2 start\n");
+X return -1;
+X }
+X else {asnp->abp+=4;}
+X
+X if (*asnp->abp == ASN_BIOSEQ_SEQ ) {
+X asnp->abp = get_astr_query2(asnp, gi, name, acc, descr, query, nq);
+X }
+X
+X /* finish up the nulls */
+X while (*asnp->abp == '\0') { asnp->abp += 2;}
+X
+X if (*asnp->abp == ASN_PSSM2_QUERY &&
+X asnp->abp[2] != ASN_SEQ ) {
+X fprintf(stderr, "improper PSSM2 start\n");
+X return -1;
+X }
+X else {asnp->abp += 4;}
+X
+X while (*asnp->abp != '\0' ) {
+X
+X switch (*asnp->abp) {
+X case ASN_PSSM_IS_PROT :
+X asnp->abp+=2;
+X asnp->abp = get_astr_bool(asnp, &is_protein);
+X break;
+X
+X case ASN_PSSM2_MATRIX :
+X asnp->abp+=2;
+X asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
+X break;
+X
+X case ASN_PSSM2_NROWS :
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, n_rows);
+X
+X if (*n_rows > 0) { have_rows = 1; }
+X else {
+X fprintf(stderr, " bad n_row count\n");
+X exit(1);
+X }
+X break;
+X
+X case ASN_PSSM2_NCOLS :
+X asnp->abp+=2;
+X asnp->abp = get_astr_int(asnp, n_cols);
+X if (*n_cols > 0) {
+X have_cols = 1;
+X }
+X else {
+X fprintf(stderr, " bad n_row count\n");
+X exit(1);
+X }
+X break;
+X
+X case ASN_PSSM2_FREQS :
+X asnp->abp += 4;
+X if (*asnp->abp == '\0') { asnp->abp += 4;}
+X break;
+X
+X case ASN_PSSM2_LAMBDA :
+X asnp->abp += 2;
+X asnp->abp = get_astr_packedfloat(asnp,lambda_p);
+X asnp->abp +=2; /* skip over end of ASN_PSSM2_LAMBDA */
+X break;
+X
+X case ASN_PSSM_INTERMED_DATA :
+X asnp->abp += 2;
+X asnp->abp = get_pssm2_intermed(asnp, freqs, *n_rows, *n_cols);
+X asnp->abp += 4;
+X break;
+X
+X default: asnp->abp += 2;
+X }
+X }
+X
+X
+X return 1;
+}
+X
+int
+parse_pssm_asn(FILE *afd,
+X int *gi,
+X char *name,
+X char *acc,
+X char *descr,
+X unsigned char **query,
+X int *nq,
+X int *n_rows,
+X int *n_cols,
+X double ***freqs,
+X int *pseudo_cnts,
+X char *matrix,
+X int *gap_open,
+X int *gap_ext,
+X double *lambda_p) {
+X
+X int is_protein, pssm_version;
+X int i;
+X int have_rows, have_cols, by_col;
+X double **my_freqs;
+X
+X struct asn_bstruct asn_str;
+X
+X if ((asn_str.buf = (unsigned char *)calloc(ASN_BUF, sizeof(char))) == NULL ) {
+X fprintf(stderr, " cannot allocate asn_buf (%d)\n",ASN_BUF);
+X exit(1);
+X }
+X
+X asn_str.fd = afd;
+X asn_str.len = ASN_BUF;
+X asn_str.abp = asn_str.buf_max = asn_str.buf + ASN_BUF;
+X
+X chk_asn_buf(&asn_str, 32);
+X
+X if (memcmp(asn_str.abp, "0\200\240\200",4) != 0) {
+X fprintf(stderr, "improper PSSM header -");
+X return -1;
+X }
+X else {asn_str.abp+=4;}
+X
+X if (*asn_str.abp == ASN_IS_INT) {
+X asn_str.abp = get_astr_int(&asn_str, &pssm_version);
+X if (pssm_version != 2) {
+X fprintf(stderr, "PSSM2 version mismatch: %d\n",pssm_version);
+X return -1;
+X }
+X *gap_open = *gap_ext = 0;
+X return parse_pssm2_asn(&asn_str, gi, name, acc, descr,
+X query, nq,
+X n_rows, n_cols, freqs,
+X pseudo_cnts, matrix,
+X lambda_p);
+X }
+X
+X if (*asn_str.abp == ASN_SEQ) { asn_str.abp += 2; }
+X
+X if (*asn_str.abp == ASN_PSSM_IS_PROT ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_astr_bool(&asn_str, &is_protein);
+X }
+X
+X if (*asn_str.abp == ASN_PSSM_NROWS ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_astr_int(&asn_str, n_rows);
+X
+X if (*n_rows > 0) { have_rows = 1; }
+X else {
+X fprintf(stderr, " bad n_row count\n");
+X exit(1);
+X }
+X }
+X
+X if (*asn_str.abp == ASN_PSSM_NCOLS ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_astr_int(&asn_str, n_cols);
+X if (*n_cols > 0) {
+X have_cols = 1;
+X }
+X else {
+X fprintf(stderr, " bad n_row count\n");
+X exit(1);
+X }
+X }
+X
+X if (*asn_str.abp == ASN_PSSM_BYCOL ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_astr_bool(&asn_str, &by_col);
+X }
+X
+X /* we have read everything up to the query
+X
+X n_cols gives us the query length, which we can allocate;
+X */
+X
+X if (*asn_str.abp == ASN_PSSM_QUERY ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_astr_query(&asn_str, gi, name, acc, descr, query, nq);
+X *nq = *n_cols;
+X }
+X
+X /* finish up the nulls */
+X while (*asn_str.abp == '\0') { asn_str.abp += 2;}
+X
+X if (*asn_str.abp == ASN_PSSM_INTERMED_DATA) {
+X
+X if (!have_rows || !have_cols) {
+X fprintf(stderr, " cannot allocate freq - missing rows/cols - %d/%d\n",
+X have_rows, have_cols);
+X return -1;
+X }
+X
+X if ((my_freqs = (double **) calloc(*n_cols, sizeof(double *)))==NULL) {
+X fprintf(stderr, " cannot allocate freq cols - %d\n", *n_cols);
+X return -1;
+X }
+X
+X if ((my_freqs[0] = (double *) calloc(*n_cols * *n_rows, sizeof(double)))==NULL) {
+X fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", *n_rows, *n_cols);
+X return -1;
+X }
+X for (i=1; i < *n_cols; i++) {
+X my_freqs[i] = my_freqs[i-1] + *n_rows;
+X }
+X
+X *freqs = my_freqs;
+X
+X asn_str.abp+=2;
+X asn_str.abp = get_pssm_intermed(&asn_str, my_freqs, *n_rows, *n_cols, by_col);
+X asn_str.abp += 4;
+X }
+X
+X if (*asn_str.abp == ASN_PSSM_PARAMS ) {
+X asn_str.abp+=2;
+X asn_str.abp = get_pssm_params(&asn_str, pseudo_cnts, matrix, gap_open, gap_ext);
+X }
+X else if (*asn_str.abp == 0) {asn_str.abp+=2;}
+X return 1;
+}
+X
+int
+parse_pssm_asn_fa( FILE *fd,
+X int *n_rows_p, int *n_cols_p,
+X unsigned char **query,
+X double ***freq2d,
+X char *matrix,
+X int *gap_open_p,
+X int *gap_extend_p,
+X double *lambda_p) {
+X
+X int qi, rj;
+X int gi;
+X double tmp_freqs[COMPO_LARGEST_ALPHABET];
+X char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR];
+X int nq;
+X int pseudo_cnts, ret_val;
+X
+X /* parse the file */
+X
+X ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq,
+X n_rows_p, n_cols_p, freq2d,
+X &pseudo_cnts, matrix, gap_open_p, gap_extend_p,
+X lambda_p);
+X
+X if (ret_val <=0) return ret_val;
+X
+X /* transform the frequencies */
+X
+X for (qi = 0; qi < *n_cols_p; qi++) {
+X for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];}
+X
+X for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) {
+X (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]];
+X }
+X }
+X return 1;
+}
+SHAR_EOF
+chmod 0644 pssm_asn_subs.c ||
+echo 'restore of pssm_asn_subs.c failed'
+Wc_c="`wc -c < 'pssm_asn_subs.c'`"
+test 26268 -eq "$Wc_c" ||
+ echo 'pssm_asn_subs.c: original size 26268, current size' "$Wc_c"
+fi
+# ============= pthr_subs.h ==============
+if test -f 'pthr_subs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping pthr_subs.h (File already exists)'
+else
+echo 'x - extracting pthr_subs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pthr_subs.h' &&
+X
+X
+/* $Name: fa_34_26_5 $ - $Id: pthr_subs.h,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
+X
+X
+#include <pthread.h>
+X
+/* error macro for thread calls */
+X
+#define check(status,string) \
+X if (status != 0) {fprintf(stderr,string); \
+X fprintf(stderr,"%s\n",strerror(status)); } /* error macro */
+X
+/*
+#define check(status,string) \
+X if (status == -1) perror(string) */ /* error macro for thread calls */
+X
+X
+#ifndef XTERNAL
+pthread_t threads[MAX_WORKERS];
+X
+/* mutex stuff */
+X
+pthread_mutex_t reader_mutex; /* empty buffer pointer structure lock */
+pthread_mutex_t worker_mutex; /* full buffer pointer structure lock */
+X
+/* condition variable stuff */
+X
+pthread_cond_t reader_cond_var; /* condition variable for reader */
+pthread_cond_t worker_cond_var; /* condition variable for workers */
+X
+pthread_mutex_t start_mutex; /* start-up synchronisation lock */
+pthread_cond_t start_cond_var; /* start-up synchronisation condition variable */
+X
+extern pthread_t threads[];
+X
+/* mutex stuff */
+X
+extern pthread_mutex_t reader_mutex;
+extern pthread_mutex_t worker_mutex;
+X
+/* condition variable stuff */
+X
+extern pthread_cond_t reader_cond_var;
+extern pthread_cond_t worker_cond_var;
+X
+extern pthread_mutex_t start_mutex;
+extern pthread_cond_t start_cond_var;
+extern int start_thread;
+X
+#endif
+SHAR_EOF
+chmod 0644 pthr_subs.h ||
+echo 'restore of pthr_subs.h failed'
+Wc_c="`wc -c < 'pthr_subs.h'`"
+test 1301 -eq "$Wc_c" ||
+ echo 'pthr_subs.h: original size 1301, current size' "$Wc_c"
+fi
+# ============= pthr_subs2.c ==============
+if test -f 'pthr_subs2.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping pthr_subs2.c (File already exists)'
+else
+echo 'x - extracting pthr_subs2.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pthr_subs2.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* modified to do more initialization of work_info here, rather than in main() */
+X
+/* $Name: fa_34_26_5 $ - $Id: pthr_subs2.c,v 1.9 2006/06/22 02:35:05 wrp Exp $ */
+X
+/* this file isolates the pthreads calls from the main program */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+X
+#include "defs.h"
+#include "structs.h" /* mngmsg, libstruct */
+#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
+X
+#include <pthread.h>
+#define XTERNAL
+#include "thr.h"
+#undef XTERNAL
+#include "pthr_subs.h"
+X
+extern void work_thread (struct thr_str *);
+X
+/* start the threads working */
+X
+void init_thr(int nthreads, struct thr_str *work_info,
+X struct mngmsg m_msg, struct pstruct *ppst,
+X unsigned char *aa0, int max_work_buf)
+{
+X int status, i;
+X pthread_attr_t thread_attr;
+X
+X if (nthreads > MAX_WORKERS) {
+X fprintf ( stderr," cannot start %d threads, max: %d\n",
+X nthreads, MAX_WORKERS);
+X exit(1);
+X }
+X
+X /* set up work_info[] structure, set parameters */
+X
+X for (i=0; i<nthreads; i++) {
+X work_info[i].n0 = m_msg.n0;
+X work_info[i].nm0 = m_msg.nm0;
+X work_info[i].qframe = m_msg.qframe;
+X work_info[i].qshuffle = m_msg.qshuffle;
+X work_info[i].ppst = ppst;
+X work_info[i].aa0 = aa0;
+X work_info[i].max_work_buf=max_work_buf;
+X work_info[i].worker=i;
+X work_info[i].max_tot=m_msg.max_tot;
+X }
+X
+X /* mutex and condition variable initialisation */
+X
+X status = pthread_mutex_init(&reader_mutex, NULL);
+X check(status,"Reader_mutex init bad status\n");
+X
+X status = pthread_mutex_init(&worker_mutex, NULL);
+X check(status,"Worker_mutex init bad status\n");
+X
+X status = pthread_cond_init(&reader_cond_var, NULL);
+X check(status,"Reader_cond_var init bad status\n");
+X
+X status = pthread_cond_init(&worker_cond_var, NULL);
+X check(status,"Worker_cond_var init bad status\n");
+X
+X status = pthread_mutex_init(&start_mutex, NULL);
+X check(status,"Start_mutex init bad status\n");
+X
+X status = pthread_cond_init(&start_cond_var, NULL);
+X check(status,"Start_cond_var init bad status\n");
+X
+X /* change stacksize on threads */ /***************************/
+X
+X status = pthread_attr_init( &thread_attr );
+X check(status,"attribute create bad status\n");
+X
+#ifdef IRIX
+X if (pthread_attr_setscope( &thread_attr, 2) != NULL)
+X status = pthread_attr_setscope( &thread_attr,PTHREAD_SCOPE_PROCESS);
+X check(status,"set scope on IRIX bad status\n");
+#endif
+X
+#ifdef FASTA_setscope
+X status = pthread_attr_setscope( &thread_attr, PTHREAD_SCOPE_SYSTEM);
+X check(status,"set scope bad status\n");
+#endif
+X
+X /* start the worker threads */
+X
+X for (i=0; i < nthreads; i++) {
+X /**********************/
+X status=pthread_create(&threads[i],&thread_attr,
+X (void *(*)(void *))&work_thread,&work_info[i]);
+X check(status,"Pthread_create failed\n");
+X }
+}
+X
+/* start_mutex/start_cont_var provides exclusive access to
+X extern int start_thread */
+X
+void start_thr()
+{
+X int status;
+X
+X /* tell threads to proceed */
+X
+X status = pthread_mutex_lock(&start_mutex);
+X check(status,"Start_mutex lock bad status in main\n");
+X
+X start_thread = 0; /* lower predicate */
+X
+X status = pthread_cond_broadcast(&start_cond_var);
+X status = pthread_mutex_unlock(&start_mutex);
+X check(status,"Start_mutex unlock bad status in main\n");
+}
+X
+void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+X int status;
+X
+X status = pthread_mutex_lock(&reader_mutex); /* lock reader_buf structure */
+X
+X check(status,"Reader_mutex lock in master bad status\n");
+X
+X /* no reader bufs: wait for signal to proceed */
+X while (num_reader_bufs == 0) {
+X pthread_cond_wait(&reader_cond_var,&reader_mutex);
+X }
+X
+X *cur_buf = reader_buf[reader_buf_readp]; /* get the buffer address */
+X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf); /* increment index */
+X num_reader_bufs--;
+X
+X status = pthread_mutex_unlock(&reader_mutex); /* unlock structure */
+X check(status,"Reader_mutex unlock in master bad status\n");
+}
+X
+void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* give the buffer to a thread, and wait for more */
+X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
+X check(status,"Worker_mutex lock in master bad status\n");
+X
+X /* Put buffer onto available for workers list */
+X worker_buf[worker_buf_readp] = cur_buf;
+X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+X num_worker_bufs++; /* increment number of buffers available to workers */
+X
+X /* Signal one worker to wake and start work */
+X status = pthread_cond_signal(&worker_cond_var);
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"Worker_mutex unlock in master bad status\n");
+}
+X
+void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
+{
+X int status, i;
+X void *exit_value;
+X
+X /* give the buffer to a thread, and wait for more */
+X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
+X check(status,"Worker_mutex lock in master bad status\n");
+X
+X /* Put buffer onto available for workers list */
+X worker_buf[worker_buf_readp] = cur_buf;
+X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
+X num_worker_bufs++; /* increment number of buffers available to workers */
+X
+X /* Signal one worker to wake and start work */
+X
+X reader_done = 1;
+X status = pthread_cond_broadcast(&worker_cond_var);
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"Worker_mutex unlock in master bad status\n");
+X
+X /* wait for all buffers available (means all do_workers are done) */
+X
+X for (i=0; i < nthreads; i++) {
+X status = pthread_join( threads[i], &exit_value);
+X check(status,"Pthread_join bad status\n");
+X }
+}
+X
+/* wait for extern int start_thread == 0 */
+X
+void wait_thr()
+{
+X int status;
+X
+X /* Wait on master to give start signal */
+X status = pthread_mutex_lock(&start_mutex);
+X check(status,"Start_mutex lock bad status in worker\n");
+X
+X while (start_thread) {
+X status = pthread_cond_wait(&start_cond_var, &start_mutex);
+X check(status,"Start_cond_wait bad status in worker\n");
+X }
+X
+X status = pthread_mutex_unlock(&start_mutex);
+X check(status,"Start_mutex unlock bad status in worker\n");
+}
+X
+int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* get a buffer to work on */
+X status = pthread_mutex_lock(&worker_mutex);
+X check(status,"First worker_mutex lock in worker bad status\n");
+X
+X /* No worker_bufs available: wait for reader to produce some */
+X while (num_worker_bufs == 0) {
+X /* Exit if reader has finished */
+X if (reader_done) {
+X pthread_mutex_unlock(&worker_mutex);
+X return 0;
+X }
+X pthread_cond_wait(&worker_cond_var,&worker_mutex);
+X } /* end while */
+X
+X /* Get the buffer from list */
+X *cur_buf = worker_buf[worker_buf_workp];
+X worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
+X num_worker_bufs--;
+X
+X status = pthread_mutex_unlock(&worker_mutex);
+X check(status,"First worker_mutex unlock in worker bad status\n");
+X return 1;
+}
+X
+void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
+{
+X int status;
+X
+X /* put buffer back on list for reader */
+X status = pthread_mutex_lock(&reader_mutex);
+X check(status,"Reader_mutex lock in worker bad status\n");
+X
+X reader_buf[reader_buf_workp] = cur_buf;
+X reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
+X num_reader_bufs++;
+X
+X /* No reader_bufs available: wake reader */
+X if (num_reader_bufs == 1) {
+X pthread_cond_signal(&reader_cond_var);
+X }
+X
+X status = pthread_mutex_unlock(&reader_mutex);
+X check(status,"Reader_mutex unlock in worker bad status\n");
+}
+SHAR_EOF
+chmod 0644 pthr_subs2.c ||
+echo 'restore of pthr_subs2.c failed'
+Wc_c="`wc -c < 'pthr_subs2.c'`"
+test 7689 -eq "$Wc_c" ||
+ echo 'pthr_subs2.c: original size 7689, current size' "$Wc_c"
+fi
+# ============= pvcomp.1 ==============
+if test -f 'pvcomp.1' -a X"$1" != X"-c"; then
+ echo 'x - skipping pvcomp.1 (File already exists)'
+else
+echo 'x - extracting pvcomp.1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'pvcomp.1' &&
+.TH PVCOMPFA/PVCOMPSW/v3.4 1 "January, 2003"
+.SH NAME
+.B pv34compfa
+\- scan a protein or DNA sequence library for similar
+sequences using the FASTA algorithm in parallel on a network of
+machines running pvm3.
+X
+.B pv34compsw
+\- scan a protein or DNA sequence library for similar
+sequences using the Smith-Waterman algorithm in parallel on a network
+of machines running pvm3.
+X
+.B ps34compfa
+\- evaluate sequence comparison parameters using the FASTA
+algorithm and super-family-annotated libraries.
+X
+.B ps34compsw
+\- evaluate sequence comparison parameters using the
+Smith-Waterman algorithm and super-family-annotated libraries.
+X
+.SH SYNOPSIS
+.B pv34compfa
+[-Q|q -B -b # -d # -E # -f # -g # -H -i J # -n -o -p #
+\& -R
+.I STATFILE
+\& -r "+n/-m" \& -S -s
+.I SMATRIX
+\& -w # -1 ] query-library reference-library [
+.I ktup
+]
+.B pv34compfa
+[\-QBbcefgHiJnopRrSsw1] \- interactive mode
+X
+.B pv34compsw
+[-Q|q -B -b # -e -f delval -g gapval -i
+\& -n -p # -R -R
+.I STATFILE
+\& -r "+n/-m" \& -S -s
+\& -s
+.I SMATRIX
+X ] query-library reference-library [
+.I ktup
+]
+X
+.B pv34compsw
+[\-QBbefgnpRrsS] \- interactive mode
+X
+.SH DESCRIPTION
+.B pv34compfa
+and
+.B pv34compsw
+compare all of the sequences in one DNA or protein sequence library
+(the query library) with to all of the entries in a reference sequence
+library using the FASTA (pv34compfa) or Smith-Waterman (pv34compsw)
+algorithms. For example,
+.B pv34compfa
+can compare a library of protein sequences to all of the sequences in
+the NBRF PIR protein sequence database.
+.B pv34compfa
+and
+.B pv34compsw
+are designed to run in parallel on networks of unix workstations using
+the PVM parallel programming system. (For more information on PVM,
+send email to "netlib@ornl.gov" with the message "send index for pvm3").
+.PP
+.B pv34compfa
+uses the rapid sequence comparison algorithm
+described in Pearson and Lipman, Proc. Natl. Acad. USA, (1988) 85:2444.
+The program can be invoked either with command line arguments or in
+interactive mode. The optional third argument,
+.I ktup
+sets the sensitivity and speed of the search. If
+.I ktup=2,
+similar regions in the two sequences being compared are found by
+looking at pairs of aligned residues; if
+.I ktup=1,
+single aligned amino acids are examined.
+.I ktup
+can be set to 2 or 1 for protein sequences, or from 1 to 6 for DNA sequences.
+The default if
+.I
+ktup
+is not specified is 2 for proteins and 6 for DNA.
+.PP
+.B pv34compfa
+compares a library of query sequences (there need be only one) to a
+reference sequence library. Normally
+.B pv34compfa
+sorts the output by the
+.I initn
+score. By using the
+.I \-1
+option, sequences are ranked by their
+.B init1
+score. Alternative, the
+.I \-o
+option causes optimized scores to be calculated for every sequence
+greater than a threshold and the output to be sorted by the optimized
+scores.
+.PP
+.B pv34compsw
+uses the rigorous Smith-Waterman algorithm to compare protein or
+DNA sequences. The gap penalties and scoring matrices can be
+modified with the
+.I -f\c
+\&,
+.I -k\c
+\&, and
+.I -s
+options.
+.PP
+.B pv34compfa
+(and
+.B pv34compsw\c
+\&) will automatically decide whether the query sequence is DNA or
+protein by reading the query sequence as protein and determining
+whether the `amino-acid composition' is more than 85% A+C+G+T.
+.PP
+.B ps34compfa
+and
+.B ps34compsw
+are versions of
+.B pv34compfa
+and
+.B pv34compsw
+that evaluate the quality of a search by reporting how many
+high-scoring related sequences and low-scoring unrelated sequences
+were found. These programs require that both the query library and
+the reference library be annotated with superfamily numbers for every
+sequence in the library.
+.SH OPTIONS
+.LP
+.B Pv34compfa
+and
+.B pv34compsw
+now support all the options of the fasta3(_t) programs.
+.TP
+\-B
+Report z-score, rather than bit-score, in list of best hits.
+.TP
+\-b #
+The number of similarity scores to be shown (10 by default).
+.TP
+\-E #
+Expectation value limit for displaying best scores.
+.TP
+\-d #
+The number of alignments to be shown.
+.TP
+\-f #
+(delval) penalty for the first residue in a gap. -12 by default for proteins.
+.TP
+\-g #
+(gapval) penalty for additional residues in a gap after the first. -2
+by default for proteins.
+.TP
+\-H #
+turn on histogram display (off by default).
+.TP
+\-i
+invert (reverse complement) DNA sequence.
+.TP
+\-J M:N
+start at the M-th sequence in the query library and continue to the
+"N-th". By default, J=1 and the search begins with the first sequence
+and ends with the last, but sometimes it makes sense to start in the
+middle of the query library if a run partially completed, and to
+finish "early" if the analysis will be run on several parallel
+clusters.
+.TP
+\-n
+Force the program to use DNA sequence parameters.
+.TP
+\-p #
+Number of "slave" processors to use. Typically, one less than
+the number of processors available with
+.B pv34compfa
+so that one processor can be used to collate results. With
+.B pv34compsw\c
+\&, it is more efficient to use every processor as a slave and
+not use this option.
+.TP
+\-Q \-q
+Quiet option. The programs will not prompt for input.
+.TP
+\-R file
+(STATFILE) Causes
+.B pv34compfa
+and
+.B pv34compsw
+to write out the sequence identifier, superfamily number (if available),
+and similarity scores to
+.I STATFILE
+for every sequence in the library. These results are not sorted.
+.TP
+\-r
+specify DNA match/mismatch ratio as "+3/-2". Default is "+5/-4".
+The "+" and "-" are required.
+.TP
+\-S
+Treat lower case residues as low complexity regions.
+.TP
+\-s file
+the filename of an alternative scoring matrix file.
+.LP
+.B
+pv34compfa
+only
+.TP
+\-1
+sort similarity scores by
+.I init1
+scores instead of
+.I initn
+scores.
+.TP
+\-c #
+(OPTCUT) the threshold for optimization with the
+.B -o
+option.
+.TP
+\-o
+(no-optimize); causes
+.B pv34compfa
+not to perform the default optimization on all of the sequences in the library
+with
+.B initn
+scores greater than
+.B OPTCUT\c
+\&.
+.TP
+\-y #
+Width for limited optimization (32 by default).
+.SH FILES
+.LP
+Query library files must be in Pearson/FASTA format, e.g.
+.in +0.5i
+.nf
+>seq-id | sfnum descriptive line
+tmlyrghi... (sequence)
+X
+.fi
+.in -0.5i
+.PP
+.B pv34compfa
+and
+.B pv34compsw
+recognize the following library formats: 0 - Pearson/FASTA; 1 - Genbank tape;
+2 - NBRF/PIR Codata; 3 - EMBL/SWISS-PROT; 5 - NBRF/PIR VMS.
+.PP
+.I Scoring matrices \-
+These programs use a different format for the scoring (PAM) matrix
+file from FASTA; they use the PAM matrix file that is used by BLASTP
+and produced by Altshul's "pam.c" program in the BLAST package.
+.SH BUGS
+The program has been tested extensively only with type 0 and type 5
+files. This documentation file may not be up to date.
+.SH AUTHOR
+Bill Pearson
+.br
+wrp@virginia.EDU
+SHAR_EOF
+chmod 0644 pvcomp.1 ||
+echo 'restore of pvcomp.1 failed'
+Wc_c="`wc -c < 'pvcomp.1'`"
+test 6657 -eq "$Wc_c" ||
+ echo 'pvcomp.1: original size 6657, current size' "$Wc_c"
+fi
+# ============= qrhuld.aa ==============
+if test -f 'qrhuld.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping qrhuld.aa (File already exists)'
+else
+echo 'x - extracting qrhuld.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'qrhuld.aa' &&
+>QRHULD LDL receptor precursor - Human
+MGPWGWKLRWTVALLLAAAGTAVGDRCERNEFQCQDGKCISYKWVCDGSAECQDGSDESQETCLSVTCKS
+GDFSCGGRVNRCIPQFWRCDGQVDCDNGSDEQGCPPKTCSQDEFRCHDGKCISRQFVCDSDRDCLDGSDE
+ASCPVLTCGPASFQCNSSTCIPQLWACDNDPDCEDGSDEWPQRCRGLYVFQGDSSPCSAFEFHCLSGECI
+HSSWRCDGGPDCKDKSDEENCAVATCRPDEFQCSDGNCIHGSRQCDREYDCKDMSDEVGCVNVTLCEGPN
+KFKCHSGECITLDKVCNMARDCRDWSDEPIKECGTNECLDNNGGCSHVCNDLKIGYECLCPDGFQLVAQR
+RCEDIDECQDPDTCSQLCVNLEGGYKCQCEEGFQLDPHTKACKAVGSIAYLFFTNRHEVRKMTLDRSEYT
+SLIPNLRNVVA
+LDTEVASNRIYWSDLSQRMICSTQLDRAHGVSSYDTVISRDIQAPDGLAVDWIHSNIYWTDSVLGTVSVA
+DTKGVKRKTLFRENGSKPRAIVVDPVHGFMYWTDWGTPAKIKKGGLNGVDIYSLVTENIQWPNGITLDLL
+SGRLYWVDSKLHSISSIDVNGGNRKTILEDEKRLAHPFSLAVFEDKVFWTDIINEAIFSANRLTGSDVNL
+LAENLLSPEDMVLFHNLTQPRGVNWCERTTLSNGGCQYLCLPAPQINPHSPKFTCACPDGMLLARDMRSC
+LTEAEAAVATQETSTVRLKVSSTAVRTQHTTTRPVPDTSRLPGATPGLTTVEIVTMSHQALGDVAGRGNE
+KKPSSVRALSIVLPIVLLVFLCLGVFLLWKNWRLKNINSINFDNPVYQKTTEDEVHICHNQDGYSYPSRQ
+MVSLEDDVA
+SHAR_EOF
+chmod 0644 qrhuld.aa ||
+echo 'restore of qrhuld.aa failed'
+Wc_c="`wc -c < 'qrhuld.aa'`"
+test 914 -eq "$Wc_c" ||
+ echo 'qrhuld.aa: original size 914, current size' "$Wc_c"
+fi
+# ============= randtest.c ==============
+if test -f 'randtest.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping randtest.c (File already exists)'
+else
+echo 'x - extracting randtest.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'randtest.c' &&
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+X
+main(argc, argv)
+X int argc; char **argv;
+{
+X int i, n, s;
+X struct timeval t;
+X
+X if (argc < 2) n = 10;
+X else n = atoi(argv[1]);
+X
+X gettimeofday(&t,NULL);
+X printf(" seed: %d\n",t.tv_usec);
+X srandom(t.tv_usec);
+X
+X for (i=0; i< n; i++)
+X printf("%3d\n",random()%100);
+X
+}
+SHAR_EOF
+chmod 0644 randtest.c ||
+echo 'restore of randtest.c failed'
+Wc_c="`wc -c < 'randtest.c'`"
+test 339 -eq "$Wc_c" ||
+ echo 'randtest.c: original size 339, current size' "$Wc_c"
+fi
+# ============= re_getlib.c ==============
+if test -f 're_getlib.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping re_getlib.c (File already exists)'
+else
+echo 'x - extracting re_getlib.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 're_getlib.c' &&
+/* re_getlib.c - re-acquire a sequence given lseek, lcont */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "mm_file.h"
+X
+#define GETLIB (m_fptr->getlib)
+X
+int
+re_getlib(unsigned char *aa1,
+X int maxn, /* longest aa1 */
+X int maxt3, /* alternate maxn */
+X int loff, /* overlap */
+X int lcont,
+X int term_code,
+X long *loffset, /* offset from real start of sequence */
+X long *l_off_p, /* coordinate of sequence start */
+X struct lmf_str *m_fptr) {
+X
+X unsigned char *aa1ptr;
+X int icont, maxt, ccont, n1;
+X char libstr[20];
+X fseek_t lmark;
+X
+X aa1ptr = aa1;
+X icont=0;
+X
+X *loffset = 0l;
+X maxt = maxn;
+X n1 = -1;
+X for (ccont=0; ccont<=lcont-1; ccont++) {
+X
+X n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
+X
+X if (term_code && m_fptr->lib_aa && aa1ptr[n1-1]!=term_code) {
+X aa1ptr[n1++]=term_code;
+X aa1ptr[n1]=0;
+X }
+X
+X if (aa1ptr!=aa1) n1 += loff;
+X
+X if (icont>lcont-1) break;
+X
+X if (icont) {
+X maxt = maxt3;
+X memcpy(aa1,&aa1[n1-loff],loff);
+X aa1ptr= &aa1[loff];
+X *loffset += n1 - loff;
+X }
+X else {
+X maxt = maxn;
+X aa1ptr=aa1;
+X }
+X }
+X return n1;
+}
+SHAR_EOF
+chmod 0644 re_getlib.c ||
+echo 'restore of re_getlib.c failed'
+Wc_c="`wc -c < 're_getlib.c'`"
+test 1184 -eq "$Wc_c" ||
+ echo 're_getlib.c: original size 1184, current size' "$Wc_c"
+fi
+# ============= readme.mpi_3.3 ==============
+if test -f 'readme.mpi_3.3' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.mpi_3.3 (File already exists)'
+else
+echo 'x - extracting readme.mpi_3.3 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.mpi_3.3' &&
+X
+X $Name: fa_34_26_5 $ - $Id: readme.mpi_3.3,v 1.4 2001/08/20 21:18:47 wrp Exp $
+X
+20-August-2001
+X
+This file is obsolete - see readme.v34t0, readme.v33t0, and
+readme.pvm_3.4 for more up-to-date information. With version 3.4, the
+MPI programs are mp34comp*, mu34comp*, etc.
+X
+================
+X
+20 January 2000
+X
+This distribution includes the first full-function MPI implementation of
+the libary-vs-library comparison programs. The following programs are
+available:
+X
+Programs to produce conventional scores and alignments:
+X
+mp3compfa protein vs protein, DNA vs DNA
+mp3compsw protein vs protein, DNA vs DNA
+mp3compfx/ DNA vs protein
+mp3comptfx/y protein vs DNA
+X
+Programs to summarize the effectiveness of a search (require
+super-family-labeled databases):
+X
+ms3compfa protein vs protein, DNA vs DNA
+ms3compsw protein vs protein, DNA vs DNA
+ms3compfx/ DNA vs protein
+ms3comptfx/y protein vs DNA
+X
+Programs to report the scores and alignments of the highest scoring
+unrelated sequence (require super-family-labeled databases). These
+programs are used to evaluate the super-family labeling.
+X
+mu3compfa protein vs protein, DNA vs DNA
+mu3compsw protein vs protein, DNA vs DNA
+mucompfx/ DNA vs protein
+mu3comptfx/y protein vs DNA
+X
+Note that the current parallel implementations distribute the second
+database among 'N' parallel workers by approximately dividing the
+database into 'N' parts by seeking into the middle of the database and
+finding the next entry. This strategy fails when the database is a
+single long sequence (the first worker gets the entire database, the
+others get nothing).
+X
+This version has been tested using the MPICH implementation of MPI,
+which is available from:
+X
+X ftp://ftp.mcs.anl.gov/mpi
+X
+See readme.pvm_3.3 for other information about the development of
+these programs. Both the PVM (pv3compfa, etc.) and MPI (mp3compfa,
+etc.) sets of programs use the same sets of source files; differences
+in the two implementations are specified with #define PVM_SRC and
+#define MPI_SRC.
+X
+SHAR_EOF
+chmod 0644 readme.mpi_3.3 ||
+echo 'restore of readme.mpi_3.3 failed'
+Wc_c="`wc -c < 'readme.mpi_3.3'`"
+test 1994 -eq "$Wc_c" ||
+ echo 'readme.mpi_3.3: original size 1994, current size' "$Wc_c"
+fi
+# ============= readme.pvm_3.2 ==============
+if test -f 'readme.pvm_3.2' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.pvm_3.2 (File already exists)'
+else
+echo 'x - extracting readme.pvm_3.2 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.2' &&
+--> August, 1999
+X
+Corrected problem with opt_cut initialization that only appeared
+with p?compfa programs.
+X
+--> v3.26 July, 1999
+X
+pvcomp* programs now use the same method for working with forward and
+reverse strands as the standard fast*3(_t) programs. Thus, statistics
+for DNA sequences should be very similar for pvcompfa and fasta3 or
+fasta3_t.
+X
+X February, 1999
+X
+With release fasta32t02 of the FASTA package, the alignment
+routines for pvcompfa, pvcompsw, etc now work properly
+again.
+X
+The PVM versions of the FASTA and Smith-Waterman search programs
+should now be functionally identical to the multithreaded (fasta3_t,
+ssearch3_t) and non-threaded (fasta3, ssearch3) versions.
+X
+The programs have also been updated to provide similar -m 10
+information to the non-pvm versions. There are some slight
+differences, because the pvcomp* versions are designed to work with
+multiple sequences. But, in general, a script that looks for /^>>>/
+to start an alignment set and /^>>><<</ to end the set work work
+properly.
+X
+--> v3.23 March, 1999
+X
+Modified Makefile.pvm, showsum.c so that showsum.c is used by
+both the complib/_thr and pvcomplib (pvm parallel) versions.
+X
+Corrected bug in reading first query for DNA sequences.
+X
+--> v3.25 May, 1999
+X
+Fixed pvm_showalign.c so that FIRSTNODE (in msg.h) can be 1, rather
+than 0. #define FIRSTNODE 1 is recommended when the virtual machine
+has 8 or more nodes.
+X
+SHAR_EOF
+chmod 0644 readme.pvm_3.2 ||
+echo 'restore of readme.pvm_3.2 failed'
+Wc_c="`wc -c < 'readme.pvm_3.2'`"
+test 1404 -eq "$Wc_c" ||
+ echo 'readme.pvm_3.2: original size 1404, current size' "$Wc_c"
+fi
+# ============= readme.pvm_3.3 ==============
+if test -f 'readme.pvm_3.3' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.pvm_3.3 (File already exists)'
+else
+echo 'x - extracting readme.pvm_3.3 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.3' &&
+X
+X $Name: fa_34_26_5 $ - $Id: readme.pvm_3.3,v 1.13 2000/08/04 18:45:15 wrp Exp $
+X
+================
+pvcomp* - FAQ's, November, 1999
+X
+(The comments below apply to the pv3comp* programs. This problem has
+been addressed in the pv4comp* programs, by dramatically changing
+the way databases are distributed.)
+X
+I believe that the number one reason why the pvcomp* programs do not
+work properly is that the second library must be fully specified.
+If you simply type:
+X
+X pv3compfa query.lib database.lib
+X
+The program will not be able to find database.lib on the worker machines.
+You need to use:
+X
+X pv3compfa query.lib /home/user/lib/database.lib
+X
+and /home/user/lib/database.lib must be accessible to all of the worker
+nodes.
+X
+To find error messages from the workers, look at /tmp/pvml.uid, where
+uid is your unix uid.
+X
+================
+Program summary:
+X
+Programs to produce conventional scores and alignments:
+X
+pv3compfa protein vs protein, DNA vs DNA
+pv3compsw protein vs protein, DNA vs DNA
+pv3compfx/ DNA vs protein
+pv3comptfx/y protein vs DNA
+X
+Programs to summarize the effectiveness of a search (require
+super-family-labeled databases):
+X
+ps3compfa protein vs protein, DNA vs DNA
+ps3compsw protein vs protein, DNA vs DNA
+ps3compfx/ DNA vs protein
+ps3comptfx/y protein vs DNA
+X
+Programs to report the scores and alignments of the highest scoring
+unrelated sequence (require super-family-labeled databases). These
+programs are used to evaluate the super-family labeling.
+X
+pu3compfa protein vs protein, DNA vs DNA
+pu3compsw protein vs protein, DNA vs DNA
+pucompfx/ DNA vs protein
+pu3comptfx/y protein vs DNA
+X
+Note that the current parallel implementations distribute the second
+database among 'N' parallel workers by approximately dividing the
+database into 'N' parts by seeking into the middle of the database and
+finding the next entry. This strategy fails when the database is a
+single long sequence (the first worker gets the entire database, the
+others get nothing).
+X
+================
+Release notes:
+X
+--> July 18, 2000
+X
+Increase SQSZ in pxgetaa.c to 200000 for long Genbank entries. This
+may still not be long enough. This increase may allow overlaps to
+occur.
+X
+--> July 10, 2000
+X
+Corrections to the code for breaking up very long sequences. The last
+portion of a long sequence did not have the correct offset.
+X
+--> July 1, 2000
+X
+Modified pxgetaa.c to read Genbank flatfiles.
+X
+Additional pieces of a long sequence no longer have a '+' at the
+beginning.
+X
+--> June 12, 2000
+X
+Restructured p_complib.c, p_workcomp.c to make the -m 9 display more
+consistent with the fast33(_t) set of programs. The alignment (%_id,
+swscore, boundary) information is now calculated at the do_opt() stage
+of the calculation. This rearrangement uncovered a problem with the
+do_opt() stage (s_func=1) that has been fixed. This has not yet been
+tested with the MPI implementation.
+X
+Many changes were made to allow k_H, k_comp information to be passed
+back so that the -z 6 scaleswn.c (proc_hist_mle2) function could be
+used.
+X
+--> February 6, 2000
+X
+Corrected some problems with proc_hist_ml() to correctly reinitialize
+hist_db_size and num_db_entries.
+X
+--> January 20, 2000
+X
+X The structure of the p[vsu]comp* programs has not changed, but the
+the code has been modified to accomodate both PVM and MPI versions of
+the programs from the same source code. Thus, all of the PVM-specific
+code is now surrounded by #ifdef PVM_SRC/#endif. The source files
+pvcomplib.c and pvworkcomp.c have been replaced by p_complib.c and
+p_workcomp.c, respectively. Additional changes were made to ensure
+that "FIRSTNODE" is used appropriately. In general, FIRSTNODE=0 for
+PVM programs (although with > 8 nodes, FIRSTNODE=1 may be more
+effective), but FIRSTNODE=1 for MPI programs.
+X
+X Modest changes were made to reduce warning messages during
+compilation.
+X
+--> January, 2000
+X
+X Modification to hxgetaa.c, pxgetaa.c to handle library sequences,
+such as those from NCBI/NR, with very long comment lines. Additional
+modifications to correct problems with long comments, long DNA
+sequences with pv3comptfx/tfy.
+X
+--> v3.33 December, 1999
+X
+Substantial updates to pvcomplib.c/pvworkcomp.c to improve efficiency
+and to provide pv3compf[xy] and pv3comptf[xy]. Previous versions of
+pvcomplib.c/pvworkcomp.c passed the entire struct mngmsg (structs.h)
+each time a new query was initiated or alignments were required. This
+version sends struct mngmsg only once and sends struct qmng_str
+(w_msg.h), which is much smaller, for the queries and alignments. In
+addition, the buffer size for results is now variable (but can be as
+large as 1200, vs 600 previously), which may improve performance when
+large numbers of workers are available. The maximum number of library
+sequences per worker has been raised to 200,000 from 50,000.
+Nevertheless, very large databases (est_human) may have too many
+entries to be examined by 4 workers.
+X
+It is likely that pv3comptf[xy] may have problems with very long
+sequences. pv3compf[xy]/tf[xy] have not been tested extensively.
+X
+--> v3.32 December, 1999
+X
+Substantial corrections to showsum.c (showbest()) for the case of DNA
+queries, where two scores are calculated for each query. As a result
+of the changes, bptr[] no longer mapped exactly to best[], which
+caused a bug that was very difficult to track down. To ensure that
+bptr[]=best[], bptr[] is now re-initialized for each query.
+X
+The output format has changed significantly as well. Lots of
+redundant /** **/ comments have been removed. An E() value has been
+added to the "equ num:" line in showsum.c.
+X
+The organization of the inner while() loop in pvcomplib.c has been
+modified so that new query sequences can be sent to workers
+immediately as soon as a worker is available, rather than waiting for
+all to finish and the statistical analysis.
+X
+--> v3.30 October, 1999
+X
+The p*comp*/c.work* programs have been renamed to pv3compfa,
+ps3compfa, etc. and c3.work* so that the older version 3.2 programs
+can co-exist with this version.
+X
+Corrected problem with "-n" option that prevented it from functioning
+properly. Include "ACGTCN" in check for DNA query library.a
+X
+(from readme.pvm_3.2)
+X
+--> August, 1999
+X
+Corrected problem with opt_cut initialization that only appeared
+with p?compfa programs.
+X
+--> v3.26 July, 1999
+X
+pvcomp* programs now use the same method for working with forward and
+reverse strands as the standard fast*3(_t) programs. Thus, statistics
+for DNA sequences should be very similar for pvcompfa and fasta3 or
+fasta3_t.
+X
+X February, 1999
+X
+With release fasta32t02 of the FASTA package, the alignment
+routines for pvcompfa, pvcompsw, etc now work properly
+again.
+X
+The PVM versions of the FASTA and Smith-Waterman search programs
+should now be functionally identical to the multithreaded (fasta3_t,
+ssearch3_t) and non-threaded (fasta3, ssearch3) versions.
+X
+The programs have also been updated to provide similar -m 10
+information to the non-pvm versions. There are some slight
+differences, because the pvcomp* versions are designed to work with
+multiple sequences. But, in general, a script that looks for /^>>>/
+to start an alignment set and /^>>><<</ to end the set work
+properly.
+X
+--> v3.23 March, 1999
+X
+Modified Makefile.pvm, showsum.c so that showsum.c is used by
+both the complib/_thr and pvcomplib (pvm parallel) versions.
+X
+Corrected bug in reading first query for DNA sequences.
+X
+--> v3.25 May, 1999
+X
+Fixed pvm_showalign.c so that FIRSTNODE (in msg.h) can be 1, rather
+than 0. #define FIRSTNODE 1 is recommended when the virtual machine
+has 8 or more nodes.
+X
+SHAR_EOF
+chmod 0644 readme.pvm_3.3 ||
+echo 'restore of readme.pvm_3.3 failed'
+Wc_c="`wc -c < 'readme.pvm_3.3'`"
+test 7535 -eq "$Wc_c" ||
+ echo 'readme.pvm_3.3: original size 7535, current size' "$Wc_c"
+fi
+# ============= readme.pvm_3.4 ==============
+if test -f 'readme.pvm_3.4' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.pvm_3.4 (File already exists)'
+else
+echo 'x - extracting readme.pvm_3.4 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.4' &&
+X
+X $Name: fa_34_26_5 $ - $Id: readme.pvm_3.4,v 1.3 2001/09/17 21:18:19 wrp Exp $
+X
+X
+20-August-2001
+X
+The pvm/mpi complib programs have been substantially updated with
+release 3.4. See readme.v34t0 for more information. With version
+3.4, the MPI programs are mp34comp*, mu34comp*, etc.
+X
+A major effect of this change is to disable automatic sequence type
+(protein/DNA) recognition with pv34compfa/mp34compfa. By default,
+protein libraries are assumed. Thus, pv34compfa/mp34compfa require
+the "-n" command line option when running pv34compfa/mp34compfa on DNA
+sequence libraries. This issue does not occur with the other
+programs, which will recognize the appropriate sequence type, because
+it is determined by the program (e.g. pv34compfx requires
+DNA:protein).
+X
+================
+pv4comp* - July, August, 2000
+X
+As noted in readme.pvm_3.3 - the major problem that users have had
+with the PVM/MPI version of the programs is in reading database files
+on the nodes. All previous versions of the program (pvcompfa,
+pv3compfa, etc) had the nodes read the databases in parallel. Thus,
+the database file had to be visible to the nodes, typically through
+NFS on modern clusters of workstations.
+X
+This strategy caused some problems. It did not work on beowulf-type
+systems, where most of the nodes are in an isolated local network and
+do not have NFS access to the outside world. And it made it
+complicated to read more than one database file. Because specialized
+functions were used, the nodes could not read the full set of library
+file formats available to the other fasta programs.
+X
+These problems have been addressed by significantly changing the the
+way the pv4comp*/mp4comp* programs read the second "reference"
+library. With these versions, both databases, but specifically the
+reference library, are read by a manager process. The manager process
+then sends the sequences to the workers. This solves problems with
+NFS reads from the workers (they don't do any), and uses exactly the
+same functions as the other fasta programs, so the full set of
+database formats can be read. In addition, the FASTLIBS database
+abbreviations are available. This also should also solve problems with
+searches of very long sequences (bacterial genomes); they can now be
+broken up into smaller pieces with the -N ##### option, as with
+fasta33/tfastx33.
+X
+Thus, you are encouraged to use the pv4comp*/mp4comp* versions of the
+programs, which should run more like fasta33.
+X
+================
+Program summary:
+X
+Programs to produce conventional scores and alignments:
+X
+pv4compfa protein vs protein, DNA vs DNA
+pv4compsw protein vs protein, DNA vs DNA
+pv4compfx/ DNA vs protein
+pv4comptfx/y protein vs DNA
+X
+Programs to summarize the effectiveness of a search (require
+super-family-labeled databases):
+X
+ps4compfa protein vs protein, DNA vs DNA
+ps4compsw protein vs protein, DNA vs DNA
+ps4compfx/ DNA vs protein
+ps4comptfx/y protein vs DNA
+X
+Programs to report the scores and alignments of the highest scoring
+unrelated sequence (require super-family-labeled databases). These
+programs are used to evaluate the super-family labeling.
+X
+pu4compfa protein vs protein, DNA vs DNA
+pu4compsw protein vs protein, DNA vs DNA
+pucompfx/ DNA vs protein
+pu4comptfx/y protein vs DNA
+X
+================
+Release notes:
+X
+--> Aug. 4, 2000
+X
+Compiled and tested mp4compfa/mp4compsw programs.
+X
+--> July 22, 2000
+X
+First release of restructured p2_complib.c/p2_workcomp.c, which use
+the manager program to read both sequence databases and send the
+"reference database" to the workers.
+X
+SHAR_EOF
+chmod 0644 readme.pvm_3.4 ||
+echo 'restore of readme.pvm_3.4 failed'
+Wc_c="`wc -c < 'readme.pvm_3.4'`"
+test 3539 -eq "$Wc_c" ||
+ echo 'readme.pvm_3.4: original size 3539, current size' "$Wc_c"
+fi
+# ============= readme.v30 ==============
+if test -f 'readme.v30' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v30 (File already exists)'
+else
+echo 'x - extracting readme.v30 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v30' &&
+X
+Because of interdependencies in the Makefile, sometimes you must
+type "make" a second time to get everything built.
+X
+June 12, 1996 - fasta30t1
+X
+X Fixed bug in reading blast-format DNA sequence files.
+X Fixed core-dump for some large libraries on some machines.
+X
+June 19, 1996 - fasta30t2
+X
+X Fixed a serious bug in the Smith-Waterman alignment routines used
+X by both fasta3 (dropnfa.c) and ssearch3 (dropgsw.c) that caused
+X the amount of memory required to depend on the library sequence
+X size, rather than the query sequence size.
+X
+X Fixed some memory-overwrite errors in showalign.c
+X
+June 27, 1996 - fasta30t3
+X
+X Found and fixed bugs in comp_thr.c and nxgetaa.c that caused core
+X dumps when reading DNA libraries with long sequences in fasta
+X format.
+X
+July 6, 1996 - fasta30t4
+X
+X ibm_pthread_subs.c available, Makefile.ibm for multiprocessor
+X IBM RS/6000 AIX systems.
+X
+X Finally (?) fixed the previous bug that caused core dumps when
+X reading DNA libraries in fasta format.
+X
+X Corrections to the fastx algorithm.
+X
+July 10, 1996
+X
+X Fixed reading of compressed GCG DNA format.
+X
+SHAR_EOF
+chmod 0644 readme.v30 ||
+echo 'restore of readme.v30 failed'
+Wc_c="`wc -c < 'readme.v30'`"
+test 1070 -eq "$Wc_c" ||
+ echo 'readme.v30: original size 1070, current size' "$Wc_c"
+fi
+# ============= readme.v30t6 ==============
+if test -f 'readme.v30t6' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v30t6 (File already exists)'
+else
+echo 'x - extracting readme.v30t6 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v30t6' &&
+X
+>>August 24, 1996
+X
+New programs - tfastx3, tfastx3_t, compare a protein sequence to
+forward and reverse translations of a DNA sequence database. An excellent
+replacement for tfasta3.
+X
+Sun multiprocessing - change in thr_create() to use all CPU's if available.
+X
+GCG formats - now can search with simple GCG-format query sequences and
+results with GCG format Swissprot and Genpept are more readable.
+X
+>>August 26, 1996
+X
+Fixed bugs in tfastx3(_t) and fastx3(_t) including an ancient problem
+with aatran(). Less redundancy in gcg_ranlib().
+X
+X
+>>August 31, 1996
+X
+Included support for BLOSUM62 (-s BL62) as per documentation.
+X
+Rearranged Makefile's so that they would make everything in one pass.
+X
+>>September 6, 1996
+X
+Corrected yet another problem with the fastx/tfastx code.
+X
+Noticed that searching without optimized scores gave no optimized
+scores on the final list of scores - fixed this.
+X
+The pvm version now does alignments - not thoroughly tested.
+X
+>>September 13, 1996
+X
+Fixed display of best scores to stdout.
+X
+Fixed problem with alignments when -o flag used.
+X
+pvcompfa/pvcompsw have now been tested on DEC Alpha, Solaris X86, and
+SGI PVM implementations. Several bugs were corrected.
+X
+>>September 18, 1996
+X
+Fixed bug selectbestz() that caused core dumps in pvcomplib.c
+(changes to pvcomplib.c, comp_thr.c, complib.c).
+X
+>>September 23, 1996
+X
+Corrected showalign.c/pvm_showalign.c addressing bug found and fixed
+by Erik Wallin. (erikw@biokemi.su.se).
+X
+>>October 15, 1996
+X
+Corrected bug so alternative scoring matrices are used.
+X
+>>October 22, 1996
+X
+Remove singularities from regression routine.
+X
+-z 0 now means no statistics (same as -z -1).
+X
+No longer show alignment for 0 score.
+X
+>>October 26, 1996
+X
+Fix problem with -b, -d when Z-values disabled.
+X
+>>November 1, 1996
+X
+Altschul-Gish statistical estimates (-z 3) now work properly.
+X
+Fix problem with mean_var==0.0.
+X
+SHAR_EOF
+chmod 0644 readme.v30t6 ||
+echo 'restore of readme.v30t6 failed'
+Wc_c="`wc -c < 'readme.v30t6'`"
+test 1871 -eq "$Wc_c" ||
+ echo 'readme.v30t6: original size 1871, current size' "$Wc_c"
+fi
+# ============= readme.v30t7 ==============
+if test -f 'readme.v30t7' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v30t7 (File already exists)'
+else
+echo 'x - extracting readme.v30t7 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v30t7' &&
+>> October 30, 1996
+X
+A new program, sc_to_e, can be used to calculate expectation values
+from the regression coefficients reported from a search. The
+expectation value is based on similarity score, sequence length, and
+database size.
+X
+>> November 8, 1996
+X
+fasta30t7 differs from fasta30t6 in the amount of information provided
+with the -m 10 option.
+X
+(1) The query and library sequence identifiers are no longer abbreviated.
+X
+(2) New information about the program and program version are provided:
+X
+The new information provided is:
+X
+X mp_name: program name (actually argv[0])
+X mp_ver: main program version (can be different from function version)
+X mp_argv: command line arguments (duplicates argv[0])
+X
+X Some statistical information is provided as well:
+X mp_extrap: XXXX YYY - statistics extrapolated from XXX to YYY
+X mp_stats: indicates type of statistics used for E() value
+X mp_KS: Kolmogorov-Smirnoff statistic
+X
+The "mp_" (main program) information is function independent, while the "pg_"
+information is produced by a particular comparison function (ssearch,
+fastx, fasta, etc). "pg_" should probably be called "fn_", and "mp_"
+called "pg_", but I remain backwards compatible.
+X
+(3) The end of the "parseable" records is denoted with:
+X
+X >>><<<
+X
+(4) There now an compile-time option -DM10_CONS, that allows you to
+display a final alignment summary:
+X
+;al_cons:
+X .::.:- .:: .. :. .:.---: : .--.:. :
+.. .--- ..: :: ... :..: .::.:. . .---. . .:
+X : . . . : .. . :..: .--. . : .:. .. : .
+X .:.::: ..:. :
+X
+or, if M10_CONS_L is defined (in addition to M10_CONS), the output is:
+;al_cons:
+X p==p=-mmmp==mpzmm=pmmmmz=p---=mmm=mmp--p=zm=m
+pzmmp---mmzp=m==mzzzm=zp=mz==z=pmzmmz---pmmpmmmp=m
+m=mzmmzmpm=mmmmppmmmpmmmm=pp=mp--pmpm=mp=pmzzm=mmp
+mp=z===mmpz=zm=
+X
+where '=' indicates identical residues, '-' a gap in one or the other
+sequence, 'p' indicates a positive pam value, 'm' indicates a negative
+pam value, and 'z' indicates a zero pam value.
+X
+A typical run now looks like:
+X
+>>>gtm1_mouse.aa, 217 aa vs s library
+; mp_name: fasta3_t
+; mp_ver: version 3.0t7 November, 1996
+; mp_argv: fasta3_t -q -m 10 gtm1_mouse.aa s
+; pg_name: FASTA
+; pg_ver: 3.06 Sept, 1996
+; pg_matrix: BL50
+; pg_gap-pen: -12 -2
+; pg_ktup: 2
+; pg_optcut: 24
+; pg_cgap: 36
+; mp_extrap: 50000 51933
+; mp_stats: Expectation fit: rho(ln(x))= 5.8855+/-0.000527; mu= 1.5386+/- 0.029; mean_var=73.0398+/-15.283
+; mp_KS: 0.0133 (N=29) at 42
+>>GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU).
+; fa_initn: 1490
+; fa_init1: 1490
+; fa_opt: 1490
+; fa_z-score: 1754.6
+; fa_expect: 0
+; sw_score: 1490
+; sw_ident: 1.000
+; sw_overlap: 217
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
+; fa_initn: 1406
+; fa_init1: 1406
+; fa_opt: 1406
+; fa_z-score: 1656.3
+; fa_expect: 0
+; sw_score: 1406
+; sw_ident: 0.931
+; sw_overlap: 217
+>GTM1_MOUSE ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
+KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
+NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
+RYIATPIFSKMAHWSNK
+>GTM1_RAT ..
+; sq_len: 217
+; sq_type: p
+; al_start: 1
+; al_stop: 217
+; al_display_start: 1
+PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKF
+KLGLDFPNLPYLIDGSRKITQSNAIMRYLARKHHLCGETEEERIRADIVE
+NQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
+KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSS
+RYLSTPIFSKLAQWSNK
+;al_cons:
+:::::::::::::::::.:::::::::.::::.::::::.::::::::::
+::::::::::::::::.::::::::.::::::::: ::::::::::::::
+:::::.::::::::::::::::::::::::::::::::::::::::::::
+::::::::::::::::..::::::::::::.:::::::::::::::::::
+::..::::::.:.::::
+>>><<<
+X
+X
+217 residues in 1 query sequences
+18531385 residues in 52205 library sequences
+X Tcomplib (4 proc)[version 3.0t7 November, 1996]
+X start: Fri Nov 8 18:20:26 1996 done: Fri Nov 8 18:20:41 1996
+X Scan time: 38.434 Display time: 2.166
+X
+Function used was FASTA
+X
+================================================================
+X
+>> November 11, 1996
+X
+X --> v30t71
+X
+Made changes to complib.c, comp_thr.c, nxgetaa.c to allow scoring
+matrix to be modified in fastx3, fastx3_t.
+X
+================================================================
+X
+>> November 15, 1996
+X
+X --> v30t72
+X
+nxgetaa.c now accepts query sequences from "stdin" by using "-" as the
+input file name. If DNA sequences are read in this mode, the "-n"
+option must be used.
+X
+> November 23, 1996
+X
+Included code in nxgetaa.c and Makefile.sgi to get around a bug in SGI's
+sscanf() that prevented compressed GCG databases from being read properly.
+X
+SHAR_EOF
+chmod 0644 readme.v30t7 ||
+echo 'restore of readme.v30t7 failed'
+Wc_c="`wc -c < 'readme.v30t7'`"
+test 5283 -eq "$Wc_c" ||
+ echo 'readme.v30t7: original size 5283, current size' "$Wc_c"
+fi
+# ============= readme.v31t0 ==============
+if test -f 'readme.v31t0' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v31t0 (File already exists)'
+else
+echo 'x - extracting readme.v31t0 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v31t0' &&
+X
+>>November 1, 1997
+X
+X --> v31t0
+X
+version 31t of the fasta program package uses a more modular
+structure for comparison functions. In addition to modular functions
+to initialize, calculate and align sequences, v31 provides a modular
+function for creating the alignment display. This was required for
+fasty and fastf, which have very different alignment strategies from
+the other search programs.
+X
+>>February 13, 1998
+X
+modified nascii[] so that 0, 1, 2 are no longer end of sequence
+characters.
+X
+prss3 added. Unlike prss, prss3 uses -d # to specify the number of
+shuffles.
+X
+>>March 18, 1998
+X
+First public release. Corrected problems with dropfz.c (which is
+used in fasty3, tfasty3). Makefile is well tested, but other Makefile's
+are not. PVM versions not tested.
+X
+>>March 19, 1998
+X
+Problem with unthreaded tfastx3, tfasty3 caused by bug in complib.c
+fixed. All Makefiles (Makefile.alpha Makefile.sun, Makefile.sgi,
+Makefile.linux) have been tested and work properly. Threaded versions
+do not work on linux (yet). Function labeling problems with fasty3,
+tfasty3 corrected.
+X
+>>March 20, 1998
+X
+X --> v31t02
+X
+Fixed problem with inconsistent openlib() calls that broke BLAST databases
+on some platforms.
+X
+>>March 27, 1998
+X
+X --> v31t04
+X
+Fixed a long standing problem with fastx/tfastx and fasty/tfasty that
+caused various memory allocation problems and core dumps.
+X
+The PVM version works again, but cannot produce alignments. The
+change in the location of the modular display functions will require
+significant changes in the pvm display functions. For the moment,
+showalign() has been commented out.
+X
+Code tested on Macintosh without changes.
+X
+Added some additional information in the results file.
+X
+X
+Please report bugs to wrp@virginia.edu
+X
+>>April 3, 1998
+X
+Removed some debugging code in faatran.c now that fastx/fasty bugs
+seem corrected.
+X
+X FASTA --> v3.14
+X
+Corrected uninitialized array elements in dropnfa.c.
+X
+>>April 10, 1998
+X
+Added facility for specifying SRCH_URL (the URL string that will be
+used to re-search the database) and REF_RUL (the URL string that
+will be used to lookup the sequence) ini url_subs.c. This allows perl
+scripts to provide different databases for re-searching dynamically.
+X
+>>April 16, 1998
+X
+X --> v31t05
+X
+Corrected problem with ignoring ','s in databases (','s are found in
+PIR).
+X
+>>April 18, 1998
+X
+Corrected some problems with sequence names for Entrez lookups and
+re-searching databases.
+X
+Made minor modifications to nxgetaa.c and compacc.c for compatibility
+with Borland 'C' compiler for Win32 systems. Including makefile.tc
+fasta.rsp, prss.rsp, and test.bat for Borland 'C'/win32.
+X
+>>April 24, 1998
+X
+X --> v31t06
+X
+Fixed another bug in fasty3/tfasty3 alignment routines.
+X
+Added additional information to the do_url1() (url_subs.c) function.
+The re-search URL can now reference the start, stop, and length of the
+library sequence to be re-searched with. For DNA library sequences,
+these values are always in nucleotides, even with tfasta/x/y.
+X
+X
+>>May 12, 1998
+X
+(no version change as v31t06 was not released prior to this)
+X
+Correct nxgetaa.c GETLIB to deal correctly with BLAST NR database
+sequences with exceptionally long title lines.
+X
+Fix bug with long -O results files.
+X
+>>May 18, 1998
+X
+X --> v31t07
+X
+Corrected some bugs in information string lengths (e.g. gstring1,
+stat_str), disabling statistics with -z 0, translation of 'X' by
+saatran() (faatran.c) that caused problems with FASTX.
+X
+A serious bug has been fixed in the FASTX alignment routines.
+For some pathological sequences, % identity increases from < 10%
+to 40%. The version number of the main program has not changed,
+but the version number of the fastx function has changed to 3.2.
+X
+>>June 19, 1998
+X
+X --> v31t08
+X
+Corrected some problems with alignments with -m 10.
+X
+Added -Z db_size option to modify apparent database size for
+expectation value calculation (used only for protein/protein FASTA and
+SSEARCH, FASTX, FASTY, TFASTX, and TFASTY).
+X
+>>July 1, 1998
+X
+X (no version change)
+X
+Corrected size of lbnames[], lb_size[] in structs.h to accomodate MAX_LF
+files.
+X
+>>July 13, 1998
+X
+X --> v31t09
+X
+Corrected problem in nxgetaa.c encountered when reading long sequences
+(that must be split) in fasta format.
+X
+Corrected problem in statistics calculation encountered with a small number
+of very long DNA sequences.
+X
+>>July 17, 1998
+X
+X (no version change, date change for ssearch3)
+X
+Corrected default expectation cutoff (it was 10, now it is 2.0) for
+DNA with ssearch3.
+X
+SHAR_EOF
+chmod 0644 readme.v31t0 ||
+echo 'restore of readme.v31t0 failed'
+Wc_c="`wc -c < 'readme.v31t0'`"
+test 4461 -eq "$Wc_c" ||
+ echo 'readme.v31t0: original size 4461, current size' "$Wc_c"
+fi
+# ============= readme.v31t1 ==============
+if test -f 'readme.v31t1' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v31t1 (File already exists)'
+else
+echo 'x - extracting readme.v31t1 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v31t1' &&
+>>July 22, 1998
+X
+X --> v31t10
+X
+Corrected problem with histogram when unscaled statistics used (e.g. prss3).
+X
+Corrected problems with prss3 shuffled sequence prompt. Provided option
+to enter number of shuffles, window size, for prss3. Number of shuffles
+for prss3 can be entered as an option (-d #) or as the third argument
+on the command line (prss3 query lib 1000).
+X
+Modified nrand.c, nrand48.c to use time to set random number.
+X
+Corrected problems reading GCG formatted files with prss3.
+X
+Corrected various problems with pvcomp* programs, but they still do
+not produce alignments with version 3.1.
+X
+Two new programs, fastf3(_t) and tfastf3(_t) are available. These
+programs compare a set of mixed peptide sequences from an Edman
+sequencer to a protein (fastf3) or DNA (tfastf3) database, using
+the database sequences to de-convolve the peptide mixture.
+X
+See fastf3.1
+X
+>>August 11, 1998
+X
+(no version change)
+X
+Modified initfa.c so that using '-n' on the fastx/fasty command line
+would not cause problems.
+X
+Changed labeling of query sequence length for fastx/fasty from 'aa' to 'nt'.
+X
+>>August 18, 1998
+X
+(no version change)
+X
+Modified complib.c, comp_thr.c scaleswn.c, to report E()-value for only
+one related sequence if -z 3 is used.
+X
+>>August 23, 1998
+X
+X -->v31t11
+X
+Some serious problems with prss3 have been corrected:
+X
+(1) use dropnsw.c rather than dropgsw.c for more accurate low scores
+X
+(2) modify estimation program; use scaleswe.c rather than scaleswn.c.
+X scaleswe.c has some improvements for estimation by moments and can
+X use MLE as well as mu/var (-z 3).
+X
+(3) add p() estimate.
+X
+(4) correct bugs in nrand48, which caused bad sequences for llgetaa.c
+X
+(5) -Z number works properly for prss3 and other programs (fixed histogram).
+X
+(6) a new program, ssearch3e, is available that uses the same scaling
+X routines as prss3 (scaleswe.c). prss3 will save the random
+X sequences it generates when the -r file option is given; the
+X sequences are in file_rlib. ssearch3e (or ssearch3 or fasta) can
+X then do a search on exactly the same sequences that were used by prss3.
+X
+A bug reading GCG format compressed DNA databases was fixed.
+X
+Fixed a bug that caused query sequence not to be displayed with -m 10.
+X
+Simple optimization in dropnfa.c improves performance 10%.
+X
+>>Sept. 1, 1998
+X
+(no version change)
+X
+Modified nxgetaa.c to recognize "ACGTX" as nucleotides.
+X
+>>Sept. 7, 1998
+X
+X --> v31t12
+X
+Added -z 11 - 15, which use shuffled sequences, rather than real
+sequences to calculate statistical estimates. Because a shuffled
+sequence score is calculated for each sequence score, the search
+process takes twice as long. In this first version, codons are not
+preserved during shuffles, so tfasta/x/y shuffles may not be as
+informative as they should be.
+X
+Also fix a problem with prss3 shuffles.
+X
+>>Sept. 14, 1998
+X
+X (no version change; previous version not released)
+X
+Corrected bugs in tfastx3/tfasty3 caused by using the -3 option with
+or without -i. With the bug fixes; "-3" and "-3 -i" work as expected;
+"-3" gives the forward three frames, while "-3 -i" gives the reverse
+three frames.
+X
+In addition, tfasta3/tfasta3_t was upgraded to perform the same way
+that tfastx/y3 does - i.e. a search with "-i -3" searches only frames
+4,5, and 6, while "-3" searches only frames 1, 2, and 3.
+X
+>>Sept. 29, 1998
+X
+X --> v31t13
+X
+Corrected bugs in dropfx.c that were corrected in fasta30 last May,
+but lingered in fasta31. Also included code to ensure that tfastx/y
+alignments against long introns would not overrun the alignment
+buffer. Instead of overrunning the buffer, the message: ***aligment
+truncated *** is displayed.
+X
+SHAR_EOF
+chmod 0644 readme.v31t1 ||
+echo 'restore of readme.v31t1 failed'
+Wc_c="`wc -c < 'readme.v31t1'`"
+test 3632 -eq "$Wc_c" ||
+ echo 'readme.v31t1: original size 3632, current size' "$Wc_c"
+fi
+# ============= readme.v32t0 ==============
+if test -f 'readme.v32t0' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v32t0 (File already exists)'
+else
+echo 'x - extracting readme.v32t0 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v32t0' &&
+X
+FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
+search both the forward and reverse strands by default.
+X
+The documentation in fasta3x.me/fasta3x.doc has been substantially
+revised.
+X
+>>October 9, 1999
+X --> v32t08 (no version number change)
+X
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences. If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search. Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length. This should be particularly useful for fasts3
+and fastf3. This limit applies only to protein sequences.
+X
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+X
+>>October 20, 1999
+(no version change)
+X
+Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
+X
+>>October 9, 1999
+X --> v32t08 (no version number change)
+X
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences. If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search. Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length. This should be particularly useful for fasts3
+and fastf3. -M -500 searches library sequences < 500; -M 200 -
+searches sequences > 200. This limit applies only to protein
+sequences.
+X
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+X
+>>October 2, 1999
+X --> v32t08
+X
+Many changes:
+X
+(1) memory mapped (mmap()ed) database reading - other database reading fixes
+(2) BLAST2 databases supported
+(3) true maximum likelihood estimates for Lambda, K
+(4) Misc. minor fixes
+X
+(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
+It is now possible to use mmap()ed access to FASTA format databases,
+if the "map_db" program has been used to produce an ".xin" file. If
+USE_MMAP is defined at compile time and a ".xin" file is present, the
+".xin" will be used to access sequences directly after the file is
+mmap()ed. On my 4-processor Alpha, this can reduce elapsed time by
+50%. It is not quite as efficient as BLAST2 format, but it is close.
+X
+Currently, memory mapping is supported for type 0 (FASTA), 5
+(PIR/GCG ascii), and 6 (GCG binary). Memory mapping is used if a
+".xin" file is present. ".xin" files are created by the new program
+"map_db". The syntax for "map_db" is:
+X
+X map_db [-n] "/dir/database.fa"
+X
+which creates the file /dir/database.fa.xin. Library types can be
+included in the filename; thus:
+X
+X map_db -n "/gcggenbank/gb_om.seq 6"
+X
+would be used for a type 6 GCG binary file.
+X
+The ".xin" file must be updated each time the database file changes.
+map_db writes the size of the database file into the ".xin" file, so
+that if the database file changes, making the ".xin" offset
+information invalid, the ".xin" file is not used. "list_db" is
+provided to print out the offset information in the ".xin" file.
+X
+(Oct 2, 1999) The memory mapping routines have been changed to
+allow several files to be memory mapped simultaneously. Indeed, once a
+database has been memory mapped, it will not be unmap()ed until the
+program finishes. This fixes a problem under Digital Unix, and should
+make re-access to mmap()ed files (as when displaying high scores and
+alignments) much more efficient. If no more memory is available for
+mmap()ing, the file will be read using conventional fread/fgets.
+X
+(Oct 2, 1999) The names of the database reading functions has been
+changed to allow both Blast1.4 and Blast2.0 databases to be read. In
+addition, Makefile.common now includes an option to link both
+ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
+However, Blast1.4 support has not been tested.
+X
+The Makefile structure has been improved. Each architecture specific
+Makefile (Makefile.alpha, Makefile.linux, etc) now includes
+Makefile.common. Thus, changes to the program structure should be
+correct for all platforms. "map_db" and "list_db" are not made with
+"make all".
+X
+The database reading functions in nxgetaa.c can now return a database
+length of 0, which indicates that no residues were read. Previously,
+0-length sequences returned a length of 1, which were ignored.
+Complib.c and comp_thr.c have changed to accommodate this
+modification. This change was made to ensure that each residue,
+including the last, of each sequence is read.
+X
+Corrected bug in nxgetaa.c with FASTA format files with very long
+(>512 char) definition lines.
+X
+(2) (September 20, 1999) BLAST2 format databases supported
+X
+This release supports NCBI Blast2.0 format databases, using either
+conventional file reading or memory mapped files. The Blast2.0 format
+can be read very efficiently, so there is only a modest improvement in
+performance with memory mapping. The decision to use mmap()'ed files
+is made at compile time, by defining USE_MMAP. My thanks to Eamonn
+O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
+providing mmap()'ed modifications to fasta3. On my machines, Blast2.0
+format reduces search time by about 30%. At the moment, ambiguous DNA
+sequences are not decoded properly.
+X
+(3) (September 30, 1999) A new statistical estimation option is
+available. -z 2 has been changed from ln()-scaling, which never
+should have been used, to scaling using Maximum Likelihood Estimates
+(MLEs) of Lambda and K. The MLE estimation routines were written by
+Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
+written by Sean Eddy. The MLE estimation examines the middle 95% of
+scores, if there are fewer than 10000 sequences in the database;
+otherwise it excludes (censors) the top 250 scores and the bottom 250
+scores. This approach seems to effectively prevent related sequences
+from contaminating the estimation process. As with -z 1, -z 12 causes
+the program to generate a shuffled sequence score for each of the
+library sequences; in this case, no censoring is done. If the
+estimation process is reliable, Lambda and K should not vary much with
+different queries or query lengths. Lambda appears not to vary much
+with the comparison algorithm, although K does.
+X
+(4) Minor changes include fixes to some of the alignment display routines,
+individual copies of the pstruct structure for each thread, and some
+changes to ensure that every last residue in a library is available
+for matching (sometime the last residue could be ignored). This
+version has undergone extensive testing with high-throughput sequences
+to confirm that long sequences are read properly. Problems with
+fastf3/fasts3 alignment display have also been addressed.
+X
+>>August 26, 1999 (no version change - not released)
+X
+Corrected problem in "apam.c" that prevented scoring matrices from
+being imported for [t]fasts3/[t]fastf3.
+X
+>>August 17, 1999
+X --> v32t07
+X
+Corrected problem with opt_cut initialization that only appeared
+with pvcomp* programs.
+X
+Improved calculation of FASTA optcut threshold for DNA sequence
+comparison for match scores much less than +5 (e.g. +3). The previous
+optcut theshold was too high when the match penalty was < 4 and
+ktup=6; it is now scaled more appropriately.
+X
+Optcut thresholds have also been raised slightly for
+fastx/y3/tfastx/y3. This should improve performance with minimal
+effects on sensitivity.
+X
+>>July 29, 1999
+(no version change - date change)
+X
+Corrected various uninitialized variables and buffer overruns
+detected.
+X
+>>July 26, 1999 - new distribution
+(no version change - v32t06, previous version not released)
+X
+Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
+programs.
+X
+Statistical calculations for tfasta/x/y in unthreaded version
+corrected. Statistical estimates for threaded and unthreaded versions
+of the tfasta/x/y/s/f programs should be much more consistent.
+X
+Substantial modifications in alignment coordinate calculation/
+presentation. Minor error in fastx/y/tfastx/y end of alignment
+corrected. Major problems with tfasta alignment coordinates
+corrected. tfasta and tfastx/y coordinates should now be consistent.
+X
+Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
+with long query sequences.
+X
+Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
+to try to avoid "cannot allocate diagonal arrays" error message.
+Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
+so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
+I am still getting this message, so it has not been completely
+successful. Makefile.linux now uses -DALLOCN0 to avoid this problem,
+at some cost in speed.
+X
+The pvcomp* programs have been updated to work properly with
+forward/reverse DNA searches. See readme.pvm_3.2.
+X
+>>July 7, 1999 - not released
+X --> v32t06
+X
+Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
+dumps with "-o" option.
+X
+Corrected a subtle bug in fastx/y/tfastx/y alignment display.
+X
+>>June 30, 1999 - new distribution
+(no version change)
+X
+Corrected doinit.c to allow DNA substitution matrices with -s matrix
+option.
+X
+Changed ".gbl" files to ".h" files.
+X
+>>June 2 - 9, 1999 - new distribution
+(no version change)
+X
+Added additional DNA lambda/K/H to alt_param.h. Corrected some
+other problems with those table. for the case where (inf,inf)
+gap penalties were not included.
+X
+Fixed complib.c/comp_thr.c error message to properly report filename
+when library file is not found.
+X
+Included approximate Lambda/K/H for BL80 in alt_parms.h.
+BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
+X
+Included some additional perl files for searchfa.cgi, searchnn.cgi
+in the distribution (my-cgi.pl, cgi-lib.pl).
+X
+>>May 30, 1999, June 2, 1999 - new distribution
+(no version number change)
+X
+Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h. Changed
+zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
+when only one sequence is compared and -z 3 is used.
+X
+>>May 27, 1999
+(no version number change)
+X
+Corrected bug in alignment numbering on the % identity line
+X 27.4% identity in 234 aa (101-234:110-243)
+for reverse complements with offset coordinates (test.aa:101-250)
+X
+>>May 23, 1999
+(no version number change)
+X
+Correction to Makefile.linux (tgetaa.o : failed to -DTFAST).
+X
+>>May 19, 1999
+(no version number change)
+X
+Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
+Changes to showsum.c to change off-end reporting. (Neither of these
+changes is likely to affect anyone outside my research group.)
+X
+>>May 12, 1999
+X --> v32t05
+X
+Fixed a serious bug in the fastx3/tfastx3 alignment display which
+caused t/fastx3 to produce incorrect alignments (and incorrectly low
+percent identities). The scores were correct, but the alignment
+percent identities were too low and the alignments were wrong.
+X
+Numbering errors were also corrected in fastx3/tfastx3 and
+fasty3/tfasty3 and when partial query sequences were used.
+X
+>>May 7, 1999
+X
+Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
+incorrect Smith-Waterman scores after do_walign() had been called.
+This affected only pvcompsw searches with the "-m 9" option.
+X
+>>May 5, 1999
+X
+Modified showalign.c to provide improved alignment information that
+includes explicitly the boundaries of the alignment. Default
+alignments now say:
+X
+Smith-Waterman score: 175; 24.645% identity in 211 aa overlap (5:207-7:207)
+X
+>>May 3, 1999
+X
+Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
+"not" superfamily annotation for the query sequence only. The
+goal is to be able to specify that certain superfamily numbers be
+ignored in some of the search summaries. Thus, a description line
+of the form:
+X
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+X
+says that GT8.7 belongs to superfamily 40001, but any library
+sequences with superfamily number 90043 should be ignored in any
+listing or summary of best scores.
+X
+In addition, it is now possible to make a fasta3r/prcompfa, which is
+the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
+unrelated sequences in a search using the superfamily annotation.
+fasta3r shows only the scores of related sequences. This might be
+used in combination with the -F e_val option to show the scores
+obtained by the most distantly related members of a family.
+X
+>>April 25, 1999
+X
+X -->v32t04 (not distributed)
+X
+Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
+(necessary for a more rational Makefile structure). No code changes.
+X
+>>April 19, 1999
+X
+Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
+(no version number change).
+X
+>>April 17, 1999
+X
+X --> v32t03
+X
+A serious bug in DNA alignments when the sequence has been broken into
+multiple segments that was introduced in version fasta32 has been
+fixed. In addition, several minor problems with -z 3 statistics on
+DNA sequences were fixed.
+X
+Added -m 9 option, which unfortunately does different things in
+pvcompfa/sw and fasta3/ssearch3. In both programs, -m 9 provides the
+id's of the two sequences, length, E(), %_ident, and start and end of
+the alignment in both sequences. pvcompfa/sw provides this
+information with the list of high scoring sequences. fasta3/ssearch3
+provides the information in lieu of an alignment.
+X
+>>March 18, 1999
+X
+X --> v32t02
+X
+Added information on the algorithm/parameter description line to
+report the range of the pam matrices. Useful for matrices like
+MD_10, _20, and _40 which require much higher gap penalties.
+X
+>>March 13, 1999 (not distributed)
+X
+X --> v32t01
+X
+X -r results.file has been changed to -R results.file to accomodate
+X DNA match/mismatch penalties of the form: -r "+1/-3".
+X
+>>February 10, 1999
+X
+Modify functions in scalesw*.c to prevent underflow after exp() on
+Alpha Linux machines. The Alpha/LINUX gcc compiler is buggy and
+doesn't behave properly with "denormalized" numbers, so "gcc -g -m
+ieee" is recommended.
+X
+Add "Display alignments also (y/n)[n] "
+X
+pvcomplib.c again provides alignments!! In addition, there is a
+new "-m 9" option, which reports alignments as:
+X
+>>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
+HS5 30 HS5 30 1.873e-11 1.000 30 1 30 1 30
+HS5 30 HS2249 40 1.061e-07 0.774 31 1 30 7 37
+HS5 30 HS2221 38 1.207e-07 0.833 30 1 30 7 35
+HS5 30 HS2283 40 1.455e-07 0.774 31 1 30 7 37
+HS5 30 HS2239 38 1.939e-07 0.800 30 1 30 7 35
+X
+where the columns are:
+X
+query-name q-len lib-name lib-len E() %id align-len q-start q-end l-start l-end
+X
+>>February 9, 1999
+X
+Corrected bug in showalign.c that offset reverse complement alignments
+by one.
+X
+>>Febrary 2, 1999
+X
+Changed the formatting slightly in showbest.c to have columns line up better.
+X
+>>January 11, 1999
+X
+Corrected some bugs introduced into fastf3(_t) in the previous version.
+X
+>>December 28, 1998
+X
+Corrected various problems in dropfz.c affecting alignment scores
+and coordinates.
+X
+Introduced a new program, fasts3(_t), for searching with peptide
+sequences.
+X
+>>November 11, 1998
+X
+X --> v32t0
+X
+Added code to correct problems with coordinate number in long library
+sequences with tfastx/tfasty. With this release, sequences should be
+numbered properly, and sequence numbers count down with reverse
+complement library sequences.
+X
+In addition, with this release, fastx/y and tfastx/y translated
+protein alignments are numbered as nucleotides (increasing by 3,
+labels every 30 nucleotides) rather than codons.
+X
+SHAR_EOF
+chmod 0644 readme.v32t0 ||
+echo 'restore of readme.v32t0 failed'
+Wc_c="`wc -c < 'readme.v32t0'`"
+test 15841 -eq "$Wc_c" ||
+ echo 'readme.v32t0: original size 15841, current size' "$Wc_c"
+fi
+# ============= readme.v33t0 ==============
+if test -f 'readme.v33t0' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v33t0 (File already exists)'
+else
+echo 'x - extracting readme.v33t0 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v33t0' &&
+X
+X $Name: fa_34_26_5 $ - $Id: readme.v33t0,v 1.45 2001/07/10 18:03:42 wrp Exp $
+X
+================ readme.v33t0 ================
+X
+This release includes an MPI implementation of the parallel
+library-vs-library comparison code. See readme.mpi_3.3 and
+readme.pvm_3.3 for more information.
+X
+=====
+>>July 9, 2001
+X
+Considerable changes to support no-global library functions.
+X
+(1) Separate ascii/sequence mapping arrays are used by the
+X query-reading (qascii), library-reading (lascii), and sequence
+X comparison function (pascii) routines. As a result, there is no
+X longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
+X functions.
+X
+(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
+X from complib.c/comp_thr.c/p2_complib.c. We no longer need
+X tcomp_thr.o, comp_thrx.o, etc. We still have a variety of
+X p2_complib.o variations to support the different c34.work* files.
+X
+(3) Because non-global openlib/getlib functions are available, exactly
+X the same open/get functions are available for reading both the
+X query and reference libraries in pv34comp* programs. The
+X host-specific openlib/getlib functions in hxgetaa.c are now
+X provided by nmgetlib.c, etc. This has two effect:
+X
+X (a) it is now possible to compare a query database generated by an
+X SQL query to a library database generated by a different SQL
+X query.
+X
+X (b) pv34comp* has lost (at least in this version) the ability to
+X automatically detect the query sequence type. To search with a
+X DNA query, you MUST use "-n".
+X
+(4) the resetp() function is now responsible for almost all of the
+X function sepcific (TFAST/FASTX/etc) initializations. All of the
+X function specific code has been removed from complib.c/comp_thr.c
+X and most of it has been moved to initfa.c/resetp().
+X
+(5) manageacc.c has been merged into compacc.c (mostly prhist()).
+X
+(6) Although it may reflect a subtle bug in my code, it is not
+X possible to reliably run threaded/memory mapped versions of the
+X fasta34_t code. I have spent considerable time tracking down the
+X problem, and have determined that, in threaded code, something
+X happens during the thread initialization to corrupt the
+X description offset information used when files are memory mapped.
+X This never occurs when the unthreaded versions of the code are
+X used. And it does not occur under MacOSX, Compaq Tru64Unix, Sun
+X Solaris/Sparc, or SGI IRIX.
+X
+X Thus, I cannot recommend using the threaded code versions (_t)
+X under Linux (RH6.2 or 7.1).
+X
+=====
+>>June 1, 2001
+X
+Many changes to accomodate a new - no global variable - strategy for
+reading sequence databases. Every time a file is opened, a struct
+lmf_str is allocated which can be used for memory mapped files, ncbl2,
+files, and mysql files.
+X
+In addition, an open'ed file has a default sequence type: DNA or
+protein, or one can open a file in a mode that will allow the sequence
+type to be changed.
+X
+=====
+>>May 18, 2001 CVS: fa33t09d0
+X
+A new compile time parameter - -DGAP_OPEN, is available to change the
+definition of the "-f gap-open" parameter from the penalty for the
+first residue in a gap to a true gap-open penalty, as is used in BLAST
+and many other comparison algorithms. This will probably become the
+default for fasta in version 3.4.
+X
+Fixes to conflicts between "-S" and "-s matrix". When a scoring
+matrix file was specified, lower-case alignments were not displayed
+with -S (although the scores were calculated properly).
+X
+More extensive testting of mysql_lib.c (mySQL query-libraries) with
+the pv4comp* and mp4comp* programs.
+X
+=====
+>>April 5, 2001 CVS: fa33t08d4b3
+X
+Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
+descriptions for PCOMPLIB (pv4/mp3comp*). Also fix p2_complib.c to
+request DNA library for translated comparisons.
+X
+Fix for prss33(_t) to read both sequences from stdin.
+X
+=====
+>>March 27, 2001 CVS: fa33t08d4
+X --> fa33t08d4
+X
+Problems in ncbl2_mlib.c found searching NCBI non-redundant nucleotide
+database "nt" were fixed. Testing revealed a minor memory leak, which
+was fixed by modifying showbest.c, showalign.c, comp_thr.c, complib.c,
+and p2_complib.c to remember the last opened database file more
+effectively.
+X
+Modifications to allow 64-bit fseek/ftell on machines like Sun,
+Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
+off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO. Machines
+with 64-bit long's do not need this option. Machines with 32-bit
+longs that allow files >2 Gb can do so with 64-bit file access
+functions, including fseeko() and ftello(), which work with off_t file
+offsets instead of long's.
+X
+=====
+>>March 3, 2001 CVS: fa33t08d2
+X
+Corrected problems in nmgetaa.c and mysql_lib.c with parallel
+programs, and one serious problem with alternate DNA scoring matrices
+(initfa.c, initsw.c) not being set properly. A subtle problem with
+the merge of scaleswn.c and scaleswg.c is fixed.
+X
+>>February 17, 2001
+X
+Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
+position of the GID. This change was made because sprintf() cannot be
+used reliably to generate an SQL string, as '"' and '%' are used in
+such strings.
+X
+=====
+>>January 17, 2001
+(no version change, date change)
+X
+Minro fixes to initfa.c, initsw.c to deal with DNA scoring matrices
+properly. "-n -s dna.mat" is required for the sequence/matrix to be
+recognized as DNA.
+X
+>>January 16, 2001
+-->v34t00
+X
+Merge of the main CVS trunk - fa33t06 with the latest release branch,
+fa33t08.
+X
+In addition, PCOMPLIB mods have been made to mysql_lib.c. Because
+p2_complib.c gets sequence description information during the first
+read of the database, the mysql_query must be changed to return:
+result[0]=GID, result[1]=description, result[2]=sequence. In the
+PCOMPLIB case, the other SQL queries (for GID description, sequence)
+are not necessary but must still be provided.
+X
+=====
+>>January 16, 2001
+(no version change, previous version not released)
+X
+changes to p2_complib.c to correct openlib() incompatibility.
+X
+changes to nmgetaa.c, ncbl2_lib.c to incorporate PCOMPLIB. nxgetaa.c
+removed.
+X
+=====
+>>January 12, 2001
+(no version change, previous version not released)
+X
+Change to initfa.c to move ktup check from query_parm() to last_init().
+X
+=====
+>>January 10, 2001
+--> v33t08
+X
+Fixes to complib.c, comp_thr.c to deal properly with long query
+protein sequences when a short library chunk (e.g. -N 5000) was given.
+In the case where the chunk size is too short, it will be reset to a
+length which allows the search to proceed, by including an amount of
+new sequence that is equal to the amount of overlap sequence.
+X
+scaleswn.c and scaleswg.c have been merged.
+X
+v33t08 includes the initial implementation for mySQL described below
+for v33t07x.
+X
+======
+>>Dec. 20, 2000
+--> v33t07x
+X
+Initial implementation of a syntax for mySQL database queries. A new
+file, mysql_lib.c has been added, and changes have been made to
+nmgetaa.c (which should now replace nxgetaa.c) and altlib.h. A mySQL
+database search needs a file with 4 parts:
+X
+(1) description of the database, user, password
+(2) a select statement that generates the set of protein sequences
+X as: UID, sequence
+(3) a select statement that generates a UID, description given a UID
+(4) a select statement that generats a single UID, sequence given a UID
+X
+Each of the four parts should be separated by ';'. For example, in
+the database that we are using for testing, a file "demo.sql" that
+contains:
+X
+================
+localhost taxonomy username secret;
+SELECT proteins.gid, proteins.sequence FROM proteins,swissprot WHERE proteins.gid=swissprot.gid AND swissprot.spid IS NOT NULL;
+select proteins.gid, concat(swissprot.spid," ",proteins.description) from proteins,swissprot where proteins.gid=%ld AND swissprot.gid=proteins.gid;
+select gid, sequence from proteins where gid=%ld;
+================
+X
+will find all the proteins in the BLAST "nr" database that also have
+SwissProt ID's when given the command line:
+X
+X fasta33 -q query.aa "demo.sql 16"
+X
+At least for simple queries, there is surprisingly little overhead for the
+search. For more complex queries involving several tables, the overhead
+can be significant.
+X
+At the moment, libraries that need the functions in mysql_lib.c will
+use library type 16. We may also use file type 17 for SQL queries
+that return binary sequences.
+X
+This implementation of mysql_lib.c was written to require a minimal
+amount of change to the other programs. Only nmgetaa.c and altlib.h
+needed to be changed to incorporate this new capability. One result
+of this limitation is that one cannot mix mySQL databases queries with
+other databases in the same search. Eventually, I would like to make
+a mySQL database like any other, so that several mysql database
+queries could be searched in the same run, and mysql databases could
+be mixed with other (flat file) databases, but this will require some
+changes in the function calls throughout the code. (Right now, the
+various programs do not distinguish between an openlib() that is made
+before searching a large database, and one before retrieving a single
+sequence. This must be changed for a database query like mySQL to
+behave like other databases.
+X
+Several mySQL demo files have been provided: mysql_demo*.sql.
+X
+(10 January 2001) The mySQL code has been tested on Intel Linux and
+Compaq/Alpha/Tru64 Unix.
+X
+>>Dec. 9, 2000
+X
+Changes to apam.c that to tie different default gap penalties to
+alternate scoring matrices. In addition, changes to apam.c, to deal
+with user-specified matrices with or without '*'.
+X
+>>Nov. 5, 2000 (date updated)
+X
+pst.dnaseq can now have 3 values, -1, or 0-> protein, 1->DNA, and 2->other.
+This becomes important for thing like init_karlin_a, which needs a
+background frequency of residues.
+X
+>>Nov. 1, 2000
+X
+Significant bug fixes for the -z 6/-z 16 option. An ininitialized
+variable was fixed in karlin.c, and comp_thr.c did not pass the
+correct composition argument type in find_zp(). The -z 6/16 option
+has now been tested and works correctly on Alphas, Linux x86, SGI, Sun
+and Mac OSX. Another problem was fixed in scaleswn.c (simplex()) that
+prevented the code from being reused by the pv4/mp4 complib programs.
+X
+>>Oct. 9, 2000
+X
+Several changes made to accomodate Mac OSX. Longer lists of superfamily
+numbers now supported in p[su]4comp/m[su]4comp programs.
+X
+>>Sept 25, 2000
+X
+All global variables have been removed from scaleswn.c. The last to
+go, db_struct db, required many edits, because until now, the fasta
+programs have kept two versions of the db_struct data (entries,
+length). One version was kept by the main program, which updated entry
+number and db length as sequences were read; a second copy of this
+information was kept by the statistical estimation routines. Now
+there is only one copy, which means that the E() values will be a
+function of the complete database, not the database with some high
+scoring sequences removed.
+X
+>>Sept 23, 2000
+X
+Continued removal of global variables from scaleswn.c. Only one
+global is left, db_struct db, which contains the number of entries in
+the database and the number of residues. It will be the next to go
+(changing all the zs_to_*() functions) and scaleswn. will be free
+of globals. scaleswg.c is gone - scaleswn.c compiles to scaleswg.c
+with -DNORMAL_DIST.
+X
+>>Sept 20, 2000
+X
+Removal of histogram globals required changes in p2_complib.c as well.
+p_complib.c has not been updated. scaleswg.c has been modified to
+reflect the new histogram strategy.
+X
+>>Sept 19, 2000
+X
+Substantial changes to remove globals for printing histogram. m_msg
+now contains a hist_str, which keeps histogram information.
+X
+>>Sept. 19, 2000
+(no version change, previous version not released)
+X
+Correct bug introduced into scaleswn.c (inithist()) by changing
+score2_sums[], score_sums[] from int to double.
+X
+Reporting of version numbers is more consistent between fasta33,
+fasta33_t, and pv4compfa/mp4compfa. The programs now report the same
+numbers/dates in similar places.
+X
+>>Sept. 15, 2000
+--> v33t07
+X
+Changes to fix problems with statistical estimates when a large
+fraction (but not all) of the database is related. Several users
+reported problems when searching with rRNA genes with version 33t06.
+In some cases, a 100% identitical match over 1500 nt would not be
+statistically significant against a search of the bacterial division
+of Genbank. This problem was not seen with some releases of v33t05.
+X
+The cause of the problem was a change between v33t05 and v33t06 to
+allow scoring matrices with unusual scaling to be used. In v33t05,
+there was a line that excluded all scores > 300 from the statistical
+estimation procedure. While 300 is a high score with any "normal"
+scoring matrix, some investigators were using matrices scaled 10X, so
+that a score of 300 was really a score of 30 with a conventional
+matrix, and should not be excluded. Unfortunately, removing the test
+to exclude scores > 300 meant that when a rRNA sequence was used to
+search the bacterial division, tens of thousands of high scoring
+related sequences were treated as if they were unrelated, with the
+result that the variance estimates were much too high, and thus high
+real scores had low z-scores, and thus were not statistically
+significant. (There appear to be more than 20,000 rRNA sequences in
+the bacterial division of Genbank, almost 25% of all sequences).
+X
+The solution to the problem is a substantial enhancement in the
+strategies used to exclude high-scoring, related sequences, the -z 1,
+4, and 5 parameter estimation strategies. The programs now estimate
+the expected high scoring sequence by calculating an ungapped Lambda
+and K, and then use a relatively conservative threshold for excluding
+scores that are higher than would be expected 0.01 times by chance.
+By calculating Lambda and K, we can scale the cutoff thresholds to
+allow scoring matrices with unusual scales. For "normal" searches,
+there should be little change, but there should be an improvement for
+searches with large numbers of related sequences in the database.
+X
+As a result of testing for this change, a bug in the karlin() function
+used with -z 6 was found and corrected.
+X
+=======
+>>Sept. 9, 2000
+X
+Changes to manshowbest.c to include correct display coordinates.
+X
+Significant changes to structs.h, param.h, p2_complib.c,
+p2_workcomp.c, to store and use a reliable a_struct for alignment
+coordinates.
+X
+Other cosmetic changes.
+X
+>>Sept. 7, 2000
+X
+Minor changes to complib.c, showrss.c, so that prss33 -q uses 200
+shuffles and prss33 provides bit scores, rather than z-scores.
+(no version number change).
+X
+Modifications to p2_complib.c to include superfamily numbers for
+ps4comp* ms4comp*.
+X
+>>Aug 22, 2000
+X
+Changes to mmgetaa.c, ncbl2_mlib.c, dropfs.c to accomodate AIX.
+00README.1st updated to reflect the current version and correct
+outdated information on threads.
+X
+>>Aug. 3, 2000
+X
+Modifications to initpam2() in initsw.c to correct a problem with pam_x
+when the -S option is used.
+X
+Modifications to compacc.c, scaleswn.c to ensure that residue numbers
+are calculated properly when more than 2 Gb of sequence is searched.
+X
+>>July 12, 2000
+X
+Modifications to dropnfa.c so that DNA matches to 'N' will be included
+in the "ungapped %identity". Thus, a sequence that is 100% identical
+for 100 nt on either side of a 100 nt region that has been masked to
+'NNNNN' will be reported as: "67% identical (100% ungapped)". This
+has been added to deal with masked BAC-end databases. It would be
+better if masking changed the letters to lowercase, but the mouse
+BAC-end sequences at TIGR use 'NNNNN'. This is currently available
+only for the fasta function, not [t]fast[x/y], etc, and only for DNA
+sequences.
+X
+mk_n_pam() in apam.c modified to ensure that mismatch scores of -1
+remain -1.
+X
+>>June 25, 2000
+X
+Modification to nxgetaa.c, nmgetaa.c, mmgetaa.c to return Genbank Accession
+number as part of the descriptive string.
+X
+>>June 11, 2000
+X
+(no version change - not yet released)
+X
+Modifications to calcons(), calc_id(), showbest(), p_workcomp.c to
+provide ngap_q (number of alignment gaps in query) , ngap_l (number
+of gaps in library) information for -m 9 output.
+X
+>>June 6, 2000
+X
+(no version change - not yet released)
+X
+Modified scaleswn.c to provide better support for unconventional
+scoring scoring matrices, in particular, scoring matrices where every
+value is 50-times higher. Previous versions of the MLE estimator (-z
+2) started with lambda = 0.2, which is too high for a scoring matrix
+going from -500:+1500. The initial estimate for lambda is now
+calculated using the formula: lambda = pi/sqrt(6*variance). For the
+default -z 1, a restriction to limit scores to a maximum of 300 for
+the statistical analysis was removed.
+X
+>>June 3, 2000
+X
+Modified aligment output, and -m 9 and -m10, to report an "ungapped"
+identity as well as the traditional "gapped" identity. The
+traditional "gapped" identity reports the number of identities divided
+by the overall length of the alignment, including gaps. The
+"ungapped" identity does not include gaps in the length of the
+alignment. This new value is included for alignments that include
+introns; thus, a tfastx33 search might find the 100% identical genomic
+sequence but report the gapped percent identity if a short intron were
+included in the alignment (the alignment probably would not span a
+long exon) as 66%. The "ungapped" identity would remain 100%. The
+ungapped identity value is also shown in the "-m 9" output line after
+the "gapped" fraction identical.
+X
+>>June 1, 2000
+X
+Modified -m 9 output to provide fraction identical, alignment boundary
+information with the initial list of high scoring sequences, just as
+the pv3comp and mp_comp versions do. The -m 9 option now shows the
+same alignment display as -m 0, but the width of the alignment is
+increased by 40. Thus, by default, -m 9 will show the list of best
+hits, with percent identity, Smith-Waterman score, and alignment
+boundaries initially, and then show alignments standard (-m 0)
+alignments with 100 residues/line.
+X
+>>May 29, 2000
+X
+Correct some problems with reading data files with <CR>'s under unix.
+X
+nmgetaa.c/nxgetaa.c/mmgetaa.c have been modified to convert <TAB>
+('\t') to <SPC> (' ') in descriptive lines.
+X
+=======
+X
+>>May 3, 2000
+X
+X Corrected problem with very low mean_var in fit_llen() in scaleswn.c.
+X
+>>May 2, 2000
+X (no version number change - previous version not released)
+X
+X Merged fasta33t05d2 with fasta33t06. Also removed restriction on
+"-M size-range" to proteins - the size range now can be applied to DNA
+as well.
+X
+>>May 1, 2000
+X (changes to v33t05d merged into v33t06)
+X
+Introduced changes to include '*' as a valid sequence character, which
+indicates termination. Thus, 'TGA', 'TAG', and 'TAA' are now
+tranlated to '*' rather than 'X', and the protein PAM matrices have
+been modified to provide a match score of approximately 1/2 the max
+identity score for a '*:*' match. Otherise, '*' is the same as 'X'.
+This change only affects query sequences that include a '*' to
+indicate an end of sequence, the '*' is not there by default.
+X
+The inclusion of '*' broke some things in tfasts33, tfastf33, fasty33,
+and tfasty33, which were fixed today.
+X
+>>March 28, 2000/April 24, 2000
+X --> v33t06
+X
+(a) -z 6 statistics that factor in composition
+(b) -smatrix-offset pam-offset parameter
+X
+(a) This release provides a new statistics option, -z 6, which
+provides a more sophisticated model that accounts for sequence
+composition. When -z 6 is used (only for fasta33(_t) and
+ssearch33(_t)), the program calculates a composition parameter
+comp=1/lambda using a modified version of the Karlin-Altschul karlin()
+function. As a result, every sequence in the database has an
+associated length (n1) and composition (comp).
+X
+The length n1 and composition comp are used in the maximum likelihood
+estimation described by Mott (1992) Bull. Math. Biol. 54:59-75. Four
+parameters are estimated, a0, a1, a2, and b1, and the probability of
+obtaining a score is then:
+X
+p(s >= x) = 1-exp(-exp(-( a0 + a1*comp + a2*comp*log(n0*n1) + x)/(b1*comp)))
+X
+The maximum likelihood estimates of a0, a1, a2, and b1 are calculated
+using the Nelder-Mead simplex search strategy.
+X
+The average Lambda is reported for the search using Lambda =
+1/(b1*ave_comp). Where ave_comp is the geometric mean of the comp values
+calculated during the statistical estimates.
+X
+The "lambda/comp" calculation can fail for sequences with very biased
+amino acid composition. When this occurs, 'comp' is set to -1.0 (as
+is 'H', the information content parameter) and the 'ave_comp' value is
+used to calculate statistical significance. (But obviously 'ave_comp'
+is not really appropriate, since if the sequence had an average 'comp'
+value, it would have been calculated.) When -z 6 is used, the
+alignment display shows the 'comp' and 'H' values for that library
+sequence.
+X
+(b) Scoring matrix offsets - The main reason that the "lamdba/comp"
+calculation fails is that, for the particular query/library sequence
+pair, the expected score is not < 0, instead, Sum {p_ij S_ij} >= 0.0.
+This problem is reported to 'stderr' when it occurs. The simplest
+solution to the problem is to provide an offset to the scoring matrix;
+for example, to use Blosum62 - 1, which ranges from +10 to -5, rather
+than the standard +11 to -4. This option used to be available with
+the -S offset option, but -S is now used to specify a lower-case
+seg-ed database. The offset can now be specified as part of the
+scoring matrix name. Thus, "-s BL62-1" uses Blosum62 reduced by 1 at
+each entry. The '-' character is used to indicate an offset, so
+scoring matrix files must not have a '-' in their name.
+Alternatively, "-s BL80+1" or "-s BL80--1" would add one to each value.
+X
+nxgetaa.c, nmgetaa.c, and mmgetaa.c have been edited to avoid string
+run-off problems after strncpy().
+X
+Fixed problem where positive gap extension penalties in ssearch33
+were not converted to negative values.
+X
+>>April 8, 2000
+X
+Fixed problem in calculating corrected sequence lengths for
+Altschul-Gish probabilities.
+X
+>>March 30, 2000
+X (no version change, date updated to March 30, 2000)
+X
+Corrected problem with -m 9 option.
+X
+The '*' character is now available to allow translated alignments to
+extend through the termination codon. Thus, if a protein sequence ends
+with a '*', and matches in to a translated termination codon, the
+score will be increased. The *:* match score is set to 1/2 the max
+positive score for the matrix (see upam.h). This strategy can also be
+used to upweight a match that extends all the way to the end of a
+full-length sequence by putting '*' at the end of both the query and
+library protein sequences. Recognition of '*' will probably become a
+command line option.
+X
+>>March 21, 2000
+X (no version change, previous version not distributed)
+X
+Changes to map_db.c, list_db.c, and mmgetaa.c to accomodate large
+sequence files. Long (64-bit on some systems) variables are now used
+to specify file and memory position for the memory mapped functions.
+As a result, there are now two *.xin (memory mapped index) file
+formats: MP0, which uses 32-bit longs, and MP1, which uses 64-bit
+longs. On 64-bit machines, MP0 32-bit indices are read properly, but
+limit the database size to 2 or 4 Gb; MP1 64-bit indices allow very
+large databases. Blast2.0 formatdb databases are still limited to
+4Gb. To compile map_db.c to generate 64-bit index files, include the
+compile time option -DBIG_LIB64 in the Makefile. (Currently this
+option has been tested only on the DEC Alpha and SGI platforms, and
+will work only with Unix versions that provide 64-bit longs and 64-bit
+ftell()'s.)
+X
+The -R results file now uses sfn_cmp() to report a matching
+superfamily number, if one exists, and '0' otherwise.
+X
+>>March 12, 2000
+X (no version change, previous version not distributed)
+X
+Provide new strategy for specifying library abbreviations. In
+addition to:
+X
+X fasta33 query.aa %anr
+X
+one can also specify:
+X
+X fasta33 query.aa %pir1+sp+nr
+or
+X fasta33 query.aa +pir1+sp+nr
+or
+X fasta33 query.aa %+pir1+sp+nr
+X
+where the + anywhere in the library name string indicates that
+variable length library names, separated by '+', are being used (the
+last '+' is optional). The FASTLIBS file then becomes:
+X
+================
+PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/blast/pir1.lseg
+NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
+NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
+NCBI/Blast non-redundant proteins$0+nr+/slib2/blast/nr.lseg
+NCBI/Blast Swissprot$0+sp+/slib2/blast/swissprot.lseg
+================
+X
+The two abbreviation types, single letter and +word+, cannot be
+intermixed, and at least initially, +word+ specifiers are
+case-sensitive (single letter abbreviations are not) and will not be
+available interactively, only on the command line.
+X
+Removed 'K' estimate for Expectation_n, Expectation_i fits to the
+distribution of unrelated similarity scores. 'K' cannot be calculated
+from the data available. 'Lamdba' can be calculated, it is
+1.28255/sqrt(mean_var), and is still available.
+X
+>>March 3, 2000
+X (no version change)
+X
+changed Makefile33.common, Makefile.common, to incorporate $(NRAND)
+rather than "rand48". Provide nrandom.c which uses random(), as
+replacement for nrand.c, which uses rand48().
+X
+>>February 8, 2000
+X --> v33t05
+X
+Fixes to scaleswn.c (proc_hist_ml) to set num_db_entries properly.
+Scaleswn.c also provides Lambda estimates for -z 1/11 (Expectation_n),
+and -z 1/14 (Expectation_i) statistical estimates.
+X
+Modifications to calc_id() to correct bug in counting identities.
+Modified showalign() to use calc_id() with -m 9, for simpler
+debugging.
+X
+Additional modifications to dropfa*.c files to deal properly with 'n's
+and 'x's.
+X
+Added new option: -x #, which allows one to override the penalty for a
+match against 'x' (or 'N') provided by the scoring matrix. This
+option is particularly useful in fast[x/y] searches, where out of
+frame low complexity regions can generate high scores.
+X
+The old function of '-x' - to specify an alternate coordinate system,
+is now available as '-X # #'.
+X
+Updated scaleswn.c to provide window shuffle information for -z 12.
+X
+Updated compacc.c, workacc.c, to fix serious bug in wshuffle()
+that destroyed aa1[n1]=0.
+X
+>>January 25, 2000
+X --> v33t04
+X
+X A serious bug in all of the fasta related programs has been
+corrected. The new code in fasta33 which ignores certain residues
+failed to initialize one of the arrays properly. As a result, in
+pathological situations, a very strong match could be missed.
+X
+X Corrected minor bug in initsw.c that cause misplaced "ktup" command
+line argument, which should be ingnored by ssearch, to be read as -d
+ktup.
+X
+X Improved error message for 0 length query sequence.
+X
+>>January 17, 2000
+X --> no external version number change
+X
+Modified mmgetaa.c, map_db.c, and nmgetaa.c to provide memory mapping
+of genbank flatfile (format=1) files. This format could be read much
+more efficiently, however.
+X
+>>January 12, 2000
+X --> no external version number change
+X
+Changed the behavior of the options that set the number of high scores
+(-b) and alignments (-d) that are displayed. Previously, fasta33 -E
+10.0 -d 10 would show 50 best scores, rather than all the scores with
+E() < 10.0. To get the -E threshold to limit, -E 10.0 -b 10000 -d 10
+was required. This is now fixed. Setting "-d 10" does not affect the
+number of best scores shown.
+X
+Minor change in mw.h to remove unused defines.
+X
+fasta3x.me (fasta3x.doc) updated.
+X
+>>January 6, 2000
+X --> v33t03
+X
+Corrected bug in memory mapped reads of gcg_binary format files
+that potentially caused the last 63 residues to be read improperly.
+X
+Changes to comp_thr.c, pthr_subs.c, uthr_subs.c, ibm_pthr_subs.c to
+ensure that each thread has its own work_info structure. This solves
+some minor race conditions that sometimes caused some parameters
+not to be reported properly.
+X
+Changes to most of the drop*.c files to correct some minor problems
+with sequence alphabets. Code in mmgetaa.c (memory mapped code for
+FASTA, GCG compressed files) reordered to prevent files from being
+memory mapped if appropriate index files are not available.
+X
+See readme.pvm_3.3 for updates to the pvm programs.
+X
+>>December 10, 1999
+X (no version change - modifications largely affect ps3comp*)
+X
+Modifications to showsum.c to deal with 2 scores/sequence. Modifications
+to mmgetaa.c for superfamily numbers.
+X
+>>December 7, 1999
+X (no version change, previous version not released)
+X
+Corrected problem in mmgetaa.c that caused searches on a memory mapped
+single long sequence (e.g. Chr22) to fail. Corrected bug in map_db.c
+that caused it to crash on some architectures if a filename was not
+specified. Corrected off-by-three error in fasty/tfasty. Corrected
+indexing error in dropfz2.c.
+X
+>>December 5, 1999
+X --> v33t02
+X
+corrected some bugs in inifa.c/initsw.c/doinit.c that caused
+abbreviated function names to be lost.
+X
+modify showbest.c, showalign.c to include information on position in
+library sequence (bbp->cont) to distinguish subsegment of very long
+sequences. Currently, the new label is available only with -m 6.
+X
+>>November 29, 1999
+X [t]fastz33 uses v33t02 of fasty function.
+X
+Replace dropfz.c with dropfz2.c. Dropfz2.c interprets any codons,
+that include the nucleotide 'N' as the amino 'X'. Previously, 'N' was
+treated as 'A', so 'NNN' ended up 'K'. This modification, together
+with the -S option and lower-case pseg'ed databases, should ensure
+that DNA queries with large numbers of 'N's do not match low
+complexity regions.
+X
+>>November 20, 1999
+X (no version change, previous version not released)
+X
+Modify initfa.c to disply initn, init1 scores for [t]fast[fs].
+Include "-B" option to show previous z-scores.
+X
+>>November 17, 1999
+X (no version change, previous version not released)
+X
+Modify dropfx.c to use saatran(), rather than aatran(). saatran
+translates any 'N' containing codon as 'X'. aatran() treats 'N' as
+an 'A'. Although more steps are required for translation, the program
+appears to run just as fast.
+X
+>>November 7, 1999
+X --> v33t01
+X
+Substantial changes to the output format in showbest.c (the list of
+high scoring sequences) and showalign.c (the alignments). The classic
+list of best scores:
+X
+The best scores are: initn init1 opt z-sc E(82014)
+gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIO ( 218) 1497 1497 1497 1761.1 2.3e-91
+gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE ( 218) 1413 1413 1413 1662.9 6.7e-86
+X
+has been replaced by:
+X
+The best scores are: opt bits E(82138)
+gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIONE S-TRAN ( 218) 1497 354 7.6e-98
+gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE S-TRANSF ( 218) 1413 335 5.3e-92
+X
+This display provides more information and removes the outdated initn
+and init1 scores, which are no longer used. The "bit" score is
+comparable to the blast2 bit score. It is calculated as: (lambda*S -
+ln K)/ln 2, where S is the raw similarity score, lambda and K are
+statistical parameters estimated from the distribution of unrelated
+sequence similarity scores. All of the similarity scores, including
+init1, initn, and z-scores are reported with the alignment data.
+Z-scores are displayed instead of bit scores in the list of high
+scores if the command line option "-B" is specified.
+X
+In addition, the alignment score line has changed from:
+X
+>>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER (220 aa)
+X initn: 954 init1: 954 opt: 958 Z-score: 1130.9 expect() 1.1e-56
+Smith-Waterman score: 958; 61.927% identity in 218 aa overlap (1-218:1-218)
+X
+to:
+X
+>>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER (220 aa)
+X initn: 954 init1: 954 opt: 958 Z-score: 1130.9 bits: 216.4 E(): 2.8e-56
+Smith-Waterman score: 958; 61.927% identity in 218 aa overlap (1-218:1-218)
+X
+In addition to the addition of the "bits:" score, the "expect()" label
+has changed to "E()" to save some space.
+X
+>>November 4,12, 1999
+(no version change)
+X
+Fixed serious bug in -z 2 lambda/K calculation in scaleswn.c
+X
+Fixed bugs in llgetaa.c (openlib()) and definition of superfamily
+numbers.
+X
+>>October 21, 1999
+(no version change)
+X
+Begin using CVS for version control. Correct faulty error message in
+dropfs.c. Corrected bad "goto loopl;" in dropfz.c. Corrected prss3.rsp
+for Makefile.tc (Win32 version).
+X
+>>October 18, 1999
+X --> v33t0
+X
+Corrected some serious bugs with the various fasta/x/y programs when
+the -DALLOCN0 was used to save memory. Improvements to fasta3x.me/.doc
+documentation.
+X
+>>October 12, 1999
+X --> v33tx
+X
+For this initial release of version 33 of the FASTA programs, the
+Makefile's have been modified to make "fasta33(_t)", "fastx33(_t)",
+etc, so that you can test fasta33 while retaining fasta3 (from release
+v32t08). The FASTA33 programs are somewhat slower than previous
+releases, but I believe the ability to handle low complexity regions
+without 'X'ing them out outweighs the slowdown. By (temporarily)
+changing the names of the programs slightly, it will be easier for you
+to judge the relative cost and benefit. To "make" the programs as
+"fasta3(_t)", etc, simply replace "Makefile33.common" with
+"Makefile.common" in the "Makefile" that you use.
+X
+>>September 30, 1999
+X
+ssearch3/fasta3/fastx3/fasty3 have been modified to search databases
+containing both upper and lower case letters, where lower case letters
+indicate low-complexity regions. With the modified programs, lower
+case letters are treated as 'X's' in the initial scan, but are then
+treated normally in the final alignment. In addition, alignments can
+contain lower case letters. Lower case letters are treated as
+low-complexity regions during the seach phase of the program, but as
+"conventional" residues during the alignment phase, with the "-S"
+option. Currently, lower case letters are mapped to 'X's during the
+scan of the entire library. In the future, alternate weights will be
+available. This is a substantial improvement for very large scale
+comparison, where one seeks both accurate statistical estimates and
+accurate %identities and alignments, and for translated DNA:protein
+comparisons, like "fastx3" and "fasty3", where out-of-frame
+translations tend to match low complexity regions (see Pearson et
+al. (1997) Genomics 46:24-36).
+X
+Protein databases (and query sequences) can be generated in the
+appropriate format using John Wooton's "pseg" program, available from
+ftp://ncbi.nlm.nih.gov/pub/seg/pseg. Once you have compiled the "pseg"
+program, use the command:
+X
+X pseg database.fasta -z 1 -q > database.lc_seg
+X
+Once you have database.lc_seg, run the command "map_db" to generate
+a ".xin" file that can be used to efficiently memory map the database.
+X
+You can then search database.lc_seg with or without the "-S" option.
+Without "-S", the database is treated as any other FASTA format file -
+all the residues are present. With "-S", lower case residues will be
+treated as 'x's' during the initial scan but as normal residues when
+final alignments are displayed.
+X
+When the -S option is used, the matrix information line is changed
+from: "BL50 matrix (15:-5)" to "BL50 matrix (15:-5)xS". The "-S"
+option is no longer available to provide a scoring matrix offset.
+X
+Unfortunately, Blast2.0 format files cannot contain lower case
+letters. We have addressed this problem by providing efficient memory
+mapped access to Fasta and GCG/PIR, and GCG/compressed-binary files in
+the last release of fasta32t08. The memory mapped file I/O
+improvements are provided in fasta33 as well.
+X
+================ readme.v32 ================
+X
+FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
+search both the forward and reverse strands by default.
+X
+The documentation in fasta3x.me/fasta3x.doc has been substantially
+revised.
+X
+>>October 20, 1999
+(no version change)
+X
+Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
+X
+>>October 9, 1999
+X --> v32t08 (no version number change)
+X
+Added "-M low-high" option, where low and high are inclusion limits
+for library sequences. If a library sequence is shorter than "low" or
+longer than "high", it will not be considered in the search. Thus,
+"-M 200-250" limits the database search to proteins between 200 and
+250 residues in length. This should be particularly useful for fasts3
+and fastf3. -M -500 searches library sequences < 500; -M 200 -
+searches sequences > 200. This limit applies only to protein
+sequences.
+X
+Modified scaleswn.c to fall back to maximum likelihood estimates of
+lambda, K rather than mean/variance estimates. (This allows MLE
+estimation to be used instead of proc_hist_n when a limited range of
+scores is examined.)
+X
+>>October 2, 1999
+X --> v32t08
+X
+Many changes:
+X
+(1) memory mapped (mmap()ed) database reading - other database reading fixes
+(2) BLAST2 databases supported
+(3) true maximum likelihood estimates for Lambda, K
+(4) Misc. minor fixes
+X
+(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
+It is now possible to use mmap()ed access to FASTA format databases,
+if the "map_db" program has been used to produce an ".xin" file. If
+USE_MMAP is defined at compile time and a ".xin" file is present, the
+".xin" will be used to access sequences directly after the file is
+mmap()ed. On my 4-processor Alpha, this can reduce elapsed time by
+50%. It is not quite as efficient as BLAST2 format, but it is close.
+X
+Currently, memory mapping is supported for type 0 (FASTA), 5
+(PIR/GCG ascii), and 6 (GCG binary). Memory mapping is used if a
+".xin" file is present. ".xin" files are created by the new program
+"map_db". The syntax for "map_db" is:
+X
+X map_db [-n] "/dir/database.fa"
+X
+which creates the file /dir/database.fa.xin. Library types can be
+included in the filename; thus:
+X
+X map_db -n "/gcggenbank/gb_om.seq 6"
+X
+would be used for a type 6 GCG binary file.
+X
+The ".xin" file must be updated each time the database file changes.
+map_db writes the size of the database file into the ".xin" file, so
+that if the database file changes, making the ".xin" offset
+information invalid, the ".xin" file is not used. "list_db" is
+provided to print out the offset information in the ".xin" file.
+X
+(Oct 2, 1999) The memory mapping routines have been changed to
+allow several files to be memory mapped simultaneously. Indeed, once a
+database has been memory mapped, it will not be unmap()ed until the
+program finishes. This fixes a problem under Digital Unix, and should
+make re-access to mmap()ed files (as when displaying high scores and
+alignments) much more efficient. If no more memory is available for
+mmap()ing, the file will be read using conventional fread/fgets.
+X
+(Oct 2, 1999) The names of the database reading functions has been
+changed to allow both Blast1.4 and Blast2.0 databases to be read. In
+addition, Makefile.common now includes an option to link both
+ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
+However, Blast1.4 support has not been tested.
+X
+The Makefile structure has been improved. Each architecture specific
+Makefile (Makefile.alpha, Makefile.linux, etc) now includes
+Makefile.common. Thus, changes to the program structure should be
+correct for all platforms. "map_db" and "list_db" are not made with
+"make all".
+X
+The database reading functions in nxgetaa.c can now return a database
+length of 0, which indicates that no residues were read. Previously,
+0-length sequences returned a length of 1, which were ignored.
+Complib.c and comp_thr.c have changed to accommodate this
+modification. This change was made to ensure that each residue,
+including the last, of each sequence is read.
+X
+Corrected bug in nxgetaa.c with FASTA format files with very long
+(>512 char) definition lines.
+X
+(2) (September 20, 1999) BLAST2 format databases supported
+X
+This release supports NCBI Blast2.0 format databases, using either
+conventional file reading or memory mapped files. The Blast2.0 format
+can be read very efficiently, so there is only a modest improvement in
+performance with memory mapping. The decision to use mmap()'ed files
+is made at compile time, by defining USE_MMAP. My thanks to Eamonn
+O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
+providing mmap()'ed modifications to fasta3. On my machines, Blast2.0
+format reduces search time by about 30%. At the moment, ambiguous DNA
+sequences are not decoded properly.
+X
+(3) (September 30, 1999) A new statistical estimation option is
+available. -z 2 has been changed from ln()-scaling, which never
+should have been used, to scaling using Maximum Likelihood Estimates
+(MLEs) of Lambda and K. The MLE estimation routines were written by
+Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
+written by Sean Eddy. The MLE estimation examines the middle 95% of
+scores, if there are fewer than 10000 sequences in the database;
+otherwise it excludes (censors) the top 250 scores and the bottom 250
+scores. This approach seems to effectively prevent related sequences
+from contaminating the estimation process. As with -z 1, -z 12 causes
+the program to generate a shuffled sequence score for each of the
+library sequences; in this case, no censoring is done. If the
+estimation process is reliable, Lambda and K should not vary much with
+different queries or query lengths. Lambda appears not to vary much
+with the comparison algorithm, although K does.
+X
+(4) Minor changes include fixes to some of the alignment display routines,
+individual copies of the pstruct structure for each thread, and some
+changes to ensure that every last residue in a library is available
+for matching (sometime the last residue could be ignored). This
+version has undergone extensive testing with high-throughput sequences
+to confirm that long sequences are read properly. Problems with
+fastf3/fasts3 alignment display have also been addressed.
+X
+>>August 26, 1999 (no version change - not released)
+X
+Corrected problem in "apam.c" that prevented scoring matrices from
+being imported for [t]fasts3/[t]fastf3.
+X
+>>August 17, 1999
+X --> v32t07
+X
+Corrected problem with opt_cut initialization that only appeared
+with pvcomp* programs.
+X
+Improved calculation of FASTA optcut threshold for DNA sequence
+comparison for match scores much less than +5 (e.g. +3). The previous
+optcut theshold was too high when the match penalty was < 4 and
+ktup=6; it is now scaled more appropriately.
+X
+Optcut thresholds have also been raised slightly for
+fastx/y3/tfastx/y3. This should improve performance with minimal
+effects on sensitivity.
+X
+>>July 29, 1999
+(no version change - date change)
+X
+Corrected various uninitialized variables and buffer overruns
+detected.
+X
+>>July 26, 1999 - new distribution
+(no version change - v32t06, previous version not released)
+X
+Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
+programs.
+X
+Statistical calculations for tfasta/x/y in unthreaded version
+corrected. Statistical estimates for threaded and unthreaded versions
+of the tfasta/x/y/s/f programs should be much more consistent.
+X
+Substantial modifications in alignment coordinate calculation/
+presentation. Minor error in fastx/y/tfastx/y end of alignment
+corrected. Major problems with tfasta alignment coordinates
+corrected. tfasta and tfastx/y coordinates should now be consistent.
+X
+Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
+with long query sequences.
+X
+Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
+to try to avoid "cannot allocate diagonal arrays" error message.
+Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
+so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
+I am still getting this message, so it has not been completely
+successful. Makefile.linux now uses -DALLOCN0 to avoid this problem,
+at some cost in speed.
+X
+The pvcomp* programs have been updated to work properly with
+forward/reverse DNA searches. See readme.pvm_3.2.
+X
+>>July 7, 1999 - not released
+X --> v32t06
+X
+Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
+dumps with "-o" option.
+X
+Corrected a subtle bug in fastx/y/tfastx/y alignment display.
+X
+>>June 30, 1999 - new distribution
+(no version change)
+X
+Corrected doinit.c to allow DNA substitution matrices with -s matrix
+option.
+X
+Changed ".gbl" files to ".h" files.
+X
+>>June 2 - 9, 1999 - new distribution
+(no version change)
+X
+Added additional DNA lambda/K/H to alt_param.h. Corrected some
+other problems with those table. for the case where (inf,inf)
+gap penalties were not included.
+X
+Fixed complib.c/comp_thr.c error message to properly report filename
+when library file is not found.
+X
+Included approximate Lambda/K/H for BL80 in alt_parms.h.
+BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
+X
+Included some additional perl files for searchfa.cgi, searchnn.cgi
+in the distribution (my-cgi.pl, cgi-lib.pl).
+X
+>>May 30, 1999, June 2, 1999 - new distribution
+(no version number change)
+X
+Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h. Changed
+zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
+when only one sequence is compared and -z 3 is used.
+X
+>>May 27, 1999
+(no version number change)
+X
+Corrected bug in alignment numbering on the % identity line
+X 27.4% identity in 234 aa (101-234:110-243)
+for reverse complements with offset coordinates (test.aa:101-250)
+X
+>>May 23, 1999
+(no version number change)
+X
+Correction to Makefile.linux (tgetaa.o : failed to -DTFAST).
+X
+>>May 19, 1999
+(no version number change)
+X
+Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
+Changes to showsum.c to change off-end reporting. (Neither of these
+changes is likely to affect anyone outside my research group.)
+X
+>>May 12, 1999
+X --> v32t05
+X
+Fixed a serious bug in the fastx3/tfastx3 alignment display which
+caused t/fastx3 to produce incorrect alignments (and incorrectly low
+percent identities). The scores were correct, but the alignment
+percent identities were too low and the alignments were wrong.
+X
+Numbering errors were also corrected in fastx3/tfastx3 and
+fasty3/tfasty3 and when partial query sequences were used.
+X
+>>May 7, 1999
+X
+Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
+incorrect Smith-Waterman scores after do_walign() had been called.
+This affected only pvcompsw searches with the "-m 9" option.
+X
+>>May 5, 1999
+X
+Modified showalign.c to provide improved alignment information that
+includes explicitly the boundaries of the alignment. Default
+alignments now say:
+X
+Smith-Waterman score: 175; 24.645% identity in 211 aa overlap (5:207-7:207)
+X
+>>May 3, 1999
+X
+Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
+"not" superfamily annotation for the query sequence only. The
+goal is to be able to specify that certain superfamily numbers be
+ignored in some of the search summaries. Thus, a description line
+of the form:
+X
+>GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
+X
+says that GT8.7 belongs to superfamily 40001, but any library
+sequences with superfamily number 90043 should be ignored in any
+listing or summary of best scores.
+X
+In addition, it is now possible to make a fasta3r/prcompfa, which is
+the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
+unrelated sequences in a search using the superfamily annotation.
+fasta3r shows only the scores of related sequences. This might be
+used in combination with the -F e_val option to show the scores
+obtained by the most distantly related members of a family.
+X
+>>April 25, 1999
+X
+X -->v32t04 (not distributed)
+X
+Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
+(necessary for a more rational Makefile structure). No code changes.
+X
+>>April 19, 1999
+X
+Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
+(no version number change).
+X
+>>April 17, 1999
+X
+X --> v32t03
+X
+A serious bug in DNA alignments when the sequence has been broken into
+multiple segments that was introduced in version fasta32 has been
+fixed. In addition, several minor problems with -z 3 statistics on
+DNA sequences were fixed.
+X
+Added -m 9 option, which unfortunately does different things in
+pvcompfa/sw and fasta3/ssearch3. In both programs, -m 9 provides the
+id's of the two sequences, length, E(), %_ident, and start and end of
+the alignment in both sequences. pvcompfa/sw provides this
+information with the list of high scoring sequences. fasta3/ssearch3
+provides the information in lieu of an alignment.
+X
+>>March 18, 1999
+X
+X --> v32t02
+X
+Added information on the algorithm/parameter description line to
+report the range of the pam matrices. Useful for matrices like
+MD_10, _20, and _40 which require much higher gap penalties.
+X
+>>March 13, 1999 (not distributed)
+X
+X --> v32t01
+X
+X -r results.file has been changed to -R results.file to accomodate
+X DNA match/mismatch penalties of the form: -r "+1/-3".
+X
+>>February 10, 1999
+X
+Modify functions in scalesw*.c to prevent underflow after exp() on
+Alpha Linux machines. The Alpha/LINUX gcc compiler is buggy and
+doesn't behave properly with "denormalized" numbers, so "gcc -g -m
+ieee" is recommended.
+X
+Add "Display alignments also (y/n)[n] "
+X
+pvcomplib.c again provides alignments!! In addition, there is a
+new "-m 9" option, which reports alignments as:
+X
+>>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
+HS5 30 HS5 30 1.873e-11 1.000 30 1 30 1 30
+HS5 30 HS2249 40 1.061e-07 0.774 31 1 30 7 37
+HS5 30 HS2221 38 1.207e-07 0.833 30 1 30 7 35
+HS5 30 HS2283 40 1.455e-07 0.774 31 1 30 7 37
+HS5 30 HS2239 38 1.939e-07 0.800 30 1 30 7 35
+X
+where the columns are:
+X
+query-name q-len lib-name lib-len E() %id align-len q-start q-end l-start l-end
+X
+>>February 9, 1999
+X
+Corrected bug in showalign.c that offset reverse complement alignments
+by one.
+X
+>>Febrary 2, 1999
+X
+Changed the formatting slightly in showbest.c to have columns line up better.
+X
+>>January 11, 1999
+X
+Corrected some bugs introduced into fastf3(_t) in the previous version.
+X
+>>December 28, 1998
+X
+Corrected various problems in dropfz.c affecting alignment scores
+and coordinates.
+X
+Introduced a new program, fasts3(_t), for searching with peptide
+sequences.
+X
+>>November 11, 1998
+X
+X --> v32t0
+X
+Added code to correct problems with coordinate number in long library
+sequences with tfastx/tfasty. With this release, sequences should be
+numbered properly, and sequence numbers count down with reverse
+complement library sequences.
+X
+In addition, with this release, fastx/y and tfastx/y translated
+protein alignments are numbered as nucleotides (increasing by 3,
+labels every 30 nucleotides) rather than codons.
+X
+SHAR_EOF
+chmod 0644 readme.v33t0 ||
+echo 'restore of readme.v33t0 failed'
+Wc_c="`wc -c < 'readme.v33t0'`"
+test 50697 -eq "$Wc_c" ||
+ echo 'readme.v33t0: original size 50697, current size' "$Wc_c"
+fi
+# ============= readme.v34t0 ==============
+if test -f 'readme.v34t0' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.v34t0 (File already exists)'
+else
+echo 'x - extracting readme.v34t0 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.v34t0' &&
+X
+X $Name: fa_34_26_5 $ - $Id: readme.v34t0,v 1.167 2007/04/26 18:42:43 wrp Exp $
+X
+>>April 26, 2007
+X
+Modify scaleswn.c to prevent mle_cen() from hanging when it fails to
+converge. Also, free() more arrays in work_thr.c; initialize
+m_msg.hist.entries=0 in comp_lib.c, and various clean-ups for a_res
+encoded alignments.
+X
+>>March 22, 2007
+X
+Update faatran.c genetic codes (and documentation on -t option). Update
+ncbl2_mlib.c to parse non-NCBI format 12 databases better.
+X
+>>March 21, 2007 fasta-34_26_2
+X
+Fix conflict between "-S" "-s matrix.file".
+X
+>>February 26, 2007 fasta-34_26_2
+X
+Fix problem with dropfs2.c (curv.start = lpos before initialized).
+X
+>>January 12, 2007
+X
+Fix a problem with pssm_asn_subs.c reading strings (sequences) longer
+than 1024 bytes.
+X
+Remove searchfa.cgi, searchnn.cgi, cgi-lib.pl, my-cgi.pl - this code
+was used for an ancient FASTA WWW implementation and has been replaced
+by the FASTA_WWW package.
+X
+FASTA Version numbers are being modified to make releases easier to
+track, thus fa34t26b5 has become fasta-34_26_1. I would prefer to use
+decimal versions, but CVS does not allow '.' in tags.
+X
+>>January 4, 2007 fasta-34_26_1
+X
+Include scripts for building Mac OS X Universal binaries on a PPC
+machine. Programs are compiled first with Makefile.os_x (gcc-3.3 for
+PPC) and then installed into ./ppc/. Programs are next compiled with
+Makefile.os_x86 for i386, and the resulting executables installed into
+./i386/. Finally, the "make_osx_univ.sh" script is run to build the
+universal binaries from the two executables using "lipo".
+X
+>>December 12, 2006
+X
+Fix some problems with p2_workcomp.c: (1) no longer initialize pad
+characters for non-existant sequences. (2) deal with small libraries
+consistently with the serial versions.
+X
+>>November 17, 2006 fa34t26b5
+X
+Fixed a problem reading ASN.1 format 2 PSSM's. It is now possible to
+download a PSI-BLAST PSSM RID and search properly. Next, the query
+sequence from the PSSM should be used instead of the provided query
+sequence, so that the query sequence is ignored.
+X
+>>October 19, 2006 fa34t26b4
+X
+Fixed problem with SSE2 code when PSSM's are used.
+X
+>>October 6, 2006 fa34t26b3
+X
+A new set of WIN32 programs is now available that use the Intel C++
+9.1 compiler, rather than the much older Borland Turbo-C compiler. All
+of the unthreaded programs that are part of the Unix and MacOSX FASTA
+distributions are now available. Threaded (multiprocessor) versions
+of the program as available as well, as are sse2 accelerated versions
+of ssearch34 (ssearch34sse2.exe, ssearch34sse2_t.exe).
+X
+Th new WIN32 code also uses Microsoft's "nmake" program to build the
+programs, which allows much greater consistency between the Unix and
+Windows versions.
+X
+X
+>>September 18, 2006
+X
+Static global alignment variables removed from dropnfa.c, dropfx.c,
+dropfz2.c. dropnfa.c, dropfx.c and dropfz2.c should be thread safe.
+Together with the earlier changes, all the FASTA functions should now
+be thread safe during the alignment process.
+X
+>>August 17, 2006
+X
+Begin removal of static variables from Smith-Waterman alignment
+functions. These variables kept the functions from being thread-safe.
+Now dropgsw.c and dropnsw.c are thread-safe.
+X
+>>August 15, 2006 fa34t26b2
+X
+Fixed a problem with pv34compfx/mp34compfx (and fy) producing
+improperly labeled alignments and de-allocating memory for the reverse
+complement.
+X
+>>July 18, 2006
+X
+The library file name parsing programs now provide the option for
+environment variable substitions. For example, SLIB2=/slib2 as an
+environment variable (e.g. export SLIB2=/slib2 for ksh and bash), then
+X
+X fasta34 -q query.aa '${SLIB2}/swissprot.fa' expands as expected.
+X
+While this is not important for command lines, where the Unix shell
+would expand things anyway, it is very helpful for various
+configuration files, such as files of file names, where:
+X
+X <${SLIB2}/blast
+X swissprot.fa
+X
+now expands properly, and in FASTLIBS files the line:
+X
+X NCBI/Blast Swissprot$0S${SLIB2}/blast/swissprot.fa
+X
+expands properly. Currently, Environment variable expansion only
+takes place for library file names, and the <directory in a file of
+file names.
+X
+>>July 14, 2006 fa34t26b1
+X
+Updated Farrar smith_waterman_sse2.c code to address possible bug
+(code from Michael Farrar). Include <sunmedia_intrin.h> for
+compilation with Sun compiler with Makefile.sun_x86.
+X
+>>July 2, 2006 fa34t26b0
+X
+This release provides an extremely efficient SSE2 implementation of
+the Smith-Waterman algorithm for the SSE2 vector instructions written
+by Michael Farrar (farrar.michael@gmail.com). The SSE code speeds up
+Smith-Waterman 8 - 10-fold in my tests, making it comparable to Eric
+Lindahl's Altivec code for the Apple/IBM G4/G5 architecture.
+X
+The Farrar code is largely confined to smith_waterman_sse2.c and
+smith_waterman_sse2.h, which are copyright (2006) by Michael Farrar,
+and cannot be redistributed without his permission. Mr. Farrar has
+agreed to provide his code under the same policy used by FASTA -
+e.g. the code can be used without permission, but not redistributed.
+X
+The Farrar code uses GCC version 4.0 SSE2 intrinsic functions to avoid
+assembly language code. Unfortunately, in my hands, "gcc -O3" causes
+"out of memory" errors, and other problems, so "gcc -O" is used instead.
+X
+>>June 23, 2006 fa34t25d10
+X
+Modifications to comp_lib.c, compacc.c, and other files to ensure that
+function-specific MAXTOT values are used properly. MAXTOT is now
+available as m_msg.max_tot, which is set in initfa.c (m_msg.max_tot =
+MAXTOT) to ensure that functions that need very large MAXTOT values
+(e.g. TFASTX) can get them. tfastx can now search successfully with
+titin, a 27,000 residue protein.
+X
+Other changes have been made to accomodate long query sequences.
+X
+A serious bug was found in fastx34(_t) that caused alignment
+coordinates to be calculated improperly when the DNA sequence was much
+longer than the protein sequence.
+X
+>>May 31, 2006 fa34t25d9
+X
+Fixed some problems with fasts/fastf alignments when -m 9 options were
+used. Unlike the other algorithms, the a_res structure does not
+capture all the information to re-produce an alignment, so do_walign
+now sets bptr->have_ares to indicate whether the a_res structure is
+valid.
+X
+Various problems with bad library names, and short query titles were
+also fixed.
+X
+Updated version number/date on all drop*.c functions.
+X
+>>May 24, 2006 fa34t25d8
+X
+Revised code for NCBI *.pal/*.nal databases has been tested on all
+architectures, including Windows.
+X
+In addition, support for ASN.1 PSSM:2 files provided by the NCBI
+PSI-BLAST WWW site is included. This code will not work with
+iteration 0 PSSM's (which have no PSSM information). For ASN.1
+PSSM's, which provide the matrix name (and in some cases the gap
+penalties), the scoring matrix and gap penalties are set appropriately
+if they were not specified on the command line. ASN.1 PSSM's are type 2:
+X ssearch34 -P "pssm.asn1 2" .....
+X
+>>May 18, 2006
+X
+Support for NCBI Blast formatdb databases has been expanded. The
+FASTA programs can now read some NCBI *.pal and *.nal files, which are
+used to specify subsets of databases. Specifically, the
+swissprot.00.pal and pdbaa.00.pal files are supported. FASTA supports
+files that refer to *.msk files (i.e. swissprot.00.pal refers to
+swissprot.00.msk); it does not currently support .pal files that
+simply list other .pal or database files (e.g. FASTA does not support
+nr.pal or swissprot.pal).
+X
+In the process of providing this support, the routines used to read
+ASN.1 binary formatdb files were substantially improved. It is now
+possible to see multiple description lines for a single sequence.
+X
+IS_BIG_ENDIAN has been removed from all of the Makefiles. The code
+now looks for the definition of __BIG_ENDIAN__ or _BIG_ENDIAN to
+decide whether the architecture IS_BIG_ENDIAN. If, for some reason,
+one of these macros is not defined on a BIG_ENDIAN architecture, then
+-DIS_BIG_ENDIAN is required.
+X
+>>May 12, 2006 CVS fa34t25d7
+X
+Corrected serious problem with coordinate display calculation for
+fasta34 and ssearch34 - in some cases the coordinates and alignment
+symbols were off by the length of the context (typically 30 residues).
+X
+Added capability to read ASN.1 binary PSSM information. This
+information is provided (in an encoded form) from the NCBI PSI-BLAST
+WWW site. (What is actually provided from the WWW site is a bzip2-ed
+binary file that is converted to ASCII HEX. The ASCII HEX file must
+be converted to binary, and then bunzip'ed. This bunzip-ed file is
+binary ASN.1.) These files can also be generated by
+X
+X blastpgp -J T -C pssm.asn1_bin -u 2
+X
+I am parsing the ASN.1 binary manually, not using the NCBI toolkit, so
+there may be some files that are not parsed properly - if so, let me
+know.
+X
+(May 12, 2006 - The NCBI changed the format of the psi-blast ASN.1
+PSSM - and has not yet provided documentation of the new structure, so
+this code does not work. It does work with blastpgp v 2.2.13, but not
+with the web site version 2.2.14. A fix was provided 24-May-2006)
+X
+>>April 18, 2006
+X
+Small modification in mshowbest.c to provide more consistent display
+widths with -m 9i in list of best hits.
+X
+>>April 11, 2006 CVS fa34t25d6
+X
+Corrected a problem introduced with the new, more efficient method for
+displaying alignments. For the tfast* programs, which must translate
+the library sequence, translations were not done when alignments were
+re-displayed.
+X
+Corrected an older problem with tfastx34 against very long sequence
+databases - the code to more efficiently do the display alignment did
+not use the correct sequence coordinates.
+X
+Modifications to dropfs2.c to ensure that exact peptide matches are
+captured more frequently.
+X
+>>March 16, 2006 CVS fa34t25d5
+X
+Change to initfa.c to allow lower case DNA libraries using the
+-DDNALIB_LC compile time option.
+X
+Modify p2_complib.c, p2_worklib.c (and doinit.c, msg.h) to allow the
+-V annotation option for the parallel programs. Also modify to allow
+specification of the query range (but only for the first query, like
+fasta34) for the parallel programs.
+X
+Modification of p2_workcomp.c to correct some problems presenting
+percent similarity. Also correct unreleased bugs in the alignment
+routines that allow more efficient alignment re-calculation.
+X
+>>Nov 20, 2005
+X
+Changes to support asymmetric matrices - a scoring matrix read in from
+a file can be asymmetric. Default matrices are all symmetric.
+X
+>>Oct 24, 2005
+X
+Modifications extended to p2_complib.c/p2_workcomp.c. Incorporation
+of drop_func.h into p2_workcomp.c greatly simplifies things. No
+changes in communication - struct a_res_str is internal to
+p2_workcomp.c.
+X
+Additional changes to do_walign() so that aln_func_vals() must be
+called to set llfact, qlfact, etc in a_struct aln before or after
+do_walign is called. do_walign produces a_res_str a_res, which has
+all the information necessary to produce a calcons() or calc_code()
+alignment.
+X
+>>Oct 19, 2005 CVS fa34t26b0
+X
+Modifications to drop*.c and c_dispn.c to separate (and simplify) some
+of the alignment coordinate calculations. Before, the "a_struct" had
+the coordinates of the alignment used in the display (seqc0, seqc1)
+AND in the original sequences (aa0, aa1), as well as other information
+used to calculate alignment coordinates. In the new version, astruct
+coordinates always refer to seqc0,1, while a new structure, a_res_str,
+has coordinates for aa0, aa1 as well as the alignment encoding in res[nres].
+Eventually, this should make it possible to display multiple local
+alignments from the same two sequences.
+X
+In addition, the file "drop_func.h" has been added to the project, and
+is included by many of the files (all the drop*.c functions,
+mshowbest.c, mshowalign.c) to ensure that the various functions are
+declared and used consistently.
+X
+>>Sept 19, 2005 CVS fa34t25d4
+X
+Changes to support Mac OS 10.4 - Tiger (include sys/types.h in more
+files). Documentation update for prss34/prfx34. Modifications to
+comp_lib.c to support prss34_t/prfx34_t. Shuffle numbers for
+prss/prfx can now be specified by "-k #".
+X
+>>Sept 2, 2005
+X
+The prss34 program has been modified to use the same display routines
+as the other search programs. To be more consistent with the other
+programs, the old "-w shuffle-window-size" is now "-v window-size".
+X
+prss34/prfx34 will also show the optimal alignment for which the
+significance is calculated by using the "-A" option.
+X
+Since the new program reports results exactly like other
+fasta/ssearch/fastxy34 programs, parsing for statistical significance
+is considerably different. The old format program can be make using
+"make prss34o".
+X
+>>Aug 26, 2005
+X
+Modifications to save_best() in comp_lib.c to support prss34_t. It
+did not work before.
+X
+>>July 25, 2005
+X
+Modify mshowbest.c to suppress gi|12345 in HTML mode.
+X
+>>July 18, 2005 CVS fa34t25d3
+X
+Modifications to Makefile.tc to support NCBI formatdb formats under
+Windows.
+X
+>>May 19, 2005 CVS fa34t25d2
+X
+Modifications to dropfs2.c to fix an obscure bug that occurred when
+correctly ordered peptides aligned one residue apart.
+X
+>>May 5, 2005 CVS fa34t25d1
+X
+Modification to the -x option, so that both an "X:X" match score and
+an "X:not-X" mismatch score can be specified. (This score is also used
+X
+give a positive score to a "*:*" match - the end of a reading frame,
+while giving a negative score to "*:not-*".
+X
+>>March 14, 2005 CVS fa34t25b4
+X
+Fixed some problems caused by padding characters required for
+Smith-Waterman ALTIVEC in the parallel (p2_complib.c, p2_workcomp.c)
+versions.
+X
+>>Feb 24, 2005 CVS fa34t25b3
+X
+Changes to comp_lib.c (and Makefile.pcom) to support prss34_t.
+X
+>>Feb 12, 2005
+X
+Modify dropfs.c to dynamically allocate space for alignments, so that
+queries with a large number of fragments can still place all the
+fragments on the alignment. Also fix a problem produced by removing
+-DBIGMEM from most of the Makefile's, but not fixing defs.h to use
+BIGMEM sizes by default.
+X
+>>Jan 24, 2005
+X
+Include a new program, "print_pssm", which reads a blastpgp binary
+checkpoint file and writes out the frequency values as text. These
+values can be used with a new option with ssearch34(_t) and prss34,
+which provides the ability to read a text PSSM file. To specify a
+text PSSM, use the option -P "query.ckpt 1" where the "1" indicates a
+text, rather than a binary checkpoint file. "initfa.c" has also been
+modified to work with PSSM files with zero's in the in the frequency
+table. Presumably these positions (at the ends) do not provide
+information. (Jan 26, 2005) blastpgp actually uses BLOSUM62 values
+when zero frequencies are provided, so read_pssm() has been modified
+to use scoring matrix values for zero frequencies as well.
+X
+>>Jan 13, 2005
+X
+Change to initfa.c to have fasts34 do a protein comparison by default,
+rather than an unknown sequence type. Automatic checking for fasts34
+does not work reliably, because queries can be very short. Likewise
+for fastm34. [Jan 26, 2004] Undo this change, which broke DNA
+comparison when "-n" was specified.
+X
+>>Jan 7, 2005
+X
+Changes to tatstats.h, dropfs2.c to allow larger numbers of peptides
+to match when fasts is used to show coverage on a proteomics
+experiment. Previously fasts could match no more than 30 peptides,
+that has been increased to 50. In addition, ktup=2 can be used
+to increase the likelihood that short exact matchs trump longer
+mismatched regions.
+X
+>>Nov 11, 2004 CVS fa34t25
+X
+Finished merge of earlier fa34t24 branch with HEAD. Correct
+labeling of TFASTM.
+X
+>>Nov 4-8, 2004
+X
+Incorporation of Erik Lindahl "anti-diagonal" Altivec code for
+Smith-Waterman, only. Altivec SSEARCH is now faster than FASTA for
+query sequences < 250 amino acids.
+X
+Small modifications to output score display to ensure that the correct
+scores are shown, and that they are correctly labeled.
+X
+>>Aug 25,26, 2004 CVS fa34t24b3
+X
+Small change in output format for p34comp* programs in
+">>>query_file#1 string" line before alignments. This line is not present
+in the non-parallel versions - it would be better for them to be consistent.
+X
+Change in last_stats.c to properly label fasts statistics with -z != 1.
+X
+Change in dropfs2.c to ensure that tatprobs are not precalculated with -z 4.
+X
+Modify -m 9i output option to show in HTML output.
+X
+Add "#ifdef NOOVERHANG" to dropfs2.c that causes overlapping
+alignments to score a 0, rather than the partial overlap score.
+Useful for SAGE alignments, because "fasts" requires global alignments
+(except for for overhangs, unless NOOVERHANG is defined).
+X
+>>Aug 23, 2004
+X
+Fix problem with very long definition lines with formatdb version4
+ASN databases. Fix mshowalign.c to re-enable "-L" option.
+X
+>>July 28, 2004
+X
+Fix to re-enable -w window shuffle for PRSS. Modify comp_lib.c
+for PRSS to ensure that the unshuffled score and probability
+are shown, even for very high probabililty alignments.
+X
+>>July 21, 2004
+X
+Modifications to support PostgreSQL databases with the same commands
+as MySQL databases. MySQL database libraries are type 16, PostgreSQL
+are type 17. Makefile.linux_sql and Makefile.pvm4_sql support both
+database types simultaneously.
+X
+>>June 23, 2004 CVS fa34t24b2
+X
+Additional fixes to enable -n or -p with fasts34 and
+fastm34. Makefile.pcom was fixed for fastm34_t. A new file,
+mgstm1.nts, of DNA fragments from mgstm1.seq, is included for testing
+fasts34 and fastm34.
+X
+>>May 4, 2004
+X
+Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM. This change
+introduced a bug that broke FASTS completely, but was fixed June 18,
+2004 (and retagged fa34t24b2).
+X
+>>April 23, 2004 CVS fa34t24b1
+X
+Fix bug in initfa.c that caused tfasts/tfastf not to examine all six
+frames.
+X
+>>May 4, 2004
+X
+Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM.
+X
+>>March 19, 2004 CVS fa34t24b0
+X
+Modify all the drop*.c files, plus mshowbest.c and mshowalign.c, to
+display percent similarity, rather than percent ungapped. An
+alignment is counted as similar if the score is greater than or equal
+to zero (the same criterion used for placing ".". To disable this
+change, remove -DSHOWSIM from the appropriate Makefile.*.
+X
+>>March 18, 2004 CVS fa34t23b8
+X
+Fix bug in initfa.c tables that caused prss to generally compare
+proteins.
+X
+>>March 15, 2004
+X
+Fix bug in calls to revcomp(); make revcomp() guarantee NULL termination.
+X
+>>March 2, 2004 CVS fa34t23b7
+X
+Fix a very embarrassing and surprising bug that caused insertions
+in fasta alignments to appear in the wrong sequence.
+X
+>>Feb 7, 2004 CVS fa34t23b6
+X
+Change initfa.c to allow "-i" (reverse complement) and "-i -3" with
+"fastx34" and "prfx34". In addition, "prfx34" now examines both query
+DNA strands in calculated the shuffled statistical significance.
+X
+>>Feb 5, 2004
+X
+Reverse assignments for G:U baseparing in initfa.c.
+X
+Fix memory allocation error caused by doubling DNA alignment width.
+X
+>>Jan 7, 2004 CVS fa34t23b5
+X
+Change in do_walign() in dropnfa.c to make final DNA alignments use a
+band that is 2X as large as the search band width.
+X
+>>Dec 22, 2003 CVS fa34t23b4
+X
+Fix typo in p2_complib.c that prevented compilation. Fix problem
+with karlin.c for assymetrical matrices, such as used with -U.
+X
+>>Dec 10, 2003 CVS fa34t23b3
+X
+Fix problem in resetp()/initfa.c that disabled banded Smith-Waterman
+DNA alignments.
+X
+Allow spam() to do extended alignments for DNA if one of the sequences
+is < 50 nt.
+X
+Cause default ktup to drop for short sequences. For protein < 50, ktup=1;
+for DNA < 20, 50, 100 ktup = 1, 2, 3, respectively.
+X
+>>Dec 7, 2003
+X
+A new option, "-U" is available for RNA sequence comparison. "-U"
+functions like "-n", indicating that the query is an RNA sequence. In
+addition, to account for "G:U" base pairs, "-U" modifies the scoring
+matrices so that a "G:A" match has the same score as a "G:G" match,
+and "T:C" match has the same score as a "T:T" match. The asymmetric
+matrix required changes in dropnfa.c that were similar to the changes
+in dropgsw.c required for profiles. In addition, m_msg.qdnaseq and pst.dnaseq
+X can now be SEQT_DNA, SEQT_RNA, SEQT_PROT, SEQT_UNK, or SEQT_OTHER.
+m_msg.ldnaseq does not use SEQT_RNA, only SEQT_DNA. A new member of
+struct pstruct: int nt_align, is used to indicate nucleotide
+alignments.
+X
+>>Nov 19, 2003
+X
+Changes to Makefile's to distinguish between tatstats_fs.o and
+tatstats_ff.o.
+X
+>>Nov 2, 2003
+X
+Substantial changes to comp_lib.c, p2_complib.c, mshowbest.c, and
+mshowalign.c to support more sophisticated display options.
+Previously, one could have only on "-m #" option, even though several
+of the options were orthogonal (-m 9c is independent of -m 1 and -m2,
+which is independent of -m 6 (HTML)). The programs now use a bitmask
+that allows independent options to be combined. In particular -m 9c
+can be combined with -m 6, which can be very helpful for runs that
+need HTML output but can also exploit the encoding provided by -m 9c.
+X
+The "-m 9" option now also allows "-m 9i", which shows the standard
+best score information, plus percent identity and alignment length.
+X
+>>Oct 26, 2003 CVS fa34t23b1
+X
+Additional fixes to Makefiles to enable tfastf34(_t). Changes to
+support ossearch34 (a non-Phil Green optimized Smith-Waterman).
+X
+>>Oct 8, 2003 CVS fa34t23b0
+X
+Fixes to get DNA queries working in both directions, and to fix PCOMPLIB
+programs for "-V" option. Currently, the parallel programs cannot use
+the "-V" option.
+X
+>>Sept 25, 2003
+X
+A new option is available for annotating alignments. -V '@#?!'
+can be used to annotate sites in a sequence, e.g:
+X >GTM1_HUMAN ...
+X PMILGYWDIRGLAHAIRLLLEYTDS@S?YEEKKYT@MG
+X DAPDYDRS@QWLNEKFKLGLDFPNLPYLIDGAHKIT
+might mark known and expected (S,T) phosphorylation sites. These
+symbols are then displayed on the query coordinate line:
+X
+X 10 20 @? 30 @ 40 @ 50 60
+GTM1_H PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+X ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+gtm1_h PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
+X 10 20 30 40 50 60
+X
+This annotation is mostly designed to display post-translational
+modifications detected by MassSpec with FASTS, but is also available
+with FASTA and SSEARCH.
+X
+>>Sept 22, 2003 CVS fa34t22b5
+X
+The Altivec Smith-Waterman code has been removed.
+X
+>>Sept 17, 2003 CVS fa34t22b4
+X
+A variety of different bugs have been fixed. (1) All the functions in
+the old initsw.c are now in initfa.c; initsw.c will be removed.
+Specifically, the Profile/PSSM code is now in initfa.c. initfa.c is
+now fully table driven. (2) various problems with prss34 and prfx34
+have been fixed in initfa.c. (3) An additional ncbl2_mlib.c buffer
+overrun has been fixed. (4) fastf34 is now available in this package.
+Its performance is very similar to, but not identical to, fastf33. I
+am tracking down the differences. In general, the raw scores
+calculated by both programs are the same, but the statistical analysis
+seems to be slightly different.
+X
+>>July 30, 2003 CVS fa34t22b3
+X
+Fix bug in ncbl2_mlib.c that caused buffer overrun with blast/formatdb
+v3 description lines.
+X
+>>July 28, 2003
+X
+The initfa.c file has been substantially re-structured to use a
+table-driven approach to parameter setting, rather than the previous
+confusing combinations of #ifdef's. Two tables of parameters are
+used, pgm_def_arr[] and msg_def_arr[], which specify values like the
+program name, reference, scoring matrix, default gap penalties, etc.
+msg_def_arr[] has the sequence types for the query, library, and
+algorithm, as well as other parameters (qframe, nframe, nrelv, etc),
+which greatly simplifies the sequence recognition logic. ppst->pgm_id
+can be used to identify the program that is running. Eventually,
+almost all of the program specific #ifdef's will be removed from
+initfa.c. initfa.c now provides initsw.c functionality, so that
+initsw.c is no longer needed.
+X
+>>July 25, 2003
+X
+A new file is included - fasta.defaults - that lists the scoring
+matrix, gap penalty, and other defaults for all of the fasta34
+programs. This file will be used soon to simplify parameter setting
+for the FASTA programs, and should also be used by Javascript WWW
+interfaces to the FASTA programs.
+X
+>>July 22, 2003 CVS fa34t22b2
+X
+Fixes to dropfs2.c, tatprobs.c to ensure that negative probabilities
+cannot occur. Negative probabilities were never seen with standard
+matrices, but did occur with BL50. Another optimization in dropfs.c
+considerably improves fasts34 performance in some cases.
+X
+Fix a problem with formatdb v4 ASN.1 format files.
+X
+>>July 12, 2003
+X
+Fix a bug that prevented "-L" (long sequence descriptions) from
+working.
+X
+>>July 9, 2003
+X
+Fix reverse complement (M:K) error. Fix off-by-one error for FASTA
+DNA alignments that caused the first aligned residue pair to be
+missed.
+X
+>>July 4 - 8, 2003
+X
+Incorporate blast-def-line ASN.1 parsing so that NCBI formatdb version
+4 files can be read.
+X
+>>June 26, 2003
+X
+The strategy for displaying the match/mismatch line (" .:" for -m 0)
+has been changed dramatically to acommodate more sophisticated
+strategies for indicating conservative replacements, e.g. because of
+PSSM's. In addition to seqc0 and seqc1, which hold the aligned
+sequences for display, there is also seqca, which holds the alignment
+symbol. calcons(), do_show(), and discons() have all changed to
+include seqca. calcons() is somewhat more complex; discons() is much
+simpler. (June 29, 2003 - dropgsw.c calcons() now displays profile
+similarity accurately - it is very very illuminating.)
+X
+>>June 16, 2003 version: fasta34t22
+X
+ssearch34 now supports PSI-BLAST PSSM/profiles. Currently, it only
+supports the "checkpoint" file produced by blastall, and only on
+certain architectures where byte-reordering is unnecessary. It has not
+been tested extensively with the -S option.
+X
+X ssearch34 -P blast.ckpt -f -11 -g -1 -s BL62 query.aa library
+X
+Will use the frequency information in the blast.chkpt file to do a
+position specific scoring matrix (PSSM) search using the
+Smith-Waterman algorithm. Because ssearch34 calculates scores for
+each of the sequences in the database, we anticipate that PSSM
+ssearch34 statistics will be more reliable than PSI-Blast statistics.
+X
+The Blast checkpoint file is mostly double precision frequency
+numbers, which are represented in a machine specific way. Thus, you
+must generate the checkpoint file on the same machine that you run
+ssearch34 or prss34 -P query.ckpt. To generate a checkpoint file,
+run:
+X
+blastpgp -j 2 -h 1e-6 -i query.fa -d swissprot -C query.ckpt -o /dev/null
+X
+(This searches swissprot for 2 iterations ("-j 2" using a E()
+threshold 1e-6 saving the resulting position specific frequencies in
+query.ckpt. Note that the original query.fa and query.ckpt must
+match.)
+X
+>>June 5, 2003
+X
+Fix to mshowbest.c to get -m 9 coordinates correct on reverse strand
+with pv34comp*. Some additional fixes for prfx34.
+X
+>>May 22, 2003
+X
+Changes to llgetaa.c, getseq.c, comp_lib.c to provide a different
+library residue lookup table (sascii) for queries and libraries. This
+allows one to make a prfx34 (like prss34, but using the fastx
+algorithm). prfx34 is now available.
+X
+>>May 13,14 2003
+X
+Fixes to most of the drop*.c files, and mshowbest.c, to ensure that
+coordinates displayed with -m 9(c) and the final alignment are
+consistent. They were consistent for fasta34/ssearch34/fasts34, but
+not for fastx34/fasty34. The alignment coordinate system has been
+been revised for consistency in allthe drop*.c programs (coordinates
+used to be off-by-one for some, but not other functions).
+X
+Fixes to -m 9c for fasty34/pv34compfy. In addition, a problem was
+fixed with fastx34/fasty34 that appeared with a protein sequence was
+considerably longer than the DNA query, e.g. an EST vs titin (26K
+residues). This problem only appeared on pv34compfx/fy on Xserve's
+under OS_X; but it should improve fastx34/fasty34 performance with
+very long protein sequences on all platforms.
+X
+>>May 7,8 2003
+X
+Changes to p2_workcomp.c, compacc.c, and p_mw.h to fix persistent
+bugs in the -m 9c display. Previous pv34comp* programs would not
+return the correct coded alignment if more than 100 alignments came
+from the same node, or if an encoding was longer than 127 chars.
+X
+Also, fixes to p2_complib.c, comp_lib.c, to allow long query sequences
+to be segmented. Previously, only the first 20,000 residues were
+used. The segmented queries are not overlapped; segmented library
+sequences are.
+X
+>>May 5, 2003
+X
+Changes to last_tat.c, scaleswt.c to ensure that all fasts alignments
+that are likely to have significant scores are displayed. In previous
+implementations, if the query had more than 10 fragments, only the 100
+best scores were shown. Now, we rescore up to 2500 alignments. The
+new approach allows large mixtures to be used for searches, where some
+of the fragments from the mixture match too many proteins
+(e.g. actins). Some differences between the fasts34 and pv34compfs
+implementations have been fixed. The two programs typically will not
+give exactly the same results, because of small differences in the
+sampling procedures, but the results are essentially equivalent.
+X
+>>Apr 11, 2003 CVS fa34t21b3
+X
+Fixes for "-E" and "-F" with ssearch34, which was inadvertantly disabled.
+X
+A new option, "-t t", is available to specify that all the protein
+sequences have implicit termination codons "*" at the end. Thus, all
+protein sequences are one residue longer, and full length matches are
+extended one extra residue and get a higher score. For
+fastx34/tfastx34, this helps extend alignments to the very end in
+cases where there may be a mismatch at the C-terminal residues.
+X
+-m 9c has also been modified to indicate locations of termination
+codons ( *1).
+X
+>>Mar 17, 2003 CVS fa34t21b2
+X
+A new option on scoring matrices "-MS" (e.g. "BL50-MS") can be used to
+turn the I/L, K/Q identities on or off. Thus, to make "fastm34" use
+the isobaric identities, use "-s M20-MS". To turn them off for "fasts34",
+use "-s M20".
+X
+More fixes for correct alignment coordinates. There was a conflict between
+-m 9 and -m 9c and subsequent alignment displays.
+X
+>>Mar 13, 2003
+X
+Various fixes to produce correct fastm34 alignments. Changes to all
+functions to correct potential problem with -m 9 alignment coordinates
+when both -m 9 and actual alignments are shown.
+X
+>>Feb 25,27, 2003
+X
+Modifications to re-activate showsum.c, which included corrections to
+the showbest() call in p2_complib.c.
+X
+>>Feb 13, 2003 CVS fa34t21b1
+X
+Modifications to dropfx.c to dramatically improve alignment speed for
+cases where the DNA sequence is considerably longer than the protein
+sequence. Previously, a 200 aa vs 5000 nt comparison would do a full
+200 x 5000 Smith-Waterman alignment; with this modification, no more
+than a 200 x 1200 (2x3x200) alignment is done. This optimization has
+not (yet) been applied to dropfz2.c (fasty/tfasty).
+X
+>>Feb 11, 2003
+X
+Small modifications to comp_lib.c, p2_complib.c, and nmgetlib.c to
+pass openlib() a possibly old lmf_str. This allows openlib() to
+re-use memory mapped files. closelib() no longer releases memory
+mapped file buffers. Under Linux, memory mapped file buffers were not
+really released, so when comparing a set of sequences against nr, the
+program could not mmap() the database after several searches. This
+will also speed up memory mapped multiple sequence searches.
+X
+>>Jan 28-31, 2003 CVS fa34t21b0
+X
+Fix another bug (all of v34t20) involved with overlapping long
+sequences. And another bug that occurred when using sampled
+statistics, but appeared only on the SGI platform - thanks to Dmitri
+Mikhailov. Several other issues have been addressed based on more
+instrumented runtime testing.
+X
+Fix an old (all v34) bug that caused problems with -z 11-16 (shuffled
+sequence array was not allocated properly). Fixed another bug with -z
+6/16 when using threaded (_t) searches in fasta34_t.
+X
+Restructure statistical analysis functions (scaleswn.c, scaleswt.c) to
+return the "final" statistical estimation routine done in pst.zsflag_f.
+This allows the program to cope with searches against a single sequence
+correctly.
+X
+Corrected an error for DNA sequences needing Altschul-Gish statistics.
+X
+>>Jan 25, 2003
+X
+Add option "-J start:stop" to pv34comp*/mp34comp*. "-J x" used to
+allow one to start at query sequence "x"; now both start and stop can
+be specified.
+X
+>>Jan 14, 2003
+X
+Changes to apam.c to provide an error message on stderr when a scoring
+matrix cannot be found.
+X
+Changes to dropfs2.c, initsw.c, initfa.c to provide -m9c information
+for fasts34 searches. Modify the alignment algorithm to use
+probabilistic scores properly.
+X
+>>Dec 22, 2002
+X
+Change to compacc.c (sortbeste()) to do a second sort on zscore when
+several sequences have E() == 0.
+X
+>>Nov 27, 2002
+X
+Change FSEEK_T to fseek_t to keep Borland BCC5 happy.
+X
+>>Nov 14-22, 2002 CVS fa34t20b6
+X
+Include compile-time define (-DPGM_DOC) that causes all the fasta
+programs to provide the same command line echo that is provided by the
+PVM and MPI parallel programs. Thus, if you run the program:
+X
+X fasta34_t -q -S gtt1_drome.aa /slib/swissprot 12
+X
+the first lines of output from FASTA will be:
+X
+X # fasta34_t -q gtt1_drome.aa /slib/swissprot
+X FASTA searches a protein or DNA sequence data bank
+X version 3.4t20 Nov 10, 2002
+X Please cite:
+X W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+X
+This has been turned on by default in most FASTA Makefiles.
+X
+Fix p2_complib.c so that qstats[] is always allocated before it is used.
+X
+Fix serious bug in non-threaded comp_lib.c that caused some high
+scoring sequences to be missed by fasts34. New tests are included in
+test.sh to detect this problem in the future.
+X
+The shell sort algorithm in sortbeste(), sortbestz(), and sortbesto()
+has been modified to use an improved algorithm that will not go
+quadratic in pathological cases.
+X
+nmgetlib.c and mmgetaa.c have been modified to remove "^A" in libstr
+when used with p2_complib.c.
+X
+Fix problem with MAXSEG in tatstats.h with IBM/AIX.
+X
+Changes to most Makefiles to use -DSAMP_STATS; fixes to p2_complib.c
+for SAMP_STATS.
+X
+>>Oct 22, Nov 3, Nov 9, 2002 CVS tag fa34t20b5
+X
+Fix problem in comp_lib.c that caused the query sequence length to be
+counted twice.
+X
+Fixed problem with prss34 (updated find_zp in showrss.c).
+X
+Correct shuffling function in several places.
+X
+Add jitter back to addhistz() - improves appearance with prss34.
+X
+Changes to fix problems with aln_code using -m 9c.
+X
+Fix to serious bug in scaleswt.c (fasts34, etc) that caused sorts on
+the high scores to take much to long. The program is now 10X faster,
+and scales well on PVM/MPI.
+X
+Fix to llgetaa.c to work with new getseq() API with automatic alphabet
+recognition.
+X
+>>Oct 12, 2002 CVS tag fa34t20b4
+X
+Several very obscure (and sometimes old) bugs that appeared in certain
+MPI environments have been fixed. This occurred because the pst.sq[]
+array did not always have a '\0' at the end. In addition,
+mshowalign.c/p2_workcomp.c sometimes failed to put the '\0' at the end
+of seqc0/seqc1. Correct bug introduced in fa34t20b3 for fasts34(_t).
+X
+>>Oct 9, 2002 CVS tag fa34t20b3
+X
+Fix to apam.c build_xascii() to not zero-out qascii[0]. Fix
+Makefile.pvm4. Mix problem with -m 9c with compacc.c.
+X
+>>Sept 28, 2002
+X
+Additional fixes to -m 9c in p2_complib.c/compacc.c/mshowbest.c.
+Remove restriction in fasts34(_t) to less than 30 peptides (though no
+more than 30 peptides can be aligned currently).
+X
+>>Sept 24, 2002
+X
+Fix p2_workcomp.c so that e_scores are delivered correctly when
+last_calc flag is set, and -m 9c provides alignments when only one
+best hit is present.
+X
+Fix comp_lib.c to use different maxn and overlap for each different
+query sequence. fasta34 and fasta34_t now have identical results when
+a long sequence is searched.
+X
+Add '@C:101' support to memory mapped FASTA format files.
+X
+Fix mshowalign.c so that coordinates returned by cal_coord() use
+loffset+l_off.
+X
+>>Sept 14, 2002 CVS tag fa34t20b2
+X
+Changes to p2_complib.c, compacc.c to fix statistics problems with
+pv34compfs on query sequences with more than 10 fragments.
+X
+>>Aug 27, 2002
+X
+Modifications to mshowbest.c and drop*.c (and p2_workcomp.c,
+compacc.c, doinit.c, etc.) to provide more information about the
+alignment with the -m 9 option. There is now a "-m 9c" option, which
+displays an encoded alignment after the -m 9 alignment information.
+The encoding is a string of the form: "=#mat+#ins=#mat-#del=#mat".
+Thus, an alignment over 218 amino acids with no gaps (not necessarily
+100% identical) would be =218. The alignment:
+X
+X 10 20 30 40 50 60 70
+GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
+X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
+XXURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
+X 20 30 40 50 60
+X
+would be encoded: "=23+9=13-2=10-1=3+1=5". The alignment encoding is
+with respect to the beginning of the alignment, not the beginning of
+either sequence. The beginning of the alignment in either sequence is
+given by the an0/an1 values. This capability is particularly useful
+for [t]fast[xy], where it can be used to indicate frameshift positions
+"/#\#" compactly. If "-m 9c" is used, the "The best scores" title
+line includes "aln_code".
+X
+>>Aug 14, 2002 CVS tag fa34t20
+X
+Changes to nmgetlib.c to allow multiple query searches coming from
+STDIN, either through pipes or input redirection. Thus, the command
+X
+X cat prot_test.lseg | fasta34 -q -S @ /seqlib/swissprot
+X
+produces 11 searches. If you use the multiple query functions, the
+query subset applies only to the first sequence.
+X
+Unfortunately, it is not possible to search against a STDIN library,
+because the FASTA programs do not keep the entire library in memory
+and need to be able to re-read high-scoring library sequences. Since
+it is not possible to fseek() against STDIN, searching against a STDIN
+library is not possible.
+X
+>>Aug 5, 2002
+X
+fasts34(_t) and fastm34(_t) have been modified to allow searches with
+DNA sequences. This gives a new capability to search for DNA motifs,
+or to search for ordered or unordered DNA sequences spaced at
+arbitrary distances.
+X
+>>Aug 4, 2002
+X
+comp_lib.c has been modified to provide comp_mlib.c function.
+comp_mlib.c is no longer used. comp_lib.c with the "mlib" function
+can now recognize protein or DNA sequences automatically, and reads
+from stdin can now detect DNA/protein sequence types automatically.
+Changes to compacc.c, getseq.c, doinit.c initfa.c, initsw.c, and
+nmgetlib.c to support automatic sequence type detection.
+X
+>>July 28-31, 2002
+X
+(1) The various Makefile's have been "normalized". The fast*34[_t]
+X (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
+X Makefile.mpi4[_sql] make files all use a common set of filenames,
+X described in Makefile.fcom. This greatly simplifies adding
+X programs, but requires that all *.o files be deleted when moving
+X from fast*34* to pv34comp* to mp34comp*.
+X
+(2) showalign.c/p_showalign.c have been merged into mshowalign.c
+X showbest.c/manshowbest.c have been merged into mshowbest.c. Some
+X of the related files (showun.c, manshowun.c, have not been merged
+X or tested).
+X
+(3) Code for ranking scores with valid e_value's incorporated.
+X
+(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
+X provide identical statistics.
+X
+>>July 26, 2002
+X
+Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
+by providing the worker program name from the h_init() function in the
+initfa.c/initsw.c files.
+X
+>>July 24, 2002
+X
+Substantial modifications to param.h, structs.h to ensure that no
+sequence specific information is kept in struct pstruct. This
+structure now holds the pam[] matrix, and other scoring parameters,
+but nothing that is dependent on aa0. The aa0 dependent stuff (nm0,
+Lambda, K, etc) is now stored in struct mngmsg. This was mostly done
+to support the pv34comp* programs, which have separate mngmsg
+structures but the same pstructs.
+X
+The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
+successfully.
+X
+>>July 19, 2002
+X
+Fix an old bug in the calculation of E()-values in DNA databases
+longer than 2147483647 residues on machines with 32-bit longs.
+X
+X
+>>July 28-31, 2002
+X
+(1) The various Makefile's have been "normalized". The fast*34[_t]
+X (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
+X Makefile.mpi4[_sql] make files all use a common set of filenames,
+X described in Makefile.fcom. This greatly simplifies adding
+X programs, but requires that all *.o files be deleted when moving
+X from fast*34* to pv34comp* to mp34comp*.
+X
+(2) showalign.c/p_showalign.c have been merged into mshowalign.c
+X showbest.c/manshowbest.c have been merged into mshowbest.c. Some
+X of the related files (showun.c, manshowun.c, have not been merged
+X or tested).
+X
+(3) Code for ranking scores with valid e_value's incorporated.
+X
+(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
+X provide identical statistics.
+X
+>>July 26, 2002
+X
+Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
+by providing the worker program name from the h_init() function in the
+initfa.c/initsw.c files.
+X
+>>July 24, 2002
+X
+Substantial modifications to param.h, structs.h to ensure that no
+sequence specific information is kept in struct pstruct. This
+structure now holds the pam[] matrix, and other scoring parameters,
+but nothing that is dependent on aa0. The aa0 dependent stuff (nm0,
+Lambda, K, etc) is now stored in struct mngmsg. This was mostly done
+to support the pv34comp* programs, which have separate mngmsg
+structures but the same pstructs.
+X
+The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
+successfully.
+X
+>>July 8, 2002
+X
+Modifications to comp_lib.c, initfa.c and new scaleswt.c, tatstats.c
+to support FASTS with Tatusov statistics.
+X
+last_params() has been introduced to allow aa0 dependent changes in m_msg/pstr.
+X
+sortbest() has been moved into initfa.c/initsw.c to make it function specific.
+X
+find_z() takes an additional parameter, escore.
+X
+The do_work() results structure, beststr, and stat_str all accommodate
+escores as well as integer scores (stat_str also saves segn and segl
+but doesn't need them).
+X
+In scaleswt.c, process_hist() now knows much more about Tatusov statistics.
+X
+last_stats() provided to accommodate rank-based statistical corrections.
+X
+scale_scores() is the last function to modify the beststr scores
+(final calculation of E-value).
+X
+Some sortbest*() calls and some bptr[i]->zscore=find_zp() loops have
+been moved into scale_scores();
+X
+>>July 3,5, 2002
+X
+Modifications to allow mySQL comments (--) in "library.sql 16" files.
+Thus, a first line of:
+X
+X --host seqdb user password;
+X
+is read by FASTA as the login information to a mySQL server, but is
+ignored by mySQL. "DO" commands in FASTA mySQL files can also be
+rendered invisible to mySQL in this way. See "do.sql".
+X
+Modifications to mysql_lib.c to allow very long SQL statements. The
+buffer is now dynamically reallocated in 4Kb chunks.
+X
+The fasta3.1 man page has been updated and re-organized.
+X
+>>June 26, 2002
+X
+Minor modifications to nmgetaa.c (openlib()) to use the same arguments
+for searching and PRSS. PRSS needs access to all of m_msg, but
+searches do not. Other small fixes to comp_mlib.c, towards the goal
+of merging comp_mlib.c and comp_lib.c.
+X
+>>June 25, 2002
+X
+Modify the statistical estimation strategy to sample all the sequences
+in the database, not just the first 60,000. The histogram is still
+based only on the first 60,000 scores and lengths, though all scores
+an lengths are shown. The fit to the data may be better than the
+histogram indicates, but it should not be worse.
+X
+Currently, this modification is available only if the -DSAMPLE_STATS
+option is defined.
+X
+>>June 23, 2002 CVS fa34t11d4
+X
+Fix a very long-standing bug in fasty/tfasty that caused 'NNN' to be
+translated as 'S', rather than 'X'. fastx/tfastx has done this
+correctly for many years, but the fasty/tfasty code that I received
+from Zheng Zhang was not implemented correctly (my fault, his code was
+fine).
+X
+>>June 19, 2002
+X
+Added "-C #" option, where 6 <= # <= MAX_UID (20), to specify the
+length of the sequence name display on the alignment labels. Until
+now, only 6 characters were ever displayed. Now, up to MAX_UID
+characters are available.
+X
+>>May 30, 2002 CVS fa34t11d3
+X
+Fixed problem with programs using the default -E cutoff when -b was
+provided. With this implementation, -E can override -b, but -b
+overrides the default -E.
+X
+Fixed problem with 64-bit file offsets in param.h (change USE_FSEEK0
+-> USE_FSEEKO, include -D_LARGEFILE_SOURCE and -D_LARGEFILE64_SOURCE
+in Makefile.linux_sql). Put limits on alignment display length (200
+chars). More checks for null returns from SQL queries.
+X
+>>Apr 17, 2002 CVS fa34t11d2
+X
+Fixed bug in mm_file.h/ncbl2_mlib.c that caused the SGI version to be
+unable to read blast2 format files.
+X
+Changed "mp_*" tags to "pg_*" for -m 10 option.
+X
+>>Mar 30, 2002
+X
+Fix embarrassing bug in revcomp() (getseq.c) that failed to complement
+the central nucleotide in a sequence with an odd number of residues.
+X
+Small changes to dropfs.c for more segments.
+X
+>>Mar 16, 2002
+X
+Added create_seq_demo.sql, nt_to_sql.pl to show how to build an SQL
+protein sequence database that can be used with with the mySQL
+versions of the fasta34 programs. Once the mySQL seq_demo database
+has been installed, it can be searched using the command:
+X
+X fasta34 -q mgstm1.aa "seq_demo.sql 16"
+X
+mysql_lib.c has been modified to remove the restriction that mySQL
+protein sequence unique identifiers be integers. This allows the
+program to be used with the PIRPSD database. The RANLIB() function
+call has been changed to include "libstr", to support SQL text keys.
+Due to the size of libstr[], unique ID's must be < MAX_UID (20)
+characters.
+X
+A "pirpsd.sql" file is available for searching the mySQL distribution
+of the PIRPSD database. PIRPSD is available from
+ftp://nbrfa.georgetown.edu/pir_databases/psd/mysql.
+X
+>>Mar 6, 2002
+X
+Fix showbest.c showbest() to report pst.zdb_size as database size.
+Fix dropnfa.c spam() to address off-by-one on end of run, and double
+counting on backwards scan. Fix dropnfa.c do_fasta() to fix another
+problem introduced by -S. Changes to comp_lib.c to ensure that both
+the beginning and end of the query and library sequence have '\0'
+present. Changes to initfa.c, initsw.c to ensure that a match to a
+lower-case letter with -S gets exactly the same score as a match to an
+'X'. Changes to mmgetlib.c to work with 64-bit longs in *.xin files.
+X
+>>Feb 26, 2002
+X
+Fixes to doinit.c, initfa.c, initsw.c to allow DNA matrices using the
+"-s dna.mat" option. A new matrix, "d50ry.mat" is available that
+scores +5 for a match, -2 for a transition, and -5 for a
+transversion. "d50ry.mat" corresponds to DNA PAM50 with transitions
+twice as common as transversions. When "-s dna.mat" is used, "-n"
+MUST be used as well.
+X
+Query sequence names ("aa", "nt") should be more accurate.
+X
+>>Feb 22, 2002
+X
+Fix to getseq.c to allow "plain" sequence files.
+X
+>>Feb 12, 2002
+X
+Minor fix to res_stats.c.
+X
+>>Jan 28, 2002
+X
+Fixes to resurrect res_stats.c. res_stats (cc -o res_stats
+res_stats.c scaleswn.c -lm) takes the output from a current "-R
+file.res" file and calculates statistical significance - this allows
+one to take exactly the same set of scores (and lengths) and calculate
+statistical estimates using different strategies.
+X
+>>Jan 24, 2002
+X
+modifications to mmgetlib.c, ncbl2_mlib.c to more robustly read memory
+mapped files (*.xin, map_db) on machines lacking "native" 64-bit
+longs. If the machine provides some definition for a 64-bit long
+(e.g. "long long", "int64_t"), things should work. 64-bit offsets into
+memory mapped files work properly on Alpha, SGI, i386 Linux, and
+MacOSX. The current implementation depends either on 64 bit longs
+(Compaq Alpha's pre 4.0G) or the <sys/inttype.h> file. Makefile,
+Makefile.alpha, and Makefile.linux have been modified.
+X
+Modifications to nmgetlib.c, mmgetlib.c to provide GI numbers and
+Accession versions for Genbank searches. If the GI:123456 number is
+available, it will be used and the description line will be formatted:
+X
+X gi|123456|gb|ACC1234.1|LOCUS description
+X
+This should help FAST_PAN runs, where the version of a sequence
+changes frequently.
+X
+>>Jan 10, 2002
+X
+Modifications to p2_complib.c, p2_workcomp.c to more reliably allocate
+space for library sequence descriptions on the master and workers.
+X
+>>Jan 2-3, 2002 CVS fa34t10c/fa34t10d3
+X
+Fixes to comp_lib.c to support Macintosh and Windows/Turbo-C
+compilation. New Makefile.tc. Macintosh version supports both
+"Classic" and "Carbon" environments.
+X
+"<values.h>" has been replaced with the more modern "<limits.h>"
+X
+Fixes to p2_complib.c to support n_libstr (libstr length) in GETLIB().
+X
+comp_thr.c, complib.c removed.
+X
+>>Dec 16, 2001
+X
+Complete integration of comp_mlib.c with both the unthreaded and
+threaded programs. Comp_mlib allows fasta34 and fasta34_t to compare
+a database with a second database, just as pv34compfa does. Using
+multiple queries with fasta34_t is not as efficient as pv34compfa (and
+it cannot use networks of Unix workstations), but it is much easier to
+use and install.
+X
+With the comp_mlib.c option, fasta34 cannot automatically recognize
+DNA sequences, just as pv34compfa no longer recognizes DNA sequences.
+You must use the "-n" option to search with DNA sequences. The other
+programs (fastx34, tfastx34, etc) "know" the type of the query and
+database sequences, so "-n" is only required for fasta34(_t).
+X
+>>Dec 14, 2001 CVS tag fa34t10b
+X
+Fix problems reading DNA databases in blast2 format.
+X
+>>Dec 11, 2001
+X
+Changes to spam() in dropnfa.c so that, for DNA sequences, the
+previous behavior for finding the boundaries of a local alignment
+region use the same algorithm as previous versions of fasta. For
+protein sequences, the algorithm will extend the local region beyond
+the "ktup" boundaries if a better score can be found. For DNA
+sequences, this raises the noise rather than increasing sensitivity,
+so it is turned off and "ktup" boundaries are respected. The old,
+"ktup" boundary algorithm is available with -DNOSPAM_EXT.
+X
+This version also includes a working res_stats.c, which can be used to
+test various statistical estimates on exactly the same set of scores.
+X
+Fixed problems with -m 9 percent identity for fastx/fasty/tfastx/tfasty.
+These errors have been present since -m 9 was implemented.
+X
+>>Dec 10, 2001
+X
+Fix to map_db.c to work correctly with files > 2 Gb when 64-bit longs
+are available. It is not yet designed to work with ftello() and other
+offset types.
+X
+>>Nov 11,21, 2001 CVS tag fa34t10a, fa34t10d1
+X
+Substantial changes to revcomp(), getseq(), and other functions to
+correct problems with -S on DNA sequences. Sequences with lower case
+nucleotides were not recognized or reverse complemented properly.
+X
+Fix to dropnfa.c (v34t07, Nov 21, 2001) bg_align() to re-initialize
+static globals - this fixes a problem encountered with pv34compfa. A
+new main program, comp_mlib.c has been added to the CVS archive,
+although it is not referenced in any of the Makefile. comp_mlib.c
+works like p2_complib.c and compares a library against another
+library.
+X
+>>Nov 4, 2001
+X
+Change to dropnfa.c spam () while(1) -> while(lpos <= dmax->stop).
+This fixes a problem with ktup=1 on Suns only, so far.
+X
+>>Oct 4, 2001 CVS tag fa34t10
+X
+Add comp_lib.c file, which merges complib.c (unthreaded) and
+comp_thr.c (threaded) code into one file.
+X
+Modifications to nmgetlib.c, mmgetaa.c to allow Genbank flatfile
+format without DESCRIPTION or ACCESSION lines.
+X
+Additional fix for -S with ktup=1.
+X
+>>Sept. 24, 2001
+X
+Fix to have correct gap-penalties for short scoring matrices with
+tfastx/fastx.
+X
+>>Sept. 10, 2001 CVS tag fa34t05d6
+X
+Fix a bug introduced by -S fix in fa34t05d5. Also, try to remove
+changes in p34compfa compared to pv4compfa output.
+X
+>>Sept. 6, 2001 CVS tag fa34t05d5
+X
+Fix the -S dropnfa/fx/fz2 bug that was not actually fixed in
+fa34t05d4. Incorporate the correct scaleswn.c refered to in
+fa34t05d4.
+X
+>>Sept. 5, 2001 CVS tag fa34t05d4
+X
+Fix problem with m_msg.quiet that prevented interactive prompts for
+ktup, file name, etc with threaded programs.
+X
+Fix serious bug in dropnfa.c/dropfx.c/dropfz2.c that caused -S to work
+improperly on sequences with effective length of 3 or less.
+X
+Change to scaleswn.c to make mle_cen(), mle_cen2() more robust to cases
+where the top and bottom scores are the same.
+X
+Change p2_complib.c to avoid compiler complaints with (void *)wstage2p=NULL
+on some platforms.
+X
+>>Aug. 30, 2001 CVS tag fa34t05d3
+X
+Fixed problem with uthr_subs.c for Suns, but changed Makefile.sun to
+use pthreads rather than Sun Unix threads. Removed SQL stuff from
+Makefile.mpi4/pvm4 and added Makefile.mpi4_sql/pvm4_sql.
+X
+fa34t05d2 - fix to map_db.c to provide *sascii.
+X
+fa34t05d1 - fixes to ibm_pthr_subs.c and Makefile.ibm from IBM.
+X
+>>Aug. 20, 2001 CVS tag fa34t05d0
+X
+The pvm/mpi complib programs have been substantially updated with
+release 3.4. See readme.v34t0 for more information. With version
+3.4, the MPI programs are mp34comp*, mu34comp*, etc.
+X
+A major effect of this change is to disable automatic sequence type
+(protein/DNA) recognition with pv34compfa/mp34compfa. By default,
+protein libraries are assumed. Thus, pv34compfa/mp34compfa require
+the "-n" command line option when running pv34compfa/mp34compfa on DNA
+sequence libraries. This issue does not occur with the other
+programs, which will recognize the appropriate sequence type, because
+it is determined by the program (e.g. pv34compfx requires
+DNA:protein).
+X
+Fixed substantial problem with 64-bit file offsets for Linux in
+complib.c/comp_thr.c, p2_complib.c. This problem, solved by Doug
+Blair, was preventing the threaded versions from working properly in
+memory mapped mode.
+X
+In all earlier versions of fasta, when very long sequences were
+searched, the sequence length reported was that of the "chunk" that
+was actually searched (typically 80,000-query_length) rather than the
+actual library sequence length. The peculiar behavior now changed,
+and the full length of the library sequence, not the sequence chunk,
+is reported as the library sequence length. Note that chunks are
+still used, however, which can cause the same alignment to be shown
+twice. In addition, the "-m 9" output format has changed to report
+the coordinates of the query and library sequence (see below), which
+may be different from 1-sequence_length because the the query and
+library sequences may have been extracted from larger sequences. Four
+additional fields have been added, "pn0", "px0","pn1", "px1" that are
+the positions in for the beginning (pn0/1) and end (px0/1) of they
+query/library sequence. pn0/1 would typically be changed with the
+"@C:#" directive, described below.
+X
+Changes to doinit.c/initfa.c/initsw.c to provide a new function -
+f_lastenv() - that allows function-specific adjustments to parameters
+after the command line options have been read but before the first
+sequence is read. This change solved problems with "mp/pv34compfx -S".
+X
+fasts34/tfasts34 now recognize that 'I/L' are the same, as are 'Q/K'
+(which are apparently indistinguishable by Mass-Spec). The latter
+identity is on by default, but can be turned off with "-h 0".
+X
+The MPI/PVM versions of the programs have been tested extensively with
+compfa, compfx, and comptfx. Makefile.mpi4 now works properly.
+Changes to p2complib.c to support the PVM option "-T 1-4", which
+allows one to run on nodes 1-4 of a (presumably larger) PVM virtual
+machine. This option has no effect on the mp34comp* programs. The
+old "-T 4" to run on 4 nodes, is also available. If each node has 2
+cpu's, as indicated in the "pvmd hostfile", both CPU's will be used
+for a total, in this example, of 8 processes. This allows one to
+specify a large PVM machine and use separate parts of it
+independently.
+X
+Changes to nmgetlib.c to fix problems with longer dates in GCG files
+(Y2K). Fixes to faatran.c for extended alphabets and 'X's. Various
+code clean-ups to make "gcc -Wall" a little bit (not much) happier.
+X
+This is the first distributed fasta34 version.
+X
+================
+>>Aug 9, 2001 CVS tag fa34t05
+X
+Corrections to initfa.c to allow -S to work with tfastx/y.
+Fix to manshowbest.c for query position with -m 9.
+X
+>>July 18, 2001 CVS tag fa34t04
+X
+Various changes to complib.c, comp_thr.c, p2_complib.c, showbest.c,
+showalign.c to deal with overlapping alignments in long sequences that
+have been segmented. When long sequences are segmented (lcont>0), the
+eventual total length (n1tot_v) is saved at beststr->n1tot_p. If
+there was no lcont, then beststr->n1tot_p = NULL, and beststr->n1
+should be used as the sequence length. This has the advantage of
+requiring space only when long sequences are encountered, and
+requiring only one integer for several segments.
+X
+m_msg.noshow has been removed.
+X
+The -m 9 format has been changed - 5 fields have been added, 4
+(pmn0/pmx0/pmn1/pmx1) provide the beginning and end coordinates of the
+query and library sequence; the last (fs) reports the number of
+frameshifts. The names of the alignment boundaries have been changed
+from min0/max0/min1/max1 to amn0/amx0/amn1/amx1 (Alignment miN/maX).
+X
+The SQL format has been extended to provide for statements that do
+things but do not generate results, such as creating and selecting into a temporary table, e.g.:
+================
+X do
+X create temporary table seq_pos (
+X id int unsigned not null auto_increment primary key,
+X prot_id int unsigned not null default 0,
+X start int unsigned not null default 0,
+X length int unsigned not null default 0,
+X )
+X ;
+X do
+X insert into seq_pos (prot_id, start, length)
+X select id, 11, len-10
+X from protein, annot
+X where len > 100
+X and annot.protein_id = protein.id
+X and annot.pref=1
+X ;
+X select seq_pos.id,
+X substring(protein.seq, start, length),
+X concat("@C:", start, " ", descr)
+X from protein, seq_pos, annot
+X where protein.id = annot.protein_id
+X and protein.id = seq_pos.prot_id
+X and annot.pref = 1
+X ;
+X select prot_id,
+X concat("@C:", start, " ", descr)
+X from seq_pos, annot
+X where annot.protein_id = seq_pos.prot_id
+X and seq_pos.id = #
+X and annot.pref = 1
+X ;
+================
+X
+X In the current implementation, these statements must start with "DO"
+as the first two characters on the line, and come immediately after a
+line ending with ';'. The text from "DO" to the next ";", excluding
+the "DO", is executed when the database connection is made.
+X
+===== >>July 12, 2001
+X
+The allocation of the work_info data structure used to send
+information to the worker threads has been changed. The old method
+worked, possibly by accident.
+X
+A bug in p2_complib.c that caused E()-values to be calculated
+improperly for the first query sequence has been fixed.
+X
+>>July 11, 2001 --> fa34t02
+X
+It is now possible to specify output coordinates in library sequences
+by including the string: "@C:number" on the description line, e.g.
+X
+X >gtm1_human gi|12345 human glutathione transferase M1 @C:21
+X
+would label the first residue in the library sequence "21" rather than
+"1". This capability has been included to provide accurate
+coordinates for searches done against subsequences generated by an SQL
+query. For example, one could use a query of the form:
+X
+X SELECT protein.id, substring(protein.seq,11,length(protein.seq)-20),
+X concat(protein.name," @C:11 ",protein.descr)
+X FROM protein;
+X
+to generate a sequence set with each sequence starting with residue
+11. Without the "@C:11" option on the description line, the program
+would number the alignment positions starting at 1, even though the
+first residue of the sequence really started at 11. "@C:11" allows
+one to correct the coordinate system.
+X
+Currently, "@C:offset" is available only with library type 1 (fasta
+format) and 16 (mySQL).
+X
+The SQL-generated database with "@C:offset" can be used with both the
+fast*34(_t) programs and with pv34comp*. However, the SQL syntax is
+used differently in the fasta34 and pv34compfa programs. fast*34(_t)
+requires three SQL statements during a search: (1) a statement to
+generate a large set of library sequences; (2) a statement to generate
+a description of a single sequence, given a unique identifier provided
+by (1); and (3) a statement to generate a single sequence given a
+unique identifier provided by (1). For fast*34 searches, the third
+(3) SQL statement must provide the "@C:offset" information in the
+third results field for the offset to be used. It is optional in (1)
+and (2).
+X
+The pv34comp* programs only require one SQL statement, statement (1)
+above, which must provide three fields, a unique identifier, the
+sequence, and a complete description that must include "@C:offset" if
+substrings are used. If SQL queries (2) and (3) are provided, they
+are ignored. Thus, the same files can be used by both programs, but
+the "@C:offset" is required in different SQL queries by the fast*34
+and pv34comp* programs.
+X
+Other changes:
+X
+Re-incorporation of GAP_OPEN option; fix to Altschul-Gish stats when
+GAP_OPEN is used.
+X
+Re-incorporation of A. Mackey's spam() improvement in dropnfa.
+X
+Fixes to include file ordering to allow fast*34(_t) pv34comp* programs
+to compile.
+X
+Fix to lascii[] for SQL database queries.
+X
+Fix to an old bug in comp_thr.c to send individual worker_info
+structures to threads (does not fix LINUX threads problems, however).
+X
+=====
+>>July 9, 2001
+X
+Considerable changes to support no-global library functions.
+X
+(1) Separate ascii/sequence mapping arrays are used by the
+X query-reading (qascii), library-reading (lascii), and sequence
+X comparison function (pascii) routines. As a result, there is no
+X longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
+X functions.
+X
+(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
+X from complib.c/comp_thr.c/p2_complib.c. We no longer need
+X tcomp_thr.o, comp_thrx.o, etc. We still have a variety of
+X p2_complib.o variations to support the different c34.work* files.
+X
+(3) Because non-global openlib/getlib functions are available, exactly
+X the same open/get functions are available for reading both the
+X query and reference libraries in pv34comp* programs. The
+X host-specific openlib/getlib functions in hxgetaa.c are now
+X provided by nmgetlib.c, etc. This has two effect:
+X
+X (a) it is now possible to compare a query database generated by an
+X SQL query to a library database generated by a different SQL
+X query.
+X
+X (b) pv34comp* has lost (at least in this version) the ability to
+X automatically detect the query sequence type. To search with a
+X DNA query, you MUST use "-n".
+X
+(4) the resetp() function is now responsible for almost all of the
+X function sepcific (TFAST/FASTX/etc) initializations. All of the
+X function specific code has been removed from complib.c/comp_thr.c
+X and most of it has been moved to initfa.c/resetp().
+X
+(5) manageacc.c has been merged into compacc.c (mostly prhist()).
+X
+=====
+>>June 1, 2001
+X
+Many changes to accommodate a new - no global variable - strategy for
+reading sequence databases. Every time a file is opened, a struct
+lmf_str is allocated which can be used for memory mapped files, ncbl2,
+files, and mysql files.
+X
+In addition, an open'ed file has a default sequence type: DNA or
+protein, or one can open a file in a mode that will allow the sequence
+type to be changed.
+X
+=====
+>>May 18, 2001 CVS: fa33t09d0
+X
+A new compile time parameter - -DGAP_OPEN, is available to change the
+definition of the "-f gap-open" parameter from the penalty for the
+first residue in a gap to a true gap-open penalty, as is used in BLAST
+and many other comparison algorithms. This will probably become the
+default for fasta in version 3.4.
+X
+Fixes to conflicts between "-S" and "-s matrix". When a scoring
+matrix file was specified, lower-case alignments were not displayed
+with -S (although the scores were calculated properly).
+X
+More extensive testting of mysql_lib.c (mySQL query-libraries) with
+the pv4comp* and mp4comp* programs.
+X
+=====
+>>April 5, 2001 CVS: fa33t08d4b3
+X
+Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
+descriptions for PCOMPLIB (pv4/mp3comp*). Also fix p2_complib.c to
+request DNA library for translated comparisons.
+X
+Fix for prss33(_t) to read both sequences from stdin.
+X
+=====
+>>March 27, 2001 CVS: fa33t08d4
+X
+Modifications to allow 64-bit fseek/ftell on machines like Sun,
+Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
+off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO. Machines
+with 64-bit long's do not need this option. Machines with 32-bit
+longs that allow files >2 Gb can do so with 64-bit file access
+functions, including fseeko() and ftello(), which work with off_t file
+offsets instead of long's.
+X
+=====
+>>March 3, 2001 CVS: fa33t08d2
+X
+Corrected problems in nmgetaa.c and mysql_lib.c with parallel
+programs, and one serious problem with alternate DNA scoring matrices
+(initfa.c, initsw.c) not being set properly. A subtle problem with
+the merge of scaleswn.c and scaleswg.c is fixed.
+X
+>>February 17, 2001
+X
+Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
+position of the GID. This change was made because sprintf() cannot be
+used reliably to generate an SQL string, as '"' and '%' are used in
+such strings.
+X
+=====
+>>January 17, 2001
+(no version change, date change)
+X
+Minor fixes to initfa.c, initsw.c to deal with DNA scoring matrices
+properly. "-n -s dna.mat" is required for the sequence/matrix to be
+recognized as DNA.
+X
+>>January 16, 2001
+-->v34t00
+X
+Merge of the main CVS trunk - fa33t06 with the latest release branch,
+fa33t08.
+X
+In addition, PCOMPLIB mods have been made to mysql_lib.c. Because
+p2_complib.c gets sequence description information during the first
+read of the database, the mysql_query must be changed to return:
+result[0]=GID, result[1]=description, result[2]=sequence. In the
+PCOMPLIB case, the other SQL queries (for GID description, sequence)
+are not necessary but must still be provided.
+SHAR_EOF
+chmod 0644 readme.v34t0 ||
+echo 'restore of readme.v34t0 failed'
+Wc_c="`wc -c < 'readme.v34t0'`"
+test 66121 -eq "$Wc_c" ||
+ echo 'readme.v34t0: original size 66121, current size' "$Wc_c"
+fi
+# ============= readme.w32 ==============
+if test -f 'readme.w32' -a X"$1" != X"-c"; then
+ echo 'x - skipping readme.w32 (File already exists)'
+else
+echo 'x - extracting readme.w32 (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'readme.w32' &&
+October 6, 2006
+X
+The FASTA programs for Windows32 environments (WindowsNT, 2000, XP)
+has undergone a major upgrade, so that now all the programs in the
+Unix/MacOSX distribution are available to Windows users. Moreover,
+Windows users with modern (SSE2 compatible) processors can run greatly
+accelerated versions of the Smith-Waterman ssearch program.
+X
+Moreover, these programs work both with FASTA formatted files, and
+NCBI BLAST formatted files.
+X
+The following programs are available:
+X
+X fasta34.exe protein-protein or DNA-DNA database searches
+X fastf34.exe
+X fastm34.exe
+X fasts34.exe
+X fastx34.exe compare DNA query to protein library with frameshifts
+X fasty34.exe compare DNA query to protein library with frameshifts
+X prfx34.exe
+X prss34.exe evaluate statistical significance using shuffles
+X prss34sse2.exe
+X ssearch34.exe Smith-Waterman for prot-prot or DNA-DNA searches
+X ssearch34sse2.exe Smith-Waterman, accelerated with SSE2 extensions
+X tfastf34.exe
+X tfastm34.exe
+X tfasts34.exe
+X tfastx34.exe compare protein to DNA library with frameshifts
+X tfasty34.exe compare protein to DNA library with frameshifts
+X
+Each of these programs also has a "threaded" version, which can run on
+multiple processors (or dual cores) if they are available. However,
+they are built using the Unix pthreads API, so to use these programs,
+you must download the pthreadVC2.dll from:
+X
+ftp://sources.redhat.com/pub/pthreads-win32/dll-latest/lib/pthreadVC2.dll
+X
+see also http://sourceware.org/pthreads-win32/
+X
+X fasta34_t.exe
+X fastf34_t.exe
+X fastm34_t.exe
+X fasts34_t.exe
+X fastx34_t.exe
+X fasty34_t.exe
+X prfx34_t.exe
+X prss34_t.exe
+X prss34sse2_t.exe
+X ssearch34_t.exe
+X ssearch34sse2_t.exe
+X tfastf34_t.exe
+X tfasts34_t.exe
+X tfastx34_t.exe
+X tfasty34_t.exe
+X
+Without that DLL, the threaded programs will not run at all. The
+current compilation supports two threads, and speeds up searches about
+2-fold on dual-core processors.
+X
+The programs have been tested with protein and DNA databases in FASTA
+format, PIR/GCG-text format, and Genbank flatfile format. The program
+does not work properly with GCG binary format databases, but it seems
+unlikely that Windows users would need these.
+X
+Be certain to use an program that can work with long file names when
+unpacking the program source files.
+X
+Please report bugs to:
+X
+X wrp@virginia.edu
+SHAR_EOF
+chmod 0644 readme.w32 ||
+echo 'restore of readme.w32 failed'
+Wc_c="`wc -c < 'readme.w32'`"
+test 2402 -eq "$Wc_c" ||
+ echo 'readme.w32: original size 2402, current size' "$Wc_c"
+fi
+# ============= res_stats.c ==============
+if test -f 'res_stats.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping res_stats.c (File already exists)'
+else
+echo 'x - extracting res_stats.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'res_stats.c' &&
+/* calculate stats from results file using scalesws.c */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include <limits.h>
+#include <math.h>
+X
+#define MAX_LLEN 200
+X
+#define LN_FACT 10.0
+X
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+X
+struct beststr {
+X int score; /* smith-waterman score */
+X int sscore; /* duplicate for compatibility with fasta */
+X double comp;
+X double H;
+X double zscore;
+X double escore;
+X int n1;
+#ifndef USE_FTELLO
+X long lseek; /* position in library file */
+#else
+X off_t lseek;
+#endif
+X int cont; /* offset into sequence */
+X int frame;
+X int lib;
+X char libstr[13];
+} *bbp, *bestptr, **bptr, *best;
+X
+struct stat_str {
+X int score;
+X int n1;
+X double comp;
+X double H;
+};
+X
+static struct db_str qtt = {0l, 0l, 0};
+X
+char gstring2[MAX_STR]; /* string for label */
+char gstring3[MAX_STR];
+char hstring1[MAX_STR];
+X
+FILE *outfd;
+X
+int nbest; /* number of sequences better than bestcut in best */
+int bestcut=1; /* cut off for getting into MAXBEST */
+int bestfull;
+X
+int dohist = 0;
+int zsflag = 1;
+int outtty=1;
+int llen=40;
+X
+/* statistics functions */
+extern void
+process_hist(struct stat_str *sptr, int nstat, struct pstruct pst,
+X struct hist_str *hist, void **);
+extern void addhistz(double, struct hist_str *); /* scaleswn.c */
+void selectbestz(struct beststr **, int, int );
+X
+extern double zs_to_E(double, int, int, long, struct db_str);
+extern double zs_to_Ec(double zs, long entries);
+X
+extern double (*find_zp)(int score, int length, double comp, void *);
+X
+void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str,
+X int, struct db_str, char *);
+X
+int nshow=20, mshow=50, ashow= -1;
+double e_cut=10.0;
+X
+main(argc, argv)
+X int argc; char **argv;
+{
+X FILE *fin;
+X char line[512];
+X int max, icol, iarg, i, qsfnum, lsfnum, n0, n1, s[3], frame;
+X double comp, H;
+X int idup, ndup, max_s;
+X char libstr[20], *bp;
+X char bin_file[80];
+X FILE *bout=NULL;
+X struct mngmsg m_msg; /* Message from host to manager */
+X struct pstruct pst;
+X struct stat_str *stats;
+X int nstats;
+X double zscor, mu, var;
+X
+#if defined(UNIX)
+X outtty = isatty(1);
+#else
+X outtty = 1;
+#endif
+X
+X if (argc < 2 ) {
+X fprintf(stderr," useage - res_stats -c col -r bin_file file\n");
+X exit(1);
+X }
+X
+X m_msg.db.length = qtt.length = 0l;
+X m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
+X m_msg.pstat_void = NULL;
+X m_msg.hist.hist_a = NULL;
+X m_msg.nohist = 0;
+X m_msg.markx = 0;
+X
+X pst.n0 = 200; /* sensible dummy value */
+X pst.zsflag = 1;
+X pst.dnaseq = 0;
+X pst.histint = 2;
+X
+X bin_file[0]='\0';
+X icol = 1;
+X iarg = 1;
+X ndup = 1;
+X while (1) {
+X if (argv[iarg][0]=='-' && argv[iarg][1]=='c') {
+X sscanf(argv[iarg+1],"%d",&icol);
+X iarg += 2;
+X }
+X else if (argv[iarg][0]=='-' && argv[iarg][1]=='r') {
+X strncpy(bin_file,argv[iarg+1],sizeof(bin_file));
+X iarg += 2;
+X }
+X else if (argv[iarg][0]=='-' && argv[iarg][1]=='z') {
+X sscanf(argv[iarg+1],"%d",&pst.zsflag);
+X iarg += 2;
+X }
+X else if (argv[iarg][0]=='-' && argv[iarg][1]=='n') {
+X pst.dnaseq = 1;
+X iarg += 1;
+X }
+X else if (argv[iarg][0]=='-' && argv[iarg][1]=='s') {
+X sscanf(argv[iarg+1],"%d",&ndup);
+X iarg += 2;
+X }
+X else if (argv[iarg][0]=='-' && argv[iarg][1]=='q') {
+X outtty = 0;
+X iarg += 1;
+X }
+X else break;
+X }
+X
+X icol--;
+X
+X if ((fin=fopen(argv[iarg],"r"))==NULL) {
+X fprintf(stderr," cannot open %s\n",argv[1]);
+X exit(1);
+X }
+X
+X if (bin_file[0]!='\0' && ((bout=fopen(bin_file,"w"))==NULL)) {
+X fprintf(stderr,"cannot open %s for output\n",bin_file);
+X }
+X
+X if ((stats =
+X (struct stat_str *)malloc((MAXSTATS)*sizeof(struct stat_str)))==NULL)
+X s_abort ("Cannot allocate stats struct","");
+X nstats = 0;
+X
+X initbest(MAXBEST+1); /* +1 required for select() */
+X
+X for (nbest=0; nbest<MAXBEST+1; nbest++)
+X bptr[nbest] = &best[nbest];
+X bptr++; best++;
+X best[-1].score= BIGNUM;
+X
+X nbest = 0;
+X
+X pst.Lambda=0.232;
+X pst.K = 0.11;
+X pst.H = 0.34;
+X
+X /* read the best scores from the results file */
+X
+X max_s = -1;
+X idup = 0;
+X
+X /* get first line with sequence length */
+X fgets(line,sizeof(line),fin);
+X sscanf(line,"%d",&n0);
+X if (n0 > 0) pst.n0 = n0;
+X
+X while (fgets(line,sizeof(line),fin)!=NULL) {
+X if (line[0]=='/' && line[1]=='*') {
+X fputs(line,stdout);
+X strncpy(gstring2,line,sizeof(gstring2));
+X if ((bp=strchr(gstring2,'\n'))!=NULL) *bp = '\0';
+X break;
+X }
+X if (line[0]==';') {
+X if ((bp=strchr(line,'|'))!=NULL) qsfnum = atoi(bp+1);
+X else continue;
+X if ((bp=strchr(line,'('))!=NULL) {
+X n0 = atoi(bp+1);
+X pst.n0 = n0;
+X }
+X else {
+X fprintf(stderr, "cannot find n0:\n %s\n",line);
+X continue;
+X }
+X }
+X else {
+X sscanf(line,"%s %d %d %d %lf %lf %d %d %d",
+X libstr,&lsfnum,&n1,&frame,&comp, &H, &s[0],&s[1],&s[2]);
+X if (lsfnum==0 && n1==0) {
+X fputs(line,stderr);
+X continue;
+X }
+X if (n1 < 10 || s[icol]<=0) fputs(line,stderr);
+X idup++;
+X
+X if (s[icol] > max_s) max_s = s[icol];
+X if (idup < ndup) continue;
+X
+X m_msg.db.entries++;
+X m_msg.db.length += n1;
+X
+X if (dohist) addhistz(zscor=(*find_zp)(max_s,n1,comp,m_msg.pstat_void),
+X &m_msg.hist);
+X else zscor = (double)max_s;
+X
+X if (nstats < MAXSTATS) {
+X stats[nstats].n1 = n1;
+X stats[nstats].comp = comp;
+X stats[nstats].H = H;
+X stats[nstats++].score = max_s;
+X }
+X
+X else if (!dohist) {
+X /* do_bout(bout,stats,nstats); */
+X process_hist(stats,nstats,pst,&m_msg.hist, &m_msg.pstat_void);
+X for (i=0; i<nbest; i++)
+X bptr[i]->zscore =
+X (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,
+X m_msg.pstat_void);
+X dohist = 1;
+X }
+X
+X if (dohist) {
+X zscor =(*find_zp)(max_s,n1,comp,m_msg.pstat_void);
+X addhistz(zscor,&m_msg.hist);
+X }
+X else zscor = (double)max_s;
+X
+X if (nbest >= MAXBEST) {
+X bestfull = nbest-MAXBEST/4;
+X selectz(bestfull-1,nbest);
+X bestcut = (int)(bptr[bestfull-1]->zscore+0.5);
+X nbest = bestfull;
+X }
+X bestptr = bptr[nbest];
+X bestptr->score = max_s;
+X bestptr->sscore = max_s;
+X bestptr->n1 = n1;
+X bestptr->comp = comp;
+X bestptr->H = H;
+X bestptr->lib = lsfnum;
+X bestptr->zscore = zscor;
+X strncpy(bestptr->libstr,libstr,12);
+X bestptr->libstr[12]='\0';
+X nbest++;
+X
+X max_s = -1;
+X idup = 0;
+X }
+X } /* done with reading results */
+X
+X if (!dohist) {
+X if (nbest < 20) {
+X zsflag = 0;
+X }
+X else {
+X /* do_bout(bout,stats,nstats); */
+X process_hist(stats,nstats,pst,&m_msg.hist,&m_msg.pstat_void);
+X for (i=0; i<nbest; i++)
+X bptr[i]->zscore =
+X (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,m_msg.pstat_void);
+X dohist = 1;
+X }
+X }
+X
+X printf(" using n0: %d\n",pst.n0);
+X
+X /* print histogram, statistics */
+X
+X m_msg.nbr_seq = m_msg.db.entries;
+X pst.zdb_size = m_msg.db.entries;
+X /* get_param(&pst, gstring2,gstring3); */
+X
+X prhist(stdout,m_msg,pst,m_msg.hist,nstats,m_msg.db,gstring2);
+X
+X if (!zsflag) sortbest();
+X else {
+X sortbestz(bptr,nbest);
+X for (i=0; i<nbest; i++)
+X bptr[i]->escore = zs_to_E(bptr[i]->zscore,bptr[i]->n1,pst.dnaseq,
+X pst.zdb_size, m_msg.db);
+X }
+X
+X outfd = stdout;
+X showbest(m_msg.db); /* display best matches */
+}
+X
+initbest(nbest) /* allocate arrays for best sort */
+X int nbest;
+{
+X
+X if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr)))
+X == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);}
+X if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *)))
+X == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}
+}
+X
+void
+prhist(FILE *fd, struct mngmsg m_msg,
+X struct pstruct pst,
+X struct hist_str hist,
+X int nstats,
+X struct db_str ntt,
+X char *gstring2)
+{
+X int i,j,hl,hll, el, ell, ev;
+X char hline[80], pch, *bp;
+X int mh1, mht;
+X int maxval, maxvalt, dotsiz, ddotsiz,doinset;
+X double cur_e, prev_e, f_int;
+X double max_dev, x_tmp;
+X double db_tt;
+X int n_chi_sq, cum_hl, max_i;
+X
+X
+X fprintf(fd,"\n");
+X
+X if (pst.zsflag < 0 || nstats <= 10) {
+X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);
+X fprintf(fd,"\n%s\n",gstring2);
+X return;
+X }
+X
+X max_dev = 0.0;
+X mh1 = hist.maxh-1;
+X mht = (3*hist.maxh-3)/4 - 1;
+X
+X if (!m_msg.nohist && mh1 > 0) {
+X for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
+X if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
+X if (i >= mht && hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
+X }
+X n_chi_sq = 0;
+X cum_hl = -hist.hist_a[0];
+X dotsiz = (maxval-1)/60+1;
+X ddotsiz = (maxvalt-1)/50+1;
+X doinset = (ddotsiz < dotsiz && dotsiz > 2);
+X
+X if (pst.zsflag>=0)
+X fprintf(fd," opt E()\n");
+X else
+X fprintf(fd," opt\n");
+X
+X prev_e = zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);
+X for (i=0; i<=mh1; i++) {
+X pch = (i==mh1) ? '>' : ' ';
+X pch = (i==0) ? '<' : pch;
+X hll = hl = hist.hist_a[i];
+X if (pst.zsflag>=0) {
+X cum_hl += hl;
+X f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
+X cur_e = (double)zs_to_Ec(f_int,hist.entries);
+X ev = el = ell = (int)(cur_e - prev_e + 0.5);
+X if (hl > 0 && i > 5 && i < (90-hist.min_hist)/hist.histint) {
+X x_tmp = fabs(cum_hl - cur_e);
+X if ( x_tmp > max_dev) {
+X max_dev = x_tmp;
+X max_i = i;
+X }
+X n_chi_sq++;
+X }
+X if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
+X if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
+X fprintf(fd,"%c%3d %5d %5d:",
+X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+X mh1*hist.histint+hist.min_hist,hl,ev);
+X }
+X else fprintf(fd,"%c%3d %5d :",
+X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
+X mh1*hist.histint+hist.min_hist,hl);
+X
+X if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
+X if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
+X for (j=0; j<hl; j++) hline[j]='=';
+X if (pst.zsflag>=0) {
+X if (el <= hl ) {
+X if (el > 0) hline[el-1]='*';
+X hline[hl]='\0';
+X }
+X else {
+X for (j = hl; j < el; j++) hline[j]=' ';
+X hline[el-1]='*';
+X hline[hl=el]='\0';
+X }
+X }
+X else hline[hl] = 0;
+X if (i==1) {
+X for (j=hl; j<10; j++) hline[j]=' ';
+X sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
+X }
+X if (doinset && i == mht-2) {
+X for (j = hl; j < 10; j++) hline[j]=' ';
+X sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
+X }
+X if (i >= mht&& doinset ) {
+X for (j = hl; j < 10; j++) hline[j]=' ';
+X hline[10]=':';
+X for (j = 11; j<11+hll; j++) hline[j]='=';
+X hline[11+hll]='\0';
+X if (pst.zsflag>=0) {
+X if (ell <= hll) hline[10+ell]='*';
+X else {
+X for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
+X hline[10+ell] = '*';
+X hline[11+ell] = '\0';
+X }
+X }
+X }
+X
+X fprintf(fd,"%s\n",hline);
+X prev_e = cur_e;
+X }
+X }
+X
+X if (ntt.carry==0) {
+X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length, ntt.entries);
+X }
+X else {
+X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
+X fprintf(fd, "%.0f residues in %5ld library sequences\n", db_tt, ntt.entries);
+X }
+X
+X if (pst.zsflag>=0) {
+X if (MAXSTATS < hist.entries)
+X fprintf(fd," statistics extrapolated from %d to %ld sequences\n",
+X MAXSTATS,hist.entries);
+X /* summ_stats(stat_info); */
+X fprintf(fd," %s\n",hist.stat_info);
+X if (!m_msg.nohist && cum_hl > 0)
+X fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%d) at %3d\n",
+X max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+X if (m_msg.markx & MX_M10FORM) {
+X while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
+X if (cum_hl <= 0) cum_hl = -1;
+X sprintf(hstring1,"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
+X MAXSTATS,hist.entries,hist.stat_info,max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
+X }
+X }
+X fprintf(fd,"\n%s\n",gstring2);
+X fflush(fd);
+}
+X
+showbest(struct db_str ntt)
+X {
+X int ib, istart, istop;
+X char bline[200], fmt[40], pad[200];
+X char rline[20];
+X int ntmp;
+X int lcont, ccont, loff;
+X int hcutoff;
+X
+X sprintf(fmt,"%%-%ds (%%3d)",llen-10);
+X
+X nshow = min(20,nbest);
+X mshow = min(20,nbest);
+X
+X if (outtty) {
+X printf(" How many scores would you like to see? [%d] ",nshow);
+X fflush(stdout);
+X if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
+X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
+X if (nshow<=0) nshow = min(20,nbest);
+X }
+X else nshow=mshow;
+X
+X memset(pad,' ',llen-10);
+X pad[llen-31]='\0';
+X if (zsflag)
+X fprintf(outfd,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
+X else
+X fprintf(outfd,"The best scores are:%s s-w\n",pad);
+X
+X if (outfd != stdout)
+X if (zsflag)
+X fprintf(stdout,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
+X else
+X fprintf(stdout,"The best scores are:%s s-w\n",pad);
+X
+X istart = 0;
+X l1: istop = min(nbest,nshow);
+X for (ib=istart; ib<istop; ib++) {
+X bbp = bptr[ib];
+X
+X if (!outtty && zsflag && bbp->escore > e_cut) {
+X nshow = ib;
+X goto done;
+X }
+X
+X sprintf(bline,"%-12s %d",bbp->libstr,bbp->lib);
+X bline[13]='\0';
+X
+X fprintf(outfd,fmt,bline,bbp->n1);
+X
+X if (zsflag)
+X fprintf(outfd,"%4d %4.1f %6.2g\n",
+X bbp->score,bbp->zscore,
+X bbp->escore);
+X else
+X fprintf(outfd,"%4d\n",bbp->score);
+X
+X if (outfd!=stdout) {
+X fprintf(stdout,fmt,bline,bbp->n1);
+X if (zsflag)
+X printf("%4d %4.1f %6.2g\n",
+X bbp->score,bbp->zscore,
+X bbp->escore);
+X else
+X printf("%4d\n",bbp->score);
+X }
+X }
+X
+X fflush(outfd); if (outfd!=stdout) fflush(stdout);
+X
+X if (outtty) {
+X printf(" More scores? [0] ");
+X fflush(stdout);
+X if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
+X ntmp = 0;
+X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
+X if (ntmp<=0) ntmp = 0;
+X if (ntmp>0) {
+X istart = istop;
+X nshow += ntmp;
+X mshow += ntmp;
+X goto l1;
+X }
+X }
+X else if (zsflag && bbp->escore < e_cut) {
+X istart=istop;
+X nshow += 10;
+X goto l1;
+X }
+X
+X done:
+X if (outfd!=stdout) fprintf(outfd,"\n");
+}
+X
+selectz(k,n) /* k is rank in array */
+X int k,n;
+{
+X int t, i, j, l, r;
+X double v;
+X struct beststr *tmptr;
+X
+X l=0; r=n-1;
+X
+X while ( r > l ) {
+X i = l-1;
+X j = r;
+X v = bptr[r]->zscore;
+X do {
+X while (bptr[++i]->zscore > v ) ;
+X while (bptr[--j]->zscore < v ) ;
+X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
+X } while (j > i);
+X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
+X if (i>=k) r = i-1;
+X if (i<=k) l = i+1;
+X }
+}
+X
+sortbest()
+{
+X int cmps(), cmp1(), cmpa(), cmpz();
+X ksort(bptr,nbest,cmps);
+}
+X
+sortbeste()
+{
+X int cmpe();
+X ksort(bptr,nbest,cmpe);
+}
+X
+sortbestz()
+{
+X int cmpz();
+X ksort(bptr,nbest,cmpz);
+}
+X
+cmps(ptr1,ptr2)
+X struct beststr *ptr1, *ptr2;
+{
+X if (ptr1->score < ptr2->score) return (1);
+X else if (ptr1->score > ptr2->score) return (-1);
+X else return (0);
+}
+X
+cmpe(ptr1,ptr2)
+X struct beststr *ptr1, *ptr2;
+{
+X if (ptr1->escore < ptr2->escore) return (-1);
+X else if (ptr1->escore > ptr2->escore) return (1);
+X else return (0);
+}
+X
+cmpz(ptr1,ptr2)
+X struct beststr *ptr1, *ptr2;
+{
+X if (ptr1->zscore < ptr2->zscore) return (1);
+X else if (ptr1->zscore > ptr2->zscore) return (-1);
+X else return (0);
+}
+X
+ksort(v,n,comp)
+X char *v[]; int n, (*comp)();
+{
+X int gap, i, j;
+X char *tmp;
+X
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if ((*comp)(v[j],v[j+gap]) <=0)
+X break;
+X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
+X }
+}
+X
+/*
+do_bout(FILE *bout,struct stat_str **bptr, int nbest)
+{
+X int i, min_hist, max_hist;
+X double mu, var;
+X
+X if (bout==NULL) return;
+X
+X inithist();
+X for (i = 0; i<nbest; i++)
+X addhist(bptr[i]->score,bptr[i]->n1);
+X
+X for (i=0; i<MAX_LLEN; i++)
+X if (llen_hist[i]>0) {
+X min_hist=i;
+X break;
+X }
+X
+X for (i=MAX_LLEN-1; i>=0; i--)
+X if (llen_hist[i]>0) {
+X max_hist=i;
+X break;
+X }
+X
+X for (i=min_hist; i<=max_hist; i++) {
+X mu=(double)score_sums[i]/(double)llen_hist[i];
+X if (llen_hist[i]>1) {
+X var = ((double)score2_sums[i]-(double)llen_hist[i]*mu*mu)/
+X (double)(llen_hist[i]-1);
+X
+X fprintf(bout,"%d\t%d\t%.1f\t%.1f\t%.1f\t%.4f\t%.4f\n",
+X i,llen_hist[i],exp(((double)(i))/LN_FACT),
+X score_sums[i],score2_sums[i],mu,var);
+X }
+X }
+X free_hist();
+X fclose(bout);
+}
+*/
+X
+s_abort()
+{
+X exit(1);
+}
+SHAR_EOF
+chmod 0644 res_stats.c ||
+echo 'restore of res_stats.c failed'
+Wc_c="`wc -c < 'res_stats.c'`"
+test 16277 -eq "$Wc_c" ||
+ echo 'res_stats.c: original size 16277, current size' "$Wc_c"
+fi
+# ============= rna.mat ==============
+if test -f 'rna.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping rna.mat (File already exists)'
+else
+echo 'x - extracting rna.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'rna.mat' &&
+# Sample rna matrix with +2 for G:A, TU:C
+X A C G T U R Y M W S K D H V B N X
+A 5 -4 2 -4 -4 2 -1 2 2 -1 -1 1 1 1 -2 -1 -1
+C -4 5 -4 2 2 -1 2 2 -1 2 -1 -2 1 1 1 -1 -1
+G 2 -4 5 -4 -4 2 -1 -1 -1 2 2 1 -2 1 1 -1 -1
+T -4 2 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
+U -4 2 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
+R 2 -1 2 -1 -1 2 -2 -1 1 1 1 1 -1 1 -1 -1 -1
+Y -1 2 -1 2 2 -2 2 -1 1 1 1 -1 1 -1 1 -1 -1
+M 2 2 -1 -1 -1 -1 -1 2 1 1 -1 -1 1 1 -1 -1 -1
+W 2 -1 -1 2 2 1 1 1 2 -1 1 1 1 -1 -1 -1 -1
+S -1 2 2 -1 -1 1 1 1 -1 2 1 -1 -1 1 1 -1 -1
+K -1 -1 2 2 2 1 1 -1 1 1 2 1 -1 -1 1 -1 -1
+D 1 -2 1 1 1 1 -1 -1 1 -1 1 1 -1 -1 -1 -1 -1
+H 1 1 -2 1 1 -1 1 1 1 -1 -1 -1 1 -1 -1 -1 -1
+V 1 1 1 -2 -2 1 -1 1 -1 1 -1 -1 -1 1 -1 -1 -1
+B -2 1 1 1 1 -1 1 -1 -1 1 1 -1 -1 -1 1 -1 -1
+N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+SHAR_EOF
+chmod 0644 rna.mat ||
+echo 'restore of rna.mat failed'
+Wc_c="`wc -c < 'rna.mat'`"
+test 998 -eq "$Wc_c" ||
+ echo 'rna.mat: original size 998, current size' "$Wc_c"
+fi
+# ============= sc_to_e.c ==============
+if test -f 'sc_to_e.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping sc_to_e.c (File already exists)'
+else
+echo 'x - extracting sc_to_e.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'sc_to_e.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: sc_to_e.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* sc_to_e uses statistical parameters from search and
+X score, length, and database size to calculate E()
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+X
+double mean_var, mu, rho;
+X
+main(argc, argv)
+X int argc; char **argv;
+{
+X char line[128];
+X int score, length, db_size;
+X double z_val, s_to_zv(), zv_to_E();
+X
+X if (argc == 4) {
+X sscanf(argv[1],"%lf",&rho);
+X sscanf(argv[2],"%lf",&mu);
+X sscanf(argv[3],"%lf",&mean_var);
+X }
+X else {
+X fprintf(stderr," enter rho mu mean_var: ");
+X fgets(line,sizeof(line),stdin);
+X sscanf(line,"%lf %lf %lf",&rho, &mu, &mean_var);
+X }
+X
+X while (1) {
+X fprintf(stderr," enter score length db_size: ");
+X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
+X if (line[0]=='\n') exit(0);
+X sscanf(line,"%d %d %d",&score, &length, &db_size);
+X if (db_size < 1) db_size = 50000;
+X
+X z_val = s_to_zv(score, length);
+X
+X printf(" s: %d (%d) E(%d): %4.2g\n",score,length,db_size,zv_to_E(z_val,db_size));
+X }
+}
+X
+double s_to_zv(int score, int length)
+{
+X return ((double)score - rho * log((double)length) - mu)/sqrt(mean_var);
+}
+X
+double zv_to_E(double zv, int db_size)
+{
+X double e;
+X
+X e = exp(-1.282554983 * zv - .577216);
+X return (double)db_size * (e > .01 ? 1.0 - exp(-e) : e);
+}
+SHAR_EOF
+chmod 0644 sc_to_e.c ||
+echo 'restore of sc_to_e.c failed'
+Wc_c="`wc -c < 'sc_to_e.c'`"
+test 1427 -eq "$Wc_c" ||
+ echo 'sc_to_e.c: original size 1427, current size' "$Wc_c"
+fi
+# ============= scaleswn.c ==============
+if test -f 'scaleswn.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping scaleswn.c (File already exists)'
+else
+echo 'x - extracting scaleswn.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'scaleswn.c' &&
+/* scaleswn.c */
+X
+/* $Name: fa_34_26_5 $ - $Id: scaleswn.c,v 1.60 2007/04/26 18:32:48 wrp Exp $ */
+X
+/* as of 24 Sept, 2000 - scaleswn uses no global variables */
+X
+/*
+X Provide statistical estimates using an extreme value distribution
+X
+X copyright (c) 1995, 1996, 2000 William R. Pearson
+X
+X This code provides multiple methods for scaling sequence
+X similarity scores to correct for length effects.
+X
+X Currently, six methods are available:
+X
+X pst.zsflag = 0 - no scaling (AVE_STATS)
+X pst.zsflag = 1 - regression-scaled scores (REG_STATS)
+X pst.zsflag = 2 - (revised) MLE Lmabda/K scaled scores (MLE_STATS)
+X pst.zsflag = 3 - scaling using Altschul's parameters (AG_STATS)
+X pst.zsflag = 4 - regression-scaled with iterative outlier removal (REGI_STATS)
+X pst.zsflag = 5 = like 1, but length scaled variance (REG2_STATS)
+X pst.zsflag = 6 = like 2, but uses lambda composition/scale (MLE2_STATS)
+X pst.zsflag = 11 = 10 + 1 - use random shuffles, method 1
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+X
+#include <limits.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#define MAXHIST 50
+#define MAX_LLEN 200
+#define LHISTC 5
+#define VHISTC 5
+#define MAX_SSCORE 300
+X
+#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
+X
+#define LN_FACT 10.0
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+#define EULER_G 0.57721566490153286060
+#define PI_SQRT6 1.28254983016186409554
+X
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237
+#endif
+#define LN200 5.2983173666
+#define ZS_MAX 400.0 /* used to prevent underflow on some machines */
+#define TOLERANCE 1.0e-12
+#define TINY 1.0e-6
+X
+/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
+struct rstat_str {
+X double rho, rho_e, mu, mu_e, mean_var, var_e; /* ?_e:std. error of ? */
+/* used by REG2_STATS */
+X double rho2, mu2, var_cutoff;
+X int n_trimmed; /* excluded because of high z-score */
+X int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
+};
+X
+/* used by AG_STATS, MLE_STATS */
+struct ag_stat_str {
+X double K, Lambda, H, a_n0f, a_n0;
+};
+X
+/* used by MLE2_STATS */
+struct mle2_stat_str {
+X double a_n0;
+X double mle2_a0, mle2_a1, mle2_a2, mle2_b1;
+X double ave_comp, max_comp, ave_H;
+};
+X
+struct pstat_str {
+X double ngLambda, ngK, ngH;
+X union {
+X struct rstat_str rg;
+X struct ag_stat_str ag;
+X struct mle2_stat_str m2;
+X } r_u;
+};
+X
+#define AVE_STATS 0 /* no length effect, only mean/variance */
+double find_zn(int score, double escore, int len, double comp, struct pstat_str *);
+X
+int proc_hist_n(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+X
+#define REG_STATS 1 /* length-regression scaled */
+#define REGI_STATS 4 /* length regression, iterative */
+double find_zr(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_r(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu);
+X
+#define MLE_STATS 2 /* MLE for lambda, K */
+double find_ze(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_ml(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+X
+#define AG_STATS 3 /* Altschul-Gish parameters */
+double find_za(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_a(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+X
+#define REG2_STATS 5 /* length regression on mean + variance */
+double find_zr2(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_r2(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+X
+#define MLE2_STATS 6 /* MLE stats using comp(lambda) */
+double find_ze2(int score, double escore, int length, double comp, struct pstat_str *);
+int proc_hist_ml2(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+X
+#ifdef USE_LNSTATS
+#define LN_STATS 2
+double find_zl(int score, double escore, int len, double comp, struct pstat_str *);
+int proc_hist_ln(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct pstat_str *);
+#endif
+X
+/* scaleswn.c local variables that belong in their own structure */
+X
+double (*find_zp)(int score, double escore, int len, double comp, struct pstat_str *) = &find_zr;
+X
+/* void s_sort (double **ptr, int nbest); */
+void ss_sort ( int *sptr, int n);
+X
+struct llen_str {
+X int min, max;
+X int max_score, min_score;
+X int *hist;
+X double *score_sums, *score2_sums;
+X double *score_var;
+X int max_length, min_length, zero_s;
+X int fit_flag;
+};
+X
+static void inithist(struct llen_str *, struct pstruct, int);
+static void free_hist( struct llen_str *);
+static void addhist(struct llen_str *, int, int, int);
+static void prune_hist(struct llen_str *, int, int, int, long *);
+void inithistz(int, struct hist_str *histp);
+void addhistz(double zs, struct hist_str *histp);
+void addhistzp(double zs, struct hist_str *histp);
+X
+static void fit_llen(struct llen_str *, struct rstat_str *);
+static void fit_llen2(struct llen_str *, struct rstat_str *);
+static void fit_llens(struct llen_str *, struct rstat_str *);
+X
+extern void sortbeste(struct beststr **bptr, int nbest);
+X
+/* void set_db_size(int, struct db_str *, struct hist_str *); */
+X
+#ifdef DEBUG
+FILE *tmpf;
+#endif
+X
+int
+process_hist(struct stat_str *sptr, int nstats,
+X struct mngmsg m_msg,
+X struct pstruct pst,
+X struct hist_str *histp,
+X struct pstat_str **ps_sp,
+X int do_hist)
+{
+X int zsflag, do_trim, i;
+X struct pstat_str *ps_s;
+X
+X if (pst.zsflag < 0) {
+X *ps_sp = NULL;
+X return pst.zsflag;
+X }
+X
+X if (*ps_sp == NULL) {
+X if ((ps_s=(struct pstat_str *)calloc(1,sizeof(struct pstat_str)))==NULL) {
+X fprintf(stderr," cannot allocate pstat_union: %ld\n",sizeof(struct pstat_str));
+X exit(1);
+X }
+X else *ps_sp = ps_s;
+X }
+X else {
+X ps_s = *ps_sp;
+X memset(ps_s,0,sizeof(struct pstat_str));
+X }
+X
+X ps_s->ngLambda = m_msg.Lambda;
+X ps_s->ngK = m_msg.K;
+X ps_s->ngH = m_msg.H;
+X
+X if (nstats < 10) pst.zsflag = AG_STATS;
+X
+X zsflag = pst.zsflag;
+X
+/*
+#ifdef DEBUG
+X if (pst.debug_lib) {
+X tmpf=fopen("tmp_stats.res","w+");
+X for (i=0; i<nstats; i++) fprintf(tmpf,"%d\t%d\n",sptr[i].score,sptr[i].n1);
+X fclose(tmpf);
+X }
+#endif
+*/
+X
+X if (zsflag >= 10) {
+X zsflag -= 10;
+X do_trim = 0;
+X }
+X else do_trim = 1;
+X
+#ifdef USE_LNSCALE
+X if (zsflag==LN_STATS) {
+X find_zp = &find_zl;
+X pst.zsflag = proc_hist_ln(sptr, nstats, histp, do_trim, ps_s);
+X }
+#else
+X if (zsflag==MLE_STATS) {
+X find_zp = &find_ze;
+X pst.zsflag = proc_hist_ml(sptr, nstats, pst, histp, do_trim, ps_s);
+X }
+#endif
+X else if (zsflag==REG_STATS) {
+X find_zp = &find_zr;
+X pst.zsflag = proc_hist_r(sptr, nstats,pst, histp, do_trim, ps_s);
+X }
+X else if (zsflag==AG_STATS) {
+X find_zp = &find_za;
+X pst.zsflag = proc_hist_a(sptr, nstats, pst, histp, do_trim, ps_s);
+X }
+X else if (zsflag==REGI_STATS) {
+X find_zp = &find_zr;
+X pst.zsflag = proc_hist_r2(sptr,nstats, pst, histp, do_trim, ps_s);
+X }
+X else if (zsflag==REG2_STATS) {
+X find_zp = &find_zr2;
+X pst.zsflag = proc_hist_r(sptr,nstats,pst, histp, do_trim, ps_s);
+X }
+#if !defined(TFAST) && !defined(FASTX)
+X else if (zsflag == MLE2_STATS) {
+X find_zp = &find_ze2;
+X pst.zsflag = proc_hist_ml2(sptr, nstats, pst, histp, do_trim, ps_s);
+X }
+#endif
+X else { /* AVE_STATS */
+X find_zp = &find_zn;
+X pst.zsflag = proc_hist_n(sptr,nstats, pst, histp, do_trim, ps_s);
+X }
+X
+X if (!do_hist) {
+X histp->entries = nstats; /* db->entries = 0; */
+X inithistz(MAXHIST, histp);
+X for (i = 0; i < nstats; i++) {
+X if (sptr[i].n1 < 0) sptr[i].n1 = -sptr[i].n1;
+X addhistz(find_zp(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,ps_s),
+X histp);
+X }
+X }
+X return pst.zsflag;
+}
+X
+int
+calc_thresh(struct pstruct pst, int nstats,
+X double Lambda, double K, double H, double *zstrim)
+{
+X int max_hscore;
+X double ave_n1, tmp_score, z, l_fact;
+X
+X if (pst.dnaseq == SEQT_DNA || pst.dnaseq == SEQT_RNA) {
+X ave_n1 = 5000.0;
+X l_fact = 1.0;
+X }
+X else {
+X ave_n1 = 400.0;
+X l_fact = 0.7;
+X }
+X
+/* max_hscore = MAX_SSCORE; */
+/* mean expected for pst.n0 * 400 for protein, 5000 for DNA */
+/* we want a number of offsets that is appropriate for the database size so
+X far (nstats)
+*/
+X
+/*
+X the calculation below sets a high-score threshold using an
+X ungapped lambda, but errs towards the high-score side by using
+X E()=0.001 and calculating with 0.70*lambda, which is the correct for
+X going from ungapped to -12/-2 gapped lambda with BLOSUM50
+*/
+X
+#ifndef NORMAL_DIST
+X tmp_score = 0.01/((double)nstats*K*(double)pst.n0*ave_n1);
+X tmp_score = -log(tmp_score)/(Lambda*l_fact);
+X max_hscore = (int)(tmp_score+0.5);
+X
+X z = 1.0/(double)nstats;
+X z = (log(z)+EULER_G)/(- PI_SQRT6);
+#else
+X max_hscore = 100;
+X z = 5.0;
+#endif
+X *zstrim = 10.0*z+50.0;
+X return max_hscore;
+}
+X
+int
+proc_hist_r(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X int i, max_hscore;
+X double zs, ztrim;
+X char s_string[128];
+X struct llen_str llen;
+X char *f_string;
+X llen.fit_flag=1;
+X llen.hist=NULL;
+X
+X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
+X pu->ngK, pu->ngH, &ztrim);
+X
+X inithist(&llen,pst,max_hscore);
+X
+X f_string = &(histp->stat_info[0]);
+X
+X for (i = 0; i<nstats; i++)
+X addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
+X
+X if ((llen.max_score - llen.min_score) < 10) {
+X free_hist(&llen);
+X llen.fit_flag = 0;
+X find_zp = &find_zn;
+X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
+X }
+X
+X fit_llen(&llen, &(pu->r_u.rg)); /* now we have rho, mu, rho2, mu2, mean_var
+X to set the parameters for the histogram */
+X
+X if (!llen.fit_flag) { /* the fit failed, fall back to proc_hist_ml */
+X free_hist(&llen);
+X find_zp = &find_ze;
+X return proc_hist_ml(sptr,nstats, pst, histp, do_trim, pu);
+X }
+X
+X pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
+X
+X if (do_trim) {
+X if (llen.fit_flag) {
+X for (i = 0; i < nstats; i++) {
+X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
+X if (zs < 20.0 || zs > ztrim) {
+X pu->r_u.rg.n_trimmed++;
+X prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
+X &(histp->entries));
+X }
+X }
+X }
+X
+X /* fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", pu->r_u.rg.n_trimmed); */
+X
+X if (llen.fit_flag) fit_llens(&llen, &(pu->r_u.rg));
+X
+X /* fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", pu->r_u.rg.n1_trimmed,pu->r_u.rg.nb_trimmed); */
+X }
+X
+X free_hist(&llen);
+X
+X /* put all the scores in the histogram */
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X if (pst.zsflag == REG2_STATS || pst.zsflag == 10+REG2_STATS)
+X sprintf(f_string,"%s Expectation_v fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n rho2=%6.2f; mu2= %6.2f, 0's: %d Z-trim: %d B-trim: %d in %d/%d",
+X s_string, pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
+X pu->r_u.rg.rho2,pu->r_u.rg.mu2,llen.zero_s,
+X pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot);
+X else
+X sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d B-trim: %d in %d/%d\n Lambda= %8.6f",
+X s_string,
+X pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e), pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),
+X llen.zero_s, pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot,
+X PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
+X return REG_STATS;
+}
+X
+X
+int
+proc_hist_r2(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X int i, nit, nprune, max_hscore;
+X double zs, ztrim;
+X char s_string[128];
+X char *f_string;
+X struct llen_str llen;
+X
+X llen.fit_flag=1;
+X llen.hist=NULL;
+X
+X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
+X pu->ngK, pu->ngH, &ztrim);
+X
+X inithist(&llen, pst,max_hscore);
+X f_string = &(histp->stat_info[0]);
+X
+X for (i = 0; i<nstats; i++)
+X addhist(&llen,sptr[i].score,sptr[i].n1,max_hscore);
+X
+X pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
+X if (do_trim) nit = 5;
+X else nit = 0;
+X
+X while (nit-- > 0) {
+X nprune = 0;
+X fit_llen2(&llen, &(pu->r_u.rg));
+X
+X for (i = 0; i < nstats; i++) {
+X if (sptr[i].n1 < 0) continue;
+X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,pu);
+X if (zs < 20.0 || zs > ztrim ) {
+X nprune++;
+X pu->r_u.rg.n_trimmed++;
+X prune_hist(&llen,sptr[i].score,sptr[i].n1,max_hscore,
+X &(histp->entries));
+X sptr[i].n1 = -sptr[i].n1;
+X }
+X }
+X /* fprintf(stderr," %d Z-trimmed at %d\n",nprune,nit); */
+X if (nprune < LHISTC) { break; }
+X }
+X
+X fit_llen(&llen, &(pu->r_u.rg));
+X
+X free_hist(&llen);
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X sprintf(f_string,"%s Expectation_i fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n mean_var=%6.4f+/-%6.3f 0's: %d Z-trim: %d N-it: %d\n Lambda= %8.6f",
+X s_string,
+X pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
+X pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),llen.zero_s,pu->r_u.rg.n_trimmed, nit,
+X PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
+X return REGI_STATS;
+}
+X
+/* this procedure implements Altschul's pre-calculated values for lambda, K */
+X
+#include "alt_parms.h"
+X
+int
+look_p(struct alt_p parm[], int gap, int ext,
+X double *K, double *Lambda, double *H);
+X
+int
+proc_hist_a(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X double Lambda, K, H;
+X char *f_string;
+X int r_v;
+X int t_gdelval, t_ggapval;
+X
+#ifdef OLD_FASTA_GAP
+X t_gdelval = pst.gdelval;
+X t_ggapval = pst.ggapval;
+#else
+X t_gdelval = pst.gdelval+pst.ggapval;
+X t_ggapval = pst.ggapval;
+#endif
+X
+X f_string = &(histp->stat_info[0]);
+X
+X if (strcmp(pst.pamfile,"BL50")==0 || strcmp(pst.pamfile,"BLOSUM50")==0)
+X r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"BL62")==0 || strcmp(pst.pamfile,"BLOSUM62")==0)
+X r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"BL80")==0 || strcmp(pst.pamfile,"BLOSUM80")==0)
+X r_v = look_p(bl80_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"P250")==0)
+X r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"P120")==0)
+X r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"MD_10")==0)
+X r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"MD_20")==0)
+X r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"MD_40")==0)
+X r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"DNA")==0 || strcmp(pst.pamfile,"+5/-4")==0)
+X r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"+3/-2")==0)
+X r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else if (strcmp(pst.pamfile,"+1/-3")==0)
+X r_v = look_p(nt13_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
+X else r_v = 0;
+X
+X pu->r_u.ag.Lambda = Lambda;
+X pu->r_u.ag.K = K;
+X pu->r_u.ag.H = H;
+X
+X if (r_v == 0) {
+X fprintf(stderr,"Parameters not available for: %s: %d/%d\n",
+X pst.pamfile,t_gdelval,t_ggapval);
+X
+X find_zp = &find_zr;
+X return proc_hist_r(sptr, nstats,pst, histp, do_trim, pu);
+X }
+X
+X /*
+X fprintf(stderr," the parameters are: Lambda: %5.3f K: %5.3f H: %5.3f\n",
+X Lambda, K, H);
+X */
+X
+X pu->r_u.ag.a_n0 = (double)pst.n0;
+X pu->r_u.ag.a_n0f = log (K * pu->r_u.ag.a_n0)/H;
+X
+X sprintf(f_string,"Altschul/Gish params: n0: %d Lambda: %5.3f K: %5.3f H: %5.3f",
+X pst.n0,Lambda, K, H);
+X return AG_STATS;
+}
+X
+int
+ag_parm(char *pamfile, int gdelval, int ggapval, struct pstat_str *pu)
+{
+X double Lambda, K, H;
+X int r_v;
+X
+X if (strcmp(pamfile,"BL50")==0)
+X r_v = look_p(bl50_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"BL62")==0)
+X r_v = look_p(bl62_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"P250")==0)
+X r_v = look_p(p250_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"P120")==0)
+X r_v = look_p(p120_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"MD_10")==0)
+X r_v = look_p(md10_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"MD_20")==0)
+X r_v = look_p(md20_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"MD_40")==0)
+X r_v = look_p(md40_p,gdelval,ggapval,&K,&Lambda,&H);
+X else if (strcmp(pamfile,"DNA")==0 || strcmp(pamfile,"+5/-4")==0)
+X r_v = look_p(nt54_p,gdelval,ggapval, &K,&Lambda,&H);
+X else if (strcmp(pamfile,"+3/-2")==0)
+X r_v = look_p(nt32_p,gdelval,ggapval, &K,&Lambda,&H);
+X else if (strcmp(pamfile,"+1/-3")==0)
+X r_v = look_p(nt13_p,gdelval,ggapval, &K,&Lambda,&H);
+X else r_v = 0;
+X
+X pu->r_u.ag.K = K;
+X pu->r_u.ag.Lambda = Lambda;
+X pu->r_u.ag.H = H;
+X
+X if (r_v == 0) {
+X fprintf(stderr,"Parameters not available for: %s: %d/%d\n",
+X pamfile,gdelval,ggapval);
+X }
+X return r_v;
+}
+X
+int
+look_p(struct alt_p parm[], int gap, int ext,
+X double *K, double *Lambda, double *H)
+{
+X int i;
+X
+X gap = -gap;
+X ext = -ext;
+X
+X if (gap > parm[1].gap) {
+X *K = parm[0].K;
+X *Lambda = parm[0].Lambda;
+X *H = parm[0].H;
+X return 1;
+X }
+X
+X for (i=1; parm[i].gap > 0; i++) {
+X if (parm[i].gap > gap) continue;
+X else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
+X else if (parm[i].gap == gap && parm[i].ext == ext) {
+X *K = parm[i].K;
+X *Lambda = parm[i].Lambda;
+X *H = parm[i].H;
+X return 1;
+X }
+X else break;
+X }
+X return 0;
+}
+X
+/* uncensored and censored maximum likelihood estimates developed
+X by Aaron Mackey based on a preprint from Sean Eddy */
+X
+int mle_cen (struct stat_str *, int, int, double, double *, double *);
+X
+int
+proc_hist_ml(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X double f_cen;
+X char s_string[128];
+X char *f_string;
+X
+X f_string = &(histp->stat_info[0]);
+X pu->r_u.ag.a_n0 = (double)pst.n0;
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X if (!do_trim) {
+X if (mle_cen(sptr, nstats, pst.n0, 0.0, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
+X goto bad_mle;
+X sprintf(f_string,"%s MLE statistics: Lambda= %6.4f; K=%6.4g",
+X s_string,pu->r_u.ag.Lambda,pu->r_u.ag.K);
+X }
+X else {
+X if (nstats/20 > 1000) f_cen = 1000.0/(double)nstats;
+X else f_cen = 0.05;
+X if (mle_cen(sptr, nstats, pst.n0, f_cen, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
+X goto bad_mle;
+X sprintf(f_string,"MLE_cen statistics: Lambda= %6.4f; K=%6.4g (cen=%d)",
+X pu->r_u.ag.Lambda,pu->r_u.ag.K,(int)((double)nstats*f_cen));
+X }
+X
+X return MLE_STATS;
+X bad_mle:
+X find_zp = &find_zn;
+X
+X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
+}
+X
+int
+mle_cen2 (struct stat_str *, int, int, double, double *, double *, double *, double *);
+X
+X
+int
+proc_hist_ml2(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X int i, ns=0, nneg=0;
+X double f_cen, ave_lambda;
+X char s_string[128], ex_string[64];
+X char *f_string;
+X
+X f_string = &(histp->stat_info[0]);
+X pu->r_u.m2.a_n0 = (double)pst.n0;
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X pu->r_u.m2.ave_comp = 0.0;
+X pu->r_u.m2.max_comp = -1.0;
+X
+X ns = nneg = 0;
+X for (i=0; i<nstats; i++) {
+X if (sptr[i].comp > pu->r_u.m2.max_comp) pu->r_u.m2.max_comp = sptr[i].comp;
+X if (sptr[i].comp > 0.0) {
+X pu->r_u.m2.ave_comp += log(sptr[i].comp);
+X ns++;
+X }
+X else nneg++;
+X }
+X pu->r_u.m2.ave_comp /= (double)ns;
+X pu->r_u.m2.ave_comp = exp(pu->r_u.m2.ave_comp);
+X for (i=0; i<nstats; i++) if (sptr[i].comp < 0.0) {
+X sptr[i].comp = pu->r_u.m2.ave_comp;
+X }
+X
+X if (nneg > 0)
+X sprintf(ex_string,"composition = -1 for %d sequences",nneg);
+X else ex_string[0]='\0';
+X
+X if (!do_trim) {
+X if (mle_cen2(sptr, nstats, pst.n0, 0.0,
+X &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1,
+X &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1) == -1) goto bad_mle2;
+X ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
+X
+X sprintf(f_string,"%s MLE-2 statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f\n ave Lamdba: %6.4f",
+X s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1,ave_lambda);
+X }
+X else {
+X if (nstats/20 > 500) f_cen = 500.0/(double)nstats;
+X else f_cen = 0.05;
+X if (mle_cen2(sptr, nstats, pst.n0, f_cen, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1)== -1) goto bad_mle2;
+X
+X ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
+X
+X sprintf(f_string,"%s MLE-2-cen statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f (cen=%d)\n ave Lambda:%6.4f",
+X s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1, (int)((double)nstats*f_cen),ave_lambda);
+X }
+X
+X return MLE2_STATS;
+X bad_mle2:
+X find_zp = &find_zn;
+X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
+}
+X
+double first_deriv_cen(double lambda, struct stat_str *sptr,
+X int start, int stop,
+X double sumlenL, double cenL,
+X double sumlenH, double cenH);
+X
+double second_deriv_cen(double lambda, struct stat_str *sptr,
+X int start, int stop,
+X double sumlenL, double cenL,
+X double sumlenH, double cenH);
+X
+static void
+st_sort (struct stat_str *v, int n) {
+X int gap, i, j;
+X int tmp;
+X
+X for (gap = 1; gap < n/3; gap = 3*gap +1) ;
+X
+X for (; gap > 0; gap = (gap-1)/3)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j -= gap) {
+X if (v[j].score <= v[j + gap].score) break;
+X
+X tmp = v[j].score;
+X v[j].score = v[j + gap].score;
+X v[j + gap].score = tmp;
+X
+X tmp = v[j].n1;
+X v[j].n1 = v[j + gap].n1;
+X v[j + gap].n1 = tmp;
+X }
+}
+X
+/* sptr[].score, sptr[].n1; sptr[] must be sorted
+X int n = total number of samples
+X int M = length of query
+X double fn = fraction of scores to be censored fn/2.0 from top, bottom
+X double *Lambda = Lambda estimate
+X double *K = K estimate
+*/
+X
+#define MAX_NIT 100
+X
+int
+mle_cen(struct stat_str *sptr, int n, int M, double fc,
+X double *Lambda, double *K) {
+X
+X double sumlenL, sumlenH, cenL, cenH;
+X double sum_s, sum2_s, mean_s, var_s, dtmp;
+X int start, stop;
+X int i, nf;
+X int nit = 0;
+X double deriv, deriv2, lambda, old_lambda, sum = 0.0;
+X /*
+X int sumlenL, int sumlenghtsR = sum of low (Left), right (High) seqs.
+X int cenL, cenH = censoring score low, high
+X */
+X
+X nf = (fc/2.0) * n;
+X start = nf;
+X stop = n - nf;
+X
+X st_sort(sptr,n);
+X
+X sum_s = sum2_s = 0.0;
+X for (i=start; i<stop; i++) {
+X sum_s += sptr[i].score;
+X }
+X dtmp = (double)(stop-start);
+X mean_s = sum_s/dtmp;
+X
+X for (i=start; i<stop; i++) {
+X sum2_s += sptr[i].score * sptr[i].score;
+X }
+X var_s = sum2_s/(dtmp-1.0);
+X
+X sumlenL = sumlenH = 0.0;
+X for (i=0; i<start; i++) sumlenL += (double)sptr[i].n1;
+X for (i=stop; i<n; i++) sumlenH += (double)sptr[i].n1;
+X
+X if (nf > 0) {
+X cenL = (double)sptr[start].score;
+X cenH = (double)sptr[stop].score;
+X }
+X else {
+X cenL = (double)sptr[start].score/2.0;
+X cenH = (double)sptr[start].score*2.0;
+X }
+X
+X if (cenL >= cenH) return -1;
+X
+X /* initial guess for lambda is 0.2 - this does not work for matrices
+X with very different scales */
+X /* lambda = 0.2; */
+X lambda = PI_SQRT6/sqrt(var_s);
+X if (lambda > 1.0) {
+X fprintf(stderr," Lambda initial estimate error: lambda: %6.4g; var_s: %6.4g\n",lambda,var_s);
+X lambda = 0.2;
+X }
+X
+X do {
+X deriv = first_deriv_cen(lambda, sptr, start, stop,
+X sumlenL, cenL, sumlenH, cenH);
+X /* (uncensored version)
+X first_deriv(lambda, &sptr[start], stop - start))
+X */
+X
+X /* (uncensored version)
+X deriv2 = second_deriv(lambda, &sptr[start], stop-start);
+X */
+X deriv2 = second_deriv_cen(lambda, sptr, start, stop,
+X sumlenL, cenL, sumlenH, cenH);
+X
+X old_lambda = lambda;
+X if (lambda - deriv/deriv2 > 0.0) lambda = lambda - deriv/deriv2;
+X else lambda = lambda/2.0;
+X nit++;
+X } while (fabs((lambda - old_lambda)/lambda) > TINY && nit < MAX_NIT);
+X
+X /* fprintf(stderr," mle_cen nit: %d\n",nit); */
+X
+X if (nit >= MAX_NIT) return -1;
+X
+X for(i = start; i < stop ; i++) {
+X sum += (double) sptr[i].n1 * exp(- lambda * (double)sptr[i].score);
+X }
+X
+X *Lambda = lambda;
+X /*
+X *K = (double)(stop-start)/((double)M*sum);
+X */
+X *K = (double)n/((double)M*
+X (sum+sumlenL*exp(-lambda*cenL)-sumlenH*exp(-lambda*cenH)));
+X return 0;
+}
+X
+/*
+double
+first_deriv(double lambda, struct stat_str *sptr, int n) {
+X
+X int i;
+X double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
+X double s, l, es;
+X
+X for(i = 0 ; i < n ; i++) {
+X s = (double)sptr[i].score;
+X l = (double)sptr[i].n1;
+X es = exp(-lambda * s );
+X sum += s;
+X sum2 += l * es;
+X sum1 += s * l * es;
+X }
+X
+X return (1.0/lambda) - (sum/(double)n) + (sum1/sum2);
+}
+*/
+X
+/*
+double
+second_deriv(double lambda, struct stat_str *sptr, int n) {
+X double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
+X double s, l, es;
+X int i;
+X
+X for(i = 0 ; i < n ; i++) {
+X l = (double)sptr[i].n1;
+X s = (double)sptr[i].score;
+X es = exp(-lambda * s);
+X sum2 += l * es;
+X sum1 += l * s * es;
+X sum3 += l * s * s * es;
+X }
+X
+X return ((sum1*sum1)/(sum2*sum2)) - (sum3/sum2) - (1.0/(lambda*lambda));
+}
+*/
+X
+double
+first_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
+X double sumlenL, double cenL, double sumlenH, double cenH) {
+X int i;
+X double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
+X double s, l, es;
+X
+X for(i = start ; i < stop ; i++) {
+X s = (double)sptr[i].score;
+X l = (double)sptr[i].n1;
+X es = exp(-lambda * s );
+X sum += s;
+X sum2 += l * es;
+X sum1 += s * l * es;
+X }
+X
+X sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
+X sum2 += sumlenL*exp(-lambda*cenL) - sumlenH*exp(-lambda*cenH);
+X
+X return (1.0 / lambda) - (sum /(double)(stop-start)) + (sum1 / sum2);
+}
+X
+double
+second_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
+X double sumlenL, double cenL, double sumlenH, double cenH) {
+X
+X double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
+X double s, l, es;
+X int i;
+X
+X for(i = start ; i < stop ; i++) {
+X s = (double)sptr[i].score;
+X l = (double)sptr[i].n1;
+X es = exp(-lambda * s);
+X sum2 += l * es;
+X sum1 += l * s * es;
+X sum3 += l * s * s * es;
+X }
+X
+X sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
+X sum2 += sumlenL*exp(-lambda * cenL) - sumlenH*exp(-lambda * cenH);
+X sum3 += sumlenL*cenL*cenL * exp(-lambda * cenL) -
+X sumlenH*cenH*cenH * exp(-lambda * cenH);
+X return ((sum1 * sum1) / (sum2 * sum2)) - (sum3 / sum2)
+X - (1.0 / (lambda * lambda));
+}
+X
+double mle2_func(double *params,
+X double *consts,
+X struct stat_str *values,
+X int n, int start, int stop);
+X
+void simplex(double *fitparams,
+X double *lambda,
+X int nparam,
+X double (*minfunc) (double *tryparams, double *consts,
+X struct stat_str *data, int ndata,
+X int start, int stop),
+X double *consts,
+X void *data,
+X int ndata, int start, int stop
+X );
+X
+int
+mle_cen2(struct stat_str *sptr, int n, int M, double fc,
+X double *a0, double *a1, double *a2, double *b1) {
+X
+X double params[4], lambdas[4], consts[9];
+X double avglenL, avglenH, avgcompL, avgcompH, cenL, cenH;
+X int start, stop;
+X int i, nf;
+X
+X nf = (fc/2.0) * n;
+X start = nf;
+X stop = n - nf;
+X
+X st_sort(sptr,n);
+X
+X /* choose arithmetic or geometic mean for compositions by appropriate commenting */
+X
+X if (nf > 0) {
+X avglenL = avglenH = 0.0;
+X avgcompL = avgcompH = 0.0;
+X /* avgcompL = avgcompH = 1.0 */
+X for (i=0; i<start; i++) {
+X avglenL += (double)sptr[i].n1;
+X avgcompL += (double)sptr[i].comp;
+X /* avgcompL *= (double) sptr[i].comp; */
+X }
+X avglenL /= (double) start;
+X avgcompL /= (double) start;
+X /* avgcompL = pow(avgcompL, 1.0/(double) start); */
+X
+X for (i=stop; i<n; i++) {
+X avglenH += (double)sptr[i].n1;
+X avgcompH += (double)sptr[i].comp;
+X /* avgcompH *= (double) sptr[i].comp; */
+X }
+X avglenH /= (double) (n - stop);
+X avgcompH /= (double) (n - stop);
+X /* avgcompL = pow(avgcompL, 1.0/(double) (n - stop)); */
+X
+X cenL = (double)sptr[start].score;
+X cenH = (double)sptr[stop].score;
+X if (cenL >= cenH) return -1;
+X }
+X else {
+X avglenL = avglenH = cenL = cenH = 0.0;
+X avgcompL = avgcompH = 1.0;
+X }
+X
+X params[0] = 10.0;
+X params[1] = -10.0;
+X params[2] = 1.0;
+X params[3] = 1.0;
+X
+X lambdas[0] = 1.0;
+X lambdas[1] = 0.5;
+X lambdas[2] = 0.1;
+X lambdas[3] = 0.01;
+X
+X consts[0] = M;
+X consts[1] = (double) start;
+X consts[2] = (double) stop;
+X consts[3] = cenL;
+X consts[4] = cenH;
+X consts[5] = avglenL;
+X consts[6] = avglenH;
+X consts[7] = avgcompL;
+X consts[8] = avgcompH;
+X
+X simplex(params, lambdas, 4,
+X (double (*) (double *, double *, struct stat_str *, int, int, int) )mle2_func,
+X consts, sptr, n, start, stop);
+X
+X *a0 = params[0];
+X *a1 = params[1];
+X *a2 = params[2];
+X *b1 = params[3];
+X
+X return 0;
+}
+X
+double mle2_func(double *params,
+X double *consts,
+X struct stat_str *values,
+X int n, int start, int stop
+X ) {
+X
+X double a0, a1, a2, b1, M;
+X double score, length, comp;
+X double cenL, cenH, avglenL, avglenH, avgcompL, avgcompH;
+X double L, y;
+X
+X int i;
+X
+X a0 = params[0];
+X a1 = params[1];
+X a2 = params[2];
+X b1 = params[3];
+X
+X M = consts[0];
+X /*
+X start = (int) consts[1];
+X stop = (int) consts[2];
+X */
+X cenL = consts[3];
+X cenH = consts[4];
+X avglenL = consts[5];
+X avglenH = consts[6];
+X avgcompL = consts[7];
+X avgcompH = consts[8];
+X
+X L = 0;
+X y = 0;
+X
+X if (start > 0) {
+X y = -(cenL - (a0 + a1*avgcompL +a2*avgcompL*log(M*avglenL)))/(b1*avgcompL);
+X L += (double) start * exp(y);
+X }
+X
+X for(i = start ; i < stop ; i++) {
+X score = (double) values[i].score;
+X length = (double) values[i].n1;
+X comp = (double) values[i].comp;
+X
+X y = - (score - (a0 + a1*comp + a2 * comp * log(M*length))) / (b1*comp);
+X
+X L += -y + exp(y) + log(b1 * comp);
+X }
+X
+X if (stop < n) {
+X y = -(cenH -(a0 + a1*avgcompH + a2*avgcompH*log(M*avglenH)))/(b1*avgcompH);
+X L -= (double) (n - stop) * exp(y);
+X }
+X return L;
+}
+X
+/* Begin Nelder-Mead simplex code: */
+X
+double evalfunc(double **param,
+X double *vals,
+X double *psums,
+X double *ptry,
+X int nparam,
+X double (*minfunc) (double *params, double *consts,
+X struct stat_str *data, int ndata,
+X int start, int stop),
+X double *consts,
+X void *data,
+X int ndata, int start, int stop,
+X int ihi,
+X double factor);
+X
+void simplex(double *fitparams,
+X double *lambda,
+X int nparam,
+X double (*minfunc) (double *tryparams, double *consts,
+X struct stat_str *data, int ndata,
+X int start, int stop),
+X double *consts,
+X void *data,
+X int ndata,
+X int start,
+X int stop
+X )
+{
+X
+X int i, j, ilo, ihi, inhi;
+X double rtol, sum, tmp, ysave, ytry;
+X double *psum, *vals, *ptry, **param;
+X
+X
+X psum = (double *) calloc(nparam, sizeof(double));
+X ptry = (double *) calloc(nparam, sizeof(double));
+X
+X vals = (double *) calloc(nparam + 1, sizeof(double));
+X
+X param = (double **) calloc(nparam + 1, sizeof(double *));
+X param[0] = (double *) calloc((nparam + 1) * nparam, sizeof(double));
+X for( i = 1 ; i < (nparam + 1) ; i++ ) {
+X param[i] = param[0] + i * nparam;
+X }
+X
+X /* Get our N+1 initial parameter values for the simplex */
+X
+X for( i = 0 ; i < nparam ; i++ ) {
+X param[0][i] = fitparams[i];
+X }
+X
+X for( i = 1 ; i < (nparam + 1) ; i++ ) {
+X for( j = 0 ; j < nparam ; j++ ) {
+X param[i][j] = fitparams[j] + lambda[j] * ( (i - 1) == j ? 1 : 0 );
+X }
+X }
+X
+X /* calculate initial values at the simplex nodes */
+X
+X for( i = 0 ; i < (nparam + 1) ; i++ ) {
+X vals[i] = minfunc(param[i], consts, data, ndata, start, stop);
+X }
+X
+X /* Begin Nelder-Mead simplex algorithm from Numerical Recipes in C */
+X
+X for( j = 0 ; j < nparam ; j++ ) {
+X for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
+X sum += param[i][j];
+X }
+X psum[j] = sum;
+X }
+X
+X
+X while( 1 ) {
+/*
+X determine which point is highest (ihi), next highest (inhi) and
+X lowest (ilo) by looping over the points in the simplex
+*/
+X ilo = 0;
+X
+/* ihi = vals[0] > vals[1] ? (inhi = 1, 0) : (inhi = 0, 1); */
+X if(vals[0] > vals[1]) { ihi = 0; inhi = 1; }
+X else { ihi = 1; inhi = 0; }
+X
+X for( i = 0 ; i < nparam + 1 ; i++) {
+X if( vals[i] <= vals[ilo] ) ilo = i;
+X if( vals[i] > vals[ihi] ) {
+X inhi = ihi;
+X ihi = i;
+X } else if ( vals[i] > vals[inhi] && i != ihi ) inhi = i;
+X }
+X
+X /* Are we finished? */
+X
+X rtol = 2.0 * fabs(vals[ihi] - vals[ilo]) /
+X (fabs(vals[ihi]) + fabs(vals[ilo]) + TINY);
+X
+X if( rtol < TOLERANCE ) {
+X
+/* put the best value and best parameters into the first index */
+X
+X tmp = vals[0];
+X vals[0] = vals[ilo];
+X vals[ilo] = tmp;
+X
+X for( i = 0 ; i < nparam ; i++ ) {
+X tmp = param[0][i];
+X param[0][i] = param[ilo][i];
+X param[ilo][i] = tmp;
+X }
+X
+X /* et voila, c'est finis */
+X break;
+X }
+X
+X /* Begin a new iteration */
+X
+X /* first, extrapolate by -1 through the face of the simplex across from ihi */
+X
+X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
+X data, ndata, start, stop, ihi, -1.0);
+X
+X if( ytry <= vals[ilo] ) {
+X
+X /* Good result, try additional extrapolation by 2 */
+X
+X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
+X data, ndata, start, stop, ihi, 2.0);
+X
+X } else if ( ytry >= vals[inhi] ) {
+X
+X /* no good, look for an intermediate lower point by contracting */
+X
+X ysave = vals[ihi];
+X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
+X data, ndata, start, stop, ihi, 0.5);
+X
+X if( ytry >= ysave ) {
+X
+X /* Still no good. Contract around lowest (best) point. */
+X
+X for( i = 0 ; i < nparam + 1 ; i++ ) {
+X if( i != ilo ) {
+X for ( j = 0 ; j < nparam ; j++ ) {
+X param[i][j] = psum[j] = 0.5 * (param[i][j] + param[ilo][j]);
+X }
+X vals[i] = minfunc(psum, consts, data, ndata, start, stop);
+X }
+X }
+X
+X
+X for( j = 0 ; j < nparam ; j++ ) {
+X for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
+X sum += param[i][j];
+X }
+X psum[j] = sum;
+X }
+X
+X }
+X }
+X }
+X
+X for( i = 0 ; i < nparam ; i++ ) {
+X fitparams[i] = param[0][i];
+X }
+X
+X if (ptry!=NULL) {
+X free(ptry);
+X ptry=NULL;
+X }
+X free(param[0]);
+X free(param);
+X free(vals);
+X free(psum);
+}
+X
+X
+double evalfunc(double **param,
+X double *vals,
+X double *psum,
+X double *ptry,
+X int nparam,
+X double (*minfunc)(double *tryparam, double *consts,
+X struct stat_str *data, int ndata,
+X int start, int stop),
+X double *consts,
+X void *data,
+X int ndata, int start, int stop,
+X int ihi,
+X double factor) {
+X
+X int j;
+X double fac1, fac2, ytry;
+X
+X
+X fac1 = (1.0 - factor) / nparam;
+X fac2 = fac1 - factor;
+X
+X for( j = 0 ; j < nparam ; j++ ) {
+X ptry[j] = psum[j] * fac1 - param[ihi][j] * fac2;
+X }
+X
+X ytry = minfunc(ptry, consts, data, ndata, start, stop);
+X
+X if( ytry < vals[ihi] ) {
+X vals[ihi] = ytry;
+X for( j = 0 ; j < nparam ; j++ ) {
+X psum[j] += ptry[j] - param[ihi][j];
+X param[ihi][j] = ptry[j];
+X }
+X }
+X
+X return ytry;
+}
+X
+/* end of Nelder-Mead simplex code */
+X
+int
+proc_hist_n(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct pstat_str *pu)
+{
+X int i, j;
+X double s_score, s2_score, ssd, ztrim;
+X int nit, max_hscore;
+X char s_string[128];
+X char *f_string;
+X
+X f_string = &(histp->stat_info[0]);
+X
+X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
+X pu->ngK, pu->ngH, &ztrim);
+X
+X s_score = s2_score = 0.0;
+X
+X for ( j = 0, i = 0; i < nstats; i++) {
+X if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
+X s_score += (ssd=(double)sptr[i].score);
+X s2_score += ssd * ssd;
+X j++;
+X }
+X }
+X
+X if (j > 1 ) {
+X pu->r_u.rg.mu = s_score/(double)j;
+X pu->r_u.rg.mean_var = s2_score - (double)j * pu->r_u.rg.mu * pu->r_u.rg.mu;
+X pu->r_u.rg.mean_var /= (double)(j-1);
+X }
+X else {
+X pu->r_u.rg.mu = 50.0;
+X pu->r_u.rg.mean_var = 10.0;
+X }
+X
+X if (pu->r_u.rg.mean_var < 0.01) {
+X pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;
+X }
+X
+X /* now remove some scores */
+X
+X nit = 5;
+X while (nit-- > 0) {
+X pu->r_u.rg.n_trimmed = 0;
+X
+X for (i=0; i< nstats; i++) {
+X if (sptr[i].n1 < 0) continue;
+X ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
+X if (ssd > ztrim || ssd < 20.0) {
+X /* fprintf(stderr,"removing %3d %3d %4.1f\n",
+X sptr[i].score, sptr[i].n1,ssd); */
+X ssd = sptr[i].score;
+X s_score -= ssd;
+X s2_score -= ssd*ssd;
+X j--;
+X pu->r_u.rg.n_trimmed++;
+X histp->entries--;
+X sptr[i].n1 = -sptr[i].n1;
+X }
+X }
+X
+X if (j > 1 ) {
+X pu->r_u.rg.mu = s_score/(double)j;
+X pu->r_u.rg.mean_var = s2_score - (double)j * pu->r_u.rg.mu * pu->r_u.rg.mu;
+X pu->r_u.rg.mean_var /= (double)(j-1);
+X }
+X else {
+X pu->r_u.rg.mu = 50.0;
+X pu->r_u.rg.mean_var = 10.0;
+X }
+X
+X if (pu->r_u.rg.mean_var < 0.01) {
+X pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;
+X }
+X
+X if (pu->r_u.rg.n_trimmed < LHISTC) {
+X /*
+X fprintf(stderr,"nprune %d at %d\n",nprune,nit);
+X */
+X break;
+X }
+X }
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X sprintf(f_string,"%s unscaled statistics: mu= %6.4f var=%6.4f; Lambda= %6.4f",
+X s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var,PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
+X return AVE_STATS;
+}
+X
+/*
+This routine calculates the maximum likelihood estimates for the
+extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
+X
+X <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
+X <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
+X
+X The <a> parameter can be transformed into and K
+X of the formula: 1 - exp ( - K m n exp ( - lambda S ))
+X using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
+X 1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
+X
+X a = log(K m n) / lambda
+X a lambda = log (K m n)
+X exp(a lambda) = K m n
+X but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
+X so: K m n = (1/n sum{ exp( -x[i] *lambda)})
+X K = sum{}/(nlib m n )
+X
+*/
+X
+void
+alloc_hist(struct llen_str *llen)
+{
+X int max_llen, i;
+X max_llen = llen->max;
+X
+X if (llen->hist == NULL) {
+X llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
+X llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X }
+X
+X for (i=0; i< max_llen+1; i++) {
+X llen->hist[i] = 0;
+X llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
+X }
+}
+X
+void
+free_hist(struct llen_str *llen)
+{
+X if (llen->hist!=NULL) {
+X free(llen->score_var);
+X free(llen->score2_sums);
+X free(llen->score_sums);
+X free(llen->hist);
+X llen->hist=NULL;
+X }
+}
+X
+void
+inithist(struct llen_str *llen, struct pstruct pst, int max_hscore)
+{
+X llen->max = MAX_LLEN;
+X
+X llen->max_score = -1;
+X llen->min_score=10000;
+X
+X alloc_hist(llen);
+X
+X llen->zero_s = 0;
+X llen->min_length = 10000;
+X llen->max_length = 0;
+}
+X
+void
+addhist(struct llen_str *llen, int score, int length, int max_hscore)
+{
+X int llength;
+X double dscore;
+X
+X if ( score<=0 || length < LENGTH_CUTOFF) {
+X llen->min_score = 0;
+X llen->zero_s++;
+X return;
+X }
+X
+X if (score < llen->min_score) llen->min_score = score;
+X if (score > llen->max_score) llen->max_score = score;
+X
+X if (length > llen->max_length) llen->max_length = length;
+X if (length < llen->min_length) llen->min_length = length;
+X if (score > max_hscore) score = max_hscore;
+X
+X llength = (int)(LN_FACT*log((double)length)+0.5);
+X
+X if (llength < 0 ) llength = 0;
+X if (llength > llen->max) llength = llen->max;
+X llen->hist[llength]++;
+X dscore = (double)score;
+X llen->score_sums[llength] += dscore;
+X llen->score2_sums[llength] += dscore * dscore;
+}
+X
+/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
+X
+void
+inithistz(int mh, struct hist_str *histp )
+{
+X int i;
+X
+X histp->z_calls = 0;
+X
+X histp->min_hist = 20;
+X histp->max_hist = 120;
+X
+X histp->histint = (int)
+X ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
+X histp->maxh = (int)
+X ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
+X
+X if (histp->hist_a==NULL) {
+X if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
+X NULL) {
+X fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
+X histp->histflg = 0;
+X }
+X else histp->histflg = 1;
+X }
+X else {
+X for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
+X }
+X histp->entries = 0;
+}
+X
+static double nrv[100]={
+X 0.3098900570,-0.0313400923, 0.1131975903,-0.2832547606, 0.0073672659,
+X 0.2914489107, 0.4209306311,-0.4630181404, 0.3326537896, 0.0050140359,
+X -0.1117435426,-0.2835630301, 0.2302997065,-0.3102716394, 0.0819894916,
+X -0.1676455701,-0.3782225018,-0.3204509938,-0.3594969187,-0.0308950398,
+X 0.2922813812, 0.1337170751, 0.4666577031,-0.2917784349,-0.2438179916,
+X 0.3002301394, 0.0231147123, 0.5687927366,-0.2318208709,-0.1476839273,
+X -0.0385043851,-0.1213476523, 0.1486341995, 0.1027917167, 0.1409192644,
+X -0.3280652579, 0.4232041455, 0.0775993309, 0.1159071787, 0.2769424442,
+X 0.3197284751, 0.1507346903, 0.0028580909, 0.4825103412,-0.0496843610,
+X -0.2754357656, 0.6021881753,-0.0816123956,-0.0899148991, 0.4847183201,
+X 0.2151621865,-0.4542246220, 0.0690709102, 0.2461894193, 0.2126042295,
+X -0.0767060668, 0.4819746149, 0.3323031326, 0.0177600676, 0.1143185210,
+X 0.2653977455, 0.0921872958,-0.1330986718, 0.0412287716,-0.1691604748,
+X -0.0529679078,-0.0194157955,-0.6117493924, 0.1199067932, 0.0210243193,
+X -0.5832259838,-0.1685528664, 0.0008591271,-0.1120347822, 0.0839125069,
+X -0.2787486831,-0.1937017962,-0.1915733940,-0.7888453635,-0.3316745163,
+X 0.1180885226,-0.3347001067,-0.2477492636,-0.2445697600, 0.0001342482,
+X -0.0015759812,-0.1516473992,-0.5202267615, 0.2136975210, 0.2500423188,
+X -0.2402926401,-0.1094186280,-0.0618869933,-0.0815221188, 0.2623337275,
+X 0.0219427302 -0.1774469919, 0.0828245026,-0.3271952808,-0.0632898028};
+X
+void
+addhistz(double zs, struct hist_str *histp)
+{
+X int ih, zi;
+X double rv;
+X
+X rv = nrv[histp->z_calls++ % 100];
+X zi = (int)(zs + 0.5+rv );
+X
+X if ((zi >= 0) && (zi <= 120)) histp->entries++;
+X
+X if (zi < histp->min_hist) zi = histp->min_hist;
+X if (zi > histp->max_hist) zi = histp->max_hist;
+X
+X ih = (zi - histp->min_hist)/histp->histint;
+X
+X histp->hist_a[ih]++;
+}
+X
+/* addhistzp() does not increase histp->entries since addhist did it already */
+/*
+void
+addhistzp(double zs, struct hist_str *histp)
+{
+X int ih, zi;
+X double rv;
+X
+X rv = nrv[histp->z_calls++ %100];
+X zi = (int)(zs + 0.5 + rv);
+X
+X if (zi < histp->min_hist) zi = histp->min_hist;
+X if (zi > histp->max_hist) zi = histp->max_hist;
+X
+X ih = (zi - histp->min_hist)/histp->histint;
+X
+X histp->hist_a[ih]++;
+}
+*/
+X
+void
+prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
+X long *entries)
+{
+X int llength;
+X double dscore;
+X
+X if (score <= 0 || length < LENGTH_CUTOFF) return;
+X
+X if (score > max_hscore) score = max_hscore;
+X
+X llength = (int)(LN_FACT*log((double)length)+0.5);
+X
+X if (llength < 0 ) llength = 0;
+X if (llength > llen->max) llength = llen->max;
+X llen->hist[llength]--;
+X dscore = (double)score;
+X llen->score_sums[llength] -= dscore;
+X llen->score2_sums[llength] -= dscore * dscore;
+X
+/* (*entries)--; histp->entries is not yet initialized */
+}
+X
+/* fit_llen: no trimming
+X (1) regress scores vs log(n) using weighted variance
+X (2) calculate mean variance after length regression
+*/
+X
+void
+fit_llen(struct llen_str *llen, struct rstat_str *pr)
+{
+X int j;
+X int n;
+X int n_size;
+X double x, y2, u, z;
+X double mean_x, mean_y, var_x, var_y, covar_xy;
+X double mean_y2, covar_xy2, var_y2, dllj;
+X
+X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+X
+/* now fit scores to best linear function of log(n), using
+X simple linear regression */
+X
+X for (llen->min=0; llen->min < llen->max; llen->min++)
+X if (llen->hist[llen->min]) break;
+X llen->min--;
+X
+X for (n_size=0,j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] > 1) {
+X dllj = (double)llen->hist[j];
+X llen->score_var[j] = llen->score2_sums[j]/dllj
+X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+X llen->score_var[j] /= (double)(llen->hist[j]-1);
+X if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
+X n_size++;
+X }
+X }
+X
+X pr->nb_tot = n_size;
+X
+X n_w = 0.0;
+X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+X for (j = llen->min; j < llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = j + 0.5;
+X dllj = (double)llen->hist[j];
+X n_w += dllj/llen->score_var[j];
+X sum_x += dllj * x / llen->score_var[j] ;
+X sum_y += llen->score_sums[j] / llen->score_var[j];
+X sum_x2 += dllj * x * x /llen->score_var[j];
+X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
+X }
+X
+X if (n_size < 5 ) {
+X llen->fit_flag=0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X return;
+X }
+X else {
+X det = n_w * sum_x2 - sum_x * sum_x;
+X if (det > 0.001) {
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
+X pr->rho_e = n_w/det;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+X pr->mu_e = sum_x2/det;
+X }
+X else {
+X llen->fit_flag = 0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X return;
+X }
+X }
+X
+X det = n_w * sum_x2 - sum_x * sum_x;
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+X
+X n = 0;
+X mean_x = mean_y = mean_y2 = 0.0;
+X var_x = var_y = 0.0;
+X covar_xy = covar_xy2 = 0.0;
+X
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1 ) {
+X n += llen->hist[j];
+X x = (double)j + 0.5;
+X mean_x += (double)llen->hist[j] * x;
+X mean_y += llen->score_sums[j];
+X var_x += (double)llen->hist[j] * x * x;
+X var_y += llen->score2_sums[j];
+X covar_xy += x * llen->score_sums[j];
+X }
+X mean_x /= n; mean_y /= n;
+X var_x = var_x / n - mean_x * mean_x;
+X var_y = var_y / n - mean_y * mean_y;
+X
+X covar_xy = covar_xy / n - mean_x * mean_y;
+/*
+X pr->rho = covar_xy / var_x;
+X pr->mu = mean_y - pr->rho * mean_x;
+*/
+X mean_y2 = covar_xy2 = var_y2 = 0.0;
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = (double)j + 0.5;
+X u = pr->rho * x + pr->mu;
+X y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+/*
+X dllj = (double)llen->hist[j];
+X fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
+X llen->score_sums[j]/dllj,y2/dllj);
+*/
+X mean_y2 += y2;
+X var_y2 += y2 * y2;
+X covar_xy2 += x * y2;
+X /* fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
+X x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
+X }
+X
+X pr->mean_var = mean_y2 /= (double)n;
+X covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
+X
+X if (pr->mean_var <= 0.01) {
+X llen->fit_flag = 0;
+X pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
+X }
+X
+X /*
+X fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
+X pr->rho*LN_FACT,pr->mu,pr->mean_var);
+X */
+X if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
+X else pr->var_e = 0.0;
+X
+X if (llen->fit_flag) {
+X pr->rho2 = covar_xy2 / var_x;
+X pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
+X }
+X else {
+X pr->rho2 = 0;
+X pr->mu2 = pr->mean_var;
+X }
+X
+X if (pr->rho2 < 0.0 )
+X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+X if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+X
+X pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
+}
+X
+/* fit_llens: trim high variance bins
+X (1) regress scores vs log(n) using weighted variance
+X (2) regress residuals vs log(n)
+X (3) remove high variance bins
+X (4) calculate mean variance after length regression
+*/
+X
+void
+fit_llens(struct llen_str *llen, struct rstat_str *pr)
+{
+X int j;
+X int n, n_u2;
+X double x, y, y2, u, u2, v, z;
+X double mean_x, mean_y, var_x, var_y, covar_xy;
+X double mean_y2, covar_xy2;
+X double mean_u2, mean_3u2, dllj;
+X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+X
+/* now fit scores to best linear function of log(n), using
+X simple linear regression */
+X
+X for (llen->min=0; llen->min < llen->max; llen->min++)
+X if (llen->hist[llen->min]) break;
+X llen->min--;
+X
+X for (j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] > 1) {
+X dllj = (double)llen->hist[j];
+X llen->score_var[j] = (double)llen->score2_sums[j]/dllj
+X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+X llen->score_var[j] /= (double)(llen->hist[j]-1);
+X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+X }
+X }
+X
+X n_w = 0.0;
+X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+X for (j = llen->min; j < llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = j + 0.5;
+X dllj = (double)llen->hist[j];
+X n_w += dllj/llen->score_var[j];
+X sum_x += dllj * x / llen->score_var[j] ;
+X sum_y += llen->score_sums[j] / llen->score_var[j];
+X sum_x2 += dllj * x * x /llen->score_var[j];
+X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
+X }
+X
+X det = n_w * sum_x2 - sum_x * sum_x;
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+X
+/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
+X
+X n = 0;
+X mean_x = mean_y = mean_y2 = 0.0;
+X var_x = var_y = 0.0;
+X covar_xy = covar_xy2 = 0.0;
+X
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1 ) {
+X n += llen->hist[j];
+X x = (double)j + 0.5;
+X dllj = (double)llen->hist[j];
+X mean_x += dllj * x;
+X mean_y += llen->score_sums[j];
+X var_x += dllj * x * x;
+X var_y += llen->score2_sums[j];
+X covar_xy += x * llen->score_sums[j];
+X }
+X mean_x /= n; mean_y /= n;
+X var_x = var_x / n - mean_x * mean_x;
+X var_y = var_y / n - mean_y * mean_y;
+X
+X covar_xy = covar_xy / n - mean_x * mean_y;
+/* pr->rho = covar_xy / var_x;
+X pr->mu = mean_y - pr->rho * mean_x;
+*/
+X
+X mean_y2 = covar_xy2 = 0.0;
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = (double)j + 0.5;
+X u = pr->rho * x + pr->mu;
+X y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+X mean_y2 += y2;
+X covar_xy2 += x * y2;
+X }
+X
+X mean_y2 /= n;
+X covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
+X pr->rho2 = covar_xy2 / var_x;
+X pr->mu2 = mean_y2 - pr->rho2 * mean_x;
+X
+X if (pr->rho2 < 0.0 )
+X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+X if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+X
+X pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
+X
+/* fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
+X pr->var_cutoff,z);
+*/
+X mean_u2 = 0.0;
+X n_u2 = 0;
+X for ( j = llen->min; j < llen->max; j++) {
+X y = j+0.5;
+X dllj = (double)llen->hist[j];
+X x = pr->rho * y + pr->mu;
+X v = pr->rho2 * y + pr->mu2;
+X if (v < pr->var_cutoff) v = pr->var_cutoff;
+X if (llen->hist[j]> 1) {
+X u2 = (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
+X mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
+X n_u2++;
+X /* fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
+X j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
+X }
+X else llen->score_var[j] = -1.0;
+X }
+X
+X mean_u2 = sqrt(mean_u2/(double)n_u2);
+X /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
+X
+X mean_3u2 = mean_u2*3.0;
+X
+X for (j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] <= 1) continue;
+X if (sqrt(llen->score_var[j]) > mean_3u2) {
+X /* fprintf(stderr," removing %d %d %.2f\n",
+X j, (int)(exp((double)j/LN_FACT)-0.5),
+X sqrt(llen->score_var[j]));
+X */
+X pr->nb_trimmed++;
+X pr->n1_trimmed += llen->hist[j];
+X llen->hist[j] = 0;
+X }
+X }
+X fit_llen(llen, pr);
+}
+X
+struct s2str {double s; int n;};
+void s2_sort ( struct s2str *sptr, int n);
+X
+void
+fit_llen2(struct llen_str *llen, struct rstat_str *pr)
+{
+X int j;
+X int n, n_y2, llen_delta, llen_del05;
+X int n_size;
+X double x, y2, u;
+X double mean_x, mean_y, var_x, var_y, covar_xy;
+X double mean_y2, covar_xy2;
+X struct s2str *ss2;
+X
+X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
+X
+/* now fit scores to best linear function of log(n), using
+X simple linear regression */
+X
+X for (llen->min=0; llen->min < llen->max; llen->min++)
+X if (llen->hist[llen->min]) break;
+X
+X for ( ; llen->max > llen->min; llen->max--)
+X if (llen->hist[llen->max]) break;
+X
+X for (n_size=0,j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] > 1) {
+X llen->score_var[j] = llen->score2_sums[j]/(double)llen->hist[j]
+X - (llen->score_sums[j]/(double)llen->hist[j])
+X * (llen->score_sums[j]/(double)llen->hist[j]);
+X llen->score_var[j] /= (double)(llen->hist[j]-1);
+X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+X n_size++;
+X }
+X }
+X
+X n_w = 0.0;
+X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+X for (j = llen->min; j < llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = j + 0.5;
+X n_w += (double)llen->hist[j]/llen->score_var[j];
+X sum_x += (double)llen->hist[j] * x / llen->score_var[j] ;
+X sum_y += llen->score_sums[j] / llen->score_var[j];
+X sum_x2 += (double)llen->hist[j] * x * x /llen->score_var[j];
+X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
+X }
+X
+X if (n_size < 5 ) {
+X llen->fit_flag=0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X }
+X else {
+X det = n_w * sum_x2 - sum_x * sum_x;
+X if (det > 0.001) {
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
+X pr->rho_e = n_w/det;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+X pr->mu_e = sum_x2/det;
+X }
+X else {
+X llen->fit_flag = 0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X }
+X }
+X
+X det = n_w * sum_x2 - sum_x * sum_x;
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
+X
+/* fprintf(stderr," rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
+X
+X n = 0;
+X mean_x = mean_y = mean_y2 = 0.0;
+X var_x = var_y = 0.0;
+X covar_xy = covar_xy2 = 0.0;
+X
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1 ) {
+X n += llen->hist[j];
+X x = (double)j + 0.5;
+X mean_x += (double)llen->hist[j] * x;
+X mean_y += llen->score_sums[j];
+X var_x += (double)llen->hist[j] * x * x;
+X var_y += llen->score2_sums[j];
+X covar_xy += x * llen->score_sums[j];
+X }
+X mean_x /= n; mean_y /= n;
+X var_x = var_x / n - mean_x * mean_x;
+X var_y = var_y / n - mean_y * mean_y;
+X
+X covar_xy = covar_xy / n - mean_x * mean_y;
+/*
+X pr->rho = covar_xy / var_x;
+X pr->mu = mean_y - pr->rho * mean_x;
+*/
+X
+X if ((ss2=(struct s2str *)calloc(llen->max+1,sizeof(struct s2str)))==NULL) {
+X fprintf(stderr," cannot allocate ss2\n");
+X return;
+X }
+X
+X mean_y2 = 0.0;
+X n_y2 = n = 0;
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > VHISTC) {
+X n++;
+X n_y2 += ss2[j].n = llen->hist[j];
+X x = (double)j + 0.5;
+X u = pr->rho * x + pr->mu;
+X ss2[j].s = y2 = llen->score2_sums[j] - 2*llen->score_sums[j]*u + llen->hist[j]*u*u;
+X mean_y2 += y2;
+X }
+X pr->mean_var = mean_y2/(double)n_y2;
+X
+X s2_sort(ss2+llen->min,llen->max-llen->min+1);
+X
+X /* fprintf(stderr,"llen->min: %d, max: %d\n",llen->min,llen->max); */
+X llen_delta = 0;
+X for (j=llen->min; j<=llen->max; j++) {
+X if (ss2[j].n > 1) {
+X llen_delta++;
+/* fprintf(stderr,"%d\t%d\t%.2f\t%.4f\n",
+X j,ss2[j].n,ss2[j].s,ss2[j].s/ss2[j].n);
+*/
+X }
+X }
+X
+X llen_del05 = llen_delta/20;
+X mean_y2 = 0.0;
+X n_y2 = 0;
+X for (j = llen->min; j<llen->min+llen_del05; j++) {
+X pr->n1_trimmed += ss2[j].n;
+X pr->nb_trimmed++;
+X }
+X for (j = llen->min+llen_del05; j <= llen->min+llen_delta-llen_del05; j++)
+X if (ss2[j].n > 1) {
+X mean_y2 += ss2[j].s;
+X n_y2 += ss2[j].n;
+X }
+X for (j = llen->min+llen_delta-llen_del05+1; j< llen->max; j++) {
+X pr->n1_trimmed += ss2[j].n;
+X pr->nb_trimmed++;
+X }
+X
+X free(ss2);
+X if (n_y2 > 1) pr->mean_var = mean_y2/(double)n_y2;
+X
+X /* fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var: %.4f/%d\n",
+X pr->rho*LN_FACT,pr->mu,pr->mean_var,n); */
+X
+X pr->var_e = 0.0;
+}
+X
+/* REG_STATS - Z() from rho/mu/mean_var */
+double find_zr(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double log_len, z;
+X
+X if (score <= 0) return 0;
+X if ( length < LENGTH_CUTOFF) return 0;
+X
+X log_len = LN_FACT*log((double)(length));
+/* var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
+X if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.var_cutoff;
+*/
+X
+X z = ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
+X
+X return (50.0 + z*10.0);
+}
+X
+/* REG2_STATS Z() from rho/mu, rho2/mu2 */
+double find_zr2(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double log_len, var;
+X double z;
+X
+X if ( length < LENGTH_CUTOFF) return 0;
+X
+X log_len = LN_FACT*log((double)(length));
+X
+X var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
+X if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.mean_var;
+X
+X z = ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / sqrt(var);
+X
+X return (50.0 + z*10.0);
+}
+X
+#ifdef USE_LNSTATS
+/* LN_STATS - ln()-scaled mu, mean_var */
+double find_zl(int score, int length, double comp, struct pstat_str *pu)
+{
+X double ls, z;
+X
+X ls = (double)score*LN200/log((double)length);
+X
+X z = (ls - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
+X
+X return (50.0 + z*10.0);
+}
+#endif
+X
+/* MLE_STATS - Z() from MLE for lambda, K */
+double
+find_ze(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double z, mp, np, a_n1;
+X
+X a_n1 = (double)length;
+X
+X mp = pu->r_u.ag.a_n0;
+X np = a_n1;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
+X
+X z = -z + EULER_G;
+X z /= - PI_SQRT6;
+X
+X return (50.0 + z*10.0);
+}
+X
+/* MLE2_STATS - Z() from MLE for mle_a0..2, mle_b1, length, comp */
+double
+find_ze2(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double z, mp, np, a_n1;
+X
+X a_n1 = (double)length;
+X
+X if (comp <= 0.0) comp = pu->r_u.m2.ave_comp;
+X
+X /* avoid very biased comp estimates */
+X /* comp = exp((4.0*log(comp)+log(pu->r_u.m2.ave_comp))/5.0); */
+X
+X mp = pu->r_u.m2.a_n0;
+X np = a_n1;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X z = (-(pu->r_u.m2.mle2_a0 + pu->r_u.m2.mle2_a1 * comp + pu->r_u.m2.mle2_a2 * comp * log(np * mp)) + score) / (pu->r_u.m2.mle2_b1 * comp);
+X
+X z = -z + EULER_G;
+X z /= - PI_SQRT6;
+X
+X return (50.0 + z*10.0);
+}
+X
+/* AG_STATS - Altschul-Gish Lamdba, K */
+double
+find_za(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double z, mp, np, a_n1, a_n1f;
+X
+X a_n1 = (double)length;
+X a_n1f = log(a_n1)/pu->r_u.ag.H;
+X
+X mp = pu->r_u.ag.a_n0 - pu->r_u.ag.a_n0f - a_n1f;
+X np = a_n1 - pu->r_u.ag.a_n0f - a_n1f;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
+X
+X z = -z + EULER_G;
+X z /= - PI_SQRT6;
+X
+X return (50.0 + z*10.0);
+}
+X
+double find_zn(int score, double escore, int length, double comp, struct pstat_str *pu)
+{
+X double z;
+X
+X z = ((double)score - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
+X
+X return (50.0 + z*10.0);
+}
+X
+/* computes E value for a given z value, assuming extreme value distribution */
+double
+z_to_E(double zs, long entries, struct db_str db)
+{
+X double e, n;
+X
+X /* if (db->entries < 5) return (double)db.entries; */
+X if (entries < 1) { n = db.entries;}
+X else {n = entries;}
+X
+X if (zs > ZS_MAX) return 0.0;
+X
+#ifndef NORMAL_DIST
+X e = exp(- PI_SQRT6 * zs - .577216);
+X return n * (e > .01 ? 1.0 - exp(-e) : e);
+#else
+X return n * erfc(zs/M_SQRT2)/2.0;
+#endif
+}
+X
+double
+zs_to_p(double zs)
+{
+X double e, z;
+X
+X /* if (db.entries < 5) return 0.0; */
+X
+X z = (zs - 50.0)/10.0;
+X
+X if (z > ZS_MAX) return 0.0;
+X
+#ifndef NORMAL_DIST
+X e = exp(- PI_SQRT6 * z - EULER_G);
+X return (e > .01 ? 1.0 - exp(-e) : e);
+#else
+X return erfc(zs/M_SQRT2)/2.0;
+#endif
+}
+X
+double
+zs_to_bit(double zs, int n0, int n1)
+{
+X double z, a_n0, a_n1;
+X
+X z = (zs - 50.0)/10.0;
+X a_n0 = (double)n0;
+X a_n1 = (double)n1;
+X
+X return (PI_SQRT6 * z + EULER_G + log(a_n0*a_n1))/M_LN2 ;
+}
+X
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
+{
+X double e, z, k;
+X
+X /* if (db->entries < 5) return 0.0; */
+X
+X z = (zs - 50.0)/10.0;
+X
+X if (z > ZS_MAX ) return 0.0;
+X
+X if (entries < 1) entries = db.entries;
+X
+X if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
+X k = (double)db.length /(double)n1;
+X if (db.carry > 0) {
+X k += ((double)db.carry * (double)LONG_MAX)/(double)n1;
+X }
+X }
+X else k = (double)entries;
+X
+X if (k < 1.0) k = 1.0;
+X
+#ifndef NORMAL_DIST
+X z *= PI_SQRT6;
+X z += EULER_G;
+X e = exp(-z);
+X return k * (e > .01 ? 1.0 - exp(-e) : e);
+#else
+X return k * erfc(z/M_SQRT2)/2.0;
+#endif
+}
+X
+#ifdef NORMAL_DIST
+double np_to_z(double, int *);
+#endif
+X
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+E_to_zs(double E, long entries)
+{
+X double e, z;
+X int error;
+X
+X e = E/(double)entries;
+X
+#ifndef NORMAL_DIST
+X z = (log(e)+EULER_G)/(- PI_SQRT6);
+X return z*10.0+50.0;
+#else
+X z = np_to_z(1.0-e,&error);
+X
+X if (!error) return z*10.0+50.0;
+X else return 0.0;
+#endif
+}
+X
+/* computes 1.0 - E value for a given z value, assuming extreme value
+X distribution */
+double
+zs_to_Ec(double zs, long entries)
+{
+X double e, z;
+X
+X if (entries < 5) return 0.0;
+X
+X z = (zs - 50.0)/10.0;
+X
+X if (z > ZS_MAX) return 1.0;
+X
+#ifndef NORMAL_DIST
+X e = exp(- PI_SQRT6 * z - EULER_G);
+X return (double)entries * (e > .01 ? exp(-e) : 1.0 - e);
+#else
+X return (double)entries*erf(z/M_SQRT2)/2.0;
+#endif
+}
+X
+/* calculate a threshold score, given an E() value and Lambda,K,H */
+X
+int
+E1_to_s(double e_val, int n0, int n1, struct pstat_str *pu) {
+X double mp, np, a_n0, a_n0f, a_n1;
+X int score;
+X
+X a_n0 = (double)n0;
+X a_n1 = (double)n1;
+X a_n0f = log(pu->r_u.ag.K * a_n0 * a_n1)/pu->r_u.ag.H;
+X
+X mp = a_n0 - a_n0f;
+X np = a_n1 - a_n0f;
+X
+X if (np < 1.0) np = 1.0;
+X if (mp < 1.0) mp = 1.0;
+X
+X score = (int)((log( pu->r_u.ag.K * mp * np) - log(e_val))/pu->r_u.ag.Lambda +0.5);
+X if (score < 0) score = 0;
+X return score;
+}
+X
+/* no longer used; stat_str returned by process_hist
+void
+summ_stats(char *s_str, struct pstat_str *pu)
+{
+X strcpy(s_str,f_string);
+}
+*/
+X
+void
+vsort(v,s,n)
+X double *v; int *s, n;
+{
+X int gap, i, j;
+X double tmp;
+X int itmp;
+X
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if (v[j] >= v[j+gap]) break;
+X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
+X itmp = s[j]; s[j]=s[j+gap]; s[j+gap]=itmp;
+X }
+}
+X
+/*
+void s_sort (double **ptr, int nbest)
+{
+X int gap, i, j;
+X double *tmp;
+X
+X for (gap = nbest/2; gap > 0; gap /= 2)
+X for (i = gap; i < nbest; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (*ptr[j] >= *ptr[j + gap]) break;
+X tmp = ptr[j];
+X ptr[j] = ptr[j + gap];
+X ptr[j + gap] = tmp;
+X }
+}
+*/
+X
+void ss_sort (int *ptr, int n)
+{
+X int gap, i, j;
+X int tmp;
+X
+X for (gap = n/2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (ptr[j] >= ptr[j + gap]) break;
+X tmp = ptr[j];
+X ptr[j] = ptr[j + gap];
+X ptr[j + gap] = tmp;
+X }
+}
+X
+X
+void s2_sort (struct s2str *ptr, int n)
+{
+X int gap, i, j;
+X struct s2str tmp;
+X
+X for (gap = n/2; gap > 0; gap /= 2)
+X for (i = gap; i < n; i++)
+X for (j = i - gap; j >= 0; j-= gap) {
+X if (ptr[j].s >= ptr[j + gap].s) break;
+X tmp.s = ptr[j].s;
+X tmp.n = ptr[j].n;
+X ptr[j].s = ptr[j + gap].s;
+X ptr[j].n = ptr[j + gap].n;
+X ptr[j + gap].s = tmp.s;
+X ptr[j + gap].n = tmp.n;
+X }
+}
+X
+void last_stats() {}
+X
+void
+scale_scores(struct beststr **bptr, int nbest, struct db_str db,
+X struct pstruct pst, struct pstat_str *rs)
+{
+X int i;
+X double zscore;
+X
+X if (pst.zsflag < 0 || pst.zsflag_f < 0) return;
+X
+X for (i=0; i<nbest; i++) {
+X zscore = find_zp(bptr[i]->score[pst.score_ix], bptr[i]->escore,
+X bptr[i]->n1,bptr[i]->comp,rs);
+X bptr[i]->zscore = zscore;
+X bptr[i]->escore
+X =zs_to_E(zscore,bptr[i]->n1,pst.dnaseq, pst.zdb_size,db);
+X }
+X sortbeste(bptr,nbest);
+}
+X
+#ifdef NORMAL_DIST
+/* ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3
+X
+X Produces the normal deviate Z corresponding to a given lower
+X tail area of P; Z is accurate to about 1 part in 10**16.
+X
+X The hash sums below are the sums of the mantissas of the
+X coefficients. They are included for use in checking
+X transcription.
+*/
+X
+double np_to_z(double p, int *fault) {
+X
+X double q, r, ppnd16;
+X
+X double zero = 0.0, one = 1.0, half = 0.5;
+X double split1 = 0.425, split2 = 5.0;
+X double const1 = 0.180625, const2 = 1.6;
+X
+/* Coefficients for P close to 0.5 */
+X
+X double a0 = 3.3871328727963666080e0;
+X double a1 = 1.3314166789178437745e+2;
+X double a2 = 1.9715909503065514427e+3;
+X double a3 = 1.3731693765509461125e+4;
+X double a4 = 4.5921953931549871457e+4;
+X double a5 = 6.7265770927008700853e+4;
+X double a6 = 3.3430575583588128105e+4;
+X double a7 = 2.5090809287301226727e+3;
+X double b1 = 4.2313330701600911252e+1;
+X double b2 = 6.8718700749205790830e+2;
+X double b3 = 5.3941960214247511077e+3;
+X double b4 = 2.1213794301586595867e+4;
+X double b5 = 3.9307895800092710610e+4;
+X double b6 = 2.8729085735721942674e+4;
+X double b7 = 5.2264952788528545610e+3;
+X
+X double sum_ab= 55.8831928806149014439;
+/*
+X Coefficients for P not close to 0, 0.5 or 1.
+*/
+X
+X double c0 = 1.42343711074968357734;
+X double c1 = 4.63033784615654529590;
+X double c2 = 5.76949722146069140550;
+X double c3 = 3.64784832476320460504;
+X double c4 = 1.27045825245236838258;
+X double c5 = 2.41780725177450611770e-1;
+X double c6 = 2.27238449892691845833e-2;
+X double c7 = 7.74545014278341407640e-4;
+X double d1 = 2.05319162663775882187;
+X double d2 = 1.67638483018380384940;
+X double d3 = 6.89767334985100004550e-1;
+X double d4 = 1.48103976427480074590e-1;
+X double d5 = 1.51986665636164571966e-2;
+X double d6 = 5.47593808499534494600e-4;
+X double d7 = 1.05075007164441684324e-9;
+X
+X double sum_cd=49.33206503301610289036;
+/*
+X Coefficients for P near 0 or 1.
+*/
+X double e0 = 6.65790464350110377720e0;
+X double e1 = 5.46378491116411436990e0;
+X double e2 = 1.78482653991729133580e0;
+X double e3 = 2.96560571828504891230e-1;
+X double e4 = 2.65321895265761230930e-2;
+X double e5 = 1.24266094738807843860e-3;
+X double e6 = 2.71155556874348757815e-5;
+X double e7 = 2.01033439929228813265e-7;
+X double f1 = 5.99832206555887937690e-1;
+X double f2 = 1.36929880922735805310e-1;
+X double f3 = 1.48753612908506148525e-2;
+X double f4 = 7.86869131145613259100e-4;
+X double f5 = 1.84631831751005468180e-5;
+X double f6 = 1.42151175831644588870e-7;
+X double f7 = 2.04426310338993978564e-15;
+X
+X double sum_ef=47.52583317549289671629;
+X
+X double sum_tmp = 0.0;
+X
+X /*
+X sum_tmp = a0+a1+a2+a3+a4+a5+a6+a7+b1+b2+b3+b4+b5+b6+b7;
+X if (fabs(sum_tmp - sum_ab) > 1e-12) {
+X fprintf (stderr," sum_ab error: %lg %lg\n",sum_tmp,sum_ab);
+X *fault = 1;
+X return zero;
+X }
+X
+X sum_tmp = c0+c1+c2+c3+c4+c5+c6+c7+d1+d2+d3+d4+d5+d6+d7;
+X if (fabs(sum_tmp - sum_cd) > 1e-12) {
+X fprintf (stderr," sum_cd error: %lg %lg\n",sum_tmp,sum_cd);
+X *fault = 1;
+X return zero;
+X }
+X sum_tmp = e0+e1+e2+e3+e4+e5+e6+e7+f1+f2+f3+f4+f5+f6+f7;
+X if (fabs(sum_tmp - sum_ef) > 1e-12) {
+X fprintf (stderr," sum_ef error: %lg %lg\n",sum_tmp,sum_ef);
+X *fault = 1;
+X return zero;
+X }
+X */
+X
+X *fault = 0;
+X q = p - half;
+X if (fabs(q) <= split1) {
+X r = const1 - q * q;
+X return q * (((((((a7 * r + a6) * r + a5) * r + a4) * r + a3)
+X * r + a2) * r + a1) * r + a0) /
+X (((((((b7 * r + b6) * r + b5) * r + b4) * r + b3)
+X * r + b2) * r + b1) * r + one);
+X }
+X else {
+X r = (q < zero) ? p : one - p;
+X if (r <= zero) {
+X *fault = 1;
+X return zero;
+X }
+X r = sqrt(-log(r));
+X if (r <= split2) {
+X r -= const2;
+X ppnd16 = (((((((c7 * r + c6) * r + c5) * r + c4) * r + c3)
+X * r + c2) * r + c1) * r + c0) /
+X (((((((d7 * r + d6) * r + d5) * r + d4) * r + d3)
+X * r + d2) * r + d1) * r + one);
+X }
+X else {
+X r -= split2;
+X ppnd16 = (((((((e7 * r + e6) * r + e5) * r + e4) * r + e3)
+X * r + e2) * r + e1) * r + e0) /
+X (((((((f7 * r + f6) * r + f5) * r + f4) * r + f3)
+X * r + f2) * r + f1) * r + one);
+X }
+X if (q < zero) return -ppnd16;
+X else return ppnd16;
+X }
+}
+#endif
+SHAR_EOF
+chmod 0644 scaleswn.c ||
+echo 'restore of scaleswn.c failed'
+Wc_c="`wc -c < 'scaleswn.c'`"
+test 69722 -eq "$Wc_c" ||
+ echo 'scaleswn.c: original size 69722, current size' "$Wc_c"
+fi
+# ============= scaleswt.c ==============
+if test -f 'scaleswt.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping scaleswt.c (File already exists)'
+else
+echo 'x - extracting scaleswt.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'scaleswt.c' &&
+/* scaleswt.c */
+X
+/* $Name: fa_34_26_5 $ - $Id: scaleswt.c,v 1.21 2006/04/12 18:50:01 wrp Exp $ */
+/* as of 24 Sept, 2000 - scaleswn uses no global variables */
+X
+/*
+X copyright (c) 1995, 1996, 2000, 2002 William R. Pearson
+X
+X This version is designed for fasts/f, which used Tatusov
+X probabilities for statistical estimates, but still needs a
+X quick-and-dirty linear regression fit to rank things
+X
+X For comparisons that obey tatusov statistics, we try whenever
+X possible to provide accurate e_scores, rather than raw scores. As a
+X result, no lambda/K fitting is required; and process_hist() can be
+X called atthe very beginning of the search to initialize some of the
+X statistics structures and find_zp().
+X
+X find_zp() must still return a valid z_score surrogate, as
+X comp_lib.c/p2_complib.c continue to use z_score's to rank hits, save
+X the best, etc.
+X
+X If e_score's cannot be calculated, the process_hist() provides
+X linear regression fitting for conventional z_score estimates.
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+X
+#include <limits.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "structs.h"
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#define MAXHIST 50
+#define MAX_LLEN 200
+#define LHISTC 5
+#define VHISTC 5
+#define MAX_SSCORE 300
+X
+#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
+X
+#define LN_FACT 10.0
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+X
+#define EULER_G 0.57721566490153286060
+#define PI_SQRT6 1.28254983016186409554
+X
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237
+#endif
+#define LN200 5.2983173666
+#define ZS_MAX 400.0 /* used to prevent underflow on some machines */
+#define TOLERANCE 1.0e-12
+#define TINY 1.0e-6
+X
+/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
+struct rstat_str {
+X double ngLambda, ngK, ngH;
+X double rho, rho_e, mu, mu_e, mean_var, var_e; /* ?_e:std. error of ? */
+/* used by REG2_STATS */
+X double rho2, mu2, var_cutoff;
+X int n_trimmed; /* excluded because of high z-score */
+X int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
+X double tat_a, tat_b, tat_c, spacefactor;
+X int have_tat;
+X int tie_j;
+};
+X
+#define AVE_STATS 0 /* no length effect, only mean/variance */
+double find_zt(int score, double escore, int len, double comp, struct rstat_str *);
+X
+double find_zn(int score, double escore, int len, double comp, struct rstat_str *);
+X
+double power(double, int);
+X
+void sortbesto(double *, int );
+extern void sortbeste(struct beststr **bptr, int nbest);
+X
+int proc_hist_n(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp, int do_trim,
+X struct rstat_str *);
+X
+#define REG_STATS 1 /* length-regression scaled */
+double find_zr(int score, double escore, int len, double comp, struct rstat_str *);
+X
+int proc_hist_r(struct stat_str *sptr, int n,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct rstat_str *rs);
+X
+double (*find_zp)(int score, double escore, int len, double comp,
+X struct rstat_str *) = &find_zr;
+X
+struct llen_str {
+X int min, max;
+X int max_score, min_score;
+X int *hist;
+X double *score_sums, *score2_sums;
+X double *score_var;
+X int max_length, min_length, zero_s;
+X int fit_flag;
+};
+X
+static void inithist(struct llen_str *, struct pstruct, int);
+static void free_hist( struct llen_str *);
+static void addhist(struct llen_str *, int, int, int);
+static void prune_hist(struct llen_str *, int, int, int, long *);
+void inithistz(int, struct hist_str *histp);
+void addhistz(double zs, struct hist_str *histp);
+X
+static void fit_llen(struct llen_str *, struct rstat_str *);
+static void fit_llens(struct llen_str *, struct rstat_str *);
+X
+void linreg(double *lny, double *x, double *lnx, int n,
+X double *a, double *b, double *c, int start);
+X
+double calc_spacefactor(const unsigned char *, int, int, int);
+X
+double det(double a11, double a12, double a13,
+X double a21, double a22, double a23,
+X double a31, double a32, double a33);
+X
+double factorial (int a, int b);
+X
+/* void set_db_size(int, struct db_str *, struct hist_str *); */
+X
+#ifdef DEBUG
+FILE *tmpf;
+#endif
+X
+int
+process_hist(struct stat_str *sptr, int nstats,
+X struct mngmsg m_msg,
+X struct pstruct pst,
+X struct hist_str *histp,
+X struct rstat_str **rs_sp,
+X int do_hist
+X )
+{
+X int zsflag, do_trim;
+X struct rstat_str *rs_s;
+X
+X if (pst.zsflag < 0) {
+X *rs_sp = NULL;
+X return pst.zsflag;
+X }
+X
+X if (*rs_sp == NULL) {
+X if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
+X fprintf(stderr," cannot allocate rs_snion: %ld\n",sizeof(struct rstat_str));
+X exit(1);
+X }
+X else *rs_sp = rs_s;
+X }
+X else {
+X rs_s = *rs_sp;
+X memset(rs_s,0,sizeof(struct rstat_str));
+X }
+X
+X if (m_msg.escore_flg) {
+X find_zp = &find_zt;
+X inithistz(MAXHIST,histp);
+X return 1;
+X }
+X
+X if (nstats < 20) {
+X fprintf(stderr," too few sequences for sampling: %d\n",nstats);
+X free(rs_s);
+X *rs_sp = NULL;
+X return -1;
+X }
+X
+X rs_s->ngLambda = m_msg.Lambda;
+X rs_s->ngK = m_msg.K;
+X rs_s->ngH = m_msg.H;
+X
+X zsflag = pst.zsflag;
+X
+X if (zsflag >= 10) {
+X zsflag -= 10;
+X do_trim = 0;
+X }
+X else do_trim = 1;
+X
+X find_zp = &find_zr;
+X return proc_hist_r(sptr, nstats,pst, histp, do_trim, rs_s);
+}
+X
+int
+calc_thresh(struct pstruct pst, int nstats,
+X double Lambda, double K, double H, double *zstrim)
+{
+X int max_hscore;
+X double ave_n1, tmp_score, z, l_fact;
+X
+X if (pst.dnaseq == SEQT_DNA || pst.dnaseq == SEQT_RNA) {
+X ave_n1 = 5000.0;
+X l_fact = 1.0;
+X }
+X else {
+X ave_n1 = 400.0;
+X l_fact = 0.7;
+X }
+X
+/* max_hscore = MAX_SSCORE; */
+/* mean expected for pst.n0 * 400 for protein, 5000 for DNA */
+/* we want a number of offsets that is appropriate for the database size so
+X far (nstats)
+*/
+X
+/*
+X the calculation below sets a high-score threshold using an
+X ungapped lambda, but errs towards the high-score side by using
+X E()=0.001 and calculating with 0.70*lambda, which is the correct for
+X going from ungapped to -12/-2 gapped lambda with BLOSUM50
+*/
+X
+#ifndef NORMAL_DIST
+X tmp_score = 0.01/((double)nstats*K*(double)pst.n0*ave_n1);
+X tmp_score = -log(tmp_score)/(Lambda*l_fact);
+X max_hscore = (int)(tmp_score+0.5);
+X
+X z = 1.0/(double)nstats;
+X z = (log(z)+EULER_G)/(-PI_SQRT6);
+#else
+X max_hscore = 100;
+X z = 5.0;
+#endif
+X *zstrim = 10.0*z+50.0;
+X return max_hscore;
+}
+X
+int
+proc_hist_r(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct rstat_str *rs)
+{
+X int i, max_hscore;
+X double zs, ztrim;
+X char s_string[128];
+X struct llen_str llen;
+X char *f_string;
+X llen.fit_flag=1;
+X llen.hist=NULL;
+X
+X max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
+X rs->ngK, rs->ngH, &ztrim);
+X
+X inithist(&llen,pst,max_hscore);
+X f_string = &(histp->stat_info[0]);
+X
+X for (i = 0; i<nstats; i++)
+X addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
+X histp->entries = nstats - llen.zero_s;
+X
+X if ((llen.max_score - llen.min_score) < 10) {
+X free_hist(&llen);
+X llen.fit_flag = 0;
+X find_zp = &find_zn;
+X return proc_hist_n(sptr, nstats, pst, histp, do_trim, rs);
+X }
+X
+X fit_llen(&llen, rs); /* now we have rho, mu, rho2, mu2, mean_var
+X to set the parameters for the histogram */
+X
+X if (!llen.fit_flag) { /* the fit failed, fall back to proc_hist_n */
+X free_hist(&llen);
+X find_zp = &find_zn;
+X return proc_hist_n(sptr,nstats, pst, histp, do_trim, rs);
+X }
+X
+X rs->n_trimmed= rs->n1_trimmed = rs->nb_trimmed = 0;
+X
+X if (do_trim) {
+X if (llen.fit_flag) {
+X for (i = 0; i < nstats; i++) {
+X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
+X if (zs < 20.0 || zs > ztrim) {
+X rs->n_trimmed++;
+X prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
+X &(histp->entries));
+X }
+X }
+X }
+X
+X /* fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", rs->n_trimmed); */
+X
+X if (llen.fit_flag) fit_llens(&llen, rs);
+X
+X /* fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", rs->n1_trimmed,rs->nb_trimmed); */
+X }
+X
+X
+X free_hist(&llen);
+X
+X /* rst all the scores in the histogram */
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X inithistz(MAXHIST, histp);
+X
+X sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d B-trim: %d in %d/%d\n Lambda= %6.4f",
+X s_string,
+X rs->rho*LN_FACT,sqrt(rs->rho_e),rs->mu,sqrt(rs->mu_e), rs->mean_var,sqrt(rs->var_e),
+X llen.zero_s, rs->n_trimmed, rs->n1_trimmed, rs->nb_trimmed, rs->nb_tot,
+X PI_SQRT6/sqrt(rs->mean_var));
+X return REG_STATS;
+}
+X
+X
+int
+proc_hist_n(struct stat_str *sptr, int nstats,
+X struct pstruct pst, struct hist_str *histp,
+X int do_trim, struct rstat_str *rs)
+{
+X int i, j;
+X double s_score, s2_score, ssd;
+X double ztrim;
+X int nit, max_hscore;
+X char s_string[128];
+X char *f_string;
+X
+X f_string = &(histp->stat_info[0]);
+X /* db->entries = db->length = db->carry = 0; */
+X
+X max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
+X rs->ngK, rs->ngH, &ztrim);
+X
+X s_score = s2_score = 0.0;
+X
+X histp->entries = 0;
+X
+X for ( j = 0, i = 0; i < nstats; i++) {
+X if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
+X s_score += (ssd=(double)sptr[i].score);
+X s2_score += ssd * ssd;
+X histp->entries++;
+X /*
+X db->length += sptr[i].n1;
+X if (db->length > LONG_MAX) {
+X db->carry++;
+X db->length -= LONG_MAX;
+X }
+X */
+X j++;
+X }
+X }
+X
+X if (j > 1 ) {
+X rs->mu = s_score/(double)j;
+X rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
+X rs->mean_var /= (double)(j-1);
+X }
+X else {
+X rs->mu = 50.0;
+X rs->mean_var = 10.0;
+X }
+X
+X if (rs->mean_var < 0.01) {
+X rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
+X }
+X
+X /* now remove some scores */
+X
+X nit = 5;
+X while (nit-- > 0) {
+X rs->n_trimmed = 0;
+X
+X for (i=0; i< nstats; i++) {
+X if (sptr[i].n1 < 0) continue;
+X ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
+X if (ssd > ztrim || ssd < 20.0) {
+X /* fprintf(stderr,"removing %3d %3d %4.1f\n",
+X sptr[i].score, sptr[i].n1,ssd); */
+X ssd = sptr[i].score;
+X s_score -= ssd;
+X s2_score -= ssd*ssd;
+X j--;
+X rs->n_trimmed++;
+X histp->entries--;
+X sptr[i].n1 = -sptr[i].n1;
+X }
+X }
+X
+X if (j > 1 ) {
+X rs->mu = s_score/(double)j;
+X rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
+X rs->mean_var /= (double)(j-1);
+X }
+X else {
+X rs->mu = 50.0;
+X rs->mean_var = 10.0;
+X }
+X
+X if (rs->mean_var < 0.01) {
+X rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
+X }
+X
+X if (rs->n_trimmed < LHISTC) {
+X /*
+X fprintf(stderr,"nprune %d at %d\n",nprune,nit);
+X */
+X break;
+X }
+X }
+X
+X if (pst.zsflag < 10) s_string[0]='\0';
+X else if (pst.zs_win > 0)
+X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
+X else strncpy(s_string,"(shuffled)",sizeof(s_string));
+X
+X sprintf(f_string,"%s unscaled statistics: mu= %6.4f var=%6.4f; Lambda= %6.4f",
+X s_string, rs->mu,rs->mean_var,PI_SQRT6/sqrt(rs->mean_var));
+X return AVE_STATS;
+}
+X
+X
+/*
+This routine calculates the maximum likelihood estimates for the
+extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
+X
+X <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
+X <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
+X
+X The <a> parameter can be transformed into and K
+X of the formula: 1 - exp ( - K m n exp ( - lambda S ))
+X using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
+X 1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
+X
+X a = log(K m n) / lambda
+X a lambda = log (K m n)
+X exp(a lambda) = K m n
+X but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
+X so: K m n = (1/n sum{ exp( -x[i] *lambda)})
+X K = sum{}/(nlib m n )
+X
+*/
+X
+void
+alloc_hist(struct llen_str *llen)
+{
+X int max_llen, i;
+X max_llen = llen->max;
+X
+X if (llen->hist == NULL) {
+X llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
+X llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
+X }
+X
+X for (i=0; i< max_llen+1; i++) {
+X llen->hist[i] = 0;
+X llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
+X }
+}
+X
+void
+free_hist(struct llen_str *llen)
+{
+X if (llen->hist!=NULL) {
+X free(llen->score_var);
+X free(llen->score2_sums);
+X free(llen->score_sums);
+X free(llen->hist);
+X llen->hist=NULL;
+X }
+}
+X
+void
+inithist(struct llen_str *llen, struct pstruct pst, int max_hscore)
+{
+X llen->max = MAX_LLEN;
+X
+X llen->max_score = -1;
+X llen->min_score=10000;
+X
+X alloc_hist(llen);
+X
+X llen->zero_s = 0;
+X llen->min_length = 10000;
+X llen->max_length = 0;
+}
+X
+void
+addhist(struct llen_str *llen, int score, int length, int max_hscore)
+{
+X int llength;
+X double dscore;
+X
+X if ( score<=0 || length < LENGTH_CUTOFF) {
+X llen->min_score = 0;
+X llen->zero_s++;
+X return ;
+X }
+X
+X if (score < llen->min_score) llen->min_score = score;
+X if (score > llen->max_score) llen->max_score = score;
+X
+X if (length > llen->max_length) llen->max_length = length;
+X if (length < llen->min_length) llen->min_length = length;
+X if (score > max_hscore) score = max_hscore;
+X
+X llength = (int)(LN_FACT*log((double)length)+0.5);
+X
+X if (llength < 0 ) llength = 0;
+X if (llength > llen->max) llength = llen->max;
+X llen->hist[llength]++;
+X dscore = (double)score;
+X llen->score_sums[llength] += dscore;
+X llen->score2_sums[llength] += dscore * dscore;
+X
+X /*
+X db->entries++;
+X db->length += length;
+X if (db->length > LONG_MAX) {db->carry++;db->length -= LONG_MAX;}
+X */
+}
+X
+/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
+X
+X
+void
+inithistz(int mh, struct hist_str *histp )
+{
+X int i;
+X
+X histp->min_hist = 20;
+X histp->max_hist = 120;
+X
+X histp->histint = (int)
+X ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
+X histp->maxh = (int)
+X ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
+X
+X if (histp->hist_a==NULL) {
+X if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
+X NULL) {
+X fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
+X histp->histflg = 0;
+X }
+X else histp->histflg = 1;
+X }
+X else {
+X for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
+X }
+}
+X
+/* fasts/f will not show any histogram */
+void
+addhistz(double zs, struct hist_str *histp)
+{
+}
+X
+void
+prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
+X long *entries)
+{
+X int llength;
+X double dscore;
+X
+X if (score <= 0 || length < LENGTH_CUTOFF) return;
+X
+X if (score > max_hscore) score = max_hscore;
+X
+X llength = (int)(LN_FACT*log((double)length)+0.5);
+X
+X if (llength < 0 ) llength = 0;
+X if (llength > llen->max) llength = llen->max;
+X llen->hist[llength]--;
+X dscore = (double)score;
+X llen->score_sums[llength] -= dscore;
+X llen->score2_sums[llength] -= dscore * dscore;
+X
+X (*entries)--;
+X /*
+X if (length < db->length) db->length -= length;
+X else {db->carry--; db->length += (LONG_MAX - (unsigned long)length);}
+X */
+}
+X
+/* fit_llen: no trimming
+X (1) regress scores vs log(n) using weighted variance
+X (2) calculate mean variance after length regression
+*/
+X
+void
+fit_llen(struct llen_str *llen, struct rstat_str *pr)
+{
+X int j;
+X int n;
+X int n_size;
+X double x, y2, u, z;
+X double mean_x, mean_y, var_x, var_y, covar_xy;
+X double mean_y2, covar_xy2, var_y2, dllj;
+X
+X double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
+X
+/* now fit scores to best linear function of log(n), using
+X simple linear regression */
+X
+X for (llen->min=0; llen->min < llen->max; llen->min++)
+X if (llen->hist[llen->min]) break;
+X llen->min--;
+X
+X for (n_size=0,j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] > 1) {
+X dllj = (double)llen->hist[j];
+X llen->score_var[j] = llen->score2_sums[j]/dllj
+X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+X llen->score_var[j] /= (double)(llen->hist[j]-1);
+X if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
+X n_size++;
+X }
+X }
+X
+X pr->nb_tot = n_size;
+X
+X n_w = 0.0;
+X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+X for (j = llen->min; j < llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = j + 0.5;
+X dllj = (double)llen->hist[j];
+X n_w += dllj/llen->score_var[j];
+X sum_x += dllj * x / llen->score_var[j] ;
+X sum_y += llen->score_sums[j] / llen->score_var[j];
+X sum_x2 += dllj * x * x /llen->score_var[j];
+X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
+X }
+X
+X if (n_size < 5 ) {
+X llen->fit_flag=0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X return;
+X }
+X else {
+X delta = n_w * sum_x2 - sum_x * sum_x;
+X if (delta > 0.001) {
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
+X pr->rho_e = n_w/delta;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+X pr->mu_e = sum_x2/delta;
+X }
+X else {
+X llen->fit_flag = 0;
+X pr->rho = 0;
+X pr->mu = sum_y/n_w;
+X return;
+X }
+X }
+X
+X delta = n_w * sum_x2 - sum_x * sum_x;
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+X
+X n = 0;
+X mean_x = mean_y = mean_y2 = 0.0;
+X var_x = var_y = 0.0;
+X covar_xy = covar_xy2 = 0.0;
+X
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1 ) {
+X n += llen->hist[j];
+X x = (double)j + 0.5;
+X mean_x += (double)llen->hist[j] * x;
+X mean_y += llen->score_sums[j];
+X var_x += (double)llen->hist[j] * x * x;
+X var_y += llen->score2_sums[j];
+X covar_xy += x * llen->score_sums[j];
+X }
+X mean_x /= n; mean_y /= n;
+X var_x = var_x / n - mean_x * mean_x;
+X var_y = var_y / n - mean_y * mean_y;
+X
+X covar_xy = covar_xy / n - mean_x * mean_y;
+/*
+X pr->rho = covar_xy / var_x;
+X pr->mu = mean_y - pr->rho * mean_x;
+*/
+X mean_y2 = covar_xy2 = var_y2 = 0.0;
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = (double)j + 0.5;
+X u = pr->rho * x + pr->mu;
+X y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+/*
+X dllj = (double)llen->hist[j];
+X fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
+X llen->score_sums[j]/dllj,y2/dllj);
+*/
+X mean_y2 += y2;
+X var_y2 += y2 * y2;
+X covar_xy2 += x * y2;
+X /* fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
+X x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
+X }
+X
+X pr->mean_var = mean_y2 /= (double)n;
+X covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
+X
+X if (pr->mean_var <= 0.01) {
+X llen->fit_flag = 0;
+X pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
+X }
+X
+X /*
+X fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
+X pr->rho*LN_FACT,pr->mu,pr->mean_var);
+X */
+X if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
+X else pr->var_e = 0.0;
+X
+X if (llen->fit_flag) {
+X pr->rho2 = covar_xy2 / var_x;
+X pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
+X }
+X else {
+X pr->rho2 = 0;
+X pr->mu2 = pr->mean_var;
+X }
+X
+X if (pr->rho2 < 0.0 )
+X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+X if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+X
+X pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
+}
+X
+/* fit_llens: trim high variance bins
+X (1) regress scores vs log(n) using weighted variance
+X (2) regress residuals vs log(n)
+X (3) remove high variance bins
+X (4) calculate mean variance after length regression
+*/
+X
+void
+fit_llens(struct llen_str *llen, struct rstat_str *pr)
+{
+X int j;
+X int n, n_u2;
+X double x, y, y2, u, u2, v, z;
+X double mean_x, mean_y, var_x, var_y, covar_xy;
+X double mean_y2, covar_xy2;
+X double mean_u2, mean_3u2, dllj;
+X double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
+X
+/* now fit scores to best linear function of log(n), using
+X simple linear regression */
+X
+X for (llen->min=0; llen->min < llen->max; llen->min++)
+X if (llen->hist[llen->min]) break;
+X llen->min--;
+X
+X for (j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] > 1) {
+X dllj = (double)llen->hist[j];
+X llen->score_var[j] = (double)llen->score2_sums[j]/dllj
+X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
+X llen->score_var[j] /= (double)(llen->hist[j]-1);
+X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
+X }
+X }
+X
+X n_w = 0.0;
+X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
+X for (j = llen->min; j < llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = j + 0.5;
+X dllj = (double)llen->hist[j];
+X n_w += dllj/llen->score_var[j];
+X sum_x += dllj * x / llen->score_var[j] ;
+X sum_y += llen->score_sums[j] / llen->score_var[j];
+X sum_x2 += dllj * x * x /llen->score_var[j];
+X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
+X }
+X
+X delta = n_w * sum_x2 - sum_x * sum_x;
+X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
+X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
+X
+/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
+X
+X n = 0;
+X mean_x = mean_y = mean_y2 = 0.0;
+X var_x = var_y = 0.0;
+X covar_xy = covar_xy2 = 0.0;
+X
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1 ) {
+X n += llen->hist[j];
+X x = (double)j + 0.5;
+X dllj = (double)llen->hist[j];
+X mean_x += dllj * x;
+X mean_y += llen->score_sums[j];
+X var_x += dllj * x * x;
+X var_y += llen->score2_sums[j];
+X covar_xy += x * llen->score_sums[j];
+X }
+X mean_x /= n; mean_y /= n;
+X var_x = var_x / n - mean_x * mean_x;
+X var_y = var_y / n - mean_y * mean_y;
+X
+X covar_xy = covar_xy / n - mean_x * mean_y;
+/* pr->rho = covar_xy / var_x;
+X pr->mu = mean_y - pr->rho * mean_x;
+*/
+X
+X mean_y2 = covar_xy2 = 0.0;
+X for (j = llen->min; j <= llen->max; j++)
+X if (llen->hist[j] > 1) {
+X x = (double)j + 0.5;
+X u = pr->rho * x + pr->mu;
+X y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
+X mean_y2 += y2;
+X covar_xy2 += x * y2;
+X }
+X
+X mean_y2 /= n;
+X covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
+X pr->rho2 = covar_xy2 / var_x;
+X pr->mu2 = mean_y2 - pr->rho2 * mean_x;
+X
+X if (pr->rho2 < 0.0 )
+X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
+X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
+X if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
+X
+X pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
+X
+/* fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
+X pr->var_cutoff,z);
+*/
+X mean_u2 = 0.0;
+X n_u2 = 0;
+X for ( j = llen->min; j < llen->max; j++) {
+X y = j+0.5;
+X dllj = (double)llen->hist[j];
+X x = pr->rho * y + pr->mu;
+X v = pr->rho2 * y + pr->mu2;
+X if (v < pr->var_cutoff) v = pr->var_cutoff;
+X if (llen->hist[j]> 1) {
+X u2 = (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
+X mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
+X n_u2++;
+X /* fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
+X j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
+X }
+X else llen->score_var[j] = -1.0;
+X }
+X
+X mean_u2 = sqrt(mean_u2/(double)n_u2);
+X /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
+X
+X mean_3u2 = mean_u2*3.0;
+X
+X for (j = llen->min; j < llen->max; j++) {
+X if (llen->hist[j] <= 1) continue;
+X if (sqrt(llen->score_var[j]) > mean_3u2) {
+X /* fprintf(stderr," removing %d %d %.2f\n",
+X j, (int)(exp((double)j/LN_FACT)-0.5),
+X sqrt(llen->score_var[j]));
+X */
+X pr->nb_trimmed++;
+X pr->n1_trimmed += llen->hist[j];
+X llen->hist[j] = 0;
+X }
+X }
+X fit_llen(llen, pr);
+}
+X
+X
+/* REG_STATS - Z() from rho/mu/mean_var */
+double find_zr(int score, double escore, int length, double comp,
+X struct rstat_str *rs)
+{
+X double log_len, z;
+X
+X if (score <= 0) return 0.0;
+X if ( length < LENGTH_CUTOFF) return 0.0;
+X
+X log_len = LN_FACT*log((double)(length));
+/* var = rs->rho2 * log_len + rs->mu2;
+X if (var < rs->var_cutoff) var = rs->var_cutoff;
+*/
+X
+X z = ((double)score - rs->rho * log_len - rs->mu) / sqrt(rs->mean_var);
+X
+X return (50.0 + z*10.0);
+}
+X
+double find_zt(int score, double escore, int length, double comp,
+X struct rstat_str *rs)
+{
+X if (escore > 0.0) return -log(escore)/M_LN2;
+X else return 744.440071/M_LN2;
+}
+X
+double find_zn(int score, double escore, int length, double comp,
+X struct rstat_str *rs)
+{
+X double z;
+X
+X z = ((double)score - rs->mu) / sqrt(rs->mean_var);
+X
+X return (50.0 + z*10.0);
+}
+X
+/* computes E value for a given z value, assuming extreme value distribution */
+double
+z_to_E(double zs, long entries, struct db_str db)
+{
+X double e, n;
+X
+X /* if (db->entries < 5) return (double)db.entries; */
+X if (entries < 1) { n = db.entries;}
+X else {n = entries;}
+X
+X if (zs > ZS_MAX) return 0.0;
+X
+X e = exp(-PI_SQRT6 * zs - EULER_G);
+X return n * (e > .01 ? 1.0 - exp(-e) : e);
+}
+X
+double
+zs_to_p(double zs)
+{
+X return zs;
+}
+X
+/* this version assumes the probability is in the ->zscore variable,
+X which is provided by this file after last_scale()
+*/
+X
+double
+zs_to_bit(double zs, int n0, int n1)
+{
+X return zs+log((double)(n0*n1))/M_LN2 ;
+}
+X
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
+{
+X double e, z, k;
+X
+X /* if (db->entries < 5) return 0.0; */
+X
+X if (zs > ZS_MAX ) return 0.0;
+X
+X if (entries < 1) entries = db.entries;
+X
+X if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
+X k = (double)db.length /(double)n1;
+X if (db.carry > 0) { k *= (double)db.carry * (double)LONG_MAX;}
+X }
+X else k = (double)entries;
+X
+X if (k < 1.0) k = 1.0;
+X
+X zs *= M_LN2;
+X if ( zs > 100.0) e = 0.0;
+X else e = exp(-zs);
+X return k * e;
+}
+X
+/* computes E-value for a given z value, assuming extreme value distribution */
+double
+E_to_zs(double E, long entries)
+{
+X double e, z;
+X int error;
+X
+X e = E/(double)entries;
+X
+#ifndef NORMAL_DIST
+X z = (log(e)+EULER_G)/(-PI_SQRT6);
+X return z*10.0+50.0;
+#else
+X z = np_to_z(1.0-e,&error);
+X
+X if (!error) return z*10.0+50.0;
+X else return 0.0;
+#endif
+}
+X
+/* computes 1.0 - E value for a given z value, assuming extreme value
+X distribution */
+double
+zs_to_Ec(double zs, long entries)
+{
+X double e, z;
+X
+X if (entries < 5) return 0.0;
+X
+X z = (zs - 50.0)/10.0;
+X
+X if (z > ZS_MAX) return 1.0;
+X
+X e = exp(-PI_SQRT6 * z - EULER_G);
+X return (double)entries * (e > .01 ? exp(-e) : 1.0 - e);
+}
+X
+void
+vsort(v,s,n)
+X double *v; int *s, n;
+{
+X int gap, i, j;
+X double tmp;
+X int itmp;
+X
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if (v[j] >= v[j+gap]) break;
+X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
+X itmp = s[j]; s[j]=s[j+gap]; s[j+gap]=itmp;
+X }
+}
+X
+void
+sort_escore(double *v, int n)
+{
+X int gap, i, j;
+X double dtmp;
+X
+X for (gap=n/2; gap>0; gap/=2) {
+X for (i=gap; i<n; i++) {
+X for (j=i-gap; j>=0; j -= gap) {
+X if (v[j] <= v[j+gap]) break;
+X dtmp = v[j];
+X v[j] = v[j+gap];
+X v[j+gap] = dtmp;
+X }
+X }
+X }
+}
+X
+/* scale_tat - compute 'a', 'b', 'c' coefficients for scaling fasts/f
+X escores
+X 5-May-2003 - also calculate index for high ties
+*/
+void
+scale_tat(double *escore, int nstats,
+X long db_entries, int do_trim,
+X struct rstat_str *rs)
+{
+X int i, j, k, start;
+X double *x, *lnx, *lny;
+X
+X /* sort_escore(escore, nstats); */
+X
+X while (*escore<0.0) {escore++; nstats--; }
+X
+X x = (double *) calloc(nstats, sizeof(double));
+X if(x == NULL) {
+X fprintf(stderr, "Couldn't calloc tatE/x\n");
+X exit(1);
+X }
+X
+X lnx = (double *) calloc(nstats,sizeof(double));
+X if(lnx == NULL) {
+X fprintf(stderr, "Couldn't calloc tatE/lnx\n");
+X exit(1);
+X }
+X
+X lny = (double *) calloc(nstats,sizeof(double));
+X if(lny == NULL) {
+X fprintf(stderr, "Couldn't calloc tatE/lny\n");
+X exit(1);
+X }
+X
+X for(i = 0 ; i < nstats ; ) {
+X
+X lny[i] = log(escore[i]);
+X
+X for(j = i+1 ; j < nstats ; j++) {
+X if(escore[j] != escore[i]) break;
+X }
+X
+X x[i] = ((((double)i + (double)(j - i - 1)/2.0)*(double)nstats/(double)db_entries)+1.0)/(double)nstats;
+X lnx[i] = log(x[i]);
+X
+X for(k = i+1 ; k < j ; k++) {
+X lny[k]=lny[i];
+X x[k] = x[i];
+X lnx[k]=lnx[i];
+X }
+X i = k;
+X }
+X
+X if (!do_trim) {
+X start = 0;
+X } else {
+X start = 0.05 * (double) nstats;
+X start = start > 500 ? 500 : start;
+X }
+X
+X linreg(lny, x, lnx, nstats, &rs->tat_a, &rs->tat_b, &rs->tat_c, start);
+X
+X /* I have the coefficients I need - a, b, c; free arrays */
+X
+X free(lny);
+X free(lnx);
+X free(x);
+X
+X /* calculate tie_j - the index below which all scores are considered
+X positional ties */
+X
+X rs->tie_j = 0.005 * db_entries;
+}
+X
+void
+linreg(double *lny, double *x, double *lnx, int n,
+X double *a, double *b, double *c, int start) {
+X
+X double yf1, yf2, yf3;
+X double f1f1, f1f2, f1f3;
+X double f2f2, f2f3;
+X double f3f3, delta;
+X
+X int i;
+X
+X yf1 = yf2 = yf3 = 0.0;
+X f1f1 = f1f2 = f1f3 = f2f2 = f2f3 = f3f3 = 0.0;
+X
+X for(i = start; i < n; i++) {
+X yf1 += lny[i] * lnx[i];
+X yf2 += lny[i] * x[i];
+X yf3 += lny[i];
+X
+X f1f1 += lnx[i] * lnx[i];
+X f1f2 += lnx[i] * x[i];
+X f1f3 += lnx[i];
+X
+X f2f2 += x[i] * x[i];
+X f2f3 += x[i];
+X
+X f3f3 += 1.0;
+X }
+X
+X delta = det(f1f1, f1f2, f1f3, f1f2, f2f2, f2f3, f1f3, f2f3, f3f3);
+X
+X *a = det(yf1, f1f2, f1f3, yf2, f2f2, f2f3, yf3, f2f3, f3f3) / delta;
+X *b = det(f1f1, yf1, f1f3, f1f2, yf2, f2f3, f1f3, yf3, f3f3) / delta;
+X *c = det(f1f1, f1f2, yf1, f1f2, f2f2, yf2, f1f3, f2f3, yf3) / delta;
+X
+}
+X
+double det(double a11, double a12, double a13,
+X double a21, double a22, double a23,
+X double a31, double a32, double a33)
+{
+X double result;
+X
+X result = a11 * (a22 * a33 - a32 * a23);
+X result -= a12 * (a21 * a33 - a31 * a23);
+X result += a13 * (a21 * a32 - a31 * a22);
+X
+X return result;
+}
+X
+void
+last_stats(const unsigned char *aa0, int n0,
+X struct stat_str *sptr, int nstats,
+X struct beststr **bestp_arr, int nbest,
+X struct mngmsg m_msg, struct pstruct pst,
+X struct hist_str *histp, struct rstat_str **rs_sp)
+{
+X double *obs_escore;
+X int i, nobs, nobs_t, do_trim;
+X long db_entries;
+X struct rstat_str *rs_s;
+X
+X if (*rs_sp == NULL) {
+X if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
+X fprintf(stderr," cannot allocate rs_s: %ld\n",sizeof(struct rstat_str));
+X exit(1);
+X }
+X else *rs_sp = rs_s;
+X }
+X else rs_s = *rs_sp;
+X
+X histp->entries = 0;
+X
+X sortbeste(bestp_arr,nbest);
+X
+X rs_s->spacefactor =
+X calc_spacefactor(aa0, n0, m_msg.nm0,pst.nsq);
+X
+X if (pst.zsflag >= 1 && pst.zsflag <= 4) {
+X if (m_msg.escore_flg) {
+X nobs = nbest;
+X do_trim = 1;
+X }
+X else {
+X nobs = nstats;
+X do_trim = 0;
+X }
+X
+X if ((obs_escore = (double *)calloc(nobs,sizeof(double)))==NULL) {
+X fprintf(stderr," cannot allocate obs_escore[%d]\n",nbest);
+X exit(1);
+X }
+X
+X if (m_msg.escore_flg) {
+X for (i=nobs=0; i<nbest; i++) {
+X if (bestp_arr[i]->escore<= 1.00)
+X obs_escore[nobs++]=bestp_arr[i]->escore;
+X }
+X /*
+X nobs_t = nobs;
+X for (i=0; i<nbest; i++) {
+X if (bestp_arr[i]->escore >= 0.99 &&
+X bestp_arr[i]->escore <= 1.00)
+X obs_escore[nobs++]=bestp_arr[i]->escore;
+X }
+X */
+X db_entries = m_msg.db.entries;
+X }
+X else {
+X for (i=nobs=0; i<nstats; i++) {
+X if (sptr[i].escore <= 1.00 ) obs_escore[nobs++]=sptr[i].escore;
+X }
+X /*
+X nobs_t = nobs;
+X for (i=0; i<nstats; i++) {
+X if (sptr[i].escore >= 0.99 &&
+X sptr[i].escore <= 1.0) obs_escore[nobs++]=sptr[i].escore;
+X }
+X */
+X db_entries = nobs;
+/* db_entries = m_msg.db.entries;*/
+X }
+X
+X sortbesto(obs_escore,nobs);
+X if (nobs > 100) {
+X scale_tat(obs_escore,nobs,db_entries,do_trim,rs_s);
+X rs_s->have_tat=1;
+X sprintf(histp->stat_info,"scaled Tatusov statistics (%d): tat_a: %6.4f tat_b: %6.4f tat_c: %6.4f",
+X nobs,rs_s->tat_a, rs_s->tat_b, rs_s->tat_c);
+X }
+X else {
+X rs_s->have_tat=0;
+X sprintf(histp->stat_info,"Space_factor %.4g scaled statistics",
+X rs_s->spacefactor);
+X }
+X free(obs_escore);
+X }
+X else {
+X rs_s->have_tat=0;
+X histp->stat_info[0] = '\0';
+X }
+}
+X
+/* scale_scores() takes the best (real) scores and re-scales them;
+X beststr bptr[] must be sorted */
+X
+void
+scale_scores(struct beststr **bptr, int nbest, struct db_str db,
+X struct pstruct pst, struct rstat_str *rs)
+{
+X int i, j, k;
+X double obs, r_a, r_b, r_c;
+X
+X /* this scale function absolutely requires that the results be sorted
+X before it is used */
+X
+X sortbeste(bptr,nbest);
+X
+X if (!rs->have_tat) {
+X for (i=0; i<nbest; i++) {
+X bptr[i]->escore *= rs->spacefactor;
+X }
+X }
+X else {
+X
+X /* here if more than 1000 scores */
+X
+X r_a = rs->tat_a; r_b = rs->tat_b; r_c = rs->tat_c;
+X
+X /* the problem with scaletat is that the E() value is related to
+X ones position in the list of top scores - thus, knowing the score
+X is not enough - one must know the rank */
+X
+X for(i = 0 ; i < nbest ; ) {
+X /* take the bottom 0.5%, and the ties, and treat them all the same */
+X j = i + 1;
+X while (j< nbest &&
+X (j <= (0.005 * db.entries) || bptr[j]->escore == bptr[i]->escore)
+X ) {
+X j++;
+X }
+X
+X /* observed frequency */
+X obs = ((double)i + ((double)(j - i - 1)/ 2.0) + 1.0)/(double)db.entries;
+X
+X /* make certain ties all have the same correction */
+X for (k = i ; k < j ; k++) {
+X bptr[k]->escore *= obs/exp(r_a*log(obs) + r_b*obs + r_c);
+X }
+X i = k;
+X }
+X }
+X
+X for (i=0; i<nbest; i++) {
+X if(bptr[i]->escore > 0.01)
+X bptr[i]->escore = 1.0 - exp(-bptr[i]->escore);
+X if (bptr[i]->escore > 0.0)
+X bptr[i]->zscore = -log(bptr[i]->escore)/M_LN2;
+X else
+X bptr[i]->zscore = 744.440071/M_LN2;
+X bptr[i]->escore *= pst.zdb_size;
+X }
+}
+X
+double scale_one_score (int ipos, double escore,
+X struct db_str db,
+X struct rstat_str *rs) {
+X double obs;
+X double a, b, c;
+X
+X if (!rs->have_tat)
+X return escore * rs->spacefactor;
+X
+X if (ipos < rs->tie_j) ipos = rs->tie_j/2;
+X
+X a = rs->tat_a; b = rs->tat_b; c = rs->tat_c;
+X
+X obs = ((double)ipos + 1.0)/(double)db.entries;
+X
+X escore *= obs/exp(a*log(obs) + b*obs + c);
+X
+X return escore;
+}
+X
+double calc_spacefactor(const unsigned char *aa0, int n0,
+X int nm0, int nsq) {
+X
+#if !defined(FASTF)
+X return pow(2.0, (double) nm0) - 1.0;
+#else
+X
+X int i, j, n, l, nr, bin, k;
+X int nmoff;
+X int **counts;
+X int **factors;
+X double tmp, result = 0.0;
+X
+X nmoff = (n0 - nm0 + 1)/nm0+1;
+X
+X counts = (int **) calloc(nsq, sizeof(int *));
+X if(counts == NULL) {
+X fprintf(stderr, "couldn't calloc counts array!\n");
+X exit(1);
+X }
+X
+X counts[0] = (int *) calloc(nsq * (nmoff - 1), sizeof(int));
+X if(counts[0] == NULL) {
+X fprintf(stderr, "couldn't calloc counts array!\n");
+X exit(1);
+X }
+X
+X for(i = 0 ; i < nsq ; i++) {
+X counts[i] = counts[0] + (i * (nmoff - 1));
+X }
+X
+X for(i = 0 ; i < nm0 ; i++) {
+X for(j = 0 ; j < (nmoff - 1) ; j++) {
+X counts[ aa0[nmoff * i + j] ] [ j ] ++;
+X }
+X }
+X
+X factors = (int **) calloc(nm0 + 1, sizeof(int *));
+X if(factors == NULL) {
+X fprintf(stderr, "Couldn't calloc factors array!\n");
+X exit(1);
+X }
+X
+X factors[0] = (int *) calloc((nm0 + 1) * (nmoff - 1), sizeof(int));
+X if(factors[0] == NULL) {
+X fprintf(stderr, "Couldn't calloc factors array!\n");
+X exit(1);
+X }
+X
+X for(i = 0 ; i <= nm0 ; i++) {
+X factors[i] = factors[0] + (i * (nmoff - 1));
+X }
+X
+X /*
+X this algorithm was adapted from the GAP4 library's NrArrangement function:
+X The GAP Group, GAP --- Groups, Algorithms, and Programming,
+X Version 4.1; Aachen, St Andrews, 1999.
+X (http://www-gap.dcs.st-and.ac.uk/ gap)
+X */
+X
+X /* calculate K factors for each column in query: */
+X for(j = 0 ; j < (nmoff - 1) ; j++) {
+X
+X /* only one way to select 0 elements */
+X factors[0][j] = 1;
+X
+X /* for each of the possible elements in this column */
+X for(n = 0 ; n < nsq ; n++) {
+X
+X /* if there aren't any of these, skip it */
+X if(counts[n][j] == 0) { continue; }
+X
+X /* loop over the possible lengths of the arrangement: K..0 */
+X for(l = nm0 ; l >= 0 ; l--) {
+X nr = 0;
+X bin = 1;
+X
+X /*
+X compute the number of arrangements of length <l>
+X using only the first <n> elements of <mset>
+X */
+X for(i = 0, k = min(counts[n][j], l); i <= k ; i++) {
+X
+X /*
+X add the number of arrangements of length <l>
+X that consist of <l>-<i> of the first <n>-1 elements
+X and <i> copies of the <n>th element
+X */
+X nr += bin * factors[l-i][j];
+X bin = (int) ((float) bin * (float) (l - i) / (float) (i + 1));
+X }
+X
+X factors[l][j] = nr;
+X }
+X }
+X }
+X
+X result = 0.0;
+X for(i = 1 ; i <= nm0 ; i++) {
+X tmp = 1.0;
+X for(j = 0 ; j < (nmoff - 1) ; j++) {
+X tmp *= (double) factors[i][j];
+X }
+X tmp /= factorial(i, 1);
+X result += tmp;
+X }
+X
+X free(counts[0]);
+X free(counts);
+X free(factors[0]);
+X free(factors);
+X
+X return result;
+#endif
+}
+X
+void sortbesto (double *obs, int nobs)
+{
+X int gap, i, j, k;
+X double v;
+X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
+X 13776, 4592, 1968, 861, 336,
+X 112, 48, 21, 7, 3, 1 };
+X
+X for ( k = 0; k < 16; k++)
+X for (gap = incs[k], i=gap; i < nobs; i++) {
+X v = obs[i];
+X j = i;
+X while ( j >= gap && obs[j-gap] > v) {
+X obs[j] = obs[j - gap];
+X j -= gap;
+X }
+X obs[j] = v;
+X }
+}
+SHAR_EOF
+chmod 0644 scaleswt.c ||
+echo 'restore of scaleswt.c failed'
+Wc_c="`wc -c < 'scaleswt.c'`"
+test 37581 -eq "$Wc_c" ||
+ echo 'scaleswt.c: original size 37581, current size' "$Wc_c"
+fi
+# ============= search.html ==============
+if test -f 'search.html' -a X"$1" != X"-c"; then
+ echo 'x - skipping search.html (File already exists)'
+else
+echo 'x - extracting search.html (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'search.html' &&
+<html>
+<head>
+<title>FASTA Sequence Comparison Engine</title></head>
+<body bgcolor="white" >
+X
+<h1 align=center>Search with FASTA</h1>
+X
+<form action="http://fasta.bioch.virginia.edu/fasta/cgi/searchnn.cgi" method=post>
+X
+<b>Choose program and database(s) to query:</b><br>
+<b>Program:</b>
+<select name = "program">
+X <option> FASTA
+X <option> FASTX
+X <option> FASTY
+X <option> FASTF
+X <option> FASTS
+X <option> TFASTX
+X <option> TFASTY
+X <option> TFASTF
+X <option> TFASTS
+X <option> SSEARCH
+</select><br><br>
+X
+<b>Databases:</b> <font color="blue">Blue databases</font> and possibly DNA databases can be re-searched<br>
+<table align=center cellspacing=10>
+<tr>
+<td><b>Protein</b><br>
+X <input type=checkbox name = "libpa" value="a">NBRF Annotated Protein Database (rel. 53)<br>
+X <input type=checkbox name = "libpp" value="p">NBRF Protein Database (complete)<br>
+X <input type=checkbox name = "libpd" value="d">NRL_3d structure database<br>
+X <input type=checkbox name = "libpn" value="n"><font color="blue">NCBI/Blast NR protein database</font><br>
+X <input type=checkbox name = "libpk" value="k"><font color="blue">NCBI/Blast NR protein database (seg)</font><br>
+X <input type=checkbox name = "libps" value="q"><font color="blue">NCBI/Blast Swiss-Prot</font><br>
+X <input type=checkbox name = "libpr" value="r"><font color="blue">NCBI/BLAST Swiss-Prot (seg)</font><br>
+X <input type=checkbox name = "libpo" value="o">OWL Nonredundant database<br>
+X <input type=checkbox name = "libpy" value="y">Yeast Proteins<br>
+</td>
+X
+<td><b>DNA</b><br>
+X <input type=checkbox name = "libnp" value="p">Primate<br>
+X <input type=checkbox name = "libnr" value="r">Rodent<br>
+X <input type=checkbox name = "libnm" value="m">Other Mammals<br>
+X <input type=checkbox name = "libnb" value="b">Vertebrates<br>
+X <input type=checkbox name = "libnh" value="h">High Throughput Genomics<br>
+X <input type=checkbox name = "libni" value="i">Invertebrates<br>
+X <input type=checkbox name = "libnl" value="l">Plants<br>
+X <input type=checkbox name = "libnt" value="t">Bacteria<br>
+</td>
+X
+<td valign=top><br>
+X <input type=checkbox name = "libns" value="s">Structural RNA<br>
+X <input type=checkbox name = "libnv" value="v">Viral<br>
+X <input type=checkbox name = "libng" value="g">Phage<br>
+X <input type=checkbox name = "libnz" value="z">Synthetics<br>
+X <input type=checkbox name = "libne" value="e">EST sequences<br>
+X <input type=checkbox name = "libnf" value="f"><font color="blue">BLAST human ESTs</A><br>
+X <input type=checkbox name = "libnc" value="c"><font color="blue">BLAST mouse ESTs</A><br>
+</td>
+</tr>
+</table>
+<p>
+<b>Sequence type:</b><br>
+<input type=radio name="seqtype" value=1 checked>Protein
+<input type=radio name="seqtype" value=2>DNA (both strands)
+<input type=radio name="seqtype" value=3>DNA (forward only)
+<input type=radio name="seqtype" value=4>DNA (rev-comp only)
+X
+<p>
+<b>Enter query sequence: </b><select name="in_seq"><option>FASTA format<option>Accession/GI number</select> <b>Subset range:</b>
+<input type=text name="ssr" maxlength=20 size=10></input>
+<table>
+<tr>
+<td>
+<textarea name="sequence" rows=6 cols=60 wrap=hard align=left></textarea>
+<td valign=top>
+<a href="http://www.ncbi.nlm.nih.gov/Entrez/protein.html" target="entrez_window">Entrez protein sequence browser</A><br><br>
+<a href="http://www.ncbi.nlm.nih.gov/Entrez/nucleotide.html" target="entrez_window">Entrez DNA sequence browser</A>
+<br><br>
+<input type=submit name="input" value="Submit Query">
+</table>
+<br><br>
+X
+<b>Other options:</b><br>
+<table>
+<tr>
+<td>
+<b>Ktup:</b><br>
+<input type=text name="ktup" maxlength=3 size=3></input>
+<td>
+<b>Protein matrix:</b><br>
+<select name = "pmatrix">
+X <option> Default
+X <option> Blosum50
+X <option> Blosum62
+X <option> Blosum80
+X <option> Pam250
+X <option> Pam120
+X <option> MD20
+X <option> MD10
+</select>
+<td>
+X <b>DNA matrix:</b><br>
+<select name = "dmatrix">
+X <option> Default
+X <option> +4/-3
+X <option> blastn2
+X <option> +4/-4
+X <option> +4/-8
+</select>
+<td>
+X <b>gap:</b><br>
+<input type=text name="gap" maxlength=4 size=3></input>
+<td>
+X <b>ext:</b><br>
+<input type=text name="ext" maxlength=4 size=3></input>
+<td>
+<b>misc:</b><br>
+<input type=text name="out_opt" maxlength=10 size=5></input>
+</tr>
+</table>
+<br>
+X
+<b>Output limits:</b><br>
+<b>E():</b><input type=text name="eval" maxlength=6 size=4></input>
+<b>Highest E():</b><input type=text name="etop" maxlength=6 size=4></input>
+<b>scores:</b><input type=text name="best" maxlength=3 size=3></input>
+<b>alignments:</b><input type=text name="align" maxlength=3 size=3></input>
+</form>
+<br>
+X
+<hr>
+<CENTER>
+<a href="http://fasta.bioch.virginia.edu/">FASTA Home</a> | <a href="search.html">Search FASTA</a> |
+<a href="ftp://ftp.virginia.edu/pub/fasta/"> Get FASTA</a> |
+<a href="http://www.people.virginia.edu/~wrp/pearson.html">About the Author </a>
+<hr>
+X
+<br>
+X
+<font size=-1><i><br>
+Copyright 1988, 1991, 1992, 1993, 1994 1995, 1997, 1999 by
+William R. Pearson and the University of Virginia. All rights
+reserved. The FASTA program and documentation may not be sold or
+incorporated into a commercial product, in whole or in part, without
+written consent of William R. Pearson and the University of Virginia.
+X
+</center>
+X
+</body>
+X </frameset>
+</html>
+SHAR_EOF
+chmod 0644 search.html ||
+echo 'restore of search.html failed'
+Wc_c="`wc -c < 'search.html'`"
+test 5247 -eq "$Wc_c" ||
+ echo 'search.html: original size 5247, current size' "$Wc_c"
+fi
+# ============= showrss.c ==============
+if test -f 'showrss.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping showrss.c (File already exists)'
+else
+echo 'x - extracting showrss.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'showrss.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: showrss.c,v 1.12 2006/04/12 18:00:02 wrp Exp $ */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#include "structs.h"
+#include "param.h"
+X
+extern double
+zs_to_E(double zs, int n1, int isdna, long entries,struct db_str db);
+extern double zs_to_bit(double zs, int n0, int n1);
+extern double zs_to_p(double zs);
+X
+extern double (*find_zp)(int score, double escore, int length, double comp, void *);
+X
+extern char *prog_func;
+X
+void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1, int maxn,
+X struct beststr **bptr, int nbest, int qlib, struct mngmsg *m_msg,
+X struct pstruct pst, struct db_str db,
+X char *gstring2, void **f_str)
+{
+X double zs;
+X int score;
+X char *rlabel;
+X struct beststr *bbp;
+X
+X if ((rlabel=strrchr(m_msg->label,' '))==NULL) rlabel = m_msg->label;
+X
+X fprintf(fp,"\n %s - %d shuffles; ",prog_func,m_msg->shuff_max);
+X if (m_msg->shuff_wid > 0)
+X fprintf(fp," window shuffle, window size: %d\n",m_msg->shuff_wid);
+X else
+X fprintf(fp," uniform shuffle\n");
+X
+X bbp = bptr[0];
+X
+X fprintf(fp," unshuffled %s score: %d; bits(s=%d|n_l=%d): %4.1f p(%d) < %g\n",
+X rlabel,bbp->score[0],bbp->score[0], bbp->n1,
+X zs_to_bit(bbp->zscore,m_msg->n0,bbp->n1),bbp->score[0],zs_to_p(bbp->zscore));
+X
+X fprintf(fp,"For %ld sequences, a score >= %d is expected %4.4g times\n\n",
+X pst.zdb_size,bbp->score[0],zs_to_E(bbp->zscore,bbp->n1,0l,pst.zdb_size,db));
+}
+X
+void showalign (FILE *fp, unsigned char *aa0, unsigned char *aa1, int maxn,
+X struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,
+X struct pstruct pst, void *f_str, char *gstring2)
+{
+}
+X
+void
+aancpy(char *to, char *from, int count,
+X struct pstruct pst)
+{
+X char *tp;
+X
+X tp=to;
+X while (count-- && *from) {
+X if (*from <= pst.nsq) *tp++ = pst.sq[*(from++)];
+X else *tp++ = *from++;
+X }
+X *tp='\0';
+}
+SHAR_EOF
+chmod 0644 showrss.c ||
+echo 'restore of showrss.c failed'
+Wc_c="`wc -c < 'showrss.c'`"
+test 2033 -eq "$Wc_c" ||
+ echo 'showrss.c: original size 2033, current size' "$Wc_c"
+fi
+# ============= showsum.c ==============
+if test -f 'showsum.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping showsum.c (File already exists)'
+else
+echo 'x - extracting showsum.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'showsum.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: showsum.c,v 1.21 2006/06/22 15:00:51 wrp Exp $ */
+X
+/* 10 December 1999 --
+X
+X code modified to reflect the fact that there may be two scores for
+X each sequence - e.g. forward and reverse strand - and only one of them
+X - presumably the best - is a related score.
+*/
+X
+/* showsum.c should report statistics for success -
+X
+X given the sorted results
+X
+X (1) find the highest scoring unrelated sequence: unf_score0
+X find the number of related sequences missed: relm_num0
+X (2) find the 0.5% highest scoring unrelated sequence: unf_score05
+X find the number of related sequences missed: relm_num05
+X (3) find the score where the number of related sequences
+X missed and the number of unrelated sequences found
+X matches; report the score and the number: equ_score, equ_num;
+X
+The query sequence library number will be put in qsfnum.
+X
+*/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+X
+#include "defs.h"
+#include "param.h"
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+#include "structs.h"
+X
+#ifndef SFCHAR
+#define SFCHAR ':'
+#define NSFCHAR '!'
+#endif
+X
+#ifdef PCOMPLIB
+#define BSFNUM(i) bptr[i]->desptr->sfnum
+#define QSFNUM qsfnum
+#define NQSFNUM qsfnum_n
+#else
+#define BSFNUM(i) bptr[i]->sfnum
+#define QSFNUM m_msg->qsfnum
+#define NQSFNUM m_msg->qsfnum_n
+#endif
+X
+#define MAX_BLINE 200
+X
+double E_to_zs(double, long);
+double zs_to_E(double,int,int,long,struct db_str db);
+double zs_to_bit(double,int,int);
+#ifdef PVM_SRC
+void sf_sort(int *s, int n);
+#endif
+void lnum_sort(struct beststr **s, int n);
+X
+void showbest (FILE *fp,
+#ifndef PCOMPLIB
+X unsigned char **aa0, unsigned char *aa1, int maxn,
+#endif
+X struct beststr **bptr,int nbest,
+X int qlib, struct mngmsg *m_msg, struct pstruct pst,
+X struct db_str db,
+X char *gstring2
+#ifndef PCOMPLIB
+X ,void *f_str
+#endif
+X )
+{
+X int i, j, k, rel_tot;
+X int irelv;
+X
+X int unf_num0, relm_num0;
+X int unf_num01,relm_num01;
+X int unf_num02, relm_num02;
+X int unf_num05, relm_num05;
+X int unf_num100, relm_num100;
+X int equ_num, rel_3_num, rel_1_num;
+X
+X double unf_score0, unf_score01, unf_score02 ,unf_score05;
+X double unf_score100, equ_score, rel_3_score, rel_1_score;
+X double unf_score0_b, unf_score01_b, unf_score02_b ,unf_score05_b;
+X double unf_score100_b, equ_score_b, rel_3_score_b, rel_1_score_b;
+X char *bp;
+X
+#ifdef PCOMPLIB
+X int qsfnum[10],qsfnum_n[10],isf,nsf,nsf_n;
+X char *bp1, *bpn, *tp;
+X char sfstr[MAX_FN];
+#endif
+X
+#ifdef PCOMPLIB
+X /* not done here because done in pvcomplib.c */
+X if ((bp=strchr(m_msg->qtitle,SFCHAR))!=NULL) {
+X strncpy(sfstr,bp+1,sizeof(sfstr));
+X sfstr[sizeof(sfstr)-1]='\0';
+X if ((bp1=strchr(sfstr,SFCHAR)) != NULL) { /* look for second | */
+X if ((bpn=strchr(sfstr,NSFCHAR))!=NULL) *bpn = '\0';
+X *bp1='\0';
+X tp = strtok(sfstr," \t");
+X qsfnum[0]=atoi(tp);
+X isf = 1;
+X while ((tp=strtok(NULL," \t"))!=NULL) {
+X qsfnum[isf++] = atoi(tp);
+X if (isf >= 10) {
+X fprintf(stderr," error - too many superfamilies: %d\n %s\n",
+X isf,m_msg->qtitle);
+X break;
+X }
+X }
+X qsfnum[nsf=isf]=0;
+X sf_sort(qsfnum,nsf);
+X
+X /* now get negatives */
+X qsfnum_n[0]= nsf_n = 0;
+X if (bpn != NULL) {
+X tp = strtok(bpn+1," \t");
+X qsfnum_n[0]=atoi(tp);
+X isf = 1;
+X while ((tp=strtok(NULL," \t"))!=NULL) {
+X qsfnum_n[isf++] = atoi(tp);
+X if (isf >= 10) {
+X fprintf(stderr,
+X " error - too many negative superfamilies: %d\n %s\n",
+X isf,m_msg->qtitle);
+X break;
+X }
+X }
+X qsfnum[nsf_n=isf]=0;
+X sf_sort(qsfnum_n,nsf_n);
+X }
+X }
+X else { /* only one sfnum */
+X sscanf(bp+1,"%d",qsfnum);
+X qsfnum[1]=0;
+X qsfnum_n[0]= nsf_n = 0;
+X }
+X }
+X else {
+X fprintf(stderr," no query superfamily number\n %s\n",m_msg->qtitle);
+X return;
+X }
+#endif
+X
+X if (m_msg->qframe > 1 || m_msg->nframe > 1) {
+X
+X /* this code is included for cases where there are several scores -
+X forward and reverse, or six in the case of tfastf33s, for each
+X sequence
+X
+X lnum_sort sorts the library by lseek position, which will be
+X the same for the same sequence
+X */
+X
+X lnum_sort(bptr,nbest);
+X
+X /* merge, saving the best score */
+X i = j = 0;
+X
+X /* i has the source position we are currently examining
+X k has the adjacent alternative scores ( k > i)
+X j has the destination
+X */
+X
+X while (i<nbest) {
+X for (k=i+1; k < nbest && bptr[i]->lseek == bptr[k]->lseek; k++) {
+X if (bptr[i]->zscore < bptr[k]->zscore) bptr[i] = bptr[k];
+X }
+X bptr[j++]=bptr[i];
+X i = k;
+X }
+X
+X if (j != m_msg->nbr_seq) {
+X fprintf(stderr,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
+X j,nbest,m_msg->nbr_seq);
+X fprintf(stdout,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
+X j,nbest,m_msg->nbr_seq);
+X }
+X nbest = j;
+X
+X if (pst.zsflag >=0) sortbeste(bptr, nbest);
+X else sortbest(bptr,nbest,pst.score_ix);
+X }
+X
+/* fprintf(stderr," %1d label is %s (%s)\n",irelv,labptr,label); */
+X
+/* get the query superfamily */
+X
+X for (i=0; i<nbest; i++) {
+X /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)==0 && sfn_cmp(BSFNUM(i),NQSFNUM)==0) {
+X unf_num0=i;
+X unf_score0=bptr[i]->zscore;
+X unf_score0_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X break;
+X }
+X }
+X
+X if (i>=nbest) {
+X fprintf(stderr," %s: %d\n error - no unrelated sequences\n",
+X m_msg->qtitle,QSFNUM[0]);
+X return;
+X }
+X
+X for (i=rel_tot=relm_num0=0; i<nbest; i++) {
+X /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)>0 ) {
+X rel_tot++; /* total related */
+X if (bptr[i]->zscore <= unf_score0) relm_num0++;
+#ifdef DEBUG
+X if (pst.debug_lib)
+X fprintf(stderr,"%d\t%l\t%.1f\n",i,bptr[i]->lseek,bptr[i]->zscore);
+#endif
+X }
+X }
+X
+X /* relm_num0, unf_num0, unf_score0 done */
+X
+X /* now calculate number missed at various expectation value cutoffs */
+X /* calculate z-score cutoff for E()=0.01, 0.02, 0.05 */
+X
+X unf_score01 = E_to_zs(0.01,db.entries);
+X unf_score02 = E_to_zs(0.02,db.entries);
+X unf_score05 = E_to_zs(0.05,db.entries);
+X unf_score100 = E_to_zs(1.00,db.entries);
+X
+X /* relm_num01, unf_num01, unf_score01 done */
+X
+X for (i=unf_num01=0,relm_num01=rel_tot;
+X i<nbest && bptr[i]->zscore >= unf_score01; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
+X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num01++;
+X }
+X else relm_num01--;
+X }
+X unf_score01_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X for (i=unf_num02=0,relm_num02=rel_tot;
+X i<nbest && bptr[i]->zscore >= unf_score02; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
+X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num02++;
+X }
+X else relm_num02--;
+X }
+X unf_score02_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X for (i=unf_num05=0,relm_num05=rel_tot;
+X i<nbest && bptr[i]->zscore >= unf_score05; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
+X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num05++;
+X }
+X else relm_num05--;
+X }
+X unf_score05_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X for (i=unf_num100=0,relm_num100=rel_tot;
+X i<nbest && bptr[i]->zscore >= unf_score100; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
+X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num100++;
+X }
+X else relm_num100--;
+X }
+X unf_score100_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X /* the final criterion finds the score and the number of sequences
+X where the number of unrelated sequences found == the number of
+X related sequences missed. */
+X
+X equ_num=0;
+X i = 0; j=nbest-1;
+X
+/* j is counting up the list of scores (actually down the array) from
+X the lowest scoring related sequence
+X
+X i is counting down the list of scores (actually up the array)
+X from the highest scoring unrelated sequence */
+X
+X for (i=0, j=nbest-1; j>=0 && i<nbest; i++,j--) {
+X /* i++ while sequences are related, stop at next unrelated */
+X while (i<nbest && (sfn_cmp(BSFNUM(i),QSFNUM) || sfn_cmp(BSFNUM(i),NQSFNUM))) i++;
+X /* j-- while sequences are unrelated, stop at next related */
+X while (j>=0 && ( sfn_cmp(BSFNUM(j),QSFNUM)==0)) j--;
+X /*
+X fprintf(stderr,"i: %3d %3d %4d; j: %3d %3d %4d\n",i,bptr[i]->zscore,
+X BSFNUM(i),j,bptr[j]->zscore,BSFNUM(j));
+X */
+X /* if unrelated [i] score <= related [j] score, quit */
+X if (bptr[i]->zscore <= bptr[j]->zscore) break;
+X equ_num++;
+X }
+X
+X equ_score = 0.0;
+X if (i>=nbest || j<0) {
+#ifndef PCOMPLIB
+X if (pst.debug_lib)
+#endif
+X fprintf(stderr," i (%3d), j (%3d) off end\n %s\n", i, j,m_msg->qtitle);
+X equ_num = rel_tot+1; equ_score = 0.0;
+X }
+X else {
+X equ_score=bptr[i]->zscore;
+X equ_score_b =zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X }
+X
+X /* get the lowest scoring related */
+X for (i=0,rel_1_num=rel_tot-1; i<nbest && rel_1_num > 0; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_1_num--;
+X }
+X rel_1_num = i;
+X rel_1_score = bptr[i]->zscore;
+X rel_1_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X /* get the 3rd lowest scoring related */
+X for (i=0,rel_3_num=rel_tot-3; i<nbest && rel_3_num > 0; i++) {
+/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
+X if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_3_num--;
+X }
+X rel_3_num = i;
+X rel_3_score = bptr[i]->zscore;
+X rel_3_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
+X
+X fprintf(fp,"%3d>%s - %d (%d/%d)\n",
+X qlib,m_msg->qtitle, QSFNUM[0],rel_tot,nbest);
+X fprintf(fp," 0.0 criterion- relm: %3d pos: %3d score: %5.1f exp: %6.4g\n",
+X relm_num0, unf_num0+1, unf_score0_b,
+X zs_to_E(unf_score0,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X fprintf(fp," 0.01 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
+X relm_num01, unf_num01, unf_score01_b,
+X zs_to_E(unf_score01,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X fprintf(fp," 0.02 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
+X relm_num02, unf_num02, unf_score02_b,
+X zs_to_E(unf_score02,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X fprintf(fp," 0.05 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
+X relm_num05, unf_num05, unf_score05_b,
+X zs_to_E(unf_score05,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X fprintf(fp," 1.00 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
+X relm_num100, unf_num100, unf_score100_b,
+X zs_to_E(unf_score100,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X
+X fprintf(fp," equ num: %3d score: %5.1f exp: %6.4g\n",equ_num,equ_score_b,
+X zs_to_E(equ_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X
+X fprintf(fp," rel[-1]: %3d score: %5.1f exp: %6.4g\n",rel_1_num+1,rel_1_score_b,
+X zs_to_E(rel_1_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X fprintf(fp," rel[-3]: %3d score: %5.1f exp: %6.4g\n",rel_3_num+1,rel_3_score_b,
+X zs_to_E(rel_3_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
+X
+X /*
+X fprintf(fp,"/ ** %s ** /\n",gstring2);
+X fflush(fp);
+X */
+X m_msg->nshow = m_msg->ashow;
+}
+X
+#ifdef PCOMPLIB
+void showalign()
+{}
+X
+#if !defined(MPI_SRC) && !defined(PCOMPLIB)
+void
+sf_sort(int *s, int n)
+{
+X int gap, i, j;
+X int itmp;
+X
+X for (i=0; i<n-1; i++)
+X if (s[i]>s[i+1]) goto l2;
+X return;
+X
+l2:
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if (s[j] <= s[j+gap]) break;
+X itmp = s[j];
+X s[j]=s[j+gap];
+X s[j+gap]=itmp;
+X }
+}
+X
+#endif
+#endif
+X
+void
+lnum_sort(struct beststr **s, int n)
+{
+X int gap, i, j;
+X struct beststr *btmp;
+X
+X for (i=0; i<n-1; i++)
+X if (s[i]->lseek > s[i+1]->lseek) goto l2;
+X return;
+X
+l2:
+X for (gap=n/2; gap>0; gap/=2)
+X for (i=gap; i<n; i++)
+X for (j=i-gap; j>=0; j -= gap) {
+X if (s[j]->lseek <= s[j+gap]->lseek) break;
+X btmp = s[j];
+X s[j]=s[j+gap];
+X s[j+gap]=btmp;
+X }
+}
+X
+#ifdef MPI_SRC
+void
+aancpy(char *to, char *from, int count, struct pstruct pst)
+{
+X char *tp, *sq;
+X int nsq;
+X
+X if (pst.ext_sq_set) {
+X nsq = pst.nsqx;
+X sq = pst.sqx;
+X }
+X else {
+X nsq = pst.nsq;
+X sq = pst.sq;
+X }
+X
+X tp=to;
+X while (count-- && *from) {
+X if (*from <= nsq) *tp++ = sq[*(from++)];
+X else *tp++ = *from++;
+X }
+X *tp='\0';
+}
+#endif
+SHAR_EOF
+chmod 0644 showsum.c ||
+echo 'restore of showsum.c failed'
+Wc_c="`wc -c < 'showsum.c'`"
+test 12412 -eq "$Wc_c" ||
+ echo 'showsum.c: original size 12412, current size' "$Wc_c"
+fi
+# ============= smith_waterman_altivec.c ==============
+if test -f 'smith_waterman_altivec.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping smith_waterman_altivec.c (File already exists)'
+else
+echo 'x - extracting smith_waterman_altivec.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_altivec.c' &&
+X
+/* Implementation of the Wozniak "anti-diagonal" vectorization
+X strategy for Smith-Waterman comparison, Wozniak (1997) Comp.
+X Appl. Biosci. 13:145-150
+X
+X November, 2004
+*/
+X
+/*
+X Written by Erik Lindahl, Stockholm Bioinformatics Center, 2004.
+X Please send bug reports and/or suggestions to lindahl@sbc.su.se.
+*/
+X
+#include <stdio.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "dropgsw.h"
+X
+#ifdef SW_ALTIVEC
+X
+int
+smith_waterman_altivec_word(unsigned char * query_sequence,
+X unsigned short * query_profile_word,
+X int query_length,
+X unsigned char * db_sequence,
+X int db_length,
+X unsigned short bias,
+X unsigned short gap_open,
+X unsigned short gap_extend,
+X struct f_struct * f_str)
+{
+X int i,j,k;
+X unsigned short * p;
+X unsigned short score;
+X unsigned char * p_dbseq;
+X int alphabet_size = f_str->alphabet_size;
+X unsigned short * workspace = (unsigned short *)f_str->workspace;
+X
+X vector unsigned short Fup,Hup1,Hup2,E,F,H,tmp;
+X vector unsigned char perm;
+X vector unsigned short v_maxscore;
+X vector unsigned short v_bias,v_gapopen,v_gapextend;
+X vector unsigned short v_score;
+X vector unsigned short v_score_q1;
+X vector unsigned short v_score_q2;
+X vector unsigned short v_score_q3;
+X vector unsigned short v_score_load;
+X vector unsigned char queue1_to_score = (vector unsigned char)(16,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+X vector unsigned char queue2_to_queue1 = (vector unsigned char)(0,1,18,19,4,5,6,7,8,9,10,11,12,13,14,15);
+X vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,16,16,16,16,21,16,0,16,1,16,2,16,3,16,4);
+X vector unsigned char queue3_with_load = (vector unsigned char)(23,5,6,7,8,25,9,10,11,27,12,13,29,14,31,16);
+X
+X /* Load the bias to all elements of a constant */
+X v_bias = vec_lde(0,&bias);
+X perm = vec_lvsl(0,&bias);
+X v_bias = vec_perm(v_bias,v_bias,perm);
+X v_bias = vec_splat(v_bias,0);
+X
+X /* Load gap opening penalty to all elements of a constant */
+X v_gapopen = vec_lde(0,&gap_open);
+X perm = vec_lvsl(0,&gap_open);
+X v_gapopen = vec_perm(v_gapopen,v_gapopen,perm);
+X v_gapopen = vec_splat(v_gapopen,0);
+X
+X /* Load gap extension penalty to all elements of a constant */
+X v_gapextend = vec_lde(0,&gap_extend);
+X perm = vec_lvsl(0,&gap_extend);
+X v_gapextend = vec_perm(v_gapextend,v_gapextend,perm);
+X v_gapextend = vec_splat(v_gapextend,0);
+X
+X v_maxscore = vec_xor(v_maxscore,v_maxscore);
+X
+X // Zero out the storage vector
+X k = 2*(db_length+7);
+X
+X for(i=0,j=0;i<k;i++,j+=16)
+X {
+X // borrow the zero value in v_maxscore to have something to store
+X vec_st(v_maxscore,j,workspace);
+X }
+X
+X for(i=0;i<query_length;i+=8)
+X {
+X // fetch first data asap.
+X p_dbseq = db_sequence;
+X k = *p_dbseq++;
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // zero lots of stuff.
+X // We use both the VPERM and VSIU unit to knock off some cycles.
+X
+X E = vec_splat_u16(0);
+X F = vec_xor(F,F);
+X H = vec_splat_u16(0);
+X Hup2 = vec_xor(Hup2,Hup2);
+X v_score_q1 = vec_splat_u16(0);
+X v_score_q2 = vec_xor(v_score_q2,v_score_q2);
+X v_score_q3 = vec_splat_u16(0);
+X
+X // reset pointers to the start of the saved data from the last row
+X p = workspace;
+X
+X // PROLOGUE 1
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 2
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 3
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 4
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 5
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 6
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 7
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 8
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // reset pointers to the start of the saved data from the last row
+X p = workspace;
+X
+X for(j=8;j<db_length;j+=8)
+X {
+X // STEP 1
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup1 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 2
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup2 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup2 = vec_sld(Hup2,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 3
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup1 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 4
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup2 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup2 = vec_sld(Hup2,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 5
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup1 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 6
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup2 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup2 = vec_sld(Hup2,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 7
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup1 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup1 = vec_sld(Hup1,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // STEP 8
+X
+X // prefetch next residue
+X k = *p_dbseq++;
+X
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // prefetch score for next step
+X v_score_load = vec_ld(16*k,query_profile_word);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(256, p);
+X Hup2 = vec_ld(272, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,14);
+X Hup2 = vec_sld(Hup2,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X }
+X
+X v_score_load = vec_splat_u16(0);
+X
+X for(;j<db_length+7;j++)
+X {
+X // Create the actual diagonal score vector
+X // and update the queue of incomplete score vectors
+X //
+X // This could of course be done with only vec_perm or vec_sel,
+X // but since they use different execution units we have found
+X // it to be slightly faster to mix them.
+X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 16; // move ahead 32 bytes
+X
+X // v_score_load contains all zeros
+X Fup = vec_sld(v_score_load,F,14);
+X Hup1 = vec_sld(v_score_load,H,14);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X }
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X
+X query_profile_word += 8*alphabet_size;
+X }
+X
+X // find largest score in the v_maxscore vector
+X tmp = vec_sld(v_maxscore,v_maxscore,8);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X tmp = vec_sld(v_maxscore,v_maxscore,4);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X tmp = vec_sld(v_maxscore,v_maxscore,2);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X
+X // store in temporary variable
+X vec_ste(v_maxscore,0,&score);
+X
+X // return largest score
+X return score;
+}
+X
+int
+smith_waterman_altivec_byte(unsigned char * query_sequence,
+X unsigned char * query_profile_byte,
+X int query_length,
+X unsigned char * db_sequence,
+X int db_length,
+X unsigned char bias,
+X unsigned char gap_open,
+X unsigned char gap_extend,
+X struct f_struct * f_str)
+{
+X int i,j,k,k8;
+X int overflow;
+X unsigned char * p;
+X unsigned char score;
+X int alphabet_size = f_str->alphabet_size;
+X unsigned char * workspace = (unsigned char *)f_str->workspace;
+X
+X vector unsigned char Fup,Hup1,Hup2,E,F,H,tmp;
+X vector unsigned char perm;
+X vector unsigned char v_maxscore;
+X vector unsigned char v_bias,v_gapopen,v_gapextend;
+X vector unsigned char v_score;
+X vector unsigned char v_score_q1;
+X vector unsigned char v_score_q2;
+X vector unsigned char v_score_q3;
+X vector unsigned char v_score_q4;
+X vector unsigned char v_score_q5;
+X vector unsigned char v_score_load1;
+X vector unsigned char v_score_load2;
+X vector unsigned char v_zero;
+X
+X vector unsigned char queue1_to_score = (vector unsigned char)(16,1,2,3,4,5,6,7,24,9,10,11,12,13,14,15);
+X vector unsigned char queue2_to_queue1 = (vector unsigned char)(16,17,2,3,4,5,6,7,24,25,10,11,12,13,14,15);
+X vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,17,18,3,4,5,6,7,24,25,26,11,12,13,14,15);
+X vector unsigned char queue4_to_queue3 = (vector unsigned char)(16,17,18,19,4,5,6,7,24,25,26,27,12,13,14,15);
+X vector unsigned char queue5_to_queue4 = (vector unsigned char)(16,17,18,19,20,2,3,4,24,25,26,27,28,10,11,12);
+X vector unsigned char queue5_with_load = (vector unsigned char)(19,20,21,5,6,22,7,23,27,28,29,13,14,30,15,31);
+X vector unsigned char merge_score_load = (vector unsigned char)(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
+X
+X v_zero = vec_splat_u8(0);
+X
+X /* Load the bias to all elements of a constant */
+X v_bias = vec_lde(0,&bias);
+X perm = vec_lvsl(0,&bias);
+X v_bias = vec_perm(v_bias,v_bias,perm);
+X v_bias = vec_splat(v_bias,0);
+X
+X /* Load gap opening penalty to all elements of a constant */
+X v_gapopen = vec_lde(0,&gap_open);
+X perm = vec_lvsl(0,&gap_open);
+X v_gapopen = vec_perm(v_gapopen,v_gapopen,perm);
+X v_gapopen = vec_splat(v_gapopen,0);
+X
+X /* Load gap extension penalty to all elements of a constant */
+X v_gapextend = vec_lde(0,&gap_extend);
+X perm = vec_lvsl(0,&gap_extend);
+X v_gapextend = vec_perm(v_gapextend,v_gapextend,perm);
+X v_gapextend = vec_splat(v_gapextend,0);
+X
+X v_maxscore = vec_xor(v_maxscore,v_maxscore);
+X
+X // Zero out the storage vector
+X k = (db_length+15);
+X for(i=0,j=0;i<k;i++,j+=32)
+X {
+X // borrow the zero value in v_maxscore to have something to store
+X vec_st(v_maxscore,j,workspace);
+X vec_st(v_maxscore,j+16,workspace);
+X }
+X
+X for(i=0;i<query_length;i+=16)
+X {
+X // zero lots of stuff.
+X // We use both the VPERM and VSIU unit to knock off some cycles.
+X
+X E = vec_splat_u8(0);
+X F = vec_xor(F,F);
+X H = vec_splat_u8(0);
+X Hup2 = vec_xor(Hup2,Hup2);
+X v_score_q1 = vec_splat_u8(0);
+X v_score_q2 = vec_xor(v_score_q2,v_score_q2);
+X v_score_q3 = vec_splat_u8(0);
+X v_score_q4 = vec_xor(v_score_q4,v_score_q4);
+X v_score_q5 = vec_splat_u8(0);
+X
+X // reset pointers to the start of the saved data from the last row
+X p = workspace;
+X
+X // start directly and prefetch score column
+X k = db_sequence[0];
+X k8 = k;
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = v_score_load1;
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // PROLOGUE 1
+X // prefetch next residue
+X k = db_sequence[1];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X // PROLOGUE 2
+X // prefetch next residue
+X k = db_sequence[2];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 3
+X // prefetch next residue
+X k = db_sequence[3];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 4
+X // prefetch next residue
+X k = db_sequence[4];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 5
+X // prefetch next residue
+X k = db_sequence[5];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X // PROLOGUE 6
+X // prefetch next residue
+X k = db_sequence[6];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 7
+X // prefetch next residue
+X k = db_sequence[7];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 8
+X // prefetch next residue
+X k = db_sequence[8];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X // PROLOGUE 9
+X // prefetch next residue
+X k = db_sequence[9];
+X k8 = db_sequence[1];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 10
+X // prefetch next residue
+X k = db_sequence[10];
+X k8 = db_sequence[2];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X // PROLOGUE 11
+X // prefetch next residue
+X k = db_sequence[11];
+X k8 = db_sequence[3];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 12
+X // prefetch next residue
+X k = db_sequence[12];
+X k8 = db_sequence[4];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X // PROLOGUE 13
+X // prefetch next residue
+X k = db_sequence[13];
+X k8 = db_sequence[5];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 14
+X // prefetch next residue
+X k = db_sequence[14];
+X k8 = db_sequence[6];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 15
+X // prefetch next residue
+X k = db_sequence[15];
+X k8 = db_sequence[7];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup1 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X // PROLOGUE 16
+X // prefetch next residue
+X k = db_sequence[16];
+X k8 = db_sequence[8];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch score for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(0, p);
+X Hup2 = vec_ld(16, p);
+X p += 32; // move ahead 32 bytes
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X p = workspace;
+X
+X for(j=16;j<db_length;j+=16)
+X {
+X // STEP 1
+X
+X // prefetch next residue
+X k = db_sequence[j+1];
+X k8 = db_sequence[j-7];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X // STEP 2
+X
+X // prefetch next residue
+X k = db_sequence[j+2];
+X k8 = db_sequence[j-6];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 3
+X
+X // prefetch next residue
+X k = db_sequence[j+3];
+X k8 = db_sequence[j-5];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 4
+X
+X // prefetch next residue
+X k = db_sequence[j+4];
+X k8 = db_sequence[j-4];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 5
+X
+X // prefetch next residue
+X k = db_sequence[j+5];
+X k8 = db_sequence[j-3];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 6
+X
+X // prefetch next residue
+X k = db_sequence[j+6];
+X k8 = db_sequence[j-2];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 7
+X
+X // prefetch next residue
+X k = db_sequence[j+7];
+X k8 = db_sequence[j-1];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 8
+X
+X // prefetch next residue
+X k = db_sequence[j+8];
+X k8 = db_sequence[j];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X
+X
+X
+X
+X
+X // STEP 9
+X
+X // prefetch next residue
+X k = db_sequence[j+9];
+X k8 = db_sequence[j+1];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 10
+X
+X // prefetch next residue
+X k = db_sequence[j+10];
+X k8 = db_sequence[j+2];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 11
+X
+X // prefetch next residue
+X k = db_sequence[j+11];
+X k8 = db_sequence[j+3];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 12
+X
+X // prefetch next residue
+X k = db_sequence[j+12];
+X k8 = db_sequence[j+4];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 13
+X
+X // prefetch next residue
+X k = db_sequence[j+13];
+X k8 = db_sequence[j+5];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 14
+X
+X // prefetch next residue
+X k = db_sequence[j+14];
+X k8 = db_sequence[j+6];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 15
+X
+X // prefetch next residue
+X k = db_sequence[j+15];
+X k8 = db_sequence[j+7];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup1 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup1 = vec_sld(Hup1,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X // STEP 16
+X
+X // prefetch next residue
+X k = db_sequence[j+16];
+X k8 = db_sequence[j+8];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load1 = vec_ld(16*k,query_profile_byte);
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X
+X // load values of F and H from previous row (one unit up)
+X Fup = vec_ld(512, p);
+X Hup2 = vec_ld(528, p);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32;
+X
+X // shift into place so we have complete F and H vectors
+X // that refer to the values one unit up from each cell
+X // that we are currently working on.
+X Fup = vec_sld(Fup,F,15);
+X Hup2 = vec_sld(Hup2,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup2,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
+X
+X // add score to H
+X H = vec_adds(Hup1,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X
+X }
+X
+X for(;j<db_length+15;j++)
+X {
+X k8 = db_sequence[j-7];
+X
+X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
+X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
+X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
+X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
+X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
+X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
+X
+X
+X // prefetch scores for next step
+X v_score_load2 = vec_ld(16*k8,query_profile_byte);
+X v_score_load1 = vec_perm(v_zero,v_score_load2,merge_score_load);
+X
+X // save old values of F and H to use on next row
+X vec_st(F, 0, p);
+X vec_st(H, 16, p);
+X p += 32; // move ahead 32 bytes
+X
+X Fup = vec_sld(v_zero,F,15);
+X Hup1 = vec_sld(v_zero,H,15);
+X
+X // do the dynamic programming
+X
+X // update E value
+X E = vec_subs(E,v_gapextend);
+X tmp = vec_subs(H,v_gapopen);
+X E = vec_max(E,tmp);
+X
+X // update F value
+X F = vec_subs(Fup,v_gapextend);
+X tmp = vec_subs(Hup1,v_gapopen);
+X F = vec_max(F,tmp);
+X
+X // add score to H
+X H = vec_adds(Hup2,v_score);
+X H = vec_subs(H,v_bias);
+X
+X // set H to max of H,E,F
+X H = vec_max(H,E);
+X H = vec_max(H,F);
+X
+X // Save value to use for next diagonal H
+X Hup2 = Hup1;
+X
+X // Update highest score encountered this far
+X v_maxscore = vec_max(v_maxscore,H);
+X }
+X vec_st(F, 512, p);
+X vec_st(H, 528, p);
+X
+X query_profile_byte += 16*alphabet_size;
+X
+X // End of this row (actually 16 rows due to SIMD).
+X // Before we continue, check for overflow.
+X tmp = vec_subs(vec_splat_u8(-1),v_bias);
+X overflow = vec_any_ge(v_maxscore,tmp);
+X
+X
+X }
+X
+X if(overflow)
+X {
+X return 255;
+X }
+X else
+X {
+X // find largest score in the v_maxscore vector
+X tmp = vec_sld(v_maxscore,v_maxscore,8);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X tmp = vec_sld(v_maxscore,v_maxscore,4);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X tmp = vec_sld(v_maxscore,v_maxscore,2);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X tmp = vec_sld(v_maxscore,v_maxscore,1);
+X v_maxscore = vec_max(v_maxscore,tmp);
+X
+X // store in temporary variable
+X vec_ste(v_maxscore,0,&score);
+X
+X // return largest score
+X return score;
+X }}
+X
+X
+#else
+X
+/* No Altivec support. Avoid compiler complaints about empty object */
+X
+int sw_dummy;
+X
+#endif
+SHAR_EOF
+chmod 0644 smith_waterman_altivec.c ||
+echo 'restore of smith_waterman_altivec.c failed'
+Wc_c="`wc -c < 'smith_waterman_altivec.c'`"
+test 113815 -eq "$Wc_c" ||
+ echo 'smith_waterman_altivec.c: original size 113815, current size' "$Wc_c"
+fi
+# ============= smith_waterman_altivec.h ==============
+if test -f 'smith_waterman_altivec.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping smith_waterman_altivec.h (File already exists)'
+else
+echo 'x - extracting smith_waterman_altivec.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_altivec.h' &&
+X
+int
+smith_waterman_altivec_word(const unsigned char * query_sequence,
+X unsigned short * query_profile_word,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned short bias,
+X unsigned short gap_open,
+X unsigned short gap_extend,
+X struct f_struct * f_str);
+X
+X
+int
+smith_waterman_altivec_byte(const unsigned char * query_sequence,
+X unsigned char * query_profile_byte,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned char bias,
+X unsigned char gap_open,
+X unsigned char gap_extend,
+X struct f_struct * f_str);
+X
+SHAR_EOF
+chmod 0644 smith_waterman_altivec.h ||
+echo 'restore of smith_waterman_altivec.h failed'
+Wc_c="`wc -c < 'smith_waterman_altivec.h'`"
+test 1144 -eq "$Wc_c" ||
+ echo 'smith_waterman_altivec.h: original size 1144, current size' "$Wc_c"
+fi
+# ============= smith_waterman_sse2.c ==============
+if test -f 'smith_waterman_sse2.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping smith_waterman_sse2.c (File already exists)'
+else
+echo 'x - extracting smith_waterman_sse2.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_sse2.c' &&
+/******************************************************************
+X Copyright 2006 by Michael Farrar. All rights reserved.
+X This program may not be sold or incorporated into a commercial product,
+X in whole or in part, without written consent of Michael Farrar. For
+X further information regarding permission for use or reproduction, please
+X contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+X
+/*
+X Written by Michael Farrar, 2006.
+X Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+X
+#include <stdio.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "dropgsw.h"
+#include "smith_waterman_sse2.h"
+X
+#ifdef __SUNPRO_C
+#include <sunmedia_intrin.h>
+#else
+#include <emmintrin.h>
+#endif
+X
+#ifdef SW_SSE2
+X
+int
+smith_waterman_sse2_word(const unsigned char * query_sequence,
+X unsigned short * query_profile_word,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned short gap_open,
+X unsigned short gap_extend,
+X struct f_struct * f_str)
+{
+X int i, j, k;
+X short score;
+X
+X int cmp;
+X int iter = (query_length + 7) / 8;
+X
+X __m128i *p;
+X __m128i *workspace = (__m128i *) f_str->workspace;
+X
+X __m128i E, F, H;
+X
+X __m128i v_maxscore;
+X __m128i v_gapopen;
+X __m128i v_gapextend;
+X
+X __m128i v_min;
+X __m128i v_minimums;
+X __m128i v_temp;
+X
+X __m128i *pHLoad, *pHStore;
+X __m128i *pE;
+X
+X __m128i *pScore;
+X
+X /* Load gap opening penalty to all elements of a constant */
+X v_gapopen = _mm_insert_epi16 (v_gapopen, gap_open, 0);
+X v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
+X v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
+X
+X /* Load gap extension penalty to all elements of a constant */
+X v_gapextend = _mm_insert_epi16 (v_gapextend, gap_extend, 0);
+X v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
+X v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
+X
+X /* load v_maxscore with the zeros. since we are using signed */
+X /* math, we will bias the maxscore to -32768 so we have the */
+X /* full range of the short. */
+X v_maxscore = _mm_cmpeq_epi16 (v_maxscore, v_maxscore);
+X v_maxscore = _mm_slli_epi16 (v_maxscore, 15);
+X
+X v_minimums = _mm_shuffle_epi32 (v_maxscore, 0);
+X
+X v_min = _mm_shuffle_epi32 (v_maxscore, 0);
+X v_min = _mm_srli_si128 (v_min, 14);
+X
+X /* Zero out the storage vector */
+X k = 2 * iter;
+X
+X p = workspace;
+X for (i = 0; i < k; i++)
+X {
+X _mm_store_si128 (p++, v_maxscore);
+X }
+X
+X pE = workspace;
+X pHStore = pE + iter;
+X pHLoad = pHStore + iter;
+X
+X for (i = 0; i < db_length; ++i)
+X {
+X /* fetch first data asap. */
+X pScore = (__m128i *) query_profile_word + db_sequence[i] * iter;
+X
+X /* bias all elements in F to -32768 */
+X F = _mm_cmpeq_epi16 (F, F);
+X F = _mm_slli_epi16 (F, 15);
+X
+X /* load the next h value */
+X H = _mm_load_si128 (pHStore + iter - 1);
+X H = _mm_slli_si128 (H, 2);
+X H = _mm_or_si128 (H, v_min);
+X
+X p = pHLoad;
+X pHLoad = pHStore;
+X pHStore = p;
+X
+X for (j = 0; j < iter; j++)
+X {
+X /* load E values */
+X E = _mm_load_si128 (pE + j);
+X
+X /* add score to H */
+X H = _mm_adds_epi16 (H, *pScore++);
+X
+X /* Update highest score encountered this far */
+X v_maxscore = _mm_max_epi16 (v_maxscore, H);
+X
+X /* get max from H, E and F */
+X H = _mm_max_epi16 (H, E);
+X H = _mm_max_epi16 (H, F);
+X
+X /* save H values */
+X _mm_store_si128 (pHStore + j, H);
+X
+X /* subtract the gap open penalty from H */
+X H = _mm_subs_epi16 (H, v_gapopen);
+X
+X /* update E value */
+X E = _mm_subs_epi16 (E, v_gapextend);
+X E = _mm_max_epi16 (E, H);
+X
+X /* update F value */
+X F = _mm_subs_epi16 (F, v_gapextend);
+X F = _mm_max_epi16 (F, H);
+X
+X /* save E values */
+X _mm_store_si128 (pE + j, E);
+X
+X /* load the next h value */
+X H = _mm_load_si128 (pHLoad + j);
+X }
+X
+X /* reset pointers to the start of the saved data */
+X j = 0;
+X H = _mm_load_si128 (pHStore + j);
+X
+X /* the computed F value is for the given column. since */
+X /* we are at the end, we need to shift the F value over */
+X /* to the next column. */
+X F = _mm_slli_si128 (F, 2);
+X F = _mm_or_si128 (F, v_min);
+X v_temp = _mm_subs_epi16 (H, v_gapopen);
+X v_temp = _mm_cmpgt_epi16 (F, v_temp);
+X cmp = _mm_movemask_epi8 (v_temp);
+X
+X while (cmp != 0x0000)
+X {
+X E = _mm_load_si128 (pE + j);
+X
+X H = _mm_max_epi16 (H, F);
+X
+X /* save H values */
+X _mm_store_si128 (pHStore + j, H);
+X
+X /* update E in case the new H value would change it */
+X H = _mm_subs_epi16 (H, v_gapopen);
+X E = _mm_max_epi16 (E, H);
+X _mm_store_si128 (pE + j, E);
+X
+X /* update F value */
+X F = _mm_subs_epi16 (F, v_gapextend);
+X
+X j++;
+X if (j >= iter)
+X {
+X j = 0;
+X F = _mm_slli_si128 (F, 2);
+X F = _mm_or_si128 (F, v_min);
+X }
+X H = _mm_load_si128 (pHStore + j);
+X
+X v_temp = _mm_subs_epi16 (H, v_gapopen);
+X v_temp = _mm_cmpgt_epi16 (F, v_temp);
+X cmp = _mm_movemask_epi8 (v_temp);
+X }
+X }
+X
+X /* find largest score in the v_maxscore vector */
+X v_temp = _mm_srli_si128 (v_maxscore, 8);
+X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+X v_temp = _mm_srli_si128 (v_maxscore, 4);
+X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+X v_temp = _mm_srli_si128 (v_maxscore, 2);
+X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
+X
+X /* extract the largest score */
+X score = _mm_extract_epi16 (v_maxscore, 0);
+X
+X /* return largest score biased by 32768 */
+X return score + 32768;
+}
+X
+X
+X
+X
+int
+smith_waterman_sse2_byte(const unsigned char * query_sequence,
+X unsigned char * query_profile_byte,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned char bias,
+X unsigned char gap_open,
+X unsigned char gap_extend,
+X struct f_struct * f_str)
+{
+X int i, j, k;
+X int score;
+X
+X int dup;
+X int cmp;
+X int iter = (query_length + 15) / 16;
+X
+X __m128i *p;
+X __m128i *workspace = (__m128i *) f_str->workspace;
+X
+X __m128i E, F, H;
+X
+X __m128i v_maxscore;
+X __m128i v_bias;
+X __m128i v_gapopen;
+X __m128i v_gapextend;
+X
+X __m128i v_temp;
+X __m128i v_zero;
+X
+X __m128i *pHLoad, *pHStore;
+X __m128i *pE;
+X
+X __m128i *pScore;
+X
+X /* Load the bias to all elements of a constant */
+X dup = ((short) bias << 8) | bias;
+X v_bias = _mm_insert_epi16 (v_bias, dup, 0);
+X v_bias = _mm_shufflelo_epi16 (v_bias, 0);
+X v_bias = _mm_shuffle_epi32 (v_bias, 0);
+X
+X /* Load gap opening penalty to all elements of a constant */
+X dup = ((short) gap_open << 8) | gap_open;
+X v_gapopen = _mm_insert_epi16 (v_gapopen, dup, 0);
+X v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
+X v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
+X
+X /* Load gap extension penalty to all elements of a constant */
+X dup = ((short) gap_extend << 8) | gap_extend;
+X v_gapextend = _mm_insert_epi16 (v_gapextend, dup, 0);
+X v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
+X v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
+X
+X /* initialize the max score */
+X v_maxscore = _mm_xor_si128 (v_maxscore, v_maxscore);
+X
+X /* create a constant of all zeros for comparison */
+X v_zero = _mm_xor_si128 (v_zero, v_zero);
+X
+X /* Zero out the storage vector */
+X k = iter * 2;
+X
+X p = workspace;
+X for (i = 0; i < k; i++)
+X {
+X _mm_store_si128 (p++, v_maxscore);
+X }
+X
+X pE = workspace;
+X pHStore = pE + iter;
+X pHLoad = pHStore + iter;
+X
+X for (i = 0; i < db_length; ++i)
+X {
+X /* fetch first data asap. */
+X pScore = (__m128i *) query_profile_byte + db_sequence[i] * iter;
+X
+X /* zero out F value. */
+X F = _mm_xor_si128 (F, F);
+X
+X /* load the next h value */
+X H = _mm_load_si128 (pHStore + iter - 1);
+X H = _mm_slli_si128 (H, 1);
+X
+X p = pHLoad;
+X pHLoad = pHStore;
+X pHStore = p;
+X
+X for (j = 0; j < iter; j++)
+X {
+X /* load values E. */
+X E = _mm_load_si128 (pE + j);
+X
+X /* add score to H */
+X H = _mm_adds_epu8 (H, *pScore++);
+X H = _mm_subs_epu8 (H, v_bias);
+X
+X /* Update highest score encountered this far */
+X v_maxscore = _mm_max_epu8 (v_maxscore, H);
+X
+X /* get max from H, E and F */
+X H = _mm_max_epu8 (H, E);
+X H = _mm_max_epu8 (H, F);
+X
+X /* save H values */
+X _mm_store_si128 (pHStore + j, H);
+X
+X /* subtract the gap open penalty from H */
+X H = _mm_subs_epu8 (H, v_gapopen);
+X
+X /* update E value */
+X E = _mm_subs_epu8 (E, v_gapextend);
+X E = _mm_max_epu8 (E, H);
+X
+X /* update F value */
+X F = _mm_subs_epu8 (F, v_gapextend);
+X F = _mm_max_epu8 (F, H);
+X
+X /* save E values */
+X _mm_store_si128 (pE + j, E);
+X
+X /* load the next h value */
+X H = _mm_load_si128 (pHLoad + j);
+X }
+X
+X /* reset pointers to the start of the saved data */
+X j = 0;
+X H = _mm_load_si128 (pHStore + j);
+X
+X /* the computed F value is for the given column. since */
+X /* we are at the end, we need to shift the F value over */
+X /* to the next column. */
+X F = _mm_slli_si128 (F, 1);
+X v_temp = _mm_subs_epu8 (H, v_gapopen);
+X v_temp = _mm_subs_epu8 (F, v_temp);
+X v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
+X cmp = _mm_movemask_epi8 (v_temp);
+X
+X while (cmp != 0xffff)
+X {
+X E = _mm_load_si128 (pE + j);
+X
+X H = _mm_max_epu8 (H, F);
+X
+X /* save H values */
+X _mm_store_si128 (pHStore + j, H);
+X
+X /* update E in case the new H value would change it */
+X H = _mm_subs_epu8 (H, v_gapopen);
+X E = _mm_max_epu8 (E, H);
+X _mm_store_si128 (pE + j, E);
+X
+X /* update F value */
+X F = _mm_subs_epu8 (F, v_gapextend);
+X
+X j++;
+X if (j >= iter)
+X {
+X j = 0;
+X F = _mm_slli_si128 (F, 1);
+X }
+X H = _mm_load_si128 (pHStore + j);
+X
+X v_temp = _mm_subs_epu8 (H, v_gapopen);
+X v_temp = _mm_subs_epu8 (F, v_temp);
+X v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
+X cmp = _mm_movemask_epi8 (v_temp);
+X }
+X }
+X
+X /* find largest score in the v_maxscore vector */
+X v_temp = _mm_srli_si128 (v_maxscore, 8);
+X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+X v_temp = _mm_srli_si128 (v_maxscore, 4);
+X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+X v_temp = _mm_srli_si128 (v_maxscore, 2);
+X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+X v_temp = _mm_srli_si128 (v_maxscore, 1);
+X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
+X
+X /* store in temporary variable */
+X score = _mm_extract_epi16 (v_maxscore, 0);
+X score = score & 0x00ff;
+X
+X /* check if we might have overflowed */
+X if (score + bias >= 255)
+X {
+X score = 255;
+X }
+X
+X /* return largest score */
+X return score;
+}
+#else
+X
+/* No SSE2 support. Avoid compiler complaints about empty object */
+X
+int sw_dummy;
+X
+#endif
+SHAR_EOF
+chmod 0644 smith_waterman_sse2.c ||
+echo 'restore of smith_waterman_sse2.c failed'
+Wc_c="`wc -c < 'smith_waterman_sse2.c'`"
+test 12106 -eq "$Wc_c" ||
+ echo 'smith_waterman_sse2.c: original size 12106, current size' "$Wc_c"
+fi
+# ============= smith_waterman_sse2.h ==============
+if test -f 'smith_waterman_sse2.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping smith_waterman_sse2.h (File already exists)'
+else
+echo 'x - extracting smith_waterman_sse2.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_sse2.h' &&
+/******************************************************************
+X Copyright 2006 by Michael Farrar. All rights reserved.
+X This program may not be sold or incorporated into a commercial product,
+X in whole or in part, without written consent of Michael Farrar. For
+X further information regarding permission for use or reproduction, please
+X contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+X
+/*
+X Written by Michael Farrar, 2006.
+X Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+X
+#ifndef SMITH_WATERMAN_SSE2_H
+#define SMITH_WATERMAN_SSE2_H
+X
+int
+smith_waterman_sse2_word(const unsigned char * query_sequence,
+X unsigned short * query_profile_word,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned short gap_open,
+X unsigned short gap_extend,
+X struct f_struct * f_str);
+X
+X
+int
+smith_waterman_sse2_byte(const unsigned char * query_sequence,
+X unsigned char * query_profile_byte,
+X const int query_length,
+X const unsigned char * db_sequence,
+X const int db_length,
+X unsigned char bias,
+X unsigned char gap_open,
+X unsigned char gap_extend,
+X struct f_struct * f_str);
+X
+#endif /* SMITH_WATERMAN_SSE2_H */
+SHAR_EOF
+chmod 0755 smith_waterman_sse2.h ||
+echo 'restore of smith_waterman_sse2.h failed'
+Wc_c="`wc -c < 'smith_waterman_sse2.h'`"
+test 1723 -eq "$Wc_c" ||
+ echo 'smith_waterman_sse2.h: original size 1723, current size' "$Wc_c"
+fi
+# ============= structs.h ==============
+if test -f 'structs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping structs.h (File already exists)'
+else
+echo 'x - extracting structs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'structs.h' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: structs.h,v 1.36 2006/06/22 02:35:05 wrp Exp $ */
+X
+#include "aln_structs.h"
+X
+struct hist_str {
+X int histflg;
+X int *hist_a;
+X int histint, min_hist, max_hist, maxh;
+X long entries;
+X int z_calls;
+X char stat_info[MAX_STR];
+};
+X
+struct db_str {
+X long entries;
+X unsigned long length;
+X int carry;
+};
+X
+struct mngmsg /* Message from host to manager */
+{
+X int n0; /* Integer returned by hgetseq */
+X int nm0; /* number of segments */
+X int nmoff; /* length of fastf segment */
+X unsigned char *aa0a; /* annotation array */
+X char ann_arr[MAX_FN]; /* annotation characters */
+X int ann_flg; /* have annotation array, characters */
+X char tname[MAX_FN]; /* Query sequence name */
+X int tnamesize; /* Query name size */
+X int qsfnum[10];
+X int nqsfnum;
+X int qsfnum_n[10];
+X int nqsfnum_n;
+X char lname[MAX_FN]; /* Library file name */
+X char *lbnames[MAX_LF]; /* list of library files */
+X struct lmf_str *lb_mfd[MAX_LF]; /* list of opened file pointers */
+X
+X int max_tot; /* function defined total sequence area */
+X int maxn; /* longest library sequence chunk */
+X int dupn; /* overlap to use when segmenting sequence (p_comp) */
+X int qoff; /* overlap when segmenting long query sequence */
+X int loff; /* overlap when segmenting long library sequences */
+X int maxt3; /* overlap for tranlated sequences */
+X int qdnaseq; /* query is protein (0)/dna (1) */
+X int ldnaseq; /* library is protein (0)/dna (1) */
+X int qframe; /* number of possible query frames */
+X int nframe; /* frame for TFASTA */
+X int nitt1; /* nframe-1 */
+X int thr_fact; /* fudge factor for threads */
+X int s_int; /* sampling interval for statistics */
+X int ql_start; /* starting query sequence */
+X int ql_stop; /* ending query sequence */
+X int nln; /* number of library names */
+X int pbuf_siz; /* buffer size for sequences send in p2_complib */
+X char qtitle[MAX_FN]; /* query title */
+X char ltitle[MAX_FN]; /* library title */
+X char flstr[MAX_FN]; /* FASTLIBS string */
+X char outfile[MAX_FN];
+X char label [MAXLN]; /* Output label */
+X char f_id0[4]; /* function id for markx==10 */
+X char f_id1[4]; /* function id for markx==10 */
+X char sqnam[4]; /* "aa" or "nt" */
+X char sqtype[10]; /* "DNA" or "protein" */
+X int long_info; /* long description flag*/
+X long sq0off, sq1off; /* offset into aa0, aa1 */
+X int markx; /* alignment display type */
+X int seqnm; /* query sequence number */
+X int nbr_seq; /* number of library sequences */
+X int term_code; /* add termination codes to proteins if absent */
+X int n1_high; /* upper limit on sequence length */
+X int n1_low; /* lower limit on sequence length */
+X double e_cut; /* e_value for display */
+X double e_low; /* e_value for display */
+X int e_cut_set; /* e_value deliberately set */
+X int pamd1; /* 1st dimension of pam matrix */
+X int pamd2; /* 2nd dimension of pam matrix */
+X int revcomp; /* flag to do reverse complement */
+X int quiet; /* quiet option */
+X int nrelv; /* number of interesting scores */
+X int srelv; /* number of scores to show in showbest */
+X int arelv; /* number of scores to show at alignment */
+X int z_bits; /* z_bits==1: show bit score, ==0 show z-score */
+X char alab[3][24]; /* labels for alignment scores */
+X int nohist; /* no histogram option */
+X int nshow;
+X int mshow; /* number of scores to show */
+X int mshow_flg;
+X int ashow; /* number of alignments to show */
+X int nmlen; /* length of name label */
+X int show_code; /* show alignment code in -m 9; ==1 => identity only, ==2 alignment code*/
+X int self; /* self comparison */
+X int thold; /* threshold */
+X int last_calc_flg; /* needs a last calculation stage */
+X int qshuffle; /* shuffle the query and do additional comparisons */
+X int shuff_max; /* number of shuffles to perform */
+X int shuff_node; /* number of shuffles/worker node */
+X int shuff_wid;
+X int stages; /* number of stages */
+X double Lambda, K, H; /* Karlin-Altschul parameters */
+X int escore_flg; /* use escore calculated by do_work() */
+X struct hist_str hist;
+X struct db_str db;
+X void *pstat_void;
+X struct a_struct aln; /* has llen, llnctx, llnctx_flg, showall */
+X struct a_res_str a_res; /* has individual alignment coordinates */
+X char dfile [MAX_FN]; /* file for dumping scores to */
+};
+X
+X
+SHAR_EOF
+chmod 0644 structs.h ||
+echo 'restore of structs.h failed'
+Wc_c="`wc -c < 'structs.h'`"
+test 4279 -eq "$Wc_c" ||
+ echo 'structs.h: original size 4279, current size' "$Wc_c"
+fi
+# ============= tatstats.c ==============
+if test -f 'tatstats.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping tatstats.c (File already exists)'
+else
+echo 'x - extracting tatstats.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'tatstats.c' &&
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "param.h"
+#include "tatstats.h"
+X
+#ifndef PCOMPLIB
+#include "mw.h"
+#else
+#include "p_mw.h"
+#endif
+X
+/* calc_priors() - calculate frequencies of amino-acids, possibly with counts */
+/* generate_tatprobs() - build the table of score probabilities if the
+X sequences are not too long */
+X
+double
+det(double a11, double a12, double a13,
+X double a21, double a22, double a23,
+X double a31, double a32, double a33);
+X
+double power(double r, int p)
+{
+X double tr;
+X int neg;
+X
+X if (r==0.0) return(p==0?1.0:0.0);
+X if (neg = p<0) p = -p;
+X tr = 1.0;
+X while (p>0) {
+X if (p & 1) tr *= r;
+X p >>= 1;
+X if (p) r *= r;
+X }
+X return((neg? 1.0/tr: tr));
+}
+X
+double
+factorial (int a, int b) {
+X
+X double res = 1.0;
+X
+X if(a == 0) { return 1.0; }
+X
+X while(a > b) {
+X res *= (double) a;
+X a--;
+X }
+X
+X return res;
+}
+X
+void
+calc_priors(double *priors,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X const unsigned char *aa1, int n1,
+X int pseudocts)
+{
+X long counts[25], sum;
+X int i;
+X
+X if(n1 == 0 && f_str->priors[1] > 0.0) {
+X for(i = 1 ; i <= ppst->nsq ; i++) {
+X priors[i] = f_str->priors[i];
+X }
+X return;
+X }
+X
+X if(n1 == 0) {
+X if (ppst->dnaseq==SEQT_PROT ) {
+X
+X /* Robinson & Robinson residue counts from Stephen Altschul */
+X counts[ 1] = 35155; /* A */
+X counts[ 2] = 23105; /* R */
+X counts[ 3] = 20212; /* N */
+X counts[ 4] = 24161; /* D */
+X counts[ 5] = 8669; /* C */
+X counts[ 6] = 19208; /* Q */
+X counts[ 7] = 28354; /* E */
+X counts[ 8] = 33229; /* G */
+X counts[ 9] = 9906; /* H */
+X counts[10] = 23161; /* I */
+X counts[11] = 40625; /* L */
+X counts[12] = 25872; /* K */
+X counts[13] = 10101; /* M */
+X counts[14] = 17367; /* F */
+X counts[15] = 23435; /* P */
+X counts[16] = 32070; /* S */
+X counts[17] = 26311; /* T */
+X counts[18] = 5990; /* W */
+X counts[19] = 14488; /* Y */
+X counts[20] = 29012; /* V */
+X counts[21] = 0; /* B */
+X counts[22] = 0; /* Z */
+X counts[23] = 0; /* X */
+X counts[24] = 0; /* * */
+X }
+X else { /* SEQT_DNA */
+X counts[1] = 250;
+X counts[2] = 250;
+X counts[3] = 250;
+X counts[4] = 250;
+X for (i=5; i<=ppst->nsq; i++) counts[i]=0;
+X }
+X } else {
+X memset(&counts[0], 0, sizeof(counts));
+X
+X for(i = 0 ; i < n1 ; i++) {
+X if(aa1[i] > ppst->nsq || aa1[i] < 1) continue;
+X counts[aa1[i]]++;
+X }
+X }
+X
+X sum = 0;
+X for(i = 1 ; i <= ppst->nsq ; i++) sum += counts[i];
+X
+X for(i = 1 ; i <= ppst->nsq ; i++) {
+X if(n1 == 0) {
+X priors[i] = (double) counts[i] / (double) sum;
+X } else {
+X priors[i] = ( ((double) pseudocts * f_str->priors[i]) + (double) counts[i] ) / ( (double) sum + (double) pseudocts );
+X }
+X }
+X
+X return;
+}
+X
+int
+max_score(int *scores, int nsq) {
+X
+X int max, i;
+X
+X max = -BIGNUM;
+X for ( i = 1 ; i <= nsq ; i++ ) {
+X if (scores[i] > max) max = scores[i];
+X }
+X
+X return max;
+}
+X
+int
+min_score(int *scores, int nsq) {
+X
+X int min, i;
+X
+X min = BIGNUM;
+X for (i = 1 ; i <= nsq ; i++ ) {
+X if (scores[i] < min) min = scores[i];
+X }
+X
+X return min;
+}
+X
+double
+calc_tatusov ( struct slink *last,
+X struct slink *this,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int **pam2, int nsq,
+X struct f_struct *f_str,
+X int pseudocts,
+X int do_opt,
+X int zsflag
+X )
+{
+X int i, is, j, k;
+X
+X double *priors, my_priors[MAXSQ], tatprob, left_tatprob, right_tatprob;
+X unsigned char *query = NULL;
+X int length, maxlength, sumlength, sumscore, tmp, seg;
+X int start, stop;
+X struct slink *sl;
+X int N;
+X double *tatprobsptr;
+X
+#if defined(FASTS) || defined(FASTM)
+X int index = 0;
+X int notokay = 0;
+#endif
+X
+X struct tat_str *oldtat = NULL, *newtat = NULL;
+X
+#if defined(FASTS) || defined(FASTM)
+X start = this->vp->start - this->vp->dp + f_str->noff;
+X stop = this->vp->stop - this->vp->dp + f_str->noff;
+X tmp = stop - start + 1;
+#else
+X /*
+X FASTF alignments can also hang off the end of library sequences,
+X but no query residues are used up in the process, but we have to
+X keep track of which are
+X */
+X tmp = 0;
+X for(i = 0, j = 0 ; i < n0 ; i++) {
+X if (this->vp->used[i] == 1) {tmp++; }
+X }
+#endif
+X
+X sumlength = maxlength = length = tmp;
+X seg = 1;
+X sumscore = this->vp->score;
+X
+#if defined(FASTS) || defined(FASTM)
+X if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
+X index |= (1 << f_str->aa0i[start]);
+X } else {
+X notokay |= (1 << f_str->aa0i[start]);
+X }
+#endif
+X
+X for(sl = last; sl != NULL ; sl = sl->prev) {
+X
+#if defined(FASTS) || defined(FASTM)
+X start = sl->vp->start - sl->vp->dp + f_str->noff;
+X stop = sl->vp->stop - sl->vp->dp + f_str->noff;
+X tmp = stop - start + 1;
+#else
+X tmp = 0;
+X for(i = 0, j = 0 ; i < n0 ; i++) {
+X if(sl->vp->used[i] == 1) {
+X tmp++;
+X }
+X }
+#endif
+X sumlength += tmp;
+X maxlength = tmp > maxlength ? tmp : maxlength;
+X seg++;
+X sumscore += sl->vp->score;
+X
+#if defined(FASTS) || defined(FASTM)
+X if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
+X index |= (1 << f_str->aa0i[start]);
+X } else {
+X notokay |= (1 << f_str->aa0i[start]);
+X }
+#endif
+X
+X }
+X
+X tatprob = -1.0;
+X
+#if defined(FASTS) || defined(FASTM)
+X
+X /* for T?FASTS, we try to use what we've precalculated: */
+X
+X /* with z = 3, do_opt is true, but we can use precalculated - with
+X all other z's we can use precalculated only if !do_opt */
+X if(!notokay && f_str->tatprobs != NULL) {
+X /* create our own newtat and copy f_str's tat into it */
+X index--;
+X
+X newtat = (struct tat_str *) malloc(sizeof(struct tat_str));
+X if(newtat == NULL) {
+X fprintf(stderr, "Couldn't calloc memory for newtat.\n");
+X exit(1);
+X }
+X
+X memcpy(newtat, f_str->tatprobs[index], sizeof(struct tat_str));
+X
+X newtat->probs = (double *) calloc(f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1, sizeof(double));
+X if(newtat->probs == NULL) {
+X fprintf(stderr, "Coudln't calloc memory for newtat->probs.\n");
+X exit(1);
+X }
+X
+X memcpy(newtat->probs, f_str->tatprobs[index]->probs,
+X (f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1) * sizeof(double));
+X
+X
+X tatprob = f_str->intprobs[index][sumscore - f_str->tatprobs[index]->lowscore];
+X
+X } else { /* we need to recalculate from scratch */
+#endif
+X
+X /* for T?FASTF, we're always recalculating from scratch: */
+X
+X query = (unsigned char *) calloc(length, sizeof(unsigned char));
+X if(query == NULL) {
+X fprintf(stderr, "Couldn't calloc memory for query.\n");
+X exit(1);
+X }
+X
+#if defined(FASTS) || defined(FASTM)
+X start = this->vp->start - this->vp->dp + f_str->noff;
+X for(i = 0, j = 0 ; i < length ; i++) {
+X query[j++] = aa0[start + i];
+X }
+#else
+X for(i = 0, j = 0 ; i < n0 ; i++) {
+X if (this->vp->used[i] == 1) {query[j++] = aa0[i];}
+X }
+#endif
+X
+X /* calc_priors - not currently implemented for aa1 dependent */
+X /*
+X if( (do_opt && zsflag == 2) || zsflag == 4 ) {
+X priors = &my_priors[0];
+X calc_priors(priors, f_str, aa1, n1, pseudocts);
+X } else {
+X priors = f_str->priors;
+X }
+X */
+X
+X priors = f_str->priors;
+X oldtat = (last != NULL ? last->tat : NULL);
+X
+X generate_tatprobs(query, 0, length - 1, priors, pam2, nsq, &newtat, oldtat);
+X
+X free(query);
+#if defined(FASTS) || defined(FASTM)
+X } /* close the FASTS-specific if-else from above */
+#endif
+X
+X this->newtat = newtat;
+X
+X if(tatprob < 0.0) { /* hasn't been set by precalculated FASTS intprobs */
+X
+X /* integrate probabilities >= sumscore */
+X tatprobsptr = newtat->probs;
+X
+X is = i = newtat->highscore - newtat->lowscore;
+X N = sumscore - newtat->lowscore;
+X
+X right_tatprob = 0;
+X for ( ; i >= N; i--) {
+X right_tatprob += tatprobsptr[i];
+X }
+X
+X left_tatprob = tatprobsptr[0];
+X for (i = 1 ; i < N ; i++ ) {
+X left_tatprob += tatprobsptr[i];
+X }
+X
+X if (right_tatprob < left_tatprob) {tatprob = right_tatprob;}
+X else {tatprob = 1.0 - left_tatprob;}
+X
+X tatprob /= (right_tatprob+left_tatprob);
+X }
+X
+X if (maxlength > 0) {
+X n1 += 2 * (maxlength - 1);
+X }
+X
+#ifndef FASTM
+X tatprob *= factorial(n1 - sumlength + seg, n1 - sumlength);
+#else
+X tatprob *= power(n1 - sumlength,seg)/(1<<seg);
+#endif
+X
+X if(tatprob > 0.01)
+X tatprob = 1.0 - exp(-tatprob);
+X
+X return tatprob;
+}
+X
+void
+generate_tatprobs(const unsigned char *query,
+X int begin,
+X int end,
+X double *priors,
+X int **pam2,
+X int nsq,
+X struct tat_str **tatarg,
+X struct tat_str *oldtat)
+{
+X
+X int i, j, k, l, m, n, N, highscore, lowscore;
+X int *lowrange = NULL, *highrange = NULL;
+X double *probs = NULL, *newprobs = NULL, *priorptr, tmp;
+X struct tat_str *tatprobs = NULL;
+X int *pamptr, *pamptrsave;
+X
+X if((tatprobs = (struct tat_str *) calloc(1, sizeof(struct tat_str)))==NULL) {
+X fprintf(stderr, "Couldn't allocate individual tatprob struct.\n");
+X exit(1);
+X }
+X
+X n = end - begin + 1;
+X
+X if ( (lowrange = (int *) calloc(n, sizeof(int))) == NULL ) {
+X fprintf(stderr, "Couldn't allocate memory for lowrange.\n");
+X exit(1);
+X }
+X
+X if ( (highrange = (int *) calloc(n, sizeof(int))) == NULL ) {
+X fprintf(stderr, "Couldn't allocate memory for highrange.\n");
+X exit(1);
+X }
+X
+X /* calculate the absolute highest and lowest score possible for this */
+X /* segment. Also, set the range we need to iterate over at each position */
+X /* in the query: */
+X if(oldtat == NULL) {
+X highscore = lowscore = 0;
+X } else {
+X highscore = oldtat->highscore;
+X lowscore = oldtat->lowscore;
+X }
+X
+X for ( i = 0 ; i < n ; i++ ) {
+X
+X if (query[begin+i] == 0) break;
+X
+X highscore =
+X (highrange[i] = highscore + max_score(pam2[query[begin + i]], nsq));
+X
+X lowscore =
+X (lowrange[i] = lowscore + min_score(pam2[query[begin + i]], nsq));
+X
+X /*
+X fprintf(stderr, "i: %d, max: %d, min: %d, high[i]: %d, low[i]: %d, high: %d, low: %d, char: %d\n",
+X i,
+X max_score(pam2[query[begin + i]], nsq),
+X min_score(pam2[query[begin + i]], nsq),
+X highrange[i], lowrange[i],
+X highscore, lowscore, query[begin + i]);
+X */
+X }
+X
+X /* allocate an array of probabilities for all possible scores */
+X /* i.e. if highest score possible is 50 and lowest score possible */
+X /* is -20, then there are 50 - (-20) + 1 = 71 possible different */
+X /* scores (including 0): */
+X N = highscore - lowscore;
+X if ( (probs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
+X fprintf(stderr, "Couldn't allocate probability matrix : %d.\n", N + 1);
+X exit(1);
+X }
+X
+X if(oldtat == NULL) {
+X /* for the first position, iterate over the only possible scores, */
+X /* summing the priors for the amino acids that can yield each score. */
+X pamptr = pam2[query[begin]];
+X for ( i = 1 ; i <= nsq ; i++ ) {
+X if(priors[i] > 0.0) {
+X probs[(pamptr[i] - lowscore)] += priors[i];
+X }
+X }
+X } else {
+X /* Need to copy the data out of oldtat->probs into probs */
+X memcpy( &probs[oldtat->lowscore - lowscore],
+X oldtat->probs,
+X (oldtat->highscore - oldtat->lowscore + 1) * sizeof(double));
+X }
+X
+X if ( (newprobs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
+X fprintf(stderr, "Couldn't allocate newprobs matrix.\n");
+X exit(1);
+X }
+X
+X /* now for each remaining residue in the segment ... */
+X for ( i = (oldtat == NULL ? 1 : 0) ; i < n ; i++ ) {
+X
+X pamptrsave = pam2[query[begin + i]];
+X
+X /* ... calculate new probability distribution .... */
+X
+X /* ... for each possible score (limited to current range) ... */
+X for ( j = lowrange[i] - lowscore,
+X k = highrange[i] - lowscore ;
+X j <= k ;
+X j++ ) {
+X
+X tmp = 0.0;
+X pamptr = &pamptrsave[1];
+X priorptr = &priors[1];
+X /* ... for each of the possible alignment scores at this position ... */
+X for ( l = 1 ;
+X l <= nsq ;
+X l++) {
+X
+X /* make sure we don't go past highest possible score, or past
+X the lowest possible score; not sure why this can happen */
+X m = j - *pamptr++;
+X if ( m <= N && m >= 0 ) {
+X /* update the probability of getting score j: */
+X tmp += probs[m] * *priorptr++;
+X }
+X }
+X newprobs[j] += tmp;
+X }
+X
+X /* save the new set of probabilities, get rid of old; we don't
+X necessarily have to copy/clear all N+1 slots, we could use
+X high/low score boundaries -- not sure that's worth the
+X effort. */
+X memcpy(probs, newprobs, (N + 1) * sizeof(double));
+X memset(newprobs, 0, (N + 1) * sizeof(double));
+X }
+X
+X free(newprobs);
+X free(highrange);
+X free(lowrange);
+X
+X tatprobs->probs = probs;
+X /* tatprobs->intprobs = intprobs; */
+X tatprobs->lowscore = lowscore;
+X tatprobs->highscore = highscore;
+X
+X *tatarg = tatprobs;
+}
+X
+SHAR_EOF
+chmod 0644 tatstats.c ||
+echo 'restore of tatstats.c failed'
+Wc_c="`wc -c < 'tatstats.c'`"
+test 12998 -eq "$Wc_c" ||
+ echo 'tatstats.c: original size 12998, current size' "$Wc_c"
+fi
+# ============= tatstats.h ==============
+if test -f 'tatstats.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping tatstats.h (File already exists)'
+else
+echo 'x - extracting tatstats.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'tatstats.h' &&
+#ifndef MAXSQ
+#include "param.h"
+#endif
+X
+#ifndef MAXSAV
+#ifdef FASTS
+#define MAXSAV 25
+#else
+#define MAXSAV 10
+#endif
+#endif
+X
+#if defined(IBM_AIX) && defined(MAXSEG)
+#undef MAXSEG
+#endif
+#define MAXSEG 30
+X
+struct savestr {
+X int score; /* pam score with segment optimization */
+X int score0; /* pam score of best single segment */
+X int start0; /* score from global match */
+X int dp; /* diagonal of match */
+X int start; /* start of match in lib seq */
+X int stop; /* end of match in lib seq */
+X int exact; /* exact match */
+#if defined(FASTF)
+X int *used; /* array of positions in aa0 that were used */
+#endif
+};
+X
+struct dstruct { /* diagonal structure for saving current run */
+X int score; /* hash score of current match */
+X int start; /* start of current match */
+X int stop; /* end of current match */
+X struct savestr *dmax; /* location in vmax[] where best score data saved */
+};
+X
+struct tat_str {
+X double *probs;
+X int lowscore;
+X int highscore;
+};
+X
+struct f_struct {
+X struct dstruct *diag;
+X struct savestr *vmax; /* best matches saved for one sequence */
+X struct savestr **vptr;
+X struct slink *sarr;
+X struct savestr *lowmax;
+X int maxsav; /* max number of peptide alignments saved in search */
+X int maxsav_w; /* max number of peptide alignments saved in alignment */
+X int shuff_cnt;
+X int nsave;
+X int ndo;
+X int noff;
+X int nm0; /* number of fragments */
+#if defined(FASTS) || defined(FASTM)
+X int *nmoff; /* offset number, start */
+X int *nm_u;
+X int *aa0b; /* beginning of each segment */
+X int *aa0e; /* end of each segment */
+X int *aa0i; /* index of each segment */
+X int *aa0s; /* max score of each segment */
+X int *aa0l; /* longest possible peptide match */
+#else
+X int nmoff; /* offset number, start */
+X unsigned char *aa0;
+X int aa0ix;
+#endif
+X unsigned char *aa0t; /* temp location for peptides */
+X int *aa0ti; /* temp index for peptides */
+X int hmask; /* hash constants */
+X int *pamh1; /* pam based array */
+X int *pamh2; /* pam based kfact array */
+#if defined(FASTS) || defined(FASTM)
+X int *link, *harr, *l_end; /* hash arrays */
+#else
+X struct hlstr *link, *harr; /* hash arrays */
+#endif
+X int kshft; /* shift width */
+X int nsav, lowscor; /* number of saved runs, worst saved run */
+X unsigned char *aa1x; /* contains translated codons 111222333 */
+X unsigned char *aa1y; /* contains translated codons 123123123 */
+X int n10;
+X int *waa;
+X int *res;
+X int max_res;
+X double *priors;
+#if defined(FASTS) || defined(FASTM)
+X struct tat_str **tatprobs; /* array of pointers to tat structs */
+X double **intprobs; /* array of integrated tatprobs */
+#endif
+X int dotat;
+X double spacefactor;
+};
+X
+struct slink {
+X int score;
+X double tatprob;
+X struct tat_str *tat;
+X struct tat_str *newtat;
+X struct savestr *vp;
+X struct slink *next;
+X struct slink *prev;
+};
+X
+struct segstr {
+X double tatprob;
+X int length;
+};
+X
+void generate_tatprobs(const unsigned char *query,
+X int begin,
+X int end,
+X double *priors,
+X int **pam2,
+X int nsq,
+X struct tat_str **tatarg, struct tat_str *oldtat);
+X
+double
+calc_tatusov ( struct slink *last,
+X struct slink *this,
+X const unsigned char *aa0, int n0,
+X const unsigned char *aa1, int n1,
+X int **pam2, int nsq,
+X struct f_struct *f_str,
+X int pseudocts,
+X int do_opt,
+X int zsflag
+X );
+X
+double seg_tatprob(struct slink *start,
+X const unsigned char *aa0,
+X int n0,
+X const unsigned char *aa1,
+X int n1,
+X struct f_struct *f_str,
+X struct pstruct *ppst,
+X int do_opt);
+X
+void calc_priors(double *priors,
+X struct pstruct *ppst,
+X struct f_struct *f_str,
+X const unsigned char *aa1,
+X int n1, int pseudocts);
+X
+double factorial (int a, int b);
+X
+int max_score(int *scores, int nsq);
+X
+int min_score(int *scores, int nsq);
+X
+double calc_spacefactor(struct f_struct *f_str);
+X
+void linreg(double *lnx, double *x, double *lny,
+X int n,
+X double *a, double *b, double *c, int start);
+SHAR_EOF
+chmod 0644 tatstats.h ||
+echo 'restore of tatstats.h failed'
+Wc_c="`wc -c < 'tatstats.h'`"
+test 4126 -eq "$Wc_c" ||
+ echo 'tatstats.h: original size 4126, current size' "$Wc_c"
+fi
+# ============= test.bat ==============
+if test -f 'test.bat' -a X"$1" != X"-c"; then
+ echo 'x - skipping test.bat (File already exists)'
+else
+echo 'x - extracting test.bat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test.bat' &&
+rem ""
+rem "starting fasta34_t - protein on win32"
+rem ""
+fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
+fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
+rem "done"
+rem "starting fastxy34_t"
+fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
+fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
+fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
+fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
+rem "done"
+rem "starting fastxy34_t rev"
+fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
+fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
+fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
+rem "done"
+rem "starting ssearch34_t"
+ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
+ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
+rem "starting ssearch34_t"
+ssearch34sse2_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3sse2
+ssearch34sse2_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25sse2
+rem "done"
+rem "starting prss34"
+prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
+prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
+rem "done"
+rem "starting fasta34_t - DNA"
+fasta34_t -S -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
+fasta34_t -S -q mgstm1.rev %M 4 > test_m1.ok4r_t
+rem "done"
+rem "starting tfastxy34_t"
+tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa m > test_m1.tx2_t.html
+tfasty34_t -q -i -3 -N 5000 mgstm1.aa m > test_m1.ty2_t
+rem "done"
+rem "starting fastf34_t"
+fastf34_t -q m1r.aa q > test_mf.ff_t
+fastf34 -q m1r.aa q > test_mf.ff_s
+rem "done"
+rem "starting tfastf34_t"
+tfastf34_t -q m1r.aa %m > test_mf.tf_tr
+rem "done"
+rem "starting fasts34_t"
+fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
+fasts34_t -q ngt.aa q > test_m1.fs_t
+fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
+rem "done"
+rem "starting tfasts34_t"
+tfasts34_t -q n0.aa %m > test_m1.ts_r
+rem "done"
+rem "starting fasta34 - protein"
+fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
+fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
+rem "done"
+rem "starting fastx3"
+fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
+rem "done"
+rem "starting fasty3"
+fasty34 -q mgstm1.esq q > test_m1.ok2y
+rem "done"
+rem "starting fasta34 - DNA "
+fasta34 -m 9c -q mgstm1.seq M 4 > test_m1.ok4
+rem "done"
+rem "starting ssearch3"
+ssearch34 -S -q -z 2 mgstm1.aa a > test_m1.ss_z2
+ssearch34 -q -s P250 mgstm1.aa a > test_m1.ss_p25
+ssearch34 -S -q -s BL50 mgstm1.aa a > test_m1.ss_bl50
+ssearch34 -S -q -s blosum50.mat mgstm1.aa a > test_m1.ss_bl50f
+ssearch34sse2 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2_sse2
+ssearch34sse2 -q -s P250 mgstm1.aa q > test_m1.ss_p25_sse2
+rem "done"
+rem "starting tfastxy3"
+tfastx34 -q mgstm1.aa M > test_m1.tx2
+tfasty34 -m 9c -q mgstm1.aa M > test_m1.ty2
+rem "done"
+rem "starting fasts34"
+fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
+fasts34 -q ngt.aa q > test_m1.fs
+rem "done"
+SHAR_EOF
+chmod 0644 test.bat ||
+echo 'restore of test.bat failed'
+Wc_c="`wc -c < 'test.bat'`"
+test 2891 -eq "$Wc_c" ||
+ echo 'test.bat: original size 2891, current size' "$Wc_c"
+fi
+# ============= test.sh ==============
+if test -f 'test.sh' -a X"$1" != X"-c"; then
+ echo 'x - skipping test.sh (File already exists)'
+else
+echo 'x - extracting test.sh (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test.sh' &&
+#!/bin/csh -f
+echo ""
+echo "starting fasta34_t - protein" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
+fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy34_t" `date`
+fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
+fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
+fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
+fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
+echo "done"
+echo "starting fastxy34_t rev" `date`
+fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
+fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
+fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
+echo "done"
+echo "starting ssearch34_t" `date`
+ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
+ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
+echo "done"
+echo "starting prss34" `date`
+prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
+prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
+echo "done"
+echo "starting fasta34_t - DNA" `date`
+fasta34_t -S -q -z 2 mgstm1.seq %RMB 4 > test_m1.ok4_tz2
+fasta34_t -S -q mgstm1.rev %RMB 4 > test_m1.ok4r_t
+echo "done"
+#echo "starting tfasta34_t" `date`
+#tfasta34_t -q mgstm1.aa %RMB > test_m1.tk2_t
+#echo "done"
+echo "starting tfastxy34_t" `date`
+tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2_t.html
+tfasty34_t -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2_t
+echo "done"
+echo "starting fastf34_t" `date`
+fastf34_t -q m1r.aa q > test_mf.ff_t
+fastf34 -q m1r.aa q > test_mf.ff_s
+echo "done"
+echo "starting tfastf34_t" `date`
+tfastf34_t -q m1r.aa %r > test_mf.tf_tr
+echo "done"
+echo "starting fasts34_t" `date`
+fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
+fasts34_t -q ngt.aa q > test_m1.fs_t
+fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
+echo "done"
+echo "starting tfasts34_t" `date`
+tfasts34_t -q n0.aa %r > test_m1.ts_r
+echo "done"
+echo "starting fasta34 - protein" `date`
+fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
+fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
+echo "done"
+echo "starting fastx3" `date`
+fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
+echo "done"
+echo "starting fasty3" `date`
+fasty34 -q mgstm1.esq q > test_m1.ok2y
+echo "done"
+echo "starting fasta34 - DNA " `date`
+fasta34 -m 9c -q mgstm1.seq %RMB 4 > test_m1.ok4
+echo "done"
+echo "starting ssearch3" `date`
+ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
+ssearch34 -S -q -s BL50 mgstm1.aa q > test_m1.ss_bl50
+ssearch34 -S -q -s blosum50.mat mgstm1.aa q > test_m1.ss_bl50f
+ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
+echo "done"
+#echo "starting tfasta3" `date`
+#tfasta34 -q mgstm1.aa %RMB > test_m1.tk2
+#echo "done"
+echo "starting tfastxy3" `date`
+tfastx34 -q mgstm1.aa %RMB > test_m1.tx2
+tfasty34 -m 9c -q mgstm1.aa %RMB > test_m1.ty2
+echo "done"
+echo "starting fasts34" `date`
+fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
+fasts34 -q ngt.aa q > test_m1.fs
+echo "done" `date`
+SHAR_EOF
+chmod 0755 test.sh ||
+echo 'restore of test.sh failed'
+Wc_c="`wc -c < 'test.sh'`"
+test 2996 -eq "$Wc_c" ||
+ echo 'test.sh: original size 2996, current size' "$Wc_c"
+fi
+# ============= test2.bat ==============
+if test -f 'test2.bat' -a X"$1" != X"-c"; then
+ echo 'x - skipping test2.bat (File already exists)'
+else
+echo 'x - extracting test2.bat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test2.bat' &&
+rem ""
+rem "starting fasta34_t - protein on win32"
+rem ""
+fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
+fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
+rem "done"
+rem "starting fastxy34_t"
+fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
+fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
+fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
+fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
+rem "done"
+rem "starting fastxy34_t rev"
+fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
+fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
+fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
+rem "done"
+rem "starting ssearch34_t"
+ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
+ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
+rem "starting ssearch34_t"
+ssearch34sse2_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3sse2
+ssearch34sse2_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25sse2
+rem "done"
+rem "starting prss34"
+prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
+prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
+rem "done"
+rem "starting fasta34_t - DNA"
+fasta34_t -S -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
+fasta34_t -S -q mgstm1.rev %M 4 > test_m1.ok4r_t
+rem "done"
+rem "starting tfastxy34_t"
+tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2_t.html
+tfasty34_t -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2_t
+rem "done"
+rem "starting fastf34_t"
+fastf34_t -q m1r.aa q > test_mf.ff_t
+fastf34 -q m1r.aa q > test_mf.ff_s
+rem "done"
+rem "starting tfastf34_t"
+tfastf34_t -q m1r.aa %r > test_mf.tf_tr
+rem "done"
+rem "starting fasts34_t"
+fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
+fasts34_t -q ngt.aa q > test_m1.fs_t
+fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
+rem "done"
+rem "starting tfasts34_t"
+tfasts34_t -q n0.aa %r > test_m1.ts_r
+rem "done"
+rem "starting fasta34 - protein"
+fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
+fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
+rem "done"
+rem "starting fastx3"
+fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
+rem "done"
+rem "starting fasty3"
+fasty34 -q mgstm1.esq q > test_m1.ok2y
+rem "done"
+rem "starting fasta34 - DNA "
+fasta34 -m 9c -q mgstm1.seq M 4 > test_m1.ok4
+rem "done"
+rem "starting ssearch3"
+ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
+ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
+ssearch34sse2 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2_sse2
+ssearch34sse2 -q -s P250 mgstm1.aa q > test_m1.ss_p25_sse2
+rem "done"
+rem "starting tfastxy3"
+tfastx34 -q mgstm1.aa M > test_m1.tx2
+tfasty34 -m 9c -q mgstm1.aa M > test_m1.ty2
+rem "done"
+rem "starting fasts34"
+fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
+fasts34 -q ngt.aa q > test_m1.fs
+rem "done"
+SHAR_EOF
+chmod 0755 test2.bat ||
+echo 'restore of test2.bat failed'
+Wc_c="`wc -c < 'test2.bat'`"
+test 2775 -eq "$Wc_c" ||
+ echo 'test2.bat: original size 2775, current size' "$Wc_c"
+fi
+# ============= test_osx.sh ==============
+if test -f 'test_osx.sh' -a X"$1" != X"-c"; then
+ echo 'x - skipping test_osx.sh (File already exists)'
+else
+echo 'x - extracting test_osx.sh (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test_osx.sh' &&
+#!/bin/csh -f
+echo ""
+echo "starting fasta34_t - protein" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
+fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy34_t" `date`
+fastx34_t -m 9 -S -q mgtt2_x.seq q > test_t2.xk2_t
+fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
+fastx34_t -m 9 -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
+fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
+echo "done"
+echo "starting fastxy34_t rev" `date`
+fastx34_t -m 9 -q -m 5 mgstm1.rev q > test_m1.xk2r_t
+fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
+fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
+echo "done"
+echo "starting ssearch34_t" `date`
+ssearch34_t -m 9 -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
+ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
+echo "done"
+echo "starting fasta34_t - DNA" `date`
+fasta34_t -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
+fasta34_t -q mgstm1.rev %M 4 > test_m1.ok4r_t
+echo "done"
+echo "starting tfasta34_t" `date`
+tfasta34_t -q mgstm1.aa %M > test_m1.tk2_t
+echo "done"
+echo "starting tfastxy34_t" `date`
+tfastx34_t -m 9 -q -i -3 -m 6 mgstm1.aa %m > test_m1.tx2_t.html
+tfasty34_t -q -3 -N 5000 mgstm1.aa %m > test_m1.ty2_t
+echo "done"
+echo "starting fastf34_t" `date`
+fastf34_t -q m1r.aa q > test_mf.ff_s
+echo "done"
+echo "starting tfastf34_t" `date`
+tfastf34_t -q m1r.aa %m > test_mf.tf_r
+echo "done"
+echo "starting fasts34_t" `date`
+fasts34_t -q n0.aa q > test_m1.fs_s
+echo "done"
+echo "starting tfasts34_t" `date`
+tfasts34_t -q n0.aa %m > test_m1.ts_r
+echo "done"
+echo "starting fasta34 - protein" `date`
+fasta34 -q -z 2 mgstm1.aa q > test_m1.ok2z2
+fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
+echo "done"
+echo "starting fastx3" `date`
+fastx34 -m 9 -q mgstm1.esq q > test_m1.ok2x
+echo "done"
+echo "starting fasty3" `date`
+fasty34 -q mgstm1.esq q > test_m1.ok2y
+echo "done"
+echo "starting fasta34 - DNA " `date`
+fasta34 -m 9 -q mgstm1.seq %m 4 > test_m1.ok4
+echo "done"
+echo "starting ssearch3" `date`
+ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
+ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
+echo "done"
+echo "starting tfasta3" `date`
+tfasta34 -q mgstm1.aa %m > test_m1.tk2
+echo "done"
+echo "starting tfastxy3" `date`
+tfastx34 -q mgstm1.aa %m > test_m1.tx2
+tfasty34 -m 9 -q mgstm1.aa %m > test_m1.ty2
+echo "done" `date`
+SHAR_EOF
+chmod 0755 test_osx.sh ||
+echo 'restore of test_osx.sh failed'
+Wc_c="`wc -c < 'test_osx.sh'`"
+test 2429 -eq "$Wc_c" ||
+ echo 'test_osx.sh: original size 2429, current size' "$Wc_c"
+fi
+# ============= test_s.sh ==============
+if test -f 'test_s.sh' -a X"$1" != X"-c"; then
+ echo 'x - skipping test_s.sh (File already exists)'
+else
+echo 'x - extracting test_s.sh (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test_s.sh' &&
+#!/bin/csh -f
+echo ""
+echo "starting fasta34 - protein" `date` "on" `hostname`
+echo `uname -a`
+echo ""
+fasta34 -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2.html
+fasta34 -S -q -z 11 -O test_m1.ok2_p25 -s P250 mgstm1.aa:100-218 q
+echo "done"
+echo "starting fastxy34" `date`
+fastx34 -m 9 -S -q mgtt2_x.seq q > test_t2.xk2
+fasty34 -S -q mgtt2_x.seq q > test_t2.yk2
+fastx34 -m 9 -S -q -z 2 mgstm1.esq a > test_m1.xk2z2
+fasty34 -S -q -z 2 mgstm1.esq a > test_m1.yk2z2
+echo "done"
+echo "starting fastxy34 rev" `date`
+fastx34 -m 9 -q -m 5 mgstm1.rev q > test_m1.xk2r
+fasty34 -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2rz2
+fasty34 -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11
+echo "done"
+echo "starting ssearch34" `date`
+ssearch34 -m 9 -S -z 3 -q mgstm1.aa q > test_m1.ssz3
+ssearch34 -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_p25
+echo "done"
+echo "starting fasta34 - DNA" `date`
+fasta34 -q -z 2 mgstm1.seq %RMB 4 > test_m1.ok4z2
+fasta34 -q mgstm1.rev %RMB 4 > test_m1.ok4r
+echo "done"
+echo "starting tfasta34" `date`
+tfasta34 -q mgstm1.aa %RMB > test_m1.tk2
+echo "done"
+echo "starting tfastxy34" `date`
+tfastx34 -m 9 -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2.html
+tfasty34 -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2
+echo "done"
+echo "starting fastf34" `date`
+fastf34 -q m1r.aa q > test_mf.ff_s
+echo "done"
+echo "starting tfastf34" `date`
+tfastf34 -q -E 0.0001 m1r.aa %r > test_mf.tf_r
+echo "done"
+echo "starting fasts34" `date`
+fasts34 -q n0.aa q > test_m1.fs_s
+echo "done"
+echo "starting tfasts34" `date`
+tfasts34 -q n0.aa %r > test_m1.ts_r
+echo "done"
+echo "done" `date`
+SHAR_EOF
+chmod 0755 test_s.sh ||
+echo 'restore of test_s.sh failed'
+Wc_c="`wc -c < 'test_s.sh'`"
+test 1597 -eq "$Wc_c" ||
+ echo 'test_s.sh: original size 1597, current size' "$Wc_c"
+fi
+# ============= test_z.sh ==============
+if test -f 'test_z.sh' -a X"$1" != X"-c"; then
+ echo 'x - skipping test_z.sh (File already exists)'
+else
+echo 'x - extracting test_z.sh (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'test_z.sh' &&
+#!/bin/csh -f
+echo "starting fasta34_t - protein" `date`
+foreach z ( 1 2 3 6 11 )
+fasta34_t -q -z $z mgstm1.aa a > test_m1_a.ok2_t_${z}
+fasta34_t -q -z $z oohu.aa a > test_m1_b.ok2_t_${z}
+fasta34_t -q -S -z $z prio_atepa.aa a > test_m1_c.ok2S_t_${z}
+fasta34_t -q -S -z $z h10_human.aa a > test_m1_d.ok2S_t_${z}
+end
+echo "done"
+echo "starting ssearch34_t" `date`
+foreach z ( 1 2 3 6 11 )
+ssearch34_t -q -z $z mgstm1.aa a > test_m1_a.ssS_t_${z}
+ssearch34_t -q -z $z oohu.aa a > test_m1_b.ssS_t_${z}
+ssearch34_t -q -sBL62 -S -f -11 -z $z prio_atepa.aa a > test_m1_c.ssSbl62_t_${z}
+ssearch34_t -q -sBL62 -S -f -11 -z $z h10_human.aa a > test_m1_d.ssSbl62_t_${z}
+end
+echo "done"
+echo "starting fasta34 - protein" `date`
+foreach z ( 1 2 3 6 11 )
+fasta34 -q -z $z mgstm1.aa a > test_m1_a.ok2_${z}
+fasta34 -q -z $z oohu.aa a > test_m1_b.ok2_${z}
+fasta34 -q -S -sBL62 -f -11 -z $z prio_atepa.aa a > test_m1_c.ok2Sbl62_${z}
+fasta34 -q -S -sBL62 -f -11 -z $z h10_human.aa a > test_m1_d.ok2Sbl62_${z}
+end
+echo "done"
+echo "starting ssearch3" `date`
+foreach z ( 1 2 3 6 11 )
+ssearch34 -q -z $z mgstm1.aa a > test_m1_a.ssS_${z}
+ssearch34 -q -z $z oohu.aa a > test_m1_b.ssS_${z}
+ssearch34 -q -S -z $z prio_atepa.aa a > test_m1_c.ssS_${z}
+ssearch34 -q -S -z $z h10_human.aa a > test_m1_d.ssS_${z}
+end
+echo "done" `date`
+SHAR_EOF
+chmod 0755 test_z.sh ||
+echo 'restore of test_z.sh failed'
+Wc_c="`wc -c < 'test_z.sh'`"
+test 1312 -eq "$Wc_c" ||
+ echo 'test_z.sh: original size 1312, current size' "$Wc_c"
+fi
+# ============= tfasts3.rsp ==============
+if test -f 'tfasts3.rsp' -a X"$1" != X"-c"; then
+ echo 'x - skipping tfasts3.rsp (File already exists)'
+else
+echo 'x - extracting tfasts3.rsp (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'tfasts3.rsp' &&
+compacc.obj doinit.obj showbest.obj htime.obj apam.obj scaleswt.obj karlin.obj last_tat.obj tatsttfs.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
+SHAR_EOF
+chmod 0644 tfasts3.rsp ||
+echo 'restore of tfasts3.rsp failed'
+Wc_c="`wc -c < 'tfasts3.rsp'`"
+test 203 -eq "$Wc_c" ||
+ echo 'tfasts3.rsp: original size 203, current size' "$Wc_c"
+fi
+# ============= thr.h ==============
+if test -f 'thr.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping thr.h (File already exists)'
+else
+echo 'x - extracting thr.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'thr.h' &&
+X
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+X
+/* $Name: fa_34_26_5 $ - $Id: thr.h,v 1.2 1999/12/30 01:26:59 wrp Exp $ */
+X
+#ifndef MAX_WORKERS
+#define MAX_WORKERS 2
+#endif
+#define NUM_WORK_BUF 2*MAX_WORKERS
+X
+#ifndef XTERNAL
+struct buf_head *worker_buf[NUM_WORK_BUF]; /* pointers to full buffers */
+struct buf_head *reader_buf[NUM_WORK_BUF]; /* pointers to empty buffers */
+X
+/* protected by worker_mutex/woker_cond_var */
+int worker_buf_workp, worker_buf_readp; /* indices into full-buffers ptrs */
+int num_worker_bufs;
+int reader_done;
+X
+/* protected by reader_mutex/reader_cond var */
+int reader_buf_workp, reader_buf_readp; /* indices into empty-buffers ptrs */
+int num_reader_bufs;
+X
+/* protected by start_mutex/start_cont_var */
+int start_thread=1; /* start-up predicate, 0 starts */
+#else
+extern struct buf_head *worker_buf[];
+extern struct buf_head *reader_buf[];
+extern int num_worker_bufs, reader_done, num_reader_bufs;
+extern int worker_buf_workp, worker_buf_readp;
+extern int reader_buf_workp, reader_buf_readp;
+X
+extern int start_thread;
+#endif
+X
+SHAR_EOF
+chmod 0644 thr.h ||
+echo 'restore of thr.h failed'
+Wc_c="`wc -c < 'thr.h'`"
+test 1144 -eq "$Wc_c" ||
+ echo 'thr.h: original size 1144, current size' "$Wc_c"
+fi
+# ============= titin_hum.aa ==============
+if test -f 'titin_hum.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping titin_hum.aa (File already exists)'
+else
+echo 'x - extracting titin_hum.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'titin_hum.aa' &&
+>gi|20143914|ref|NP_003310.2| titin isoform N2-B [Homo sapiens]
+MTTQAPTFTQPLQSVVVLEGSTATFEAHISGFPVPEVSWFRDGQVISTSTLPGVQISFSDGRAKLTIPAV
+TKANSGRYSLKATNGSGQATSTAELLVKAETAPPNFVQRLQSMTVRQGSQVRLQVRVTGIPTPVVKFYRD
+GAEIQSSLDFQISQEGDLYSLLIAEAYPEDSGTYSVNATNSVGRATSTAELLVQGEEEVPAKKTKTIVST
+AQISESRQTRIEKKIEAHFDARSIATVEMVIDGAAGQQLPHKTPPRIPPKPKSRSPTPPSIAAKAQLARQ
+QSPSPIRHSPSPVRHVRAPTPSPVRSVSPAARISTSPIRSVRSPLLMRKTQASTVATGPEVPPPWKQEGY
+VASSSEAEMRETTLTTSTQIRTEERWEGRYGVQEQVTISGAAGAAASVSASASYAAEAVATGAKEVKQDA
+DKSAAVATVVAAVDMARVREPVISAVEQTAQRTTTTAVHIQPAQEQVRKEAEKTAVTKVVVAADKAKEQE
+LKSRTKEVITTKQEQMHVTHEQIRKETEKTFVPKVVISAAKAKEQETRISEEITKKQKQVTQEAIMKETR
+KTVVPKVIVATPKVKEQDLVSRGREGITTKREQVQITQEKMRKEAEKTALSTIAVATAKAKEQETILRTR
+ETMATRQEQIQVTHGKVDVGKKAEAVATVVAAVDQARVREPREPGHLEESYAQQTTLEYGYKERISAAKV
+AEPPQRPASEPHVVPKAVKPRVIQAPSETHIKTTDQKGMHISSQIKKTTDLTTERLVHVDKRPRTASPHF
+TVSKISVPKTEHGYEASIAGSAIATLQKELSATSSAQKITKSVKAPTVKPSETRVRAEPTPLPQFPFADT
+PDTYKSEAGVEVKKEVGVSITGTTVREERFEVLHGREAKVTETARVPAPVEIPVTPPTLVSGLKNVTVIE
+GESVTLECHISGYPSPTVTWYREDYQIESSIDFQITFQSGIARLMIREAFAEDSGRFTCSAVNEAGTVST
+SCYLAVQVSEEFEKETTAVTEKFTTEEKRFVESRDVVMTDTSLTEEQAGPGEPAAPYFITKPVVQKLVEG
+GSVVFGCQVGGNPKPHVYWKKSGVPLTTGYRYKVSYNKQTGECKLVISMTFADDAGEYTIVVRNKHGETS
+ASASLLEEADYELLMKSQQEMLYQTQVTAFVQEPKVGETAPGFVYSEYEKEYEKEQALIRKKMAKDTVVV
+RTYVEDQEFHISSFEERLIKEIEYRIIKTTLEELLEEDGEEKMAVDISESEAVESGFDLRIKNYRILEGM
+GVTFHCKMSGYPLPKIAWYKDGKRIKHGERYQMDFLQDGRASLRIPVVLPEDEGIYTAFASNIKGNAICS
+GKLYVEPAAPLGAPTYIPTLEPVSRIRSLSPRSVSRSPIRMSPARMSPARMSPARMSPARMSPGRRLEET
+DESQLERLYKPVFVLKPVSFKCLEGQTARFDLKVVGRPMPETFWFHDGQQIVNDYTHKVVIKEDGTQSLI
+IVPATPSDSGEWTVVAQNRAGRSSISVILTVEAVEHQVKPMFVEKLKNVNIKEGSRLEMKVRATGNPNPD
+IVWLKNSDIIVPHKYPKIRIEGTKGEAALKIDSTVSQDSAWYTATAINKAGRDTTRCKVNVEVEFAEPEP
+ERKLIIPRGTYRAKEIAAPELEPLHLRYGQEQWEEGDLYDKEKQQKPFFKKKLTSLRLKRFGPAHFECRL
+TPIGDPTMVVEWLHDGKPLEAANRLRMINEFGYCSLDYGVAYSRDSGIITCRATNKYGTDHTSATLIVKD
+EKSLVEESQLPEGRKGLQRIEELERMAHEGALTGVTTDQKEKQKPDIVLYPEPVRVLEGETARFRCRVTG
+YPQPKVNWYLNGQLIRKSKRFRVRYDGIHYLDIVDCKSYDTGEVKVTAENPEGVIEHKVKLEIQQREDFR
+SVLRRAPEPRPEFHVHEPGKLQFEVQKVDRPVDTTETKEVVKLKRAERITHEKVPEESEELRSKFKRRTE
+EGYYEAITAVELKSRKKDESYEELLRKTKDELLHWTKELTEEEKKALAEEGKITIPTFKPDKIELSPSME
+APKIFERIQSQTVGQGSDAHFRVRVVGKPDPECEWYKNGVKIERSDRIYWYWPEDNVCELVIRDVTAEDS
+ASIMVKAINIAGETSSHAFLLVQAKQLITFTQELQDVVAKEKDTMATFECETSEPFVKVKWYKDGMEVHE
+GDKYRMHSDRKVHFLSILTIDTSDAEDYSCVLVEDENVKTTAKLIVEGAVVEFVKELQDIEVPESYSGEL
+ECIVSPENIEGKWYHNDVELKSNGKYTITSRRGRQNLTVKDVTKEDQGEYSFVIDGKKTTCKLKMKPRPI
+AILQGLSDQKVCEGDIVQLEVKVSLESVEGVWMKDGQEVQPSDRVHIVIDKQSHMLLIEDMTKEDAGNYS
+FTIPALGLSTSGRVSVYSVDVITPLKDVNVIEGTKAVLECKVSVPDVTSVKWYLNDEQIKPDDRVQAIVK
+GTKQRLVINRTHASDEGPYKLIVGRVETNCNLSVEKIKIIRGLRDLTCTETQNVVFEVELSHSGIDVLWN
+FKDKEIKPSSKYKIEAHGKIYKLTVLNMMKDDEGKYTFYAGENITSGKLTVAGGAISKPLTDQTVAESQE
+AVFECEVANPDSKGEWLRDGKHLPLTNNIRSESDGHKRRLIIAATKLDDIGEYTYKVATSKTSAKLKVEA
+VKIKKTLKNLTVTETQDAVFTVELTHPNVKGVQWIKNGVVLESNEKYAISVKGTIYSLRIKNCAIVDESV
+YGFRLGRLGASARLHVETVKIIKKPKDVTALENATVAFEVSVSHDTVPVKWFHKSVEIKPSDKHRLVSER
+KVHKLMLQNISPSDAGEYTAVVGQLECKAKLFVETLHITKTMKNIEVPETKTASFECEVSHFNVPSMWLK
+NGVEIEMSEKFKIVVQGKLHQLIIMNTSTEDSAEYTFVCGNDQVSATLTVTPIMITSMLKDINAEEKDTI
+TFEVTVNYEGISYKWLKNGVEIKSTDKCQMRTKKLTHSLNIRNVHFGDAADYTFVAGKATSTATLYVEAR
+HIEFRKHIKDIKVLEKKRAMFECEVSEPDITVQWMKDDQELQITDRIKIQKEKYVHRLLIPSTRMSDAGK
+YTVVAGGNVSTAKLFVEGRDVRIRSIKKEVQVIEKQRAVVEFEVNEDDVDAHWYKDGIEINFQVQERHKY
+VVERRIHRMFISETRQSDAGEYTFVAGRNRSSVTLYVNAPEPPQVLQELQPVTVQSGKPARFCAVISGRP
+QPKISWYKEEQLLSTGFKCKFLHDGQEYTLLLIEAFPEDAAVYTCEAKNDYGVATTSASLSVEVPEVVSP
+DQEMPVYPPAIITPLQDTVTSEGQPARFQCRVSGTDLKVSWYSKDKKIKPSRFFRMTQFEDTYQLEIAEA
+YPEDEGTYTFVASNAVGQVSSTANLSLEAPESILHERIEQEIEMEMKEFSSSFLSAEEEGLHSAELQLSK
+INETLELLSESPVYSTKFDSEKEGTGPIFIKEVSNADISMGDVATLSVTVIGIPKPKIQWFFNGVLLTPS
+ADYKFVFDGDDHSLIILFTKLEDEGEYTCMASNDYGKTICSAYLKINSKGEGHKDTETESAVAKSLEKLG
+GPCPPHFLKELKPIRCAQGLPAIFEYTVVGEPAPTVTWFKENKQLCTSVYYTIIHNPNGSGTFIVNDPQR
+EDSGLYICKAENMLGESTCAAELLVLLEDTDMTDTPCKAKSTPEAPEDFPQTPLKGPAVEALDSEQEIAT
+FVKDTILKAALITEENQQLSYEHIAKANELSSQLPLGAQELQSILEQDKLTPESTREFLCINGSIHFQPL
+KEPSPNLQLQIVQSQKTFSKEGILMPEEPETQAVLSDTEKIFPSAMSIEQINSLTVEPLKTLLAEPEGNY
+PQSSIEPPMHSYLTSVAEEVLSPKEKTVSDTNREQRVTLQKQEAQSALILSQSLAEGHVESLQSPDVMIS
+QVNYEPLVPSEHSCTEGGKILIESANPLENAGQDSAVRIEEGKSLRFPLALEEKQVLLKEEHSDNVVMPP
+DQIIESKREPVAIKKVQEVQGRDLLSKESLLSGIPEEQRLNLKIQICRALQAAVASEQPGLFSEWLRNIE
+KVEVEAVNITQEPRHIMCMYLVTSAKSVTEEVTIIIEDVDPQMANLKMELRDALCAIIYEEIDILTAEGP
+RIQQGAKTSLQEEMDSFSGSQKVEPITEPEVESKYLISTEEVSYFNVQSRVKYLDATPVTKGVASAVVSD
+EKQDESLKPSEEKEESSSESGTEEVATVKIQEAEGGLIKEDGPMIHTPLVDTVSEEGDIVHLTTSITNAK
+EVNWYFENKLVPSDEKFKCLQDQNTYTLVIDKVNTEDHQGEYVCEALNDSGKTATSAKLTVVKRAAPVIK
+RKIEPLEVALGHLAKFTCEIQSAPNVRFQWFKAGREIYESDKCSIRSSKYISSLEILRTQVVDCGEYTCK
+ASNEYGSVSCTATLTVTVPGGEKKVRKLLPERKPEPKEEVVLKSVLRKRPEEEEPKVEPKKLEKVKKPAV
+PEPPPPKPVEEVEVPTVTKRERKIPEPTKVPEIKPAIPLPAPEPKPKPEAEVKTIKPPPVEPEPTPIAAP
+VTVPVVGKKAEAKAPKEEAAKPKGPIKGVPKKTPSPIEAERRKLRPGSGGEKPPDEAPFTYQLKAVPLKF
+VKEIKDIILTESEFVGSSAIFECLVSPSTAITTWMKDGSNIRESPKHRFIADGKDRKLHIIDVQLSDAGE
+YTCVLRLGNKEKTSTAKLVVEELPVRFVKTLEEEVTVVKGQPLYLSCELNKERDVVWRKDGKIVVEKPGR
+IVPGVIGLMRALTINDADDTDAGTYTVTVENANNLECSSCVKVVEVIRDWLVKPIRDQHVKPKGTAIFAC
+DIAKDTPNIKWFKGYDEIPAEPNDKTEILRDGNHLYLKIKNAMPEDIAEYAVEIEGKRYPAKLTLGEREV
+ELLKPIEDVTIYEKESASFDAEISEADIPGQWKLKGELLRPSPTCEIKAEGGKRFLTLHKVKLDQAGEVL
+YQALNAITTAILTVKEIELDFAVPLKDVTVPERRQARFECVLTREANVIWSKGPDIIKSSDKFDIIADGK
+KHILVINDSQFDDEGVYTAEVEGKKTSARLFVTGIRLKFMSPLEDQTVKEGETATFVCELSHEKMHVVWF
+KNDAKLHTSRTVLISSEGKTHKLEMKEVTLDDISQIKAQVKELSSTAQLKVLEADPYFTVKLHDKTAVEK
+DEITLKCEVSKDVPVKWFKDGEEIVPSPKYSIKADGLRRILKIKKADLKDKGEYVCDCGTDKTKANVTVE
+ARLIKVEKPLYGVEVFVGETAHFEIELSEPDVHGQWKLKGQPLTASPDCEIIEDGKKHILILHNCQLGMT
+GEVSFQAANAKSAANLKVKELPLIFITPLSDVKVFEKDEAKFECEVSREPKTFRWLKGTQEITGDDRFEL
+IKDGTKHSMVIKSAAFEDEAKYMFEAEDKHTSGKLIIEGIRLKFLTPLKDVTAKEKESAVFTVELSHDNI
+RVKWFKNDQRLHTTRSVSMQDEGKTHSITFKDLSIDDTSQIRVEAMGMSSEAKLTVLEGDPYFTGKLQDY
+TGVEKDEVILQCEISKADAPVKWFKDGKEIKPSKNAVIKADGKKRMLILKKALKSDIGQYTCDCGTDKTS
+GKLDIEDREIKLVRPLHSVEVMETETARFETEISEDDIHANWKLKGEALLQTPDCEIKEEGKIHSLVLHN
+CRLDQTGGVDFQAANVKSSAHLRVKPRVIGLLRPLKDVTVTAGETATFDCELSYEDIPVEWYLKGKKLEP
+SDKVVPRSEGKVHTLTLRDVKLEDAGEVQLTAKDFKTHANLFVKEPPVEFTKPLEDQTVEEGATAVLECE
+VSRENAKVKWFKNGTEILKSKKYEIVADGRVRKLVIHDCTPEDIKTYTCDAKDFKTSCNLNVVPPHVEFL
+RPLTDLQVREKEMARFECELSRENAKVKWFKDGAEIKKGKKYDIISKGAVRILVINKCLLDDEAEYSCEV
+RTARTSGMLTVLEEEAVFTKNLANIEVSETDTIKLVCEVSKPGAEVIWYKGDEEIIETGRYEILTEGRKR
+ILVIQNAHLEDAGNYNCRLPSSRTDGKVKVHELAAEFISKPQNLEILEGEKAEFVCSISKESFPVQWKRD
+DKTLESGDKYDVIADGKKRVLVVKDATLQDMGTYVVMVGAARAAAHLTVIEKLRIVVPLKDTRVKEQQEV
+VFNCEVNTEGAKAKWFRNEEAIFDSSKYIILQKDLVYTLRIRDAHLDDQANYNVSLTNHRGENVKSAANL
+IVEEEDLRIVEPLKDIETMEKKSVTFWCKVNRLNVTLKWTKNGEEVPFDNRVSYRVDKYKHMLTIKDCGF
+PDEGEYIVTAGQDKSVAELLIIEAPTEFVEHLEDQTVTEFDDAVFSCQLSREKANVKWYRNGREIKEGKK
+YKFEKDGSIHRLIIKDCRLDDECEYACGVEDRKSRARLFVEEIPVEIIRPPQDILEAPGADVVFLAELNK
+DKVEVQWLRNNMVVVQGDKHQMMSEGKIHRLQICDIKPRDQGEYRFIAKDKEARAKLELAAAPKIKTADQ
+DLVVDVGKPLTMVVPYDAYPKAEAEWFKENEPLSTKTIDTTAEQTSFRILEAKKGDKGRYKIVLQNKHGK
+AEGFINLKVIDVPGPVRNLEVTETFDGEVSLAWEEPLTDGGSKIIGYVVERRDIKRKTWVLATDRAESCE
+FTVTGLQKGGVEYLFRVSARNRVGTGEPVETDNPVEARSKYDVPGPPLNVTITDVNRFGVSLTWEPPEYD
+GGAEITNYVIELRDKTSIRWDTAMTVRAEDLSATVTDVVEGQEYSFRVRAQNRIGVGKPSAATPFVKVAD
+PIERPSPPVNLTSSDQTQSSVQLKWEPPLKDGGSPILGYIIERCEEGKDNWIRCNMKLVPELTYKVTGLE
+KGNKYLYRVSAENKAGVSDPSEILGPLTADDAFVEPTMDLSAFKDGLEVIVPNPITILVPSTGYPRPTAT
+WCFGDKVLETGDRVKMKTLSAYAELVISPSERSDKGIYTLKLENRVKTISGEIDVNVIARPSAPKELKFG
+DITKDSVHLTWEPPDDDGGSPLTGYVVEKREVSRKTWTKVMDFVTDLEFTVPDLVQGKEYLFKVCARNKC
+GPGEPAYVDEPVNMSTPATVPDPPENVKWRDRTANSIFLTWDPPKNDGGSRIKGYIVERCPRGSDKWVAC
+GEPVAETKMEVTGLEEGKWYAYRVKALNRQGASKPSRPTEEIQAVDTQEAPEIFLDVKLLAGLTVKAGTK
+IELPATVTGKPEPKITWTKADMILKQDKRITIENVPKKSTVTIVDSKRSDTGTYIIEAVNVCGRATAVVE
+VNVLDKPGPPAAFDITDVTNESCLLTWNPPRDDGGSKITNYVVERRATDSEVWHKLSSTVKDTNFKATKL
+IPNKEYIFRVAAENMYGVGEPVQASPITAKYQFDPPGPPTRLEPSDITKDAVTLTWCEPDDDGGSPITGY
+WVERLDPDTDKWVRCNKMPVKDTTYRVKGLTNKKKYRFRVLAENLAGPGKPSKSTEPILIKDPIDPPWPP
+GKPTVKDVGKTSVRLNWTKPEHDGGAKIESYVIEMLKTGTDEWVRVAEGVPTTQHLLPGLMEGQEYSFRV
+RAVNKAGESEPSEPSDPVLCREKLYPPSPPRWLEVINITKNTADLKWTVPEKDGGSPITNYIVEKRDVRR
+KGWQTVDTTVKDTKCTVTPLTEGSLYVFRVAAENAIGQSDYTEIEDSVLAKDTFTTPGPPYALAVVDVTK
+RHVDLKWEPPKNDGGRPIQRYVIEKKERLGTRWVKAGKTAGPDCNFRVTDVIEGTEVQFQVRAENEAGVG
+HPSEPTEILSIEDPTSPPSPPLDLHVTDAGRKHIAIAWKPPEKNGGSPIIGYHVEMCPVGTEKWMRVNSR
+PIKDLKFKVEEGVVPDKEYVLRVRAVNAIGVSEPSEISENVVAKDPDCKPTIDLETHDIIVIEGEKLSIP
+VPFRAVPVPTVSWHKDGKEVKASDRLTMKNDHISAHLEVPKSVRADAGIYTITLENKLGSATASINVKVI
+GLPGPCKDIKASDITKSSCKLTWEPPEFDGGTPILHYVLERREAGRRTYIPVMSGENKLSWTVKDLIPNG
+EYFFRVKAVNKVGGGEYIELKNPVIAQDPKQPPDPPVDVEVHNPTAEAMTITWKPPLYDGGSKIMGYIIE
+KIAKGEERWKRCNEHLVPILTYTAKGLEEGKEYQFRVRAENAAGISEPSRATPPTKAVDPIDAPKVILRT
+SLEVKRGDEIALDASISGSPYPTITWIKDENVIVPEEIKKRAAPLVRRRKGEVQEEEPFVLPLTQRLSID
+NSKKGESQLRVRDSLRPDHGLYMIKVENDHGIAKAPCTVSVLDTPGPPINFVFEDIRKTSVLCKWEPPLD
+DGGSEIINYTLEKKDKTKPDSEWIVVTSTLRHCKYSVTKLIEGKEYLFRVRAENRFGPGPPCVSKPLVAK
+DPFGPPDAPDKPIVEDVTSNSMLVKWNEPKDNGSPILGYWLEKREVNSTHWSRVNKSLLNALKANVDGLL
+EGLTYVFRVCAENAAGPGKFSPPSDPKTAHDPISPPGPPIPRVTDTSSTTIELEWEPPAFNGGGEIVGYF
+VDKQLVGTNEWSRCTEKMIKVRQYTVKEIREGADYKLRVSAVNAAGEGPPGETQPVTVAEPQEPPAVELD
+VSVKGGIQIMAGKTLRIPAVVTGRPVPTKVWTKEEGELDKDRVVIDNVGTKSELIIKDALRKDHGRYVIT
+ATNSCGSKFAAARVEVFDVPGPVLDLKPVVTNRKMCLLNWSDPEDDGGSEITGFIIERKDAKMHTWRQPI
+ETERSKCDITGLLEGQEYKFRVIAKNKFGCGPPVEIGPILAVDPLGPPTSPERLTYTERTKSTITLDWKE
+PRSNGGSPIQGYIIEKRRHDKPDFERVNKRLCPTTSFLVENLDEHQMYEFRVKAVNEIGESEPSLPLNVV
+IQDDEVPPTIKLRLSVRGDTIKVKAGEPVHIPADVTGLPMPKIEWSKNETVIEKPTDALQITKEEVSRSE
+AKTELSIPKAVREDKGTYTVTASNRLGSVFRNVHVEVYDRPSPPRNLAVTDIKAESCYLTWDAPLDNGGS
+EITHYVIDKRDASRKKAEWEEVTNTAVEKRYGIWKLIPNGQYEFRVRAVNKYGISDECKSDKVVIQDPYR
+LPGPPGKPKVLARTKGSMLVSWTPPLDNGGSPITGYWLEKREEGSPYWSRVSRAPITKVGLKGVEFNVPR
+LLEGVKYQFRAMAINAAGIGPPSEPSDPEVAGDPIFPPGPPSCPEVKDKTKSSISLGWKPPAKDGGSPIK
+GYIVEMQEEGTTDWKRVNEPDKLITTCECVVPNLKELRKYRFRVKAVNEAGESEPSDTTGEIPATDIQEE
+PEVFIDIGAQDCLVCKAGSQIRIPAVIKGRPTPKSSWEFDGKAKKAMKDGVHDIPEDAQLETAENSSVII
+IPECKRSHTGKYSITAKNKAGQKTANCRVKVMDVPGPPKDLKVSDITRGSCRLSWKMPDDDGGDRIKGYV
+IEKRTIDGKAWTKVNPDCGSTTFVVPDLLSEQQYFFRVRAENRFGIGPPVETIQRTTARDPIYPPDPPIK
+LKIGLITKNTVHLSWKPPKNDGGSPVTHYIVECLAWDPTGTKKEAWRQCNKRDVEELQFTVEDLVEGGEY
+EFRVKAVNAAGVSKPSATVGPCDCQRPDMPPSIDLKEFMEVEEGTNVNIVAKIKGVPFPTLTWFKAPPKK
+PDNKEPVLYDTHVNKLVVDDTCTLVIPQSRRSDTGLYTITAVNNLGTASKEMRLNVLGRPGPPVGPIKFE
+SVSADQMTLSWFPPKDDGGSKITNYVIEKREANRKTWVHVSSEPKECTYTIPKLLEGHEYVFRIMAQNKY
+GIGEPLDSEPETARNLFSVPGAPDKPTVSSVTRNSMTVNWEEPEYDGGSPVTGYWLEMKDTTSKRWKRVN
+RDPIKAMTLGVSYKVTGLIEGSDYQFRVYAINAAGVGPASLPSDPATARDPIAPPGPPFPKVTDWTKSSA
+DLEWSPPLKDGGSKVTGYIVEYKEEGKEEWEKGKDKEVRGTKLVVTGLKEGAFYKFRVSAVNIAGIGEPG
+EVTDVIEMKDRLVSPDLQLDASVRDRIVVHAGGVIRIIAYVSGKPPPTVTWNMNERTLPQEATIETTAIS
+SSMVIKNCQRSHQGVYSLLAKNEAGERKKTIIVDVLDVPGPVGTPFLAHNLTNESCKLTWFSPEDDGGSP
+ITNYVIEKRESDRRAWTPVTYTVTRQNATVQGLIQGKAYFFRIAAENSIGMGPFVETSEALVIREPITVP
+ERPEDLEVKEVTKNTVTLTWNPPKYDGGSEIINYVLESRLIGTEKFHKVTNDNLLSRKYTVKGLKEGDTY
+EYRVSAVNIVGQGKPSFCTKPITCKDELAPPTLHLDFRDKLTIRVGEAFALTGRYSGKPKPKVSWFKDEA
+DVLEDDRTHIKTTPATLALEKIKAKRSDSGKYCVVVENSTGSRKGFCQVNVVDRPGPPVGPVSFDEVTKD
+YMVISWKPPLDDGGSKITNYIIEKKEVGKDVWMPVTSASAKTTCKVSKLLEGKDYIFRIHAENLYGISDP
+LVSDSMKAKDRFRVPDAPDQPIVTEVTKDSALVTWNKPHDGGKPITNYILEKRETMSKRWARVTKDPIHP
+YTKFRVPDLLEGCQYEFRVSAENEIGIGDPSPPSKPVFAKDPIAKPSPPVNPEAIDTTCNSVDLTWQPPR
+HDGGSKILGYIVEYQKVGDEEWRRANHTPESCPETKYKVTGLRDGQTYKFRVLAVNAAGESDPAHVPEPV
+LVKDRLEPPELILDANMAREQHIKVGDTLRLSAIIKGVPFPKVTWKKEDRDAPTKARIDVTPVGSKLEIR
+NAAHEDGGIYSLTVENPAGSKTVSVKVLVLDKPGPPRDLEVSEIRKDSCYLTWKEPLDDGGSVITNYVVE
+RRDVASAQWSPLSATSKKKSHFAKHLNEGNQYLFRVAAENQYGRGPFVETPKPIKALDPLHPPGPPKDLH
+HVDVDKTEVSLVWNKPDRDGGSPITGYLVEYQEEGTQDWIKFKTVTNLECVVTGLQQGKTYRFRVKAENI
+VGLGLPDTTIPIECQEKLVPPSVELDVKLIEGLVVKAGTTVRFPAIIRGVPVPTAKWTTDGSEIKTDEHY
+TVETDNFSSVLTIKNCLRRDTGEYQITVSNAAGSKTVAVHLTVLDVPGPPTGPINILDVTPEHMTISWQP
+PKDDGGSPVINYIVEKQDTRKDTWGVVSSGSSKTKLKIPHLQKGCEYVFRVRAENKIGVGPPLDSTPTVA
+KHKFSPPSPPGKPVVTDITENAATVSWTLPKSDGGSPITGYYMERREVTGKWVRVNKTPIADLKFRVTGL
+YEGNTYEFRVFAENLAGLSKPSPSSDPIKACRPIKPPGPPINPKLKDKSRETADLVWTKPLSDGGSPILG
+YVVECQKPGTAQWNRINKDELIRQCAFRVPGLIEGNEYRFRIKAANIVGEGEPRELAESVIAKDILHPPE
+VELDVTCRDVITVRVGQTIRILARVKGRPEPDITWTKEGKVLVREKRVDLIQDLPRVELQIKEAVRADHG
+KYIISAKNSSGHAQGSAIVNVLDRPGPCQNLKVTNVTKENCTISWENPLDNGGSEITNFIVEYRKPNQKG
+WSIVASDVTKRLIKANLLANNEYYFRVCAENKVGVGPTIETKTPILAINPIDRPGEPENLHIADKGKTFV
+YLKWRRPDYDGGSPNLSYHVERRLKGSDDWERVHKGSIKETHYMVDRCVENQIYEFRVQTKNEGGESDWV
+KTEEVVVKEDLQKPVLDLKLSGVLTVKAGDTIRLEAGVRGKPFPEVAWTKDKDATDLTRSPRVKIDTRAD
+SSKFSLTKAKRSDGGKYVVTATNTAGSFVAYATVNVLDKPGPVRNLKIVDVSSDRCTVCWDPPEDDGGCE
+IQNYILEKCETKRMVWSTYSATVLTPGTTVTRLIEGNEYIFRVRAENKIGTGPPTESKPVIAKTKYDKPG
+RPDPPEVTKVSKEEMTVVWNPPEYDGGKSITGYFLEKKEKHSTRWVPVNKSAIPERRMKVQNLLPDHEYQ
+FRVKAENEIGIGEPSLPSRPVVAKDPIEPPGPPTNFRVVDTTKHSITLGWGKPVYDGGAPIIGYVVEMRP
+KIADASPDEGWKRCNAAAQLVRKEFTVTSLDENQEYEFRVCAQNQVGIGRPAELKEAIKPKEILEPPEID
+LDASMRKLVIVRAGCPIRLFAIVRGRPAPKVTWRKVGIDNVVRKGQVDLVDTMAFLVIPNSTRDDSGKYS
+LTLVNPAGEKAVFVNVRVLDTPGPVSDLKVSDVTKTSCHVSWAPPENDGGSQVTHYIVEKREADRKTWST
+VTPEVKKTSFHVTNLVPGNEYYFRVTAVNEYGPGVPTDVPKPVLASDPLSEPDPPRKLEVTEMTKNSATL
+AWLPPLRDGGAKIDGYITSYREEEQPADRWTEYSVVKDLSLVVTGLKEGKKYKFRVAARNAVGVSLPREA
+EGVYEAKEQLLPPKILMPEQITIKAGKKLRIEAHVYGKPHPTCKWKKGEDEVVTSSHLAVHKADSSSILI
+IKDVTRKDSGYYSLTAENSSGTDTQKIKVVVMDAPGPPQPPFDISDIDADACSLSWHIPLEDGGSNITNY
+IVEKCDVSRGDWVTALASVTKTSCRVGKLIPGQEYIFRVRAENRFGISEPLTSPKMVAQFPFGVPSEPKN
+ARVTKVNKDCIFVAWDRPDSDGGSPIIGYLIERKERNSLLWVKANDTLVRSTEYPCAGLVEGLEYSFRIY
+ALNKAGSSPPSKPTEYVTARMPVDPPGKPEVIDVTKSTVSLIWARPKHDGGSKIIGYFVEACKLPGDKWV
+RCNTAPHQIPQEEYTATGLEEKAQYQFRAIARTAVNISPPSEPSDPVTILAENVPPRIDLSVAMKSLLTV
+KAGTNVCLDATVFGKPMPTVSWKKDGTLLKPAEGIKMAMQRNLCTLELFSVNRKDSGDYTITAENSSGSK
+SATIKLKVLDKPGPPASVKINKMYSDRAMLSWEPPLEDGGSEITNYIVDKRETSRPNWAQVSATVPITSC
+SVEKLIEGHEYQFRICAENKYGVGDPVFTEPAIAKNPYDPPGRCDPPVISNITKDHMTVSWKPPADDGGS
+PITGYLLEKRETQAVNWTKVNRKPIIERTLKATGLQEGTEYEFRVTAINKAGPGKPSDASKAAYARDPQY
+PPAPPAFPKVYDTTRSSVSLSWGKPAYDGGSPIIGYLVEVKRADSDNWVRCNLPQNLQKTRFEVTGLMED
+TQYQFRVYAVNKIGYSDPSDVPDKHYPKDILIPPEGELDADLRKTLILRAGVTMRLYVPVKGRPPPKITW
+SKPNVNLRDRIGLDIKSTDFDTFLRCENVNKYDAGKYILTLENSCGKKEYTIVVKVLDTPGPPVNVTVKE
+ISKDSAYVTWEPPIIDGGSPIINYVVQKRDAERKSWSTVTTECSKTSFRVANLEEGKSYFFRVFAENEYG
+IGDPGETRDAVKASQTPGPVVDLKVRSVSKSSCSIGWKKPHSDGGSRIIGYVVDFLTEENKWQRVMKSLS
+LQYSAKDLTEGKEYTFRVSAENENGEGTPSEITVVARDDVVAPDLDLKGLPDLCYLAKENSNFRLKIPIK
+GKPAPSVSWKKGEDPLATDTRVSVESSAVNTTLIVYDCQKSDAGKYTITLKNVAGTKEGTISIKVVGKPG
+IPTGPIKFDEVTAEAMTLKWAPPKDDGGSEITNYILEKRDSVNNKWVTCASAVQKTTFRVTRLHEGMEYT
+FRVSAENKYGVGEGLKSEPIVARHPFDVPDAPPPPNIVDVRHDSVSLTWTDPKKTGGSPITGYHLEFKER
+NSLLWKRANKTPIRMRDFKVTGLTEGLEYEFRVMAINLAGVGKPSLPSEPVVALDPIDPPGKPEVINITR
+NSVTLIWTEPKYDGGHKLTGYIVEKRDLPSKSWMKANHVNVPECAFTVTDLVEGGKYEFRIRAKNTAGAI
+SAPSESTETIICKDEYEAPTIVLDPTIKDGLTIKAGDTIVLNAISILGKPLPKSSWSKAGKDIRPSDITQ
+ITSTPTSSMLTIKYATRKDAGEYTITATNPFGTKVEHVKVTVLDVPGPPGPVEISNVSAEKATLTWTPPL
+EDGGSPIKSYILEKRETSRLLWTVVSEDIQSCRHVATKLIQGNEYIFRVSAVNHYGKGEPVQSEPVKMVD
+RFGPPGPPEKPEVSNVTKNTATVSWKRPVDDGGSEITGYHVERREKKSLRWVRAIKTPVSDLRCKVTGLQ
+EGSTYEFRVSAENRAGIGPPSEASDSVLMKDAAYPPGPPSNPHVTDTTKKSASLAWGKPHYDGGLEITGY
+VVEHQKVGDEAWIKDTTGTALRITQFVVPDLQTKEKYNFRISAINDAGVGEPAVIPDVEIVEREMAPDFE
+LDAELRRTLVVRAGLSIRIFVPIKGRPAPEVTWTKDNINLKNRANIENTESFTLLIIPECNRYDTGKFVM
+TIENPAGKKSGFVNVRVLDTPGPVLNLRPTDITKDSVTLHWDLPLIDGGSRITNYIVEKREATRKSYSTA
+TTKCHKCTYKVTGLSEGCEYFFRVMAENEYGIGEPTETTEPVKASEAPSPPDSLNIMDITKSTVSLAWPK
+PKHDGGSKITGYVIEAQRKGSDQWTHITTVKGLECVVRNLTEGEEYTFQVMAVNSAGRSAPRESRPVIVK
+EQTMLPELDLRGIYQKLVIAKAGDNIKVEIPVLGRPKPTVTWKKGDQILKQTQRVNFETTATSTILNINE
+CVRSDSGPYPLTARNIVGEVGDVITIQVHDIPGPPTGPIKFDEVSSDFVTFSWDPPENDGGVPISNYVVE
+MRQTDSTTWVELATTVIRTTYKATRLTTGLEYQFRVKAQNRYGVGPGITSACIVANYPFKVPGPPGTPQV
+TAVTKDSMTISWHEPLSDGGSPILGYHVERKERNGILWQTVSKALVPGNIFKSSGLTDGIAYEFRVIAEN
+MAGKSKPSKPSEPMLALDPIDPPGKPVPLNITRHTVTLKWAKPEYTGGFKITSYIVEKRDLPNGRWLKAN
+FSNILENEFTVSGLTEDAAYEFRVIAKNAAGAISPPSEPSDAITCRDDVEAPKIKVDVKFKDTVILKAGE
+AFRLEADVSGRPPPTMEWSKDGKELEGTAKLEIKIADFSTNLVNKDSTRRDSGAYTLTATNPGGFAKHIF
+NVKVLDRPGPPEGPLAVTEVTSEKCVLSWFPPLDDGGAKIDHYIVQKRETSRLAWTNVASEVQVTKLKVT
+KLLKGNEYIFRVMAVNKYGVGEPLESEPVLAVNPYGPPDPPKNPEVTTITKDSMVVCWGHPDSDGGSEII
+NYIVERRDKAGQRWIKCNKKTLTDLRYKVSGLTEGHEYEFRIMAENAAGISAPSPTSPFYKACDTVFKPG
+PPGNPRVLDTSRSSISIAWNKPIYDGGSEITGYMVEIALPEEDEWQIVTPPAGLKATSYTITGLTENQEY
+KIRIYAMNSEGLGEPALVPGTPKAEDRMLPPEIELDADLRKVVTIRACCTLRLFVPIKGRPAPEVKWARD
+HGESLDKASIESTSSYTLLIVGNVNRFDSGKYILTVENSSGSKSAFVNVRVLDTPGPPQDLKVKEVTKTS
+VTLTWDPPLLDGGSKIKNYIVEKRESTRKAYSTVATNCHKTSWKVDQLQEGCSYYFRVLAENEYGIGLPA
+ETAESVKASERPLPPGKITLMDVTRNSVSLSWEKPEHDGGSRILGYIVEMQTKGSDKWATCATVKVTEAT
+ITGLIQGEEYSFRVSAQNEKGISDPRQLSVPVIAKDLVIPPAFKLLFNTFTVLAGEDLKVDVPFIGRPTP
+AVTWHKDNVPLKQTTRVNAESTENNSLLTIKDACREDVGHYVVKLTNSAGEAIETLNVIVLDKPGPPTGP
+VKMDEVTADSITLSWGPPKYDGGSSINNYIVEKRDTSTTTWQIVSATVARTTIKACRLKTGCEYQFRIAA
+ENRYGKSTYLNSEPTVAQYPFKVPGPPGTPVVTLSSRDSMEVQWNEPISDGGSRVIGYHLERKERNSILW
+VKLNKTPIPQTKFKTTGLEEGVEYEFRVSAENIVGIGKPSKVSECYVARDPCDPPGRPEAIIVTRNSVTL
+QWKKPTYDGGSKITGYIVEKKELPEGRWMKASFTNIIDTHFEVTGLVEDHRYEFRVIARNAAGVFSEPSE
+STGAITARDEVDPPRISMDPKYKDTIVVHAGESFKVDADIYGKPIPTIQWIKGDQELSNTARLEIKSTDF
+ATSLSVKDAVRVDSGNYILKAKNVAGERSVTVNVKVLDRPGPPEGPVVISGVTAEKCTLAWKPPLQDGGS
+DIINYIVERRETSRLVWTVVDANVQTLSCKVTKLLEGNEYTFRIMAVNKYGVGEPLESEPVVAKNPFVVP
+DAPKAPEVTTVTKDSMIVVWERPASDGGSEILGYVLEKRDKEGIRWTRCHKRLIGELRLRVTGLIENHDY
+EFRVSAENAAGLSEPSPPSAYQKACDPIYKPGPPNNPKVIDITRSSVFLSWSKPIYDGGCEIQGYIVEKC
+DVSVGEWTMCTPPTGINKTNIEVEKLLEKHEYNFRICAINKAGVGEHADVPGPIIVEEKLEAPDIDLDLE
+LRKIINIRAGGSLRLFVPIKGRPTPEVKWGKVDGEIRDAAIIDVTSSFTSLVLDNVNRYDSGKYTLTLEN
+SSGTKSAFVTVRVLDTPSPPVNLKVTEITKDSVSITWEPPLLDGGSKIKNYIVEKREATRKSYAAVVTNC
+HKNSWKIDQLQEGCSYYFRVTAENEYGIGLPAQTADPIKVAEVPQPPGKITVDDVTRNSVSLSWTKPEHD
+GGSKIIQYIVEMQAKHSEKWSECARVKSLQAVITNLTQGEEYLFRVVAVNEKGRSDPRSLAVPIVAKDLV
+IEPDVKPAFSSYSVQVGQDLKIEVPISGRPKPTITWTKDGLPLKQTTRINVTDSLDLTTLSIKETHKDDG
+GQYGITVANVVGQKTASIEIVTLDKPDPPKGPVKFDDVSAESITLSWNPPLYTGGCQITNYIVQKRDTTT
+TVWDVVSATVARTTLKVTKLKTGTEYQFRIFAENRYGQSFALESDPIVAQYPYKEPGPPGTPFATAISKD
+SMVIQWHEPVNNGGSPVIGYHLERKERNSILWTKVNKTIIHDTQFKAQNLEEGIEYEFRVYAENIVGVGK
+ASKNSECYVARDPCDPPGTPEPIMVKRNEITLQWTKPVYDGGSMITGYIVEKRDLPDGRWMKASFTNVIE
+TQFTVSGLTEDQRYEFRVIAKNAAGAISKPSDSTGPITAKDEVELPRISMDPKFRDTIVVNAGETFRLEA
+DVHGKPLPTIEWLRGDKEIEESARCEIKNTDFKALLIVKDAIRIDGGQYILRASNVAGSKSFPVNVKVLD
+RPGPPEGPVQVTGVTSEKCSLTWSPPLQDGGSDISHYVVEKRETSRLAWTVVASEVVTNSLKVTKLLEGN
+EYVFRIMAVNKYGVGEPLESAPVLMKNPFVLPGPPKSLEVTNIAKDSMTVCWNRPDSDGGSEIIGYIVEK
+RDRSGIRWIKCNKRRITDLRLRVTGLTEDHEYEFRVSAENAAGVGEPSPATVYYKACDPVFKPGPPTNAH
+IVDTTKNSITLAWGKPIYDGGSEILGYVVEICKADEEEWQIVTPQTGLRVTRFEISKLTEHQEYKIRVCA
+LNKVGLGEATSVPGTVKPEDKLEAPELDLDSELRKGIVVRAGGSARIHIPFKGRPTPEITWSREEGEFTD
+KVQIEKGVNYTQLSIDNCDRNDAGKYILKLENSSGSKSAFVTVKVLDTPGPPQNLAVKEVRKDSAFLVWE
+PPIIDGGAKVKNYVIDKRESTRKAYANVSSKCSKTSFKVENLTEGAIYYFRVMAENEFGVGVPVETVDAV
+KAAEPPSPPGKVTLTDVSQTSASLMWEKPEHDGGSRVLGYVVEMQPKGTEKWSIVAESKVCNAVVTGLSS
+GQEYQFRVKAYNEKGKSDPRVLGVPVIAKDLTIQPSLKLPFNTYSIQAGEDLKIEIPVIGRPRPNISWVK
+DGEPLKQTTRVNVEETATSTVLHIKEGNKDDFGKYTVTATNSAGTATENLSVIVLEKPGPPVGPVRFDEV
+SADFVVISWEPPAYTGGCQISNYIVEKRDTTTTTWHMVSATVARTTIKITKLKTGTEYQFRIFAENRYGK
+SAPLDSKAVIVQYPFKEPGPPGTPFVTSISKDQMLVQWHEPVNDGGTKIIGYHLEQKEKNSILWVKLNKT
+PIQDTKFKTTGLDEGLEYEFKVSAENIVGIGKPSKVSECFVARDPCDPPGRPEAIVITRNNVTLKWKKPA
+YDGGSKITGYIVEKKDLPDGRWMKASFTNVLETEFTVSGLVEDQRYEFRVIARNAAGNFSEPSDSSGAIT
+ARDEIDAPNASLDPKYKDVIVVHAGETFVLEADIRGKPIPDVVWSKDGKELEETAARMEIKSTIQKTTLV
+VKDCIRTDGGQYILKLSNVGGTKSIPITVKVLDRPGPPEGPLKVTGVTAEKCYLAWNPPLQDGGANISHY
+IIEKRETSRLSWTQVSTEVQALNYKVTKLLPGNEYIFRVMAVNKYGIGEPLESGPVTACNPYKPPGPPST
+PEVSAITKDSMVVTWARPVDDGGTEIEGYILEKRDKEGVRWTKCNKKTLTDLRLRVTGLTEGHSYEFRVA
+AENAAGVGEPSEPSVFYRACDALYPPGPPSNPKVTDTSRSSVSLAWSKPIYDGGAPVKGYVVEVKEAAAD
+EWTTCTPPTGLQGKQFTVTKLKENTEYNFRICAINSEGVGEPATLPGSVVAQERIEPPEIELDADLRKVV
+VLRASATLRLFVTIKGRPEPEVKWEKAEGILTDRAQIEVTSSFTMLVIDNVTRFDSGRYNLTLENNSGSK
+TAFVNVRVLDSPSAPVNLTIREVKKDSVTLSWEPPLIDGGAKITNYIVEKRETTRKAYATITNNCTKTTF
+RIENLQEGCSYYFRVLASNEYGIGLPAETTEPVKVSEPPLPPGRVTLVDVTRNTATIKWEKPESDGGSKI
+TGYVVEMQTKGSEKWSTCTQVKTLEATISGLTAGEEYVFRVAAVNEKGRSDPRQLGVPVIARDIEIKPSV
+ELPFHTFNVKAREQLKIDVPFKGRPQATVNWRKDGQTLKETTRVNVSSSKTVTSLSIKEASKEDVGTYEL
+CVSNSAGSITVPITIIVLDRPGPPGPIRIDEVSCDSITISWNPPEYDGGCQISNYIVEKKETTSTTWHIV
+SQAVARTSIKIVRLTTGSEYQFRVCAENRYGKSSYSESSAVVAEYPFSPPGPPGTPKVVHATKSTMLVTW
+QVPVNDGGSRVIGYHLEYKERSSILWSKANKILIADTQMKVSGLDEGLMYEYRVYAENIAGIGKCSKSCE
+PVPARDPCDPPGQPEVTNITRKSVSLKWSKPHYDGGAKITGYIVERRELPDGRWLKCNYTNIQETYFEVT
+ELTEDQRYEFRVFARNAADSVSEPSESTGPIIVKDDVEPPRVMMDVKFRDVIVVKAGEVLKINADIAGRP
+LPVISWAKDGIEIEERARTEIISTDNHTLLTVKDCIRRDTGQYVLTLKNVAGTRSVAVNCKVLDKPGPPA
+GPLEINGLTAEKCSLSWGRPQEDGGADIDYYIVEKRETSHLAWTICEGELQMTSCKVTKLLKGNEYIFRV
+TGVNKYGVGEPLESVAIKALDPFTVPSPPTSLEITSVTKESMTLCWSRPESDGGSEISGYIIERREKNSL
+RWVRVNKKPVYDLRVKSTGLREGCEYEYRVYAENAAGLSLPSETSPLIRAEDPVFLPSPPSKPKIVDSGK
+TTITIAWVKPLFDGGAPITGYTVEYKKSDDTDWKTSIQSLRGTEYTISGLTTGAEYVFRVKSVNKVGASD
+PSDSSDPQIAKEREEEPLFDIDSEMRKTLIVKAGASFTMTVPFRGRPVPNVLWSKPDTDLRTRAYVDTTD
+SRTSLTIENANRNDSGKYTLTIQNVLSAASLTLVVKVLDTPGPPTNITVQDVTKESAVLSWDVPENDGGA
+PVKNYHIEKREASKKAWVSVTNNCNRLSYKVTNLQEGAIYYFRVSGENEFGVGIPAETKEGVKITEKPSP
+PEKLGVTSISKDSVSLTWLKPEHDGGSRIVHYVVEALEKGQKNWVKCAVAKSTHHVVSGLRENSEYFFRV
+FAENQAGLSDPRELLLPVLIKEQLEPPEIDMKNFPSHTVYVRAGSNLKVDIPISGKPLPKVTLSRDGVPL
+KATMRFNTEITAENLTINLKESVTADAGRYEITAANSSGTTKAFINIVVLDRPGPPTGPVVISDITEESV
+TLKWEPPKYDGGSQVTNYILLKRETSTAVWTEVSATVARTMMKVMKLTTGEEYQFRIKAENRFGISDHID
+SACVTVKLPYTTPGPPSTPWVTNVTRESITVGWHEPVSNGGSAVVGYHLEMKDRNSILWQKANKLVIRTT
+HFKVTTISAGLIYEFRVYAENAAGVGKPSHPSEPVLAIDACEPPRNVRITDISKNSVSLSWQQPAFDGGS
+KITGYIVERRDLPDGRWTKASFTNVTETQFIISGLTQNSQYEFRVFARNAVGSISNPSEVVGPITCIDSY
+GGPVIDLPLEYTEVVKYRAGTSVKLRAGISGKPAPTIEWYKDDKELQTNALVCVENTTDLASILIKDADR
+LNSGCYELKLRNAMGSASATIRVQILDKPGPPGGPIEFKTVTAEKITLLWRPPADDGGAKITHYIVEKRE
+TSRVVWSMVSEHLEECIITTTKIIKGNEYIFRVRAVNKYGIGEPLESDSVVAKNAFVTPGPPGIPEVTKI
+TKNSMTVVWSRPIADGGSDISGYFLEKRDKKSLGWFKVLKETIRDTRQKVTGLTENSDYQYRVCAVNAAG
+QGPFSEPSEFYKAADPIDPPGPPAKIRIADSTKSSITLGWSKPVYDGGSAVTGYVVEIRQGEEEEWTTVS
+TKGEVRTTEYVVSNLKPGVNYYFRVSAVNCAGQGEPIEMNEPVQAKDILEAPEIDLDVALRTSVIAKAGE
+DVQVLIPFKGRPPPTVTWRKDEKNLGSDARYSIENTDSSSLLTIPQVTRNDTGKYILTIENGVGEPKSST
+VSVKVLDTPAACQKLQVKHVSRGTVTLLWDPPLIDGGSPIINYVIEKRDATKRTWSVVSHKCSSTSFKLI
+DLSEKTPFFFRVLAENEIGIGEPCETTEPVKAAEVPAPIRDLSMKDSTKTSVILSWTKPDFDGGSVITEY
+VVERKGKGEQTWSHAGISKTCEIEVSQLKEQSVLEFRVFAKNEKGLSDPVTIGPITVKELIITPEVDLSD
+IPGAQVTVRIGHNVHLELPYKGKPKPSISWLKDGLPLKESEFVRFSKTENKITLSIKNAKKEHGGKYTVI
+LDNAVCRIAVPITVITLGPPSKPKGPIRFDEIKADSVILSWDVPEDNGGGEITCYSIEKRETSQTNWRMV
+CSSVARTTFKVPNLVKDAEYQFRVRAENRYGVSQPLVSSIIVAKHQFRIPGPPGKPVIYNVTSDGMSLTW
+DAPVYDGGSEVTGFHVEKKERNSILWQKVNTSPISGREYRATGLVEGLDYQFRVYAENSAGLSSPSDPSK
+FTLAVSPVDPPGTPDYIDVTRETITLKWNPPLRDGGSKIVGYSIEKRQGNERWVRCNFTDVSECQYTVTG
+LSPGDRYEFRIIARNAVGTISPPSQSSGIIMTRDENVPPIVEFGPEYFDGLIIKSGESLRIKALVQGRPV
+PRVTWFKDGVEIEKRMNMEITDVLGSTSLFVRDATRDHRGVYTVEAKNASGSAKAEIKVKVQDTPGKVVG
+PIRFTNITGEKMTLWWDAPLNDGCAPITHYIIEKRETSRLAWALIEDKCEAQSYTAIKLINGNEYQFRVS
+AVNKFGVGRPLDSDPVVAQIQYTVPDAPGIPEPSNITGNSITLTWARPESDGGSEIQQYILERREKKSTR
+WVKVISKRPISETRFKVTGLTEGNEYEFHVMAENAAGVGPASGISRLIKCREPVNPPGPPTVVKVTDTSK
+TTVSLEWSKPVFDGGMEIIGYIIEMCKADLGDWHKVNAEACVKTRYTVTDLQAGEEYKFRVSAINGAGKG
+DSCEVTGTIKAVDRLTAPELDIDANFKQTHVVRAGASIRLFIAYQGRPTPTAVWSKPDSNLSLRADIHTT
+DSFSTLTVENCNRNDAGKYTLTVENNSGSKSITFTVKVLDTPGPPGPITFKDVTRGSATLMWDAPLLDGG
+ARIHHYVVEKREASRRSWQVISEKCTRQIFKVNDLAEGVPYYFRVSAVNEYGVGEPYEMPEPIVATEQPA
+PPRRLDVVDTSKSSAVLAWLKPDHDGGSRITGYLLEMRQKGSDFWVEAGHTKQLTFTVERLVEKTEYEFR
+VKAKNDAGYSEPREAFSSVIIKEPQIEPTADLTGITNQLITCKAGSPFTIDVPISGRPAPKVTWKLEEMR
+LKETDRVSITTTKDRTTLTVKDSMRGDSGRYFLTLENTAGVKTFSVTVVVIGRPGPVTGPIEVSSVSAES
+CVLSWGEPKDGGGTEITNYIVEKRESGTTAWQLVNSSVKRTQIKVTHLTKYMEYSFRVSSENRFGVSKPL
+ESAPIIAEHPFVPPSAPTRPEVYHVSANAMSIRWEEPYHDGGSKIIGYWVEKKERNTILWVKENKVPCLE
+CNYKVTGLVEGLEYQFRTYALNAAGVSKASEASRPIMAQNPVDAPGRPEVTDVTRSTVSLIWSAPAYDGG
+SKVVGYIIERKPVSEVGDGRWLKCNYTIVSDNFFTVTALSEGDTYEFRVLAKNAAGVISKGSESTGPVTC
+RDEYAPPKAELDARLHGDLVTIRAGSDLVLDAAVGGKPEPKIIWTKGDKELDLCEKVSLQYTGKRATAVI
+KFCDRSDSGKYTLTVKNASGTKAVSVMVKVLDSPGPCGKLTVSRVTQEKCTLAWSLPQEDGGAEITHYIV
+ERRETSRLNWVIVEGECPTLSYVVTRLIKNNEYIFRVRAVNKYGPGVPVESEPIVARNSFTIPSPPGIPE
+EVGTGKEHIIIQWTKPESDGGNEISNYLVDKREKKSLRWTRVNKDYVVYDTRLKVTSLMEGCDYQFRVTA
+VNAAGNSEPSEASNFISCREPSYTPGPPSAPRVVDTTKHSISLAWTKPMYDGGTDIVGYVLEMQEKDTDQ
+WYRVHTNATIRNTEFTVPDLKMGQKYSFRVAAVNVKGMSEYSESIAEIEPVERIEIPDLELADDLKKTVT
+IRAGASLRLMVSVSGRPPPVITWSKQGIDLASRAIIDTTESYSLLIVDKVNRYDAGKYTIEAENQSGKKS
+ATVLVKVYDTPGPCPSVKVKEVSRDSVTITWEIPTIDGGAPVNNYIVEKREAAMRAFKTVTTKCSKTLYR
+ISGLVEGTMYYFRVLPENIYGIGEPCETSDAVLVSEVPLVPAKLEVVDVTKSTVTLAWEKPLYDGGSRLT
+GYVLEACKAGTERWMKVVTLKPTVLEHTVTSLNEGEQYLFRIRAQNEKGVSEPRETVTAVTVQDLRVLPT
+IDLSTMPQKTIHVPAGRPVELVIPIAGRPPPAASWFFAGSKLRESERVTVETHTKVAKLTIRETTIRDTG
+EYTLELKNVTGTTSETIKVIILDKPGPPTGPIKIDEIDATSITISWEPPELDGGAPLSGYVVEQRDAHRP
+GWLPVSESVTRSTFKFTRLTEGNEYVFRVAATNRFGIGSYLQSEVIECRSSIRIPGPPETLQIFDVSRDG
+MTLTWYPPEDDGGSQVTGYIVERKEVRADRWVRVNKVPVTMTRYRSTGLTEGLEYEHRVTAINARGSGKP
+SRPSKPIVAMDPIAPPGKPQNPRVTDTTRTSVSLAWSVPEDEGGSKVTGYLIEMQKVDQHEWTKCNTTPT
+KIREYTLTHLPQGAEYRFRVLACNAGGPGEPAEVPGTVKVTEMLEYPDYELDERYQEGIFVRQGGVIRLT
+IPIKGKPFPICKWTKEGQDISKRAMIATSETHTELVIKEADRGDSGTYDLVLENKCGKKAVYIKVRVIGS
+PNSPEGPLEYDDIQVRSVRVSWRPPADDGGADILGYILERREVPKAAWYTIDSRVRGTSLVVKGLKENVE
+YHFRVSAENQFGISKPLKSEEPVTPKTPLNPPEPPSNPPEVLDVTKSSVSLSWSRPKDDGGSRVTGYYIE
+RKETSTDKWVRHNKTQITTTMYTVTGLVPDAEYQFRIIAQNDVGLSETSPASEPVVCKDPFDKPSQPGEL
+EILSISKDSVTLQWEKPECDGGKEILGYWVEYRQSGDSAWKKSNKERIKDKQFTIGGLLEATEYEFRVFA
+ENETGLSRPRRTAMSIKTKLTSGEAPGIRKEMKDVTTKLGEAAQLSCQIVGRPLPDIKWYRFGKELIQSR
+KYKMSSDGRTHTLTVMTEEQEDEGVYTCIATNEVGEVETSSKLLLQATPQFHPGYPLKEKYYGAVGSTLR
+LHVMYIGRPVPAMTWFHGQKLLQNSENITIENTEHYTHLVMKNVQRKTHAGKYKVQLSNVFGTVDAILDV
+EIQDKPDKPTGPIVIEALLKNSAVISWKPPADDGGSWITNYVVEKCEAKEGAEWQLVSSAISVTTCRIVN
+LTENAGYYFRVSAQNTFGISDPLEVSSVVIIKSPFEKPGAPGKPTITAVTKDSCVVAWKPPASDGGAKIR
+NYYLEKREKKQNKWISVTTEEIRETVFSVKNLIEGLEYEFRVKCENLGGESEWSEISEPITPKSDVPIQA
+PHFKEELRNLNVRYQSNATLVCKVTGHPKPIVKWYRQGKEIIADGLKYRIQEFKGGYHQLIIASVTDDDA
+TVYQVRATNQGGSVSGTASLEVEVPAKIHLPKTLEGMGAVHALRGEVVSIKIPFSGKPDPVITWQKGQDL
+IDNNGHYQVIVTRSFTSLVFPNGVERKDAGFYVVCAKNRFGIDQKTVELDVADVPDPPRGVKVSDVSRDS
+VNLTWTEPASDGGSKITNYIVEKCATTAERWLRVGQARETRYTVINLFGKTSYQFRVIAENKFGLSKPSE
+PSEPTITKEDKTRAMNYDEEVDETREVSMTKASHSSTKELYEKYMIAEDLGRGEFGIVHRCVETSSKKTY
+MAKFVKVKGTDQVLVKKEISILNIARHRNILHLHESFESMEELVMIFEFISGLDIFERINTSAFELNERE
+IVSYVHQVCEALQFLHSHNIGHFDIRPENIIYQTRRSSTIKIIEFGQARQLKPGDNFRLLFTAPEYYAPE
+VHQHDVVSTATDMWSLGTLVYVLLSGINPFLAETNQQIIENIMNAEYTFDEEAFKEISIEAMDFVDRLLV
+KERKSRMTASEALQHPWLKQKIERVSTKVIRTLKHRRYYHTLIKKDLNMVVSAARISCGGAIRSQKGVSV
+AKVKVASIEIGPVSGQIMHAVGEEGGHVKYVCKIENYDQSTQVTWYFGVRQLENSEKYEITYEDGVAILY
+VKDITKLDDGTYRCKVVNDYGEDSSYAELFVKGVREVYDYYCRRTMKKIKRRTDTMRLLERPPEFTLPLY
+NKTAYVGENVRFGVTITVHPEPHVTWYKSGQKIKPGDNDKKYTFESDKGLYQLTINSVTTDDDAEYTVVA
+RNKYGEDSCKAKLTVTLHPPPTDSTLRPMFKRLLANAECQEGQSVCFEIRVSGIPPPTLKWEKDGQPLSL
+GPNIEIIHEGLDYYALHIRDTLPEDTGYYRVTATNTAGSTSCQAHLQVERLRYKKQEFKSKEEHERHVQK
+QIDKTLRMAEILSGTESVPLTQVAKEALREAAVLYKPAVSTKTVKGEFRLEIEEKKEERKLRMPYDVPEP
+RKYKQTTIEEDQRIKQFVPMSDMKWYKKIRDQYEMPGKLDRVVQKRPKRIRLSRWEQFYVMPLPRITDQY
+RPKWRIPKLSQDDLEIVRPARRRTPSPDYDFYYRPRRRSLGDISDEELLLPIDDYLAMKRTEEERLRLEE
+ELELGFSASPPSRSPPHFELSSLRYSSPQAHVKVEETRKDFRYSTYHIPTKAEASTSYAELRERHAQAAY
+RQPKQRQRIMAEREDEELLRPVTTTQHLSEYKSELDFMSKEEKSRKKSRRQREVTEITEIEEEYEISKHA
+QRESSSSASRLLRRRRSLSPTYIELMRPVSELIRSRPQPAEEYEDDTERRSPTPERTRPRSPSPVSSERS
+LSRFERSARFDIFSRYESMKAALKTQKTSERKYEVLSQQPFTLDHAPRITLRMRSHRVPCGQNTRFILNV
+QSKPTAEVKWYHNGVELQESSKIHYTNTSGVLTLEILDCHTDDSGTYRAVCTNYKGEASDYATLDVTGGD
+YTTYASQRRDEEVPRSVFPELTRTEAYAVSSFKKTSEMEASSSVREVKSQMTETRESLSSYEHSASAEMK
+SAALEEKSLEEKSTTRKIKTTLAARILTKPRSMTVYEGESARFSCDTDGEPVPTVTWLRKGQVLSTSARH
+QVTTTKYKSTFEISSVQASDEGNYSVVVENSEGKQEAEFTLTIQKARVTEKAVTSPPRVKSPEPRVKSPE
+AVKSPKRVKSPEPSHPKAVSPTETKPTPTEKVQHLPVSAPPKITQFLKAEASKEIAKLTCVVESSVLRAK
+EVTWYKDGKKLKENGHFQFHYSADGTYELKINNLTESDQGEYVCEISGEGGTSKTNLQFMGQAFKSIHEK
+VSKISETKKSDQKTTESTVTRKTEPKAPEPISSKPVIVTGLQDTTVSSDSVAKFAVKATGEPRPTAIWTK
+DGKAITQGGKYKLSEDKGGFFLEIHKTDTSDSGLYTCTVKNSAGSVSSSCKLTIKAIKDTEAQKVSTQKT
+SEITPQKKAVVQEEISQKALRSEEIKMSEAKSQEKLALKEEASKVLISEEVKKSAATSLEKSIVHEEITK
+TSQASEEVRTHAEIKAFSTQMSINEGQRLVLKANIAGATDVKWVLNGVELTNSEEYRYGVSGSDQTLTIK
+QASHRDEGILTCISKTKEGIVKCQYDLTLSKELSDAPAFISQPRSQNINEGQNVLFTCEISGEPSPEIEW
+FKNNLPISISSNVSISRSRNVYSLEIRNASVSDSGKYTIKAKNFRGQCSATASLMVLPLVEEPSREVVLR
+TSGDTSLQGSFSSQSVQMSASKQEASFSSFSSSSASSMTEMKFASMSAQSMSSMQESFVEMSSSSFMGIS
+NMTQLESSTSKMLKAGIRGIPPKIEALPSDISIDEGKVLTVACAFTGEPTPEVTWSCGGRKIHSQEQGRF
+HIENTDDLTTLIIMDVQKQDGGLYTLSLGNEFGSDSATVNIHIRSI
+X
+SHAR_EOF
+chmod 0644 titin_hum.aa ||
+echo 'restore of titin_hum.aa failed'
+Wc_c="`wc -c < 'titin_hum.aa'`"
+test 27376 -eq "$Wc_c" ||
+ echo 'titin_hum.aa: original size 27376, current size' "$Wc_c"
+fi
+# ============= titin_hum.seq ==============
+if test -f 'titin_hum.seq' -a X"$1" != X"-c"; then
+ echo 'x - skipping titin_hum.seq (File already exists)'
+else
+echo 'x - extracting titin_hum.seq (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'titin_hum.seq' &&
+>gi|20143913|ref|NM_003319.2| Homo sapiens titin (TTN), transcript variant N2-B, mRNA
+AGCAGTCGTGCATTCCCAGCCTCGCCTCGGGTGTAGGGATTGCATAGAAAAGCAAAACTACACAGTCTTG
+ACTGTGTAGTTTTGTTTTTAGGATTAGAGGCTCACCGATTCATGTCGGAGATGGTCAGAAAAACCAACTC
+TCCATAGGACGTCGTTTCAGAAGCAACCTTGGGCTTAGTCCCACCCTTTTTAGGCACTCTTGAGAAATCA
+AGTGCCTAGAAAGATGACAACTCAAGCACCGACGTTTACGCAGCCGTTACAAAGCGTTGTGGTACTGGAG
+GGTAGTACCGCAACCTTTGAGGCTCACATTAGTGGTTTTCCAGTTCCTGAGGTGAGCTGGTTTAGGGATG
+GCCAGGTGATTTCCACTTCCACTCTGCCCGGCGTGCAGATCTCCTTTAGCGATGGCCGCGCTAAACTGAC
+GATCCCCGCCGTGACTAAAGCCAACAGTGGACGATATTCCCTGAAAGCCACCAATGGATCTGGACAAGCG
+ACTAGTACTGCTGAGCTTCTCGTGAAAGCTGAGACAGCACCACCCAACTTCGTTCAACGACTGCAGAGCA
+TGACCGTGAGACAAGGAAGCCAAGTGAGACTCCAAGTGAGAGTGACTGGAATCCCTACACCTGTGGTGAA
+GTTCTACCGGGATGGAGCCGAAATCCAGAGCTCCCTTGATTTCCAAATTTCACAAGAAGGCGACCTCTAC
+AGCTTACTGATTGCAGAAGCATACCCTGAGGACTCAGGGACCTATTCAGTAAATGCCACCAATAGCGTTG
+GAAGAGCTACTTCGACTGCTGAATTACTGGTTCAAGGTGAAGAAGAAGTACCTGCTAAAAAGACAAAGAC
+AATTGTTTCGACTGCTCAGATCTCAGAATCAAGACAAACCCGAATTGAAAAGAAGATTGAAGCCCACTTT
+GATGCCAGATCAATTGCAACAGTTGAGATGGTCATAGATGGTGCCGCTGGGCAACAGCTGCCACATAAAA
+CACCTCCCAGGATTCCTCCGAAGCCAAAGTCAAGATCCCCAACACCACCGTCTATTGCTGCCAAAGCACA
+GCTGGCTCGGCAGCAGTCCCCATCGCCCATAAGACACTCCCCTTCCCCGGTCAGACACGTGCGGGCACCG
+ACCCCATCTCCGGTCAGGTCCGTGTCTCCAGCAGCAAGAATCTCCACATCCCCCATCAGGTCTGTTAGGT
+CTCCATTGCTCATGCGTAAGACTCAGGCATCCACCGTGGCCACAGGTCCTGAAGTGCCTCCCCCTTGGAA
+GCAAGAGGGCTACGTGGCCTCCTCATCTGAGGCTGAGATGAGAGAGACAACGCTGACAACCTCTACTCAG
+ATCAGGACAGAAGAGAGATGGGAAGGGAGATACGGTGTCCAGGAGCAAGTGACCATCAGTGGTGCTGCGG
+GTGCTGCCGCCAGTGTGTCGGCCAGTGCTAGCTACGCAGCAGAGGCTGTTGCCACTGGTGCTAAAGAGGT
+GAAACAAGATGCTGACAAAAGTGCAGCTGTTGCGACTGTTGTTGCTGCCGTTGATATGGCCAGAGTGAGA
+GAACCAGTGATCAGCGCTGTAGAGCAGACTGCTCAGAGGACAACCACGACTGCTGTGCACATCCAACCTG
+CTCAAGAACAGGTAAGAAAGGAAGCGGAGAAGACTGCTGTAACTAAGGTAGTAGTGGCCGCCGATAAAGC
+CAAGGAACAAGAATTAAAATCAAGAACCAAAGAAGTAATTACCACAAAGCAAGAGCAGATGCACGTAACT
+CATGAGCAGATAAGAAAAGAAACTGAAAAAACATTTGTACCAAAGGTAGTAATTTCCGCAGCTAAAGCCA
+AAGAACAAGAAACTAGAATTTCTGAAGAAATTACTAAGAAACAGAAACAAGTAACTCAAGAAGCAATAAT
+GAAGGAAACTAGGAAAACAGTTGTACCTAAAGTCATAGTTGCCACACCCAAAGTCAAAGAACAAGATTTA
+GTATCAAGAGGTAGAGAAGGCATTACTACCAAAAGAGAACAAGTGCAAATAACTCAGGAGAAGATGAGAA
+AGGAAGCCGAGAAAACTGCCTTGTCTACAATAGCAGTTGCTACTGCTAAAGCCAAAGAACAAGAAACAAT
+ACTGAGAACTAGAGAAACTATGGCTACTAGACAAGAACAAATCCAAGTTACCCATGGAAAGGTGGACGTT
+GGAAAAAAGGCTGAAGCTGTAGCAACAGTTGTTGCTGCAGTAGACCAGGCCCGAGTCAGAGAGCCCAGAG
+AGCCTGGGCATCTTGAAGAATCCTATGCTCAGCAGACCACTTTGGAGTACGGATATAAGGAACGCATTTC
+CGCCGCAAAGGTAGCTGAGCCTCCCCAACGTCCAGCCTCAGAACCCCACGTTGTCCCTAAAGCAGTCAAG
+CCTAGAGTAATCCAGGCTCCTTCTGAGACTCATATCAAAACTACTGATCAAAAGGGAATGCACATATCAT
+CACAGATCAAGAAAACTACAGATCTAACAACGGAAAGATTAGTCCATGTGGATAAACGCCCCCGCACAGC
+TAGCCCTCACTTTACTGTTTCAAAAATTTCTGTTCCTAAGACAGAACATGGATATGAGGCATCAATAGCC
+GGTAGTGCTATTGCCACATTACAAAAAGAGTTGTCAGCCACATCTTCTGCTCAGAAGATCACCAAATCGG
+TGAAGGCTCCTACTGTGAAGCCCAGTGAGACTAGAGTAAGGGCAGAGCCCACACCCTTGCCACAGTTCCC
+CTTCGCTGACACACCAGATACTTACAAGAGTGAAGCTGGCGTTGAGGTGAAAAAGGAAGTAGGGGTGAGC
+ATCACTGGCACCACCGTCCGTGAAGAGCGCTTTGAAGTACTGCACGGACGCGAAGCCAAGGTAACAGAAA
+CAGCAAGAGTACCAGCACCTGTTGAAATTCCTGTTACTCCACCAACTTTGGTCTCGGGCTTAAAAAATGT
+GACTGTCATAGAAGGTGAATCTGTCACCTTGGAGTGCCACATCTCTGGATACCCATCCCCGACAGTGACA
+TGGTACAGGGAAGACTACCAAATCGAAAGTTCCATTGACTTCCAGATAACCTTCCAGAGTGGAATTGCTC
+GTCTTATGATTCGCGAAGCATTTGCGGAAGACAGCGGGCGATTTACTTGCAGTGCTGTAAATGAGGCTGG
+AACCGTCAGCACATCCTGCTATCTGGCTGTGCAGGTGTCAGAAGAATTTGAAAAGGAAACCACAGCCGTG
+ACTGAGAAATTTACTACAGAAGAGAAACGCTTTGTTGAGTCAAGAGATGTGGTTATGACTGATACTAGCC
+TCACAGAGGAACAAGCAGGGCCTGGAGAACCTGCCGCGCCTTACTTTATTACAAAACCAGTGGTCCAGAA
+ACTGGTGGAAGGTGGGAGCGTGGTGTTTGGATGCCAAGTTGGCGGCAACCCAAAGCCCCATGTATACTGG
+AAAAAATCTGGTGTTCCTCTAACCACTGGATACAGATACAAAGTGAGTTACAACAAACAAACCGGTGAAT
+GCAAGCTGGTGATTTCTATGACTTTTGCTGATGATGCTGGAGAATACACTATTGTTGTTCGCAATAAGCA
+TGGAGAAACTTCTGCATCTGCTTCCTTGCTTGAAGAAGCTGATTATGAGTTACTGATGAAGTCCCAGCAA
+GAAATGCTTTATCAGACACAAGTGACTGCATTTGTTCAAGAACCTAAAGTTGGAGAAACAGCACCTGGAT
+TTGTATACTCTGAGTATGAAAAAGAGTATGAAAAAGAACAAGCCTTAATTAGGAAGAAAATGGCCAAAGA
+TACTGTAGTGGTCAGAACTTATGTAGAAGATCAGGAATTCCATATTTCTTCCTTTGAAGAGAGACTTATT
+AAAGAAATTGAATATAGAATAATAAAGACTACATTAGAAGAACTTCTTGAAGAAGATGGAGAAGAAAAGA
+TGGCAGTTGACATTTCTGAATCTGAAGCTGTTGAATCAGGATTTGATTTAAGAATCAAGAATTATAGAAT
+TCTTGAGGGGATGGGTGTCACTTTTCATTGCAAGATGTCTGGATATCCATTACCAAAGATTGCTTGGTAC
+AAAGATGGCAAGCGCATCAAACATGGAGAAAGATACCAAATGGACTTTCTACAAGATGGCAGAGCTAGTC
+TGCGTATACCTGTTGTTCTTCCAGAAGATGAAGGAATCTACACTGCATTTGCCAGCAATATTAAAGGAAA
+TGCAATTTGCTCAGGGAAATTGTATGTGGAGCCTGCTGCACCACTTGGAGCTCCGACTTACATTCCCACA
+CTAGAGCCAGTGAGCAGAATCAGATCTCTCTCTCCACGTTCAGTGAGCAGGTCTCCTATACGCATGTCTC
+CTGCACGGATGTCACCTGCAAGGATGTCTCCTGCACGGATGTCCCCTGCAAGAATGTCCCCTGGACGTAG
+GCTGGAGGAGACAGATGAGTCACAACTTGAGAGACTATATAAACCAGTCTTTGTGTTAAAACCTGTTTCT
+TTCAAATGTTTAGAAGGGCAAACTGCCAGATTTGACTTAAAGGTTGTTGGTAGACCTATGCCAGAGACGT
+TCTGGTTTCATGATGGCCAGCAAATTGTCAATGACTATACCCATAAAGTAGTCATTAAAGAAGATGGTAC
+TCAATCACTAATTATTGTCCCTGCCACACCCAGTGATTCTGGGGAATGGACTGTGGTTGCCCAAAACAGG
+GCAGGCAGATCTTCAATTTCAGTGATTTTAACTGTGGAAGCTGTGGAACATCAGGTAAAACCGATGTTTG
+TAGAAAAACTGAAAAATGTCAATATAAAGGAAGGTTCCCGACTTGAAATGAAAGTCAGAGCTACGGGTAA
+CCCCAACCCTGACATTGTATGGTTGAAAAACAGTGACATCATTGTGCCTCATAAATATCCCAAAATCAGA
+ATTGAAGGAACCAAGGGAGAAGCTGCCCTTAAAATCGATTCCACTGTCAGCCAAGATTCTGCCTGGTATA
+CTGCGACTGCTATTAATAAAGCTGGCAGAGACACTACAAGATGCAAAGTAAATGTTGAAGTTGAGTTTGC
+AGAGCCTGAGCCAGAGAGAAAGTTAATCATCCCACGGGGGACATATAGAGCAAAGGAGATTGCAGCCCCA
+GAACTGGAGCCCCTCCATTTGCGATATGGCCAAGAGCAATGGGAAGAAGGTGATCTCTATGACAAAGAGA
+AACAACAGAAACCATTTTTCAAGAAAAAACTCACTTCCTTAAGACTTAAGCGCTTTGGGCCTGCCCACTT
+TGAATGCAGGCTAACACCCATTGGTGACCCAACGATGGTGGTGGAGTGGCTCCATGATGGAAAGCCACTT
+GAAGCAGCCAACAGGCTCCGTATGATCAATGAATTTGGGTACTGCAGCCTTGATTATGGCGTTGCATATT
+CTAGAGACAGTGGTATCATTACTTGCAGAGCCACTAACAAATATGGAACAGATCACACATCTGCTACCCT
+TATTGTTAAAGATGAGAAAAGTCTTGTGGAAGAATCCCAATTGCCTGAGGGGAGGAAAGGCTTACAGAGA
+ATTGAAGAATTAGAGAGAATGGCTCATGAAGGTGCACTTACAGGTGTAACAACAGATCAGAAAGAAAAGC
+AAAAGCCAGACATTGTCTTGTACCCAGAGCCAGTTAGAGTACTTGAAGGGGAGACTGCAAGGTTCCGCTG
+CAGGGTAACAGGCTACCCTCAGCCCAAAGTCAACTGGTACCTCAATGGACAGCTCATCCGCAAAAGCAAA
+AGGTTCAGAGTTCGCTATGATGGTATCCATTACCTGGACATCGTGGACTGCAAATCATATGACACAGGTG
+AAGTGAAGGTCACCGCGGAAAATCCTGAAGGTGTGATAGAGCATAAAGTGAAGCTTGAGATTCAACAGAG
+GGAAGATTTTAGGTCTGTCCTTAGGAGAGCTCCTGAACCAAGGCCTGAGTTTCACGTACATGAACCAGGA
+AAGCTTCAGTTTGAAGTACAAAAAGTGGATAGACCTGTTGACACCACTGAAACCAAAGAAGTTGTGAAGT
+TGAAAAGGGCTGAAAGAATTACCCATGAAAAAGTGCCTGAAGAGTCGGAAGAGCTGCGCAGTAAATTCAA
+GCGCAGAACAGAAGAGGGCTATTATGAAGCCATTACCGCTGTGGAGCTCAAGTCTCGAAAGAAGGATGAA
+TCCTATGAGGAACTCCTCAGGAAGACAAAAGATGAACTTCTCCACTGGACCAAAGAGTTAACTGAAGAGG
+AAAAGAAAGCTCTTGCCGAAGAAGGCAAAATCACGATTCCAACTTTTAAACCTGACAAGATTGAACTAAG
+TCCTAGTATGGAGGCTCCAAAAATCTTCGAAAGAATCCAGAGCCAAACAGTGGGCCAAGGATCTGATGCA
+CACTTCCGGGTCAGAGTCGTGGGGAAACCAGACCCCGAATGTGAATGGTACAAAAATGGTGTCAAAATTG
+AACGGTCTGACCGGATCTACTGGTACTGGCCCGAAGACAATGTTTGTGAATTGGTCATAAGAGATGTGAC
+TGCTGAGGACTCTGCCAGCATCATGGTAAAAGCCATCAACATAGCTGGAGAAACCTCCAGTCACGCATTC
+TTACTTGTCCAAGCCAAGCAATTGATCACTTTCACACAGGAATTACAAGATGTTGTTGCTAAGGAAAAAG
+ACACTATGGCAACCTTTGAATGTGAAACTTCAGAACCATTTGTCAAAGTGAAATGGTATAAAGATGGTAT
+GGAGGTTCATGAGGGAGATAAATACAGGATGCACTCTGACAGAAAGGTTCACTTCCTCTCCATACTGACC
+ATTGATACGTCTGATGCTGAAGATTACAGCTGTGTACTTGTGGAAGATGAAAATGTCAAAACGACTGCTA
+AACTTATTGTTGAAGGTGCAGTTGTTGAGTTTGTGAAAGAACTTCAGGACATAGAAGTTCCAGAATCATA
+TTCAGGAGAATTAGAGTGCATTGTATCCCCAGAAAATATAGAAGGAAAATGGTATCATAATGATGTGGAG
+CTTAAATCCAATGGCAAATATACAATTACATCTCGTCGTGGACGTCAGAACCTCACGGTCAAGGATGTAA
+CCAAGGAGGACCAGGGAGAATACAGCTTTGTCATCGACGGGAAAAAGACAACCTGTAAATTAAAGATGAA
+ACCCCGCCCCATTGCTATCCTACAAGGACTTAGTGACCAAAAAGTCTGTGAGGGTGACATTGTTCAGCTT
+GAAGTTAAAGTCTCCTTGGAAAGTGTGGAAGGCGTCTGGATGAAAGACGGCCAAGAAGTGCAGCCCAGTG
+ACAGGGTTCACATTGTGATAGACAAACAATCTCATATGCTGCTCATTGAAGACATGACTAAGGAAGATGC
+TGGAAATTACTCTTTCACCATTCCAGCCCTTGGCCTCTCCACCAGTGGGCGTGTCTCTGTCTATAGTGTG
+GACGTGATAACACCTCTAAAAGATGTTAATGTGATTGAAGGCACCAAGGCTGTGCTTGAATGTAAGGTGT
+CAGTCCCTGATGTGACTTCTGTTAAGTGGTACTTAAATGATGAACAAATCAAGCCTGATGACCGTGTACA
+GGCCATTGTGAAAGGTACTAAACAGCGACTAGTCATTAACCGAACTCATGCTTCAGACGAAGGACCTTAC
+AAGCTGATAGTTGGCAGAGTTGAAACCAACTGTAATCTCTCTGTAGAAAAAATTAAAATTATCAGAGGTC
+TTCGTGACCTTACCTGTACAGAAACTCAAAATGTGGTGTTTGAGGTTGAGCTGTCCCACTCTGGAATTGA
+TGTCCTGTGGAATTTTAAGGACAAGGAAATCAAGCCCAGTTCTAAATATAAAATTGAAGCACATGGAAAA
+ATATATAAATTGACAGTTCTAAATATGATGAAAGATGATGAAGGAAAATACACATTTTACGCGGGAGAAA
+ATATCACATCTGGAAAACTTACTGTGGCAGGTGGGGCCATCTCCAAGCCACTCACAGATCAGACCGTAGC
+TGAATCCCAGGAAGCTGTGTTTGAATGTGAAGTTGCCAACCCAGATTCCAAAGGCGAATGGTTGAGGGAT
+GGCAAACACCTACCACTGACTAACAACATCAGAAGTGAGTCTGATGGCCACAAAAGGAGACTTATCATTG
+CTGCCACCAAATTAGATGACATTGGAGAATATACCTACAAGGTGGCCACCTCCAAAACATCTGCCAAACT
+CAAAGTTGAAGCTGTCAAAATTAAGAAGACTCTGAAGAACCTCACAGTGACAGAAACACAGGATGCTGTT
+TTCACTGTCGAGCTTACACACCCTAATGTCAAAGGTGTCCAGTGGATCAAAAATGGAGTTGTGCTGGAAT
+CCAATGAAAAGTATGCTATCTCTGTCAAAGGAACAATTTACTCTCTGAGGATTAAAAACTGTGCCATCGT
+GGATGAGTCTGTTTATGGCTTCAGGCTTGGAAGGCTTGGAGCCAGTGCCAGACTGCACGTGGAGACTGTC
+AAGATCATTAAAAAGCCAAAGGATGTGACAGCCTTGGAAAATGCCACTGTTGCCTTTGAAGTTAGTGTTT
+CCCATGACACTGTTCCAGTAAAATGGTTCCATAAGAGTGTGGAAATTAAGCCAAGTGACAAACACAGACT
+GGTCTCAGAAAGGAAAGTCCACAAGCTGATGCTGCAGAACATCTCCCCCTCAGATGCTGGGGAATACACA
+GCTGTGGTCGGGCAATTGGAATGCAAAGCAAAACTGTTTGTGGAGACATTACATATTACAAAAACCATGA
+AAAATATCGAGGTGCCTGAGACCAAAACTGCCTCTTTTGAGTGTGAGGTGTCCCACTTCAATGTCCCTTC
+CATGTGGCTGAAGAATGGTGTGGAAATTGAGATGAGTGAAAAGTTCAAGATAGTTGTGCAGGGAAAACTC
+CATCAGCTGATCATCATGAACACCAGCACAGAGGACTCGGCAGAATACACATTTGTCTGTGGCAATGACC
+AAGTCAGTGCCACCCTGACAGTCACCCCAATCATGATTACTTCCATGCTGAAAGACATCAACGCTGAAGA
+AAAAGACACTATTACTTTTGAGGTGACAGTGAACTATGAAGGCATCTCTTACAAATGGTTAAAGAATGGT
+GTGGAAATCAAATCAACTGACAAGTGCCAGATGAGAACCAAAAAGCTCACACACTCACTGAACATCAGGA
+ATGTTCACTTTGGGGATGCTGCTGACTACACCTTTGTGGCTGGAAAAGCAACATCAACAGCCACACTTTA
+TGTGGAAGCTCGTCATATAGAATTTAGGAAACACATTAAGGACATTAAGGTACTGGAGAAGAAGCGAGCC
+ATGTTTGAATGTGAAGTTTCTGAACCTGACATCACTGTACAGTGGATGAAAGATGACCAGGAACTGCAGA
+TCACAGACAGAATAAAGATTCAGAAGGAGAAATATGTCCACCGCCTTCTGATCCCATCCACCCGGATGTC
+TGATGCTGGGAAGTACACAGTGGTGGCAGGAGGCAACGTGTCAACTGCAAAACTCTTTGTAGAAGGCAGA
+GATGTTCGCATCCGAAGTATTAAAAAGGAGGTTCAGGTCATTGAGAAACAGCGTGCTGTTGTTGAATTTG
+AGGTCAATGAAGACGATGTTGATGCCCACTGGTATAAAGATGGCATTGAAATCAATTTCCAAGTTCAAGA
+ACGACACAAATATGTAGTGGAAAGAAGAATCCACCGAATGTTTATCTCTGAGACCAGACAGAGCGATGCA
+GGAGAATACACCTTTGTGGCAGGAAGGAACAGGAGTTCTGTCACTCTCTATGTCAATGCTCCTGAACCGC
+CCCAAGTTCTGCAGGAGCTCCAGCCTGTCACTGTGCAGTCTGGCAAGCCTGCCCGCTTCTGTGCCGTGAT
+ATCCGGAAGACCACAGCCCAAAATTTCCTGGTACAAGGAAGAGCAGCTGCTTTCCACTGGCTTCAAGTGC
+AAATTTCTTCATGATGGGCAAGAGTACACGCTTTTGCTAATTGAAGCCTTCCCAGAGGATGCGGCAGTCT
+ATACCTGTGAAGCCAAGAATGACTATGGTGTTGCCACAACATCAGCTTCACTCTCAGTGGAAGTTCCAGA
+AGTTGTGTCTCCTGATCAGGAAATGCCTGTTTATCCACCTGCCATCATCACCCCGCTTCAGGACACTGTC
+ACTTCTGAAGGGCAGCCAGCCCGTTTTCAATGCCGGGTTTCTGGAACAGATCTAAAAGTGTCGTGGTACA
+GCAAAGACAAGAAAATCAAGCCATCTCGGTTCTTTAGAATGACTCAATTTGAAGACACTTATCAACTGGA
+AATTGCCGAAGCTTATCCAGAAGATGAAGGAACTTACACGTTTGTTGCTAGTAATGCTGTAGGCCAAGTA
+TCAAGCACAGCCAACCTGAGTCTGGAAGCTCCTGAATCAATTTTGCATGAGAGGATTGAACAAGAGATTG
+AGATGGAAATGAAAGAGTTTTCTAGTTCTTTTCTGTCTGCCGAGGAAGAAGGACTTCATAGCGCCGAACT
+TCAATTATCTAAAATAAATGAAACACTTGAACTTTTGTCTGAATCTCCAGTTTACTCAACTAAATTTGAT
+TCCGAAAAGGAAGGCACTGGCCCAATTTTCATCAAAGAAGTGTCAAATGCTGATATAAGCATGGGGGATG
+TGGCTACACTGTCTGTAACTGTCATTGGCATCCCCAAACCTAAAATTCAGTGGTTCTTTAATGGAGTGCT
+ATTAACCCCTTCTGCTGACTACAAATTTGTTTTTGACGGTGATGATCATAGCCTGATCATTCTGTTCACC
+AAATTGGAGGATGAGGGAGAGTATACATGTATGGCCAGTAATGACTATGGAAAGACAATATGTAGTGCCT
+ATCTAAAAATTAATTCCAAAGGAGAGGGTCACAAAGACACTGAAACAGAATCAGCAGTGGCAAAATCTCT
+GGAAAAGCTGGGAGGTCCTTGTCCTCCTCACTTCCTTAAGGAGTTAAAACCAATTCGCTGTGCTCAAGGG
+CTTCCTGCCATCTTTGAGTACACAGTGGTTGGAGAGCCTGCCCCTACTGTTACATGGTTCAAAGAAAACA
+AGCAGCTTTGCACCAGTGTTTATTACACTATCATTCATAACCCTAATGGCTCTGGAACTTTCATTGTCAA
+TGACCCTCAGAGGGAAGACAGTGGCCTCTATATCTGTAAAGCAGAGAATATGTTGGGTGAGTCCACCTGT
+GCAGCAGAGCTGCTTGTGCTTCTGGAAGACACAGACATGACTGATACCCCCTGCAAAGCAAAGTCCACAC
+CAGAGGCTCCTGAGGATTTTCCACAGACACCCTTAAAGGGTCCCGCAGTTGAAGCACTTGACTCAGAGCA
+GGAAATTGCAACGTTTGTAAAAGACACCATTTTGAAAGCTGCTTTAATTACAGAAGAAAACCAGCAACTA
+TCTTATGAGCATATTGCTAAAGCCAATGAATTGAGCAGTCAGCTTCCTTTGGGAGCTCAGGAATTGCAAT
+CCATTTTGGAGCAAGACAAGCTCACTCCTGAAAGCACCAGGGAATTTCTTTGCATCAATGGCAGTATTCA
+CTTTCAGCCTCTCAAGGAACCATCTCCCAACCTACAGCTGCAGATTGTACAGTCCCAGAAAACCTTCTCC
+AAAGAAGGTATTCTAATGCCTGAAGAGCCTGAGACACAGGCAGTTCTATCAGATACCGAGAAAATCTTCC
+CAAGTGCCATGTCCATAGAACAAATTAATTCATTAACAGTTGAGCCTCTGAAAACTTTATTAGCTGAACC
+TGAAGGGAATTATCCACAGTCTTCAATAGAACCTCCAATGCATTCTTATCTAACCTCTGTGGCTGAGGAA
+GTACTTTCACCAAAAGAAAAGACAGTATCTGACACCAACAGAGAGCAAAGAGTGACTCTTCAAAAGCAAG
+AGGCACAAAGTGCGCTCATCTTGAGTCAGAGCTTAGCTGAGGGACACGTGGAGAGTCTCCAGAGTCCTGA
+TGTCATGATCTCTCAGGTAAACTATGAGCCCCTAGTCCCTTCAGAACACTCATGCACAGAAGGAGGTAAA
+ATTTTGATAGAAAGTGCAAATCCACTGGAAAATGCAGGGCAAGATTCTGCGGTCAGAATTGAGGAAGGCA
+AGTCCTTAAGATTTCCACTAGCACTTGAAGAAAAGCAGGTACTGCTCAAAGAAGAGCATTCTGACAACGT
+GGTGATGCCCCCAGACCAAATCATTGAGTCTAAAAGAGAGCCCGTGGCAATAAAGAAAGTGCAGGAGGTA
+CAGGGAAGGGACCTTCTTTCTAAGGAAAGCTTGCTTTCTGGTATTCCAGAAGAGCAGAGATTAAACCTGA
+AAATTCAAATCTGCCGGGCTTTGCAAGCAGCCGTGGCCAGCGAGCAGCCAGGTCTTTTCTCTGAGTGGCT
+AAGAAATATTGAAAAGGTGGAGGTCGAGGCTGTAAACATCACCCAAGAGCCCAGACACATCATGTGCATG
+TACCTTGTTACTTCGGCAAAGTCTGTAACAGAAGAAGTAACCATCATTATTGAAGATGTTGATCCTCAAA
+TGGCTAACCTGAAAATGGAACTTAGGGATGCTTTGTGTGCTATTATATATGAGGAAATAGACATCCTAAC
+AGCTGAGGGTCCTAGAATTCAGCAAGGAGCCAAAACAAGTTTGCAAGAAGAAATGGATTCTTTTTCAGGT
+TCACAGAAGGTTGAACCCATTACTGAACCAGAAGTTGAATCTAAATATCTGATCTCAACTGAAGAGGTCA
+GTTATTTTAACGTGCAAAGTAGGGTTAAATATTTGGATGCCACACCTGTCACTAAAGGGGTTGCTTCAGC
+TGTTGTCTCTGACGAAAAACAAGATGAGAGTCTGAAACCATCAGAGGAAAAAGAGGAGTCTTCCTCTGAA
+AGTGGTACTGAGGAGGTTGCTACAGTAAAGATACAGGAAGCTGAGGGTGGCTTAATCAAAGAGGATGGCC
+CCATGATACATACACCTTTAGTGGACACTGTTTCTGAGGAAGGTGATATTGTACACCTCACAACATCCAT
+AACAAATGCTAAAGAGGTGAATTGGTATTTTGAGAATAAACTGGTGCCTTCAGATGAAAAGTTCAAGTGT
+TTACAAGATCAAAATACATATACGCTAGTCATCGACAAAGTAAATACCGAAGACCATCAAGGAGAGTATG
+TCTGTGAGGCCTTGAATGACAGCGGAAAAACAGCAACTTCAGCCAAACTCACTGTAGTAAAAAGAGCTGC
+CCCAGTGATCAAGAGGAAAATCGAACCCCTGGAAGTAGCACTGGGCCACCTAGCCAAATTCACCTGTGAG
+ATCCAAAGTGCTCCCAATGTCCGGTTCCAGTGGTTTAAAGCTGGCCGAGAAATTTATGAGAGTGACAAGT
+GTTCTATTCGATCTTCAAAGTATATCTCCAGCCTTGAAATCCTGAGAACCCAGGTGGTTGACTGCGGCGA
+GTATACATGCAAAGCTTCCAATGAGTATGGCAGTGTCAGCTGTACAGCCACACTAACTGTGACAGTGCCT
+GGAGGTGAAAAGAAAGTTCGCAAATTACTTCCGGAACGTAAACCTGAACCAAAGGAAGAAGTTGTTCTGA
+AAAGCGTTCTAAGAAAAAGACCTGAAGAAGAAGAACCTAAAGTAGAACCTAAAAAACTAGAAAAAGTTAA
+AAAACCTGCAGTACCAGAACCACCACCTCCAAAACCTGTTGAAGAGGTTGAAGTACCTACTGTTACAAAA
+AGGGAAAGGAAGATTCCTGAACCAACAAAAGTGCCTGAAATCAAGCCAGCAATACCTCTCCCTGCACCTG
+AACCGAAACCAAAGCCCGAAGCAGAAGTGAAAACAATCAAACCACCTCCTGTGGAACCTGAACCAACCCC
+CATCGCTGCCCCAGTAACAGTGCCAGTGGTTGGAAAGAAAGCAGAAGCCAAAGCACCTAAGGAAGAGGCT
+GCCAAGCCAAAAGGTCCTATCAAAGGTGTACCCAAAAAGACTCCTTCACCAATAGAAGCCGAAAGGAGAA
+AGTTAAGGCCAGGAAGTGGTGGAGAGAAACCTCCTGATGAAGCCCCGTTCACCTACCAGCTAAAGGCTGT
+GCCACTGAAGTTTGTGAAAGAAATCAAAGACATCATCTTGACAGAATCAGAGTTCGTTGGCTCTTCAGCA
+ATCTTTGAATGTTTGGTCTCCCCTTCCACTGCAATTACAACCTGGATGAAAGACGGTAGCAATATCCGTG
+AGAGTCCCAAGCACAGGTTTATTGCAGATGGTAAAGACAGAAAGCTGCACATCATTGATGTTCAACTTTC
+CGATGCTGGTGAATACACCTGTGTTTTACGTTTGGGAAACAAAGAAAAGACCTCCACGGCTAAACTTGTT
+GTAGAAGAACTTCCTGTGCGTTTTGTAAAAACACTGGAAGAGGAAGTCACAGTGGTCAAAGGACAGCCAT
+TGTACTTGAGCTGCGAGTTAAACAAAGAGCGTGACGTGGTCTGGAGGAAGGATGGCAAGATTGTGGTGGA
+GAAACCTGGCCGAATTGTGCCAGGCGTCATTGGCTTGATGCGGGCTCTGACCATCAACGATGCAGATGAC
+ACAGATGCTGGAACATACACAGTTACTGTGGAAAACGCCAACAACCTGGAGTGTTCATCTTGCGTAAAAG
+TAGTAGAAGTCATTAGAGATTGGCTGGTGAAACCTATACGAGACCAGCATGTGAAACCCAAGGGGACAGC
+TATTTTTGCCTGTGATATAGCAAAAGATACTCCAAACATTAAGTGGTTCAAAGGATATGATGAAATCCCT
+GCGGAACCAAATGATAAGACTGAAATACTGAGAGATGGAAATCATCTGTACCTCAAAATTAAGAATGCTA
+TGCCAGAAGATATTGCTGAGTATGCAGTGGAAATTGAAGGAAAAAGATACCCTGCAAAGCTGACACTTGG
+AGAGCGTGAAGTTGAACTGCTTAAACCAATAGAGGACGTTACCATTTATGAGAAAGAAAGTGCAAGCTTT
+GATGCAGAAATCTCAGAGGCAGACATTCCTGGACAATGGAAACTGAAAGGAGAACTTCTAAGGCCCTCAC
+CTACTTGTGAAATCAAAGCAGAAGGTGGAAAACGCTTCTTAACTTTGCACAAAGTCAAACTGGACCAAGC
+TGGTGAAGTCCTCTACCAGGCCCTTAATGCAATTACAACTGCCATTTTGACAGTAAAAGAAATCGAACTT
+GACTTTGCTGTGCCCCTGAAGGATGTCACTGTTCCAGAAAGGCGACAGGCTCGATTCGAATGTGTCCTCA
+CCCGAGAGGCAAATGTTATATGGTCCAAAGGACCTGATATAATTAAGTCATCTGACAAATTTGATATCAT
+CGCTGATGGAAAGAAACATATTCTTGTTATTAATGATTCTCAATTTGATGATGAAGGGGTCTATACTGCT
+GAGGTGGAGGGCAAGAAGACCTCAGCTCGGTTGTTTGTCACAGGTATAAGACTGAAATTCATGTCACCTC
+TTGAAGATCAAACAGTAAAAGAAGGTGAAACAGCAACTTTTGTTTGTGAACTTTCTCATGAAAAAATGCA
+TGTAGTCTGGTTCAAAAATGATGCCAAACTCCATACAAGCAGAACAGTACTCATCTCTTCTGAGGGCAAG
+ACTCACAAATTGGAAATGAAAGAAGTGACATTGGATGATATATCTCAGATAAAAGCTCAAGTCAAGGAGC
+TGAGCTCCACAGCACAGCTGAAGGTCTTAGAGGCCGATCCCTACTTCACTGTGAAATTACATGACAAAAC
+TGCAGTGGAGAAGGATGAGATTACTTTGAAGTGTGAAGTGAGCAAAGATGTACCAGTGAAATGGTTCAAA
+GATGGTGAAGAGATTGTCCCTTCACCCAAATATTCTATCAAGGCAGATGGCCTGCGCCGCATCTTAAAAA
+TCAAAAAGGCGGACCTTAAAGATAAAGGCGAATATGTGTGTGACTGTGGCACAGACAAGACCAAGGCAAA
+TGTTACTGTTGAGGCTCGACTAATAAAAGTGGAAAAGCCTCTGTACGGAGTAGAGGTGTTTGTTGGTGAA
+ACAGCCCACTTTGAAATTGAACTTTCTGAACCTGATGTTCACGGCCAGTGGAAGCTGAAAGGACAGCCTT
+TGACAGCTTCCCCTGACTGTGAAATCATTGAGGATGGAAAGAAGCATATTCTGATCCTTCATAACTGTCA
+GCTGGGTATGACAGGAGAGGTTTCCTTCCAGGCTGCTAATGCCAAATCTGCAGCCAATCTGAAAGTGAAA
+GAATTGCCTCTTATCTTCATCACACCTCTCAGTGATGTTAAAGTCTTCGAGAAAGATGAGGCTAAGTTTG
+AGTGTGAAGTATCCAGGGAGCCCAAAACATTCCGTTGGCTAAAAGGAACCCAGGAAATCACAGGTGATGA
+CAGATTTGAGCTTATAAAGGATGGCACTAAGCATTCAATGGTGATCAAGTCAGCTGCTTTTGAAGATGAA
+GCAAAATACATGTTTGAAGCTGAAGATAAGCACACAAGTGGCAAACTGATCATTGAAGGAATCCGGCTCA
+AATTCCTCACCCCTCTCAAAGATGTAACTGCCAAAGAGAAGGAAAGTGCTGTATTTACTGTGGAGTTATC
+TCATGATAACATCCGAGTTAAATGGTTCAAGAATGACCAGCGCCTACACACCACCAGGTCGGTCTCAATG
+CAAGACGAAGGGAAAACTCATTCGATCACATTCAAAGACCTGTCTATTGATGACACCTCCCAAATTAGAG
+TAGAAGCTATGGGGATGAGTTCAGAAGCTAAACTCACTGTGCTTGAGGGAGACCCATATTTTACAGGAAA
+ACTTCAAGATTATACTGGTGTAGAGAAAGATGAAGTTATTCTACAGTGTGAAATTAGCAAAGCAGATGCA
+CCAGTGAAATGGTTTAAGGATGGGAAGGAAATAAAGCCATCCAAAAATGCTGTTATTAAGGCAGATGGCA
+AGAAACGCATGCTAATCCTAAAGAAAGCCTTGAAATCAGATATTGGACAGTACACCTGTGACTGTGGGAC
+AGATAAGACCTCAGGAAAACTTGACATTGAGGATCGGGAAATTAAACTGGTGCGACCCCTGCACAGTGTG
+GAGGTGATGGAGACTGAGACAGCACGCTTTGAAACCGAAATCTCTGAAGATGATATCCACGCCAACTGGA
+AACTCAAGGGAGAGGCCCTACTCCAAACACCTGATTGTGAAATTAAGGAAGAAGGCAAAATACACTCCCT
+TGTTTTGCACAACTGTCGCCTGGACCAGACGGGTGGGGTGGATTTCCAAGCTGCCAATGTTAAATCTAGT
+GCCCACCTCCGAGTTAAGCCACGAGTAATTGGTCTTCTGAGGCCTTTAAAGGATGTCACCGTGACTGCAG
+GGGAAACAGCCACCTTCGACTGCGAGCTCTCCTACGAAGATATCCCAGTGGAATGGTATCTCAAAGGGAA
+GAAACTAGAGCCCAGCGATAAGGTGGTCCCACGTTCAGAAGGAAAAGTTCATACACTTACTCTGAGGGAT
+GTAAAGTTAGAAGATGCTGGGGAAGTCCAACTAACAGCAAAAGATTTCAAAACTCACGCCAACCTCTTTG
+TGAAAGAACCCCCAGTTGAATTCACTAAGCCTCTTGAGGACCAGACGGTCGAAGAGGGAGCCACTGCAGT
+GCTGGAGTGTGAAGTCTCCAGAGAAAATGCTAAGGTGAAATGGTTCAAAAATGGGACAGAAATCCTCAAA
+AGCAAGAAGTATGAAATTGTTGCTGATGGCAGGGTCAGAAAACTTGTTATACATGACTGTACCCCAGAGG
+ATATTAAAACATACACTTGTGATGCTAAGGATTTTAAGACTTCCTGTAACCTGAATGTCGTGCCTCCTCA
+TGTGGAATTCTTAAGACCACTCACCGACCTTCAAGTTAGAGAAAAAGAAATGGCTCGATTTGAGTGTGAA
+CTTTCCCGAGAAAATGCTAAGGTTAAGTGGTTTAAAGATGGTGCTGAAATTAAAAAGGGCAAAAAATATG
+ACATCATATCCAAGGGAGCAGTGCGCATTCTTGTCATCAACAAATGTCTACTGGATGATGAAGCTGAATA
+TTCCTGTGAAGTAAGGACAGCGAGAACTTCTGGCATGCTGACAGTTCTGGAAGAAGAAGCTGTCTTTACC
+AAAAATCTTGCCAACATTGAAGTTAGTGAAACAGACACTATAAAACTGGTTTGTGAAGTCTCCAAACCTG
+GCGCAGAAGTGATTTGGTATAAAGGGGATGAGGAGATCATTGAAACAGGAAGATATGAAATACTGACTGA
+AGGACGGAAGAGAATCCTGGTCATTCAGAACGCTCACCTTGAGGATGCTGGCAACTACAACTGTCGACTC
+CCAAGCTCTCGAACCGATGGCAAAGTCAAAGTACATGAACTGGCTGCTGAATTTATCTCAAAGCCTCAAA
+ACCTTGAAATACTTGAAGGAGAAAAGGCTGAATTTGTCTGCTCTATATCAAAAGAAAGCTTTCCAGTCCA
+GTGGAAGAGGGATGATAAGACACTTGAATCTGGAGATAAATATGACGTTATTGCTGATGGTAAAAAGAGG
+GTCCTAGTTGTGAAAGATGCCACATTACAAGATATGGGCACTTACGTTGTCATGGTAGGGGCCGCCAGAG
+CAGCAGCTCACTTGACAGTCATTGAAAAACTCAGGATCGTAGTTCCTCTTAAGGACACCCGGGTGAAGGA
+ACAACAGGAAGTTGTCTTCAACTGTGAAGTCAATACTGAAGGTGCCAAAGCCAAATGGTTCAGAAATGAA
+GAAGCTATATTTGATAGTTCAAAATACATCATTCTCCAAAAAGACCTAGTCTACACCCTCAGAATTAGAG
+ATGCACACTTAGATGACCAAGCCAACTATAATGTGTCTTTGACCAATCACAGAGGTGAAAATGTTAAAAG
+TGCAGCCAATCTAATAGTAGAAGAGGAAGACCTTAGGATTGTTGAGCCTCTTAAAGATATTGAAACAATG
+GAGAAGAAATCTGTCACATTCTGGTGCAAGGTGAATCGTCTCAATGTAACACTGAAGTGGACCAAAAATG
+GTGAAGAAGTGCCTTTTGACAACCGTGTCTCATACAGAGTTGATAAGTACAAGCACATGTTAACCATTAA
+AGACTGTGGCTTCCCAGATGAAGGTGAATACATTGTCACTGCTGGACAAGATAAATCTGTTGCTGAGCTT
+CTCATCATAGAAGCCCCGACAGAATTTGTGGAACACTTGGAAGATCAGACAGTCACTGAGTTCGATGACG
+CTGTCTTCTCCTGCCAGCTCTCCAGAGAGAAAGCCAATGTAAAATGGTACAGAAATGGGAGAGAAATCAA
+AGAAGGCAAAAAATACAAATTTGAAAAAGATGGAAGTATACACAGACTCATTATAAAAGATTGCAGGCTG
+GATGATGAGTGTGAATATGCTTGCGGGGTAGAAGACAGGAAGTCTCGTGCTAGACTTTTTGTGGAAGAAA
+TTCCTGTTGAGATCATCAGGCCTCCACAAGATATTCTTGAAGCCCCTGGTGCTGATGTTGTCTTTTTAGC
+AGAACTCAATAAAGATAAGGTGGAAGTCCAATGGCTAAGAAATAACATGGTTGTTGTCCAGGGTGATAAA
+CACCAGATGATGAGTGAAGGAAAGATACATCGACTACAGATTTGTGATATTAAGCCCCGTGACCAGGGTG
+AATACAGATTTATTGCCAAAGACAAAGAAGCCAGAGCTAAGCTTGAACTGGCAGCTGCACCAAAAATCAA
+GACAGCTGACCAAGACCTTGTGGTTGATGTTGGCAAGCCTCTGACAATGGTGGTGCCATATGATGCCTAC
+CCCAAAGCAGAAGCTGAATGGTTTAAAGAAAATGAACCTTTATCTACAAAAACCATTGATACTACGGCTG
+AACAAACTTCTTTCAGAATTTTAGAAGCCAAGAAAGGAGACAAAGGGAGGTATAAAATTGTGCTTCAGAA
+CAAACATGGAAAAGCAGAAGGATTCATCAATTTAAAAGTTATTGATGTTCCTGGGCCAGTACGTAACTTA
+GAAGTGACAGAAACATTTGATGGTGAAGTGAGCCTTGCTTGGGAAGAACCTTTAACTGATGGTGGAAGCA
+AAATCATAGGTTACGTTGTTGAAAGACGTGACATTAAGAGAAAGACCTGGGTTCTGGCCACAGACCGTGC
+AGAGAGTTGTGAGTTTACTGTCACTGGTCTACAGAAAGGAGGAGTTGAGTACCTATTCCGTGTGAGTGCA
+AGAAACAGAGTTGGCACTGGTGAGCCAGTAGAAACTGACAATCCTGTAGAAGCAAGGAGTAAATATGATG
+TTCCAGGCCCTCCTTTGAATGTAACCATCACTGATGTGAATCGATTTGGTGTCTCACTGACATGGGAACC
+ACCAGAGTATGATGGAGGTGCTGAGATCACAAACTACGTCATTGAATTAAGAGACAAGACTTCTATCAGG
+TGGGATACTGCCATGACTGTGAGAGCTGAAGACCTGTCTGCAACTGTTACTGATGTGGTAGAAGGACAGG
+AGTACAGTTTCCGAGTGAGAGCCCAAAATCGAATTGGAGTTGGAAAACCAAGTGCAGCCACACCCTTCGT
+CAAAGTTGCTGATCCAATTGAGAGACCAAGTCCTCCTGTAAACCTAACTTCCTCAGATCAGACTCAGTCA
+TCAGTTCAGCTCAAATGGGAACCTCCTCTGAAAGATGGAGGAAGCCCAATATTAGGCTATATAATTGAGC
+GATGCGAAGAAGGAAAAGATAATTGGATTCGTTGCAATATGAAACTTGTCCCTGAACTGACTTACAAGGT
+TACCGGATTGGAAAAAGGAAATAAATATTTATATAGAGTATCTGCAGAAAATAAAGCTGGTGTTTCAGAT
+CCATCTGAAATTCTTGGTCCTCTCACCGCTGACGATGCATTTGTTGAACCAACAATGGATTTAAGTGCAT
+TTAAAGATGGTCTGGAAGTTATTGTCCCAAATCCTATCACGATCCTGGTTCCAAGTACAGGCTATCCAAG
+GCCAACTGCAACCTGGTGTTTTGGAGATAAAGTACTAGAAACAGGGGACCGGGTGAAAATGAAGACCTTG
+TCTGCCTATGCCGAACTTGTCATTTCTCCAAGTGAACGTTCAGACAAGGGCATTTATACACTGAAATTAG
+AAAACCGTGTGAAAACAATTTCTGGGGAAATTGATGTCAATGTAATTGCTCGCCCAAGTGCACCCAAAGA
+ATTGAAATTTGGTGATATAACCAAGGACTCAGTACATTTGACTTGGGAACCACCTGATGATGATGGAGGA
+AGTCCGTTAACTGGATACGTTGTTGAAAAACGAGAAGTCAGCCGGAAAACATGGACTAAAGTTATGGACT
+TTGTGACTGATCTAGAATTCACAGTTCCTGATCTTGTTCAAGGAAAAGAGTACTTATTTAAAGTTTGTGC
+TCGTAACAAATGTGGCCCTGGAGAACCTGCATATGTTGATGAACCTGTAAATATGTCAACTCCTGCAACG
+GTACCTGACCCACCAGAGAATGTTAAATGGAGAGATCGAACAGCCAATAGCATCTTCTTAACATGGGATC
+CACCTAAAAATGATGGTGGTTCACGCATCAAAGGATATATAGTTGAAAGATGTCCACGTGGTTCTGATAA
+ATGGGTTGCCTGTGGAGAACCTGTTGCAGAAACAAAAATGGAAGTGACAGGTCTTGAGGAAGGCAAATGG
+TATGCCTACCGCGTGAAGGCCTTAAACAGGCAGGGTGCTAGCAAACCAAGCAGACCCACAGAGGAAATCC
+AGGCTGTGGACACACAAGAGGCCCCAGAAATCTTCCTCGATGTGAAGCTCCTTGCTGGTCTCACTGTAAA
+AGCTGGGACCAAGATTGAACTTCCTGCCACCGTAACCGGAAAACCTGAACCTAAAATAACTTGGACAAAG
+GCTGATATGATTCTGAAGCAGGACAAAAGAATTACCATTGAAAATGTCCCTAAGAAATCCACAGTGACTA
+TTGTTGATAGTAAGAGAAGTGACACTGGCACATATATCATTGAGGCTGTGAATGTGTGTGGCCGGGCCAC
+TGCTGTGGTGGAAGTGAACGTCTTAGATAAACCCGGACCACCAGCTGCCTTTGACATCACAGATGTAACC
+AATGAGTCATGTCTTCTAACATGGAACCCACCACGCGATGATGGTGGATCTAAGATCACAAACTATGTTG
+TGGAGAGACGAGCAACTGATAGTGAAGTGTGGCACAAGCTCTCATCCACCGTCAAGGATACAAACTTCAA
+GGCCACCAAATTAATCCCCAATAAAGAGTACATCTTCAGAGTTGCTGCAGAAAACATGTATGGTGTTGGT
+GAACCAGTTCAGGCCTCTCCAATAACAGCCAAATATCAGTTTGATCCACCTGGTCCTCCAACTCGCCTAG
+AACCTTCTGATATCACTAAAGACGCAGTGACTCTCACATGGTGTGAGCCAGATGATGATGGTGGCAGCCC
+AATCACAGGATACTGGGTTGAAAGACTGGATCCTGATACAGATAAATGGGTTAGATGCAATAAGATGCCA
+GTAAAGGACACAACATACAGAGTGAAAGGTCTCACTAATAAGAAAAAATACAGATTCCGTGTGTTGGCTG
+AAAATCTTGCTGGACCTGGAAAACCAAGCAAATCAACTGAACCAATCTTAATAAAGGATCCCATAGATCC
+TCCATGGCCCCCTGGAAAACCAACTGTAAAAGATGTAGGCAAAACATCAGTAAGGTTGAATTGGACAAAA
+CCAGAACATGATGGAGGTGCAAAGATTGAGTCTTATGTCATTGAAATGCTGAAGACTGGAACAGATGAGT
+GGGTCAGAGTGGCGGAAGGGGTTCCCACCACTCAGCACTTGCTCCCAGGGCTCATGGAAGGACAGGAATA
+CTCATTCCGAGTTAGAGCTGTGAATAAGGCTGGGGAAAGTGAACCCAGTGAACCCAGTGACCCTGTGCTT
+TGCCGGGAGAAGCTATATCCTCCATCACCACCACGCTGGCTTGAAGTTATTAATATCACAAAAAATACAG
+CAGACCTAAAATGGACAGTTCCTGAGAAAGATGGAGGGTCCCCCATCACCAACTACATTGTGGAAAAGAG
+AGACGTCAGGCGAAAAGGCTGGCAAACAGTGGATACCACTGTCAAGGACACCAAGTGCACAGTCACCCCA
+CTGACTGAGGGCTCTTTATATGTGTTCCGAGTTGCTGCAGAAAATGCTATAGGACAAAGCGACTACACCG
+AAATTGAGGACTCTGTGCTGGCCAAAGACACCTTTACCACTCCTGGACCACCCTACGCCCTGGCAGTGGT
+TGATGTGACAAAACGACATGTTGACCTAAAGTGGGAGCCACCTAAAAATGATGGTGGAAGACCAATACAG
+AGATATGTCATTGAGAAGAAAGAAAGGTTAGGTACCCGTTGGGTGAAAGCTGGAAAGACTGCAGGACCTG
+ACTGTAACTTCAGAGTAACTGATGTCATCGAAGGAACAGAGGTCCAGTTTCAGGTTCGGGCTGAAAATGA
+AGCTGGAGTTGGCCACCCAAGTGAACCCACAGAAATCCTATCCATTGAAGATCCAACAAGTCCTCCCTCA
+CCACCCCTTGACCTACATGTGACTGATGCTGGGAGAAAACACATTGCCATTGCTTGGAAGCCTCCAGAGA
+AAAATGGTGGAAGTCCTATCATAGGATACCATGTTGAAATGTGTCCAGTAGGCACTGAGAAATGGATGAG
+AGTTAATTCTCGCCCAATAAAGGACTTGAAATTCAAGGTTGAAGAAGGTGTTGTTCCTGACAAAGAATAT
+GTCCTGAGAGTGAGAGCAGTCAATGCTATTGGTGTCAGCGAGCCATCTGAAATCTCTGAAAATGTGGTTG
+CCAAAGACCCAGACTGCAAGCCAACAATTGACCTGGAGACTCATGACATTATTGTTATTGAAGGTGAAAA
+GTTAAGCATTCCTGTTCCCTTCAGAGCTGTCCCAGTTCCAACTGTTAGTTGGCATAAAGATGGCAAAGAA
+GTTAAAGCAAGTGATAGATTAACAATGAAGAATGATCACATCTCTGCACACCTTGAAGTTCCCAAGAGTG
+TCCGTGCAGATGCCGGAATTTATACCATTACACTGGAGAATAAGCTCGGCTCAGCAACAGCCTCAATCAA
+TGTCAAAGTCATAGGCCTACCTGGACCATGCAAAGATATTAAAGCAAGTGACATTACCAAGAGTTCTTGT
+AAGTTAACTTGGGAACCTCCAGAATTTGATGGTGGAACCCCAATTCTTCATTATGTCCTGGAGCGCAGAG
+AAGCTGGGAGGAGAACATATATACCAGTCATGTCTGGTGAGAACAAACTGTCATGGACTGTGAAGGATCT
+CATACCAAATGGTGAATACTTCTTCCGTGTTAAAGCAGTCAACAAGGTTGGTGGAGGAGAATATATTGAA
+CTGAAAAATCCAGTCATTGCTCAAGATCCAAAGCAACCCCCTGATCCACCTGTAGATGTAGAGGTTCATA
+ATCCTACAGCGGAGGCAATGACTATTACATGGAAGCCACCTTTGTATGATGGAGGGAGCAAGATAATGGG
+CTACATCATAGAGAAGATTGCTAAGGGTGAAGAAAGGTGGAAGAGATGCAATGAACACCTGGTACCAATC
+CTGACCTATACAGCAAAAGGACTTGAAGAGGGGAAAGAGTACCAATTCCGTGTGCGAGCAGAGAACGCCG
+CGGGTATTAGTGAACCTTCTCGGGCTACTCCTCCAACCAAAGCTGTAGATCCCATTGATGCCCCCAAAGT
+CATTCTGAGAACAAGCCTAGAAGTGAAACGAGGTGATGAAATAGCACTTGATGCAAGTATTTCTGGATCA
+CCTTACCCAACTATTACATGGATAAAGGATGAAAATGTTATTGTACCAGAGGAAATTAAGAAGCGTGCAG
+CACCCTTGGTTAGGAGAAGGAAGGGTGAAGTTCAAGAAGAAGAACCATTTGTCCTGCCTCTGACACAGCG
+TTTGAGTATTGACAACAGCAAAAAGGGAGAATCTCAGCTACGCGTCCGAGATTCTCTCCGACCTGACCAT
+GGTCTGTATATGATCAAAGTTGAAAATGACCACGGTATTGCAAAAGCTCCTTGTACTGTCAGTGTGTTAG
+ATACACCGGGACCACCAATCAACTTTGTATTTGAAGATATCAGAAAGACCTCAGTCCTTTGTAAATGGGA
+ACCACCCCTTGATGATGGTGGCAGTGAAATCATAAACTACACTTTGGAAAAGAAAGACAAGACAAAACCC
+GACTCAGAATGGATTGTTGTCACTTCAACACTTAGACATTGCAAATATTCAGTAACAAAACTGATTGAAG
+GAAAAGAGTACCTCTTCCGTGTAAGAGCTGAAAACAGATTTGGGCCAGGTCCACCATGTGTTTCAAAGCC
+ACTTGTGGCTAAAGATCCATTTGGACCACCTGATGCACCAGATAAGCCCATTGTGGAAGATGTTACCAGC
+AACAGTATGCTAGTGAAATGGAATGAACCAAAAGATAATGGAAGCCCCATTTTGGGTTACTGGCTTGAAA
+AACGTGAAGTTAACAGTACACATTGGTCTCGTGTCAACAAAAGCCTTCTGAATGCCTTGAAAGCCAATGT
+AGATGGCTTATTAGAAGGACTCACCTATGTCTTCAGAGTATGTGCTGAAAATGCAGCTGGACCTGGAAAG
+TTCAGTCCACCTTCAGATCCCAAAACAGCACATGATCCAATCTCTCCTCCTGGGCCACCTATCCCAAGAG
+TCACTGACACAAGCTCTACAACTATTGAACTAGAATGGGAACCCCCAGCTTTCAATGGTGGTGGGGAAAT
+TGTTGGCTATTTTGTTGATAAGCAGTTGGTTGGCACAAATGAATGGTCACGCTGCACAGAGAAGATGATC
+AAGGTCCGTCAGTACACCGTCAAAGAAATCCGAGAGGGTGCTGATTACAAACTTCGGGTGAGTGCTGTCA
+ATGCCGCAGGGGAAGGACCGCCTGGAGAAACACAACCTGTTACTGTGGCTGAACCACAAGAGCCTCCAGC
+TGTGGAACTGGATGTTTCTGTCAAGGGTGGAATACAAATAATGGCTGGGAAGACTCTTAGAATTCCAGCT
+GTGGTGACTGGTCGCCCTGTACCTACAAAAGTATGGACCAAAGAAGAAGGGGAGCTGGATAAAGACCGTG
+TTGTAATAGACAACGTTGGAACCAAATCTGAACTAATTATCAAGGATGCACTGCGAAAAGACCATGGCAG
+ATATGTGATTACAGCTACAAATAGCTGTGGTTCCAAATTTGCAGCAGCCAGGGTAGAAGTTTTTGATGTC
+CCTGGTCCAGTTCTTGACTTAAAACCTGTTGTAACAAACAGAAAAATGTGTCTACTTAACTGGTCTGATC
+CAGAAGATGATGGAGGAAGTGAAATAACAGGCTTTATCATTGAAAGAAAAGATGCCAAGATGCATACTTG
+GAGACAACCAATAGAGACTGAGAGATCTAAATGTGACATCACAGGTCTGCTTGAGGGACAAGAATATAAG
+TTCCGTGTTATTGCCAAGAACAAGTTTGGCTGTGGCCCTCCTGTTGAAATAGGACCAATTCTTGCAGTTG
+ATCCACTAGGTCCTCCAACATCTCCAGAGAGGCTCACATACACTGAAAGGACAAAGTCCACTATCACACT
+TGACTGGAAAGAGCCCCGCAGTAATGGTGGCAGTCCCATCCAAGGATATATCATTGAAAAACGGCGTCAT
+GACAAACCTGACTTTGAAAGAGTTAACAAGCGACTCTGCCCAACCACATCTTTTCTGGTTGAAAATCTTG
+ATGAACACCAAATGTATGAGTTCCGTGTCAAAGCTGTCAATGAAATTGGTGAAAGTGAACCATCCCTACC
+TCTTAATGTAGTCATACAAGATGATGAAGTGCCTCCAACTATTAAGTTGCGTCTGAGTGTTCGAGGAGAC
+ACTATCAAAGTTAAGGCAGGAGAGCCTGTCCACATCCCTGCAGATGTGACAGGCCTTCCAATGCCTAAGA
+TTGAATGGTCCAAAAATGAAACTGTAATTGAAAAACCCACTGATGCACTTCAGATAACCAAGGAAGAGGT
+ATCCCGAAGTGAGGCAAAAACTGAGCTTAGCATTCCCAAAGCGGTCCGGGAGGACAAAGGCACTTACACA
+GTTACTGCTTCCAATCGCCTTGGCTCAGTGTTCCGAAATGTTCACGTTGAAGTATATGACCGCCCATCCC
+CACCAAGAAATCTTGCTGTTACTGACATTAAAGCTGAATCTTGCTACTTGACATGGGATGCCCCTCTTGA
+TAATGGTGGCAGTGAAATCACCCATTATGTTATTGACAAACGTGATGCAAGTAGGAAGAAAGCAGAATGG
+GAGGAAGTCACCAACACTGCTGTAGAGAAAAGATATGGGATCTGGAAACTTATCCCCAATGGTCAGTATG
+AGTTCCGAGTCAGGGCAGTGAATAAATATGGAATCAGTGATGAGTGCAAATCAGATAAAGTAGTCATTCA
+AGATCCTTATCGCCTTCCTGGACCTCCAGGAAAACCAAAAGTTTTGGCACGCACCAAAGGATCAATGCTA
+GTGAGCTGGACTCCTCCTTTGGACAATGGTGGCTCTCCAATTACTGGCTACTGGCTGGAGAAAAGAGAAG
+AGGGAAGTCCTTATTGGTCACGTGTTAGCCGAGCACCAATAACCAAAGTGGGATTGAAAGGCGTGGAATT
+TAATGTTCCTCGTTTGCTTGAAGGCGTTAAATACCAGTTCAGAGCCATGGCAATAAATGCTGCAGGAATT
+GGTCCTCCCAGTGAACCATCAGATCCAGAGGTTGCAGGAGATCCCATATTTCCACCGGGGCCACCTTCTT
+GCCCAGAAGTTAAAGATAAAACGAAGTCAAGCATCTCACTAGGATGGAAACCTCCAGCCAAAGATGGTGG
+CAGCCCAATCAAAGGATACATTGTAGAAATGCAAGAAGAAGGTACTACTGACTGGAAAAGAGTAAATGAA
+CCAGACAAACTTATAACTACCTGTGAATGTGTGGTGCCTAATCTGAAAGAGCTCAGGAAGTACAGATTCA
+GAGTGAAAGCTGTCAATGAAGCTGGTGAATCTGAACCAAGTGATACAACTGGGGAGATCCCTGCCACTGA
+TATTCAAGAGGAACCAGAAGTTTTCATTGACATTGGAGCACAGGACTGTCTGGTTTGTAAAGCTGGCTCA
+CAGATTAGGATTCCTGCTGTCATCAAGGGACGCCCAACACCAAAATCATCTTGGGAATTTGATGGAAAGG
+CAAAGAAAGCAATGAAGGATGGAGTTCATGACATACCCGAAGATGCACAGCTGGAGACTGCTGAAAACTC
+CTCAGTAATTATTATTCCGGAGTGTAAACGATCTCATACAGGCAAATACAGCATCACAGCCAAGAATAAA
+GCAGGACAAAAGACTGCAAATTGCAGAGTTAAAGTCATGGATGTACCAGGCCCACCCAAAGATCTGAAAG
+TCAGTGATATCACAAGGGGTAGTTGCAGACTTTCATGGAAGATGCCAGACGACGATGGAGGAGACAGGAT
+CAAAGGCTATGTTATTGAGAAGAGGACTATTGATGGAAAAGCCTGGACCAAAGTCAATCCAGACTGTGGA
+AGCACCACATTTGTAGTGCCTGATCTCCTCTCTGAACAGCAATATTTCTTCCGTGTGCGAGCAGAAAACC
+GTTTTGGTATTGGCCCACCTGTGGAAACCATTCAGAGGACCACTGCCAGAGATCCGATATATCCTCCTGA
+TCCTCCTATTAAACTCAAGATTGGCCTCATCACAAAGAACACAGTGCATCTGTCATGGAAACCCCCGAAG
+AATGATGGGGGCTCCCCTGTTACCCACTATATTGTTGAGTGCCTTGCATGGGACCCTACTGGGACAAAGA
+AAGAAGCCTGGAGGCAGTGCAATAAGCGTGATGTGGAAGAACTGCAATTTACTGTTGAAGACCTAGTAGA
+AGGTGGGGAATATGAATTCCGAGTCAAAGCTGTCAATGCTGCAGGAGTCAGCAAGCCTTCAGCCACTGTT
+GGGCCCTGTGACTGTCAAAGACCAGACATGCCACCATCAATTGATCTAAAAGAATTCATGGAGGTTGAAG
+AAGGAACCAATGTTAACATTGTGGCCAAAATTAAAGGTGTGCCATTCCCGACACTAACCTGGTTTAAAGC
+TCCTCCAAAGAAGCCTGATAACAAAGAACCTGTTCTCTATGACACCCATGTCAACAAACTGGTGGTAGAT
+GATACTTGCACTTTAGTTATTCCGCAGTCTCGCAGGAGTGACACTGGCTTATATACCATCACAGCTGTAA
+ATAATCTGGGAACAGCATCAAAGGAGATGAGACTGAATGTCCTGGGTCGTCCTGGCCCTCCAGTGGGACC
+CATAAAATTTGAATCTGTTTCAGCAGATCAAATGACACTATCTTGGTTTCCACCTAAAGATGATGGTGGG
+TCTAAGATTACAAACTATGTAATTGAGAAAAGAGAAGCTAACAGGAAGACATGGGTCCATGTCTCCAGTG
+AACCTAAGGAGTGCACGTACACGATTCCCAAATTGCTAGAAGGCCATGAATATGTATTCCGAATCATGGC
+CCAGAATAAATATGGCATTGGAGAACCTCTTGACAGTGAACCTGAAACAGCAAGAAACCTCTTCTCTGTC
+CCTGGAGCACCAGATAAACCAACAGTTAGCAGCGTGACTCGTAACTCCATGACTGTCAACTGGGAAGAGC
+CAGAATATGATGGAGGCTCTCCTGTGACAGGGTACTGGCTGGAAATGAAAGACACCACTTCAAAGAGATG
+GAAGAGAGTTAACCGAGATCCTATCAAAGCCATGACTTTGGGTGTTTCTTATAAAGTGACTGGTCTTATT
+GAAGGTTCCGACTATCAATTCCGGGTATATGCAATCAATGCTGCTGGCGTGGGTCCAGCAAGTCTGCCAT
+CAGACCCAGCGACTGCTAGAGATCCAATTGCCCCTCCTGGTCCTCCATTTCCCAAAGTGACAGATTGGAC
+TAAATCATCTGCAGATCTGGAGTGGTCTCCCCCACTAAAAGATGGTGGATCCAAAGTAACTGGATACATC
+GTTGAATATAAAGAAGAAGGAAAAGAAGAATGGGAAAAGGGTAAAGATAAAGAAGTGAGAGGAACAAAGC
+TCGTTGTGACAGGATTAAAGGAAGGAGCATTCTACAAATTTAGAGTTAGTGCAGTCAACATTGCTGGCAT
+TGGAGAACCTGGAGAGGTCACAGATGTCATTGAAATGAAGGACAGACTTGTTTCACCTGACCTTCAGCTA
+GATGCCAGTGTCAGAGATAGAATTGTTGTCCATGCTGGAGGGGTGATCCGAATCATTGCCTATGTGTCTG
+GAAAGCCTCCTCCAACCGTCACCTGGAACATGAATGAAAGAACCTTACCTCAAGAAGCCACCATTGAGAC
+CACAGCCATTAGCTCATCCATGGTCATCAAGAACTGCCAGAGGAGCCATCAAGGCGTCTATTCTCTTCTT
+GCCAAAAATGAAGCCGGAGAAAGAAAGAAGACAATTATTGTTGATGTATTAGATGTTCCAGGTCCCGTTG
+GAACACCATTCCTAGCTCACAACCTAACCAATGAGTCCTGCAAACTGACATGGTTTTCTCCAGAAGATGA
+TGGAGGCTCTCCAATCACCAATTATGTCATTGAAAAGCGTGAATCTGACCGCAGAGCATGGACCCCAGTG
+ACATATACAGTTACCCGACAAAATGCTACTGTCCAGGGTCTCATTCAAGGAAAAGCCTACTTTTTCCGAA
+TTGCGGCTGAAAATAGTATTGGCATGGGTCCATTTGTTGAGACATCAGAGGCACTTGTTATCAGAGAGCC
+AATAACTGTACCAGAGCGTCCTGAAGACCTGGAAGTCAAAGAAGTTACTAAAAATACTGTAACTTTGACT
+TGGAATCCTCCTAAGTATGATGGTGGGTCAGAAATTATTAACTATGTCCTAGAAAGTCGGCTCATTGGGA
+CTGAGAAGTTCCACAAAGTTACAAATGACAACTTGCTTAGCAGAAAATACACTGTTAAAGGCTTAAAAGA
+AGGTGATACCTATGAGTACCGTGTCAGTGCTGTCAACATTGTTGGACAAGGCAAACCATCATTTTGCACC
+AAACCAATTACTTGCAAGGATGAGCTGGCACCCCCAACGCTTCACCTCGACTTCAGAGATAAGCTCACGA
+TTCGAGTTGGTGAAGCTTTTGCCCTCACTGGCCGTTACTCAGGCAAACCAAAGCCTAAGGTTTCCTGGTT
+CAAAGATGAAGCTGATGTGCTGGAAGATGATCGCACTCATATAAAGACTACACCAGCAACACTTGCTTTA
+GAGAAGATCAAGGCCAAACGTTCAGATTCCGGCAAATACTGTGTGGTTGTGGAGAACAGTACAGGCTCTA
+GGAAAGGTTTCTGTCAAGTTAATGTTGTTGACCGTCCTGGACCACCAGTAGGACCAGTTAGTTTTGATGA
+GGTGACCAAAGATTACATGGTTATCTCTTGGAAGCCTCCTTTAGATGATGGAGGCAGTAAAATCACCAAT
+TATATTATTGAGAAGAAGGAAGTGGGTAAAGACGTCTGGATGCCAGTGACATCTGCAAGTGCTAAAACAA
+CATGCAAAGTTTCTAAACTACTTGAAGGAAAAGATTATATTTTCCGGATACATGCTGAAAATCTGTATGG
+AATAAGTGATCCTCTGGTGTCTGATTCAATGAAAGCCAAAGATCGTTTCAGGGTTCCTGATGCACCTGAT
+CAGCCAATTGTTACAGAAGTTACCAAAGACTCTGCATTAGTAACCTGGAATAAGCCACATGATGGAGGAA
+AACCCATCACAAACTACATCCTGGAAAAGAGAGAAACTATGTCTAAACGATGGGCTAGAGTTACCAAAGA
+TCCTATTCATCCATACACTAAATTTAGGGTTCCTGATCTTCTAGAAGGATGTCAGTATGAATTCCGGGTT
+TCTGCAGAAAATGAAATTGGTATTGGAGATCCAAGCCCACCATCCAAACCAGTCTTTGCTAAAGATCCAA
+TTGCTAAACCAAGTCCACCTGTTAATCCTGAAGCAATAGATACAACATGCAATTCAGTCGATCTAACTTG
+GCAGCCACCACGTCATGATGGTGGGAGCAAGATTCTGGGTTATATTGTTGAGTACCAGAAAGTTGGAGAT
+GAAGAGTGGAGAAGAGCCAATCACACCCCTGAGTCATGTCCTGAAACTAAATATAAAGTCACCGGTCTTC
+GGGACGGTCAAACCTATAAGTTTAGAGTGTTAGCAGTCAATGCAGCTGGTGAATCAGATCCAGCTCATGT
+TCCGGAGCCAGTCCTAGTAAAAGACAGGCTTGAACCCCCTGAGTTGATTCTTGATGCCAACATGGCAAGA
+GAACAACACATTAAAGTTGGTGATACTCTAAGACTTAGTGCCATCATCAAAGGAGTGCCATTCCCAAAAG
+TAACTTGGAAAAAAGAAGACAGAGATGCTCCAACTAAAGCAAGAATTGATGTGACTCCAGTTGGTAGCAA
+GCTTGAAATTCGTAATGCTGCCCATGAAGATGGTGGAATTTATTCTTTAACAGTGGAGAATCCAGCTGGT
+TCAAAAACTGTCTCAGTAAAAGTACTTGTATTAGATAAACCTGGGCCACCTAGAGATCTGGAAGTCAGTG
+AAATTAGGAAAGATTCATGTTACCTTACTTGGAAAGAACCACTGGATGATGGTGGTTCTGTTATTACCAA
+TTATGTGGTTGAGAGGAGAGATGTTGCCAGCGCCCAGTGGTCACCTCTCTCAGCTACATCAAAGAAAAAG
+AGTCACTTCGCTAAGCATCTGAATGAAGGCAACCAGTACCTCTTCCGAGTAGCTGCGGAGAACCAGTATG
+GACGTGGTCCTTTTGTTGAAACACCAAAACCAATCAAGGCTTTGGATCCTCTCCATCCCCCAGGGCCACC
+CAAGGACCTGCACCATGTAGATGTTGACAAGACTGAAGTCTCCCTAGTCTGGAATAAGCCGGATCGTGAT
+GGTGGTTCTCCAATCACTGGATATTTGGTAGAATATCAAGAAGAAGGCACCCAGGACTGGATTAAATTTA
+AGACTGTGACAAACTTAGAGTGTGTGGTTACTGGACTACAACAAGGAAAGACCTATAGATTCCGTGTAAA
+AGCTGAAAACATTGTGGGTCTTGGTCTCCCTGACACAACTATCCCGATAGAATGTCAAGAAAAACTAGTG
+CCTCCATCCGTGGAGCTAGATGTGAAATTAATTGAAGGTCTTGTGGTAAAGGCTGGAACCACAGTCAGAT
+TCCCTGCTATTATAAGAGGTGTGCCTGTTCCTACTGCAAAGTGGACAACCGATGGGAGTGAGATTAAAAC
+CGATGAGCACTACACAGTTGAAACAGACAACTTCTCATCAGTACTTACCATTAAGAACTGCTTAAGGAGA
+GACACTGGGGAATATCAAATCACAGTTTCCAATGCAGCCGGTAGCAAAACAGTAGCCGTACATCTTACTG
+TTCTTGATGTTCCTGGGCCACCAACAGGTCCTATTAATATTCTGGATGTTACTCCTGAACACATGACTAT
+CTCATGGCAGCCACCTAAGGATGATGGAGGAAGCCCTGTGATAAATTATATTGTTGAGAAACAAGATACA
+AGGAAAGACACGTGGGGTGTTGTCTCTTCCGGAAGCAGTAAGACAAAGCTGAAAATCCCACATCTGCAGA
+AGGGCTGTGAATATGTTTTCCGAGTTAGAGCAGAGAATAAGATAGGTGTTGGTCCTCCCCTTGACTCCAC
+ACCTACTGTTGCTAAGCATAAATTTAGTCCTCCGTCTCCTCCTGGTAAACCAGTGGTTACTGACATTACT
+GAAAATGCAGCAACAGTGTCTTGGACCCTGCCAAAATCTGATGGTGGCAGTCCAATAACTGGCTACTATA
+TGGAACGTCGAGAAGTAACTGGCAAATGGGTGAGGGTCAACAAAACACCTATCGCTGACCTGAAGTTCAG
+AGTGACTGGACTCTATGAAGGAAATACATATGAGTTTAGAGTTTTTGCTGAAAATCTTGCAGGACTAAGC
+AAACCATCCCCAAGTTCTGATCCAATAAAAGCTTGCCGGCCCATCAAACCACCTGGACCACCTATTAATC
+CTAAACTGAAAGACAAGAGCAGAGAAACAGCTGATTTGGTGTGGACAAAGCCTCTCAGTGATGGTGGTAG
+CCCCATTCTAGGATATGTAGTGGAATGTCAGAAACCTGGCACGGCACAATGGAACAGGATTAATAAAGAT
+GAACTCATTAGGCAATGTGCCTTTAGGGTACCTGGACTAATTGAAGGAAATGAGTACAGATTCCGTATAA
+AGGCAGCTAATATTGTAGGAGAGGGTGAGCCAAGAGAACTAGCAGAATCTGTGATTGCAAAAGATATCCT
+TCATCCTCCAGAAGTAGAACTTGATGTTACTTGTCGTGATGTTATTACCGTGAGAGTAGGCCAAACTATC
+CGCATTCTAGCTCGAGTCAAAGGCAGACCTGAACCAGACATAACTTGGACTAAGGAAGGCAAAGTATTGG
+TCCGAGAAAAGAGGGTGGACCTTATTCAGGATCTACCTCGTGTTGAGTTACAAATTAAAGAAGCTGTTAG
+AGCTGATCATGGCAAGTATATCATCTCAGCTAAGAACAGCAGTGGACATGCCCAAGGTTCAGCCATCGTT
+AACGTCCTTGACAGACCTGGGCCTTGCCAGAATTTGAAGGTTACCAATGTAACCAAAGAGAACTGTACAA
+TTTCTTGGGAAAACCCACTAGATAATGGTGGCTCAGAAATAACAAACTTCATAGTAGAATATCGCAAACC
+AAACCAGAAAGGCTGGTCAATTGTTGCATCAGATGTCACTAAACGATTAATCAAGGCCAACCTTTTAGCC
+AACAATGAATACTATTTCCGAGTTTGTGCAGAGAATAAAGTAGGTGTTGGGCCAACCATCGAAACAAAAA
+CTCCCATTCTGGCTATTAACCCTATTGACAGACCAGGTGAGCCTGAAAACCTTCACATTGCAGATAAAGG
+AAAGACATTTGTCTATCTAAAGTGGCGGAGGCCTGACTATGATGGTGGCAGTCCAAATCTGTCATATCAT
+GTTGAGAGAAGGCTTAAGGGCTCCGATGACTGGGAAAGAGTGCATAAAGGAAGCATTAAAGAAACTCACT
+ACATGGTTGACAGATGTGTTGAAAACCAGATTTATGAGTTCAGAGTGCAAACAAAGAATGAAGGTGGGGA
+AAGTGACTGGGTGAAGACAGAGGAAGTTGTTGTGAAAGAAGACTTACAAAAACCAGTACTTGATCTGAAA
+TTAAGTGGGGTCCTAACTGTCAAAGCAGGGGACACCATTAGGCTTGAGGCAGGGGTTAGAGGCAAACCAT
+TCCCAGAAGTTGCATGGACCAAGGACAAAGACGCTACAGACTTAACAAGATCACCAAGGGTCAAGATTGA
+TACCCGTGCTGATTCATCTAAATTTTCTCTTACTAAAGCAAAGCGAAGTGATGGGGGTAAATATGTAGTT
+ACGGCAACTAACACGGCTGGCAGTTTTGTGGCCTATGCCACTGTCAATGTTTTAGATAAGCCTGGTCCTG
+TGAGAAATCTGAAAATTGTTGATGTGTCCAGTGATAGGTGTACTGTTTGCTGGGATCCACCAGAAGATGA
+TGGTGGCTGTGAAATCCAAAATTATATTCTAGAAAAATGTGAGACAAAGCGAATGGTTTGGTCTACCTAT
+TCTGCTACTGTCTTGACACCTGGTACTACAGTAACACGTCTCATAGAAGGAAATGAATATATTTTCAGAG
+TCCGTGCAGAAAATAAAATAGGCACAGGGCCTCCAACAGAAAGTAAACCAGTCATAGCCAAAACCAAGTA
+TGATAAACCTGGTCGCCCTGATCCCCCAGAAGTCACTAAAGTAAGCAAAGAAGAGATGACTGTGGTTTGG
+AATCCACCTGAATATGATGGTGGAAAGTCTATAACTGGATACTTTTTGGAGAAAAAGGAAAAGCATTCAA
+CACGATGGGTCCCTGTCAACAAGAGTGCAATCCCTGAGAGACGTATGAAAGTACAGAATCTCCTCCCAGA
+CCATGAATATCAGTTCCGTGTCAAGGCAGAAAATGAAATTGGAATTGGAGAACCAAGCTTGCCTTCAAGA
+CCGGTGGTGGCAAAAGACCCCATAGAGCCACCTGGTCCACCAACCAATTTCAGAGTGGTTGATACAACCA
+AACATTCCATAACTCTTGGGTGGGGAAAACCAGTCTATGATGGTGGTGCACCGATCATTGGATATGTTGT
+GGAAATGAGACCAAAAATAGCAGATGCGTCTCCTGATGAAGGCTGGAAACGGTGTAATGCTGCAGCACAG
+CTTGTACGCAAGGAATTCACTGTTACCAGCTTGGATGAAAACCAGGAATATGAGTTCAGGGTGTGTGCCC
+AAAACCAAGTTGGTATTGGGCGCCCTGCAGAGCTAAAGGAAGCTATCAAACCTAAAGAAATACTAGAACC
+TCCGGAGATTGATTTGGATGCCAGCATGAGGAAACTGGTCATAGTGAGAGCAGGATGCCCTATTCGTCTC
+TTTGCTATAGTGAGAGGACGACCAGCCCCTAAAGTCACTTGGCGAAAAGTTGGCATTGATAATGTGGTCA
+GAAAAGGACAAGTTGATCTGGTTGACACTATGGCCTTCCTTGTCATCCCCAATTCTACCCGTGATGACTC
+AGGAAAATATTCCTTAACACTTGTGAACCCAGCAGGAGAAAAGGCTGTATTCGTAAATGTCAGAGTATTA
+GACACTCCTGGGCCTGTGTCTGATTTAAAAGTTTCAGATGTCACTAAAACATCATGCCATGTGTCCTGGG
+CCCCTCCTGAAAACGACGGTGGGAGCCAAGTGACACATTATATCGTGGAGAAACGTGAGGCAGACAGAAA
+GACATGGTCGACCGTTACCCCAGAAGTTAAGAAAACAAGCTTCCATGTAACCAATCTTGTCCCTGGGAAT
+GAGTATTACTTCAGAGTAACTGCTGTCAACGAATATGGCCCTGGCGTCCCAACAGATGTCCCAAAACCAG
+TGCTTGCATCAGATCCTCTAAGTGAGCCGGATCCCCCAAGGAAATTAGAAGTGACTGAAATGACCAAGAA
+CAGTGCCACCTTAGCCTGGTTACCTCCCCTACGTGATGGAGGTGCTAAAATCGATGGCTACATCACTAGT
+TACAGAGAAGAAGAGCAGCCTGCAGATCGCTGGACAGAGTACTCAGTGGTAAAAGATCTGAGCCTTGTTG
+TCACTGGCCTAAAGGAAGGAAAGAAATACAAATTTAGAGTAGCGGCCAGAAATGCTGTTGGAGTCAGTTT
+GCCAAGAGAAGCTGAAGGAGTGTATGAAGCCAAAGAACAACTGTTGCCACCAAAGATCCTTATGCCAGAG
+CAAATAACTATCAAAGCTGGGAAAAAACTCCGAATTGAAGCCCATGTGTATGGAAAGCCTCATCCCACCT
+GTAAATGGAAAAAAGGAGAAGATGAAGTTGTCACATCCAGCCACCTGGCAGTGCATAAAGCAGACAGCTC
+TTCAATTCTGATCATAAAAGATGTGACTAGGAAAGACAGTGGTTACTACAGCCTCACAGCAGAGAACAGT
+TCTGGGACAGACACTCAGAAAATCAAAGTTGTAGTCATGGATGCCCCCGGCCCCCCTCAGCCTCCATTTG
+ACATTTCTGATATAGACGCTGATGCTTGCTCCCTGTCATGGCACATCCCTCTGGAGGACGGAGGCAGTAA
+CATCACCAATTATATAGTGGAGAAGTGTGATGTAAGCCGAGGTGACTGGGTCACGGCTCTAGCTTCAGTC
+ACAAAAACTTCCTGCAGGGTTGGAAAGCTGATCCCAGGCCAGGAGTACATCTTCCGGGTCCGTGCTGAAA
+ACCGATTTGGCATTTCAGAGCCTCTCACATCTCCAAAGATGGTTGCGCAGTTCCCATTTGGTGTTCCTAG
+TGAACCAAAGAATGCACGAGTCACCAAAGTCAACAAGGACTGTATTTTTGTTGCTTGGGACAGACCAGAT
+AGTGATGGAGGGAGCCCCATTATTGGTTATCTGATTGAACGCAAGGAAAGAAACAGTTTGCTGTGGGTGA
+AAGCCAATGATACTCTTGTCCGGTCAACTGAATATCCTTGTGCTGGCCTTGTAGAAGGTCTTGAGTATTC
+ATTCAGAATCTATGCCCTAAACAAAGCTGGATCCAGCCCACCCAGCAAACCCACAGAATATGTAACTGCA
+AGAATGCCAGTTGATCCTCCTGGGAAACCTGAGGTTATTGATGTCACCAAGAGTACTGTATCTCTGATCT
+GGGCTCGTCCAAAGCATGATGGAGGCAGTAAAATTATTGGCTATTTCGTAGAAGCTTGCAAACTTCCTGG
+TGATAAATGGGTACGGTGCAATACTGCACCTCACCAGATTCCCCAGGAAGAGTACACAGCTACTGGCCTA
+GAAGAGAAAGCTCAGTATCAATTTAGAGCTATTGCCAGGACCGCGGTAAACATTAGCCCACCTTCTGAAC
+CTTCTGATCCAGTGACTATCCTCGCAGAAAATGTCCCTCCCAGGATAGACCTGAGTGTGGCTATGAAATC
+TTTGCTTACTGTGAAAGCTGGAACTAATGTCTGCTTGGATGCTACTGTTTTTGGTAAACCGATGCCAACA
+GTTTCTTGGAAAAAAGATGGCACACTGCTAAAACCAGCAGAAGGCATAAAGATGGCCATGCAGCGGAATC
+TGTGCACCTTGGAGCTATTCAGCGTGAACCGGAAGGACTCAGGAGACTATACCATTACTGCTGAAAATTC
+AAGTGGTTCTAAATCAGCCACCATTAAGCTTAAAGTGTTAGATAAACCGGGTCCTCCAGCATCTGTTAAA
+ATCAACAAAATGTATTCAGATCGTGCTATGCTTTCTTGGGAACCGCCTCTTGAAGATGGAGGCTCAGAAA
+TCACCAACTATATTGTTGACAAACGTGAAACAAGCAGGCCCAACTGGGCTCAAGTCTCTGCAACTGTGCC
+TATCACCAGCTGCAGCGTGGAGAAACTTATAGAGGGCCATGAGTATCAGTTCCGTATTTGTGCTGAAAAT
+AAATATGGAGTAGGCGATCCAGTCTTCACTGAACCAGCAATTGCCAAAAACCCATATGACCCACCAGGAC
+GCTGTGATCCTCCTGTTATTAGCAACATAACCAAAGATCACATGACAGTCAGCTGGAAGCCACCAGCAGA
+TGATGGGGGCTCACCCATCACTGGCTATTTGCTTGAAAAGCGGGAAACCCAGGCTGTTAACTGGACTAAG
+GTCAACAGAAAACCTATTATAGAAAGAACATTAAAAGCAACAGGTCTTCAAGAAGGTACCGAATATGAGT
+TCCGTGTTACAGCTATAAATAAAGCTGGACCAGGCAAACCCAGTGACGCATCCAAGGCCGCTTATGCTCG
+GGACCCTCAGTATCCTCCTGCGCCACCGGCTTTCCCTAAAGTATATGATACAACTCGCAGCTCTGTGAGT
+CTATCTTGGGGCAAGCCAGCCTATGACGGCGGCAGCCCTATCATTGGTTATCTCGTTGAAGTAAAACGGG
+CTGACTCCGATAACTGGGTGAGGTGCAACTTACCACAGAATCTACAGAAAACCCGCTTTGAGGTTACTGG
+CCTGATGGAAGACACACAATATCAATTCCGTGTGTATGCCGTTAATAAGATTGGATACAGTGACCCCAGT
+GATGTGCCAGATAAACACTATCCCAAGGACATCTTAATTCCACCTGAGGGAGAACTTGATGCGGACTTAA
+GGAAGACACTCATATTACGTGCTGGAGTTACTATGAGACTATATGTACCAGTAAAAGGACGCCCACCTCC
+AAAGATTACTTGGTCTAAACCAAATGTCAATCTAAGAGACAGGATTGGACTGGACATAAAGTCAACTGAC
+TTTGACACTTTCTTGCGCTGTGAAAATGTGAACAAATATGATGCAGGAAAATATATCTTAACCCTGGAGA
+ACAGCTGTGGTAAAAAGGAATATACCATTGTTGTGAAAGTGCTTGATACTCCTGGGCCACCTGTCAATGT
+GACTGTTAAGGAAATATCCAAAGACTCTGCTTATGTTACCTGGGAGCCTCCCATTATTGATGGCGGAAGC
+CCCATCATAAACTATGTGGTACAAAAACGTGATGCAGAGAGGAAATCCTGGTCTACAGTGACAACTGAGT
+GCTCCAAAACAAGCTTCAGAGTAGCTAATTTGGAGGAGGGAAAATCCTACTTCTTCCGAGTGTTTGCTGA
+AAATGAGTATGGCATTGGTGATCCCGGTGAAACTCGTGATGCTGTCAAAGCTTCCCAAACTCCTGGACCA
+GTTGTGGACCTGAAAGTGAGGTCTGTATCTAAGTCATCCTGTAGCATTGGCTGGAAAAAGCCTCACAGTG
+ATGGTGGAAGTCGGATTATTGGATATGTAGTTGATTTCCTGACTGAAGAAAATAAGTGGCAACGAGTTAT
+GAAATCCTTAAGCCTACAGTACTCTGCAAAAGATTTGACTGAAGGGAAGGAATATACCTTCAGAGTGAGT
+GCTGAGAATGAAAATGGAGAAGGAACCCCAAGCGAAATCACTGTTGTGGCAAGGGATGATGTTGTGGCTC
+CTGATCTTGACTTAAAGGGTCTACCTGATTTGTGCTACTTGGCTAAAGAAAACAGCAACTTCCGGCTTAA
+GATCCCCATAAAAGGCAAGCCAGCTCCATCAGTCTCCTGGAAGAAAGGGGAAGATCCTCTAGCAACTGAC
+ACTAGAGTCAGTGTTGAGTCATCTGCGGTTAACACAACTCTTATAGTGTACGATTGCCAAAAATCTGATG
+CTGGAAAATACACAATCACACTTAAGAATGTTGCTGGCACCAAGGAAGGAACTATCTCCATAAAGGTTGT
+TGGCAAGCCTGGCATCCCCACTGGACCAATCAAATTTGATGAAGTCACAGCAGAAGCCATGACCTTAAAG
+TGGGCTCCTCCAAAGGATGATGGAGGTTCTGAAATCACCAACTATATCCTAGAGAAGAGGGATTCTGTGA
+ACAACAAGTGGGTGACGTGCGCCTCAGCTGTCCAGAAAACCACCTTTAGAGTAACCAGACTTCATGAGGG
+CATGGAATATACCTTCAGGGTCAGTGCCGAAAATAAATATGGTGTAGGGGAAGGCCTGAAATCGGAGCCA
+ATTGTTGCGAGACATCCATTTGATGTGCCTGATGCTCCCCCACCTCCCAATATTGTGGATGTCAGACACG
+ATTCAGTATCTCTAACTTGGACTGACCCCAAGAAAACTGGTGGTTCTCCAATTACAGGGTATCATCTCGA
+GTTCAAGGAAAGAAACAGCCTTTTGTGGAAGAGAGCTAACAAGACTCCGATAAGGATGAGAGACTTTAAA
+GTGACAGGATTAACTGAAGGTCTTGAATATGAATTCCGAGTTATGGCAATCAATTTAGCAGGTGTGGGCA
+AGCCAAGCCTACCATCAGAGCCTGTTGTGGCACTGGACCCAATTGATCCTCCTGGAAAACCTGAGGTTAT
+TAACATAACAAGGAATTCAGTGACTCTCATTTGGACTGAACCTAAATATGACGGTGGTCATAAGTTAACT
+GGATATATAGTGGAGAAGCGAGATCTACCTTCGAAGTCTTGGATGAAAGCCAACCATGTTAATGTCCCAG
+AATGTGCCTTTACTGTAACTGACCTTGTTGAGGGTGGAAAATATGAATTCAGAATTAGAGCAAAGAATAC
+AGCAGGTGCTATCAGTGCTCCATCAGAAAGTACAGAAACCATTATTTGCAAGGATGAATACGAGGCACCA
+ACAATTGTCCTTGATCCCACAATAAAAGATGGGCTAACAATTAAAGCAGGGGATACCATTGTTTTGAATG
+CCATTAGCATTCTTGGCAAACCCCTTCCAAAATCAAGTTGGTCCAAGGCAGGAAAAGACATTAGACCATC
+AGATATCACTCAGATAACTTCAACCCCAACATCTTCCATGCTTACTATCAAGTATGCCACTAGAAAAGAT
+GCGGGTGAATATACCATCACTGCTACCAATCCTTTTGGCACGAAGGTGGAACATGTGAAGGTAACAGTCC
+TTGATGTACCTGGTCCCCCAGGTCCTGTTGAAATCAGTAATGTTTCTGCTGAAAAAGCAACACTTACATG
+GACACCTCCCTTGGAAGATGGCGGCTCACCAATTAAGTCCTATATACTTGAAAAGAGAGAAACCAGCCGA
+CTTTTGTGGACAGTGGTTTCTGAAGATATTCAGTCTTGCAGGCATGTGGCAACCAAACTTATCCAAGGAA
+ATGAGTACATCTTCCGGGTCTCAGCTGTAAACCACTATGGCAAAGGAGAACCTGTACAGTCTGAACCTGT
+CAAAATGGTAGACAGATTTGGTCCCCCTGGCCCTCCTGAAAAACCAGAGGTATCAAATGTCACTAAGAAC
+ACTGCCACTGTCAGCTGGAAAAGGCCAGTGGATGATGGTGGCAGCGAAATTACAGGATATCATGTAGAAA
+GGAGAGAAAAGAAAAGCCTGCGATGGGTGAGAGCAATAAAAACACCAGTTTCCGATCTCAGGTGCAAAGT
+AACAGGACTGCAAGAAGGAAGCACCTACGAATTCCGTGTCAGTGCAGAAAACAGAGCAGGAATTGGTCCA
+CCCAGTGAGGCTTCAGATTCTGTTCTGATGAAAGATGCAGCATATCCTCCAGGACCACCTTCAAATCCGC
+ATGTCACTGATACTACCAAGAAATCTGCTTCTTTGGCATGGGGCAAGCCTCATTATGATGGTGGACTTGA
+AATCACTGGCTATGTCGTGGAGCATCAAAAAGTAGGAGACGAGGCCTGGATAAAAGATACCACAGGAACC
+GCCCTCAGAATCACTCAGTTCGTTGTTCCTGATCTTCAGACTAAAGAAAAATACAACTTCAGAATCAGTG
+CCATCAACGATGCAGGTGTTGGGGAGCCAGCGGTGATTCCAGATGTTGAAATCGTAGAACGGGAGATGGC
+TCCTGATTTTGAACTAGATGCCGAGCTTCGAAGAACACTTGTTGTTAGAGCAGGACTCAGTATTAGGATA
+TTTGTGCCAATTAAAGGTCGTCCTGCTCCTGAAGTGACATGGACCAAAGATAACATCAACCTGAAAAACC
+GAGCCAACATTGAAAATACGGAATCATTTACTCTTCTGATTATCCCAGAATGTAACAGATATGATACCGG
+TAAATTTGTCATGACCATTGAAAACCCGGCTGGGAAGAAAAGTGGCTTTGTGAACGTCAGAGTCTTGGAC
+ACGCCAGGCCCAGTCCTCAACCTGCGGCCTACAGACATCACAAAGGACAGTGTCACCCTGCACTGGGACC
+TCCCTCTGATAGATGGAGGCTCACGTATAACAAACTACATTGTAGAGAAACGTGAAGCAACACGGAAATC
+TTATTCCACAGCCACCACTAAGTGCCATAAATGCACATATAAAGTTACCGGCTTGTCTGAAGGGTGTGAA
+TATTTCTTCAGAGTGATGGCAGAGAATGAATATGGAATTGGTGAGCCAACAGAAACTACAGAGCCCGTAA
+AAGCCTCTGAAGCACCATCTCCACCAGACAGCCTTAACATCATGGACATAACTAAGAGCACCGTCAGCCT
+GGCATGGCCTAAGCCCAAACACGATGGTGGCAGCAAGATCACTGGCTATGTGATTGAAGCCCAAAGAAAA
+GGCTCTGACCAGTGGACCCACATCACAACCGTGAAAGGGTTAGAATGTGTTGTGAGGAATCTAACTGAAG
+GAGAGGAATATACCTTCCAAGTGATGGCAGTGAACAGCGCGGGGAGAAGTGCCCCTAGAGAAAGCAGACC
+CGTCATTGTCAAGGAGCAGACAATGCTTCCAGAGCTGGATCTCCGTGGCATCTATCAGAAACTGGTCATT
+GCCAAAGCTGGTGACAACATCAAAGTTGAAATTCCAGTGCTCGGTCGACCGAAGCCCACAGTGACATGGA
+AAAAAGGAGACCAAATTCTTAAACAGACACAGAGAGTTAATTTTGAAACCACAGCGACTTCAACCATTTT
+AAATATCAATGAGTGTGTCAGAAGTGATAGTGGGCCCTATCCATTAACAGCAAGGAACATTGTAGGAGAG
+GTTGGTGATGTCATCACCATTCAAGTCCATGATATCCCAGGGCCACCTACTGGACCAATCAAATTTGATG
+AAGTTTCATCTGATTTTGTAACCTTCTCTTGGGACCCACCTGAGAACGATGGTGGTGTACCAATAAGCAA
+CTATGTAGTGGAAATGCGGCAGACTGACAGTACTACCTGGGTTGAGTTAGCAACCACCGTTATACGTACT
+ACCTATAAAGCCACCCGCCTTACTACTGGATTAGAGTATCAGTTCCGTGTAAAAGCTCAGAATAGATATG
+GAGTTGGACCAGGCATCACATCAGCATGCATAGTTGCCAACTATCCATTTAAGGTTCCTGGACCTCCTGG
+TACCCCTCAGGTAACTGCAGTTACCAAGGATTCAATGACAATTAGCTGGCATGAGCCACTTTCTGATGGT
+GGAAGCCCCATTTTAGGATATCATGTTGAAAGAAAAGAACGAAATGGTATTCTCTGGCAGACTGTGAGCA
+AAGCTTTAGTACCAGGCAACATTTTCAAATCAAGTGGACTTACAGATGGTATTGCTTATGAGTTCCGGGT
+GATTGCAGAAAACATGGCAGGCAAAAGTAAGCCAAGCAAGCCATCAGAACCTATGTTGGCTCTGGATCCC
+ATTGACCCACCTGGAAAACCAGTACCTCTAAATATTACAAGACACACAGTAACACTTAAATGGGCTAAGC
+CTGAATATACTGGGGGCTTTAAAATTACCAGTTATATCGTTGAAAAGAGAGACCTTCCTAATGGACGGTG
+GCTGAAGGCCAACTTCAGCAACATTTTGGAGAATGAATTTACAGTCAGTGGCCTAACAGAAGATGCTGCA
+TATGAATTCCGTGTGATCGCCAAAAATGCTGCAGGTGCCATCAGTCCACCATCTGAGCCATCTGATGCTA
+TCACTTGCAGGGATGATGTTGAGGCACCAAAGATAAAGGTGGATGTTAAATTTAAGGACACGGTTATATT
+AAAAGCAGGTGAAGCATTCAGACTGGAAGCTGATGTTTCAGGCCGCCCACCTCCAACAATGGAATGGAGC
+AAAGATGGAAAAGAGCTGGAAGGCACAGCAAAGTTAGAAATAAAAATTGCAGATTTCTCTACTAATCTGG
+TAAACAAAGATTCAACAAGAAGGGATAGTGGTGCCTATACCCTTACAGCGACTAATCCTGGTGGCTTTGC
+TAAACACATTTTCAATGTCAAAGTTCTTGACAGACCAGGCCCACCTGAAGGACCTTTGGCTGTAACTGAA
+GTGACATCAGAAAAGTGTGTACTATCATGGTTCCCTCCACTGGATGATGGAGGTGCCAAAATTGATCATT
+ACATAGTACAGAAACGTGAAACCAGCAGATTGGCATGGACAAATGTAGCCTCAGAAGTCCAAGTAACAAA
+GCTAAAGGTCACTAAACTCTTGAAAGGCAATGAATACATATTCCGTGTCATGGCTGTAAATAAATATGGA
+GTGGGAGAGCCACTGGAATCAGAGCCTGTGCTTGCAGTGAATCCTTATGGACCCCCTGATCCGCCCAAAA
+ACCCTGAAGTGACAACTATTACTAAAGATTCGATGGTTGTCTGCTGGGGACATCCTGATTCTGATGGTGG
+AAGTGAAATCATCAATTATATTGTGGAACGGCGTGATAAAGCTGGCCAACGCTGGATTAAATGCAACAAA
+AAAACTCTTACTGATTTAAGATATAAAGTGTCTGGACTGACAGAAGGACATGAATATGAGTTCAGGATTA
+TGGCTGAAAATGCTGCTGGAATTAGTGCACCAAGTCCTACCAGTCCATTTTACAAGGCTTGTGACACTGT
+GTTTAAACCTGGACCACCAGGTAACCCACGTGTTCTGGATACAAGCAGATCATCCATTTCAATCGCTTGG
+AATAAACCTATCTATGATGGTGGTTCAGAAATCACTGGGTATATGGTTGAGATTGCCCTGCCAGAGGAAG
+ATGAATGGCAGATTGTCACTCCACCAGCAGGACTCAAGGCAACTTCGTATACTATCACTGGCCTCACAGA
+GAATCAGGAATATAAGATCCGCATCTATGCCATGAATTCCGAAGGACTTGGGGAACCTGCCCTTGTTCCT
+GGAACTCCAAAGGCTGAAGACAGAATGCTGCCTCCAGAAATTGAACTGGATGCTGACCTGCGCAAAGTTG
+TTACTATAAGGGCCTGCTGCACCCTGAGACTTTTTGTTCCCATCAAAGGAAGGCCTGCACCTGAGGTGAA
+GTGGGCCCGGGACCATGGAGAATCTTTAGATAAAGCTAGCATCGAATCCACAAGCTCTTACACCCTGCTT
+ATTGTTGGAAATGTAAACAGATTTGACAGTGGCAAATATATACTAACTGTAGAAAATAGTTCAGGCAGCA
+AGTCTGCATTTGTCAATGTTAGAGTTCTCGATACACCAGGCCCCCCACAGGATCTGAAGGTAAAAGAGGT
+CACTAAGACATCTGTCACACTCACATGGGACCCACCTCTCCTTGATGGAGGTTCAAAAATCAAGAACTAT
+ATTGTTGAAAAGCGGGAATCAACAAGAAAAGCATATTCAACTGTTGCAACAAACTGCCACAAGACTTCCT
+GGAAGGTAGACCAGCTTCAAGAAGGCTGTAGCTACTATTTCAGGGTTCTCGCAGAAAATGAATATGGCAT
+TGGGCTGCCTGCTGAAACCGCAGAATCTGTGAAAGCATCAGAACGACCTCTTCCTCCAGGAAAAATAACT
+TTGATGGATGTCACAAGAAATAGTGTGTCACTCTCTTGGGAGAAACCAGAGCATGATGGAGGCAGCCGAA
+TTCTAGGCTACATTGTGGAGATGCAGACCAAAGGCAGTGACAAATGGGCCACGTGTGCCACAGTCAAGGT
+CACTGAAGCCACTATCACTGGATTAATTCAGGGTGAAGAATACTCTTTCCGTGTTTCAGCTCAGAATGAA
+AAGGGCATCAGTGATCCTAGACAACTGAGTGTGCCAGTGATCGCCAAAGATCTTGTCATTCCACCAGCCT
+TCAAACTCCTGTTCAATACTTTCACTGTACTGGCAGGTGAAGACCTAAAAGTTGATGTTCCATTCATTGG
+CCGCCCTACCCCAGCTGTAACCTGGCATAAAGATAATGTACCACTGAAGCAGACAACTAGAGTAAATGCA
+GAGAGCACAGAAAATAATTCACTACTGACAATAAAGGACGCCTGCCGAGAAGATGTTGGCCATTATGTGG
+TTAAACTGACTAACTCAGCTGGTGAAGCTATTGAAACCCTTAATGTTATCGTTCTTGACAAACCAGGGCC
+TCCAACTGGACCAGTTAAAATGGATGAAGTGACAGCTGATAGTATTACTCTTTCCTGGGGCCCACCCAAG
+TATGATGGTGGAAGTTCTATCAATAATTACATTGTTGAGAAACGGGACACTTCCACAACCACCTGGCAAA
+TTGTATCAGCTACAGTTGCAAGGACAACAATAAAGGCTTGCAGACTGAAGACTGGATGTGAATATCAGTT
+TAGAATTGCAGCTGAAAACAGATATGGGAAGAGTACCTACCTCAATTCAGAGCCTACTGTAGCCCAATAT
+CCATTCAAAGTTCCTGGTCCTCCTGGCACTCCAGTTGTCACACTGTCCTCCAGGGACAGCATGGAAGTAC
+AATGGAATGAGCCAATCAGTGATGGAGGAAGTAGAGTCATTGGCTATCATCTAGAACGCAAGGAAAGAAA
+TAGCATCCTCTGGGTTAAGTTGAATAAAACACCTATTCCTCAAACCAAGTTTAAGACAACTGGCCTTGAA
+GAAGGTGTTGAATATGAATTTAGAGTCTCTGCAGAGAACATCGTGGGCATTGGCAAGCCGAGTAAAGTAT
+CAGAATGTTATGTGGCTCGTGACCCATGTGATCCACCAGGACGGCCAGAGGCAATCATTGTCACAAGGAA
+TTCTGTGACTCTTCAGTGGAAGAAACCCACCTATGACGGTGGAAGCAAGATCACTGGTTATATTGTTGAG
+AAGAAAGAATTACCTGAGGGCCGTTGGATGAAAGCCAGTTTTACAAATATTATTGACACTCATTTTGAAG
+TAACTGGCCTAGTTGAAGATCACAGATATGAGTTCCGGGTTATAGCCCGAAATGCCGCAGGAGTGTTTAG
+TGAGCCTTCAGAAAGCACAGGAGCAATAACAGCTAGAGATGAGGTAGATCCACCACGAATAAGTATGGAT
+CCAAAATACAAAGACACAATCGTGGTTCATGCTGGTGAATCATTCAAGGTTGATGCAGATATTTATGGCA
+AACCAATACCAACCATTCAGTGGATAAAAGGTGATCAGGAGCTTTCAAACACAGCTCGATTAGAAATAAA
+GAGCACCGACTTTGCCACCAGTCTCAGTGTAAAAGATGCAGTACGTGTCGACAGTGGAAATTACATACTG
+AAGGCCAAAAATGTTGCAGGAGAAAGATCAGTTACTGTGAATGTCAAGGTTCTTGACAGACCAGGGCCAC
+CTGAAGGACCTGTTGTTATCTCAGGAGTTACAGCAGAAAAATGCACACTAGCTTGGAAACCCCCACTTCA
+GGATGGTGGGAGTGACATCATAAATTATATTGTGGAAAGGAGAGAAACCAGCCGCTTAGTTTGGACTGTG
+GTTGATGCCAATGTGCAGACTCTCAGCTGCAAGGTTACTAAGCTTCTTGAAGGCAATGAATATACTTTCC
+GTATAATGGCAGTAAACAAATATGGTGTTGGTGAACCTCTTGAATCTGAGCCAGTAGTTGCCAAGAATCC
+ATTTGTAGTACCAGATGCACCAAAAGCTCCAGAAGTCACAACAGTGACCAAGGACTCAATGATTGTTGTA
+TGGGAAAGACCAGCATCTGATGGTGGTAGTGAAATTCTTGGATATGTTCTTGAGAAACGGGATAAAGAAG
+GCATTAGATGGACAAGATGCCATAAGCGTCTGATTGGAGAGTTGCGCCTGAGAGTAACTGGACTCATAGA
+AAATCACGATTATGAGTTCAGAGTTTCTGCTGAGAATGCTGCTGGACTTAGTGAACCAAGCCCTCCTTCT
+GCTTACCAAAAGGCTTGTGATCCTATTTATAAACCAGGACCCCCAAACAACCCCAAAGTCATAGACATAA
+CCAGATCTTCAGTATTCCTTTCTTGGAGCAAACCAATATATGATGGTGGCTGTGAAATTCAAGGATACAT
+TGTTGAAAAATGTGATGTGAGTGTTGGTGAATGGACAATGTGCACTCCACCAACAGGAATTAATAAAACA
+AACATAGAAGTAGAGAAGCTGTTGGAAAAGCATGAATACAACTTCCGTATCTGTGCTATTAATAAAGCTG
+GAGTTGGAGAACATGCTGACGTCCCTGGACCTATTATAGTTGAAGAAAAATTAGAAGCACCAGACATTGA
+TCTTGACCTAGAACTAAGGAAAATCATAAATATAAGGGCAGGTGGCTCCTTAAGGTTATTTGTTCCTATA
+AAAGGTCGTCCTACACCAGAAGTTAAATGGGGAAAGGTGGATGGTGAAATCCGAGATGCAGCTATAATTG
+ATGTCACTAGCAGTTTCACCTCTCTTGTTCTTGACAATGTCAACCGATATGATAGTGGAAAATATACGCT
+TACATTAGAAAACAGCAGTGGAACAAAGTCTGCCTTTGTTACTGTGAGAGTTCTGGACACGCCAAGTCCA
+CCTGTTAACCTGAAAGTCACAGAAATCACCAAAGACTCAGTATCAATTACATGGGAACCTCCTTTGTTGG
+ATGGGGGATCCAAAATAAAAAATTACATTGTTGAGAAACGTGAAGCCACAAGAAAATCATATGCTGCTGT
+TGTAACTAACTGCCATAAGAATTCTTGGAAAATCGATCAGCTCCAAGAAGGTTGCAGTTATTACTTTAGA
+GTCACAGCTGAGAATGAGTATGGTATTGGCCTTCCTGCCCAGACTGCTGATCCAATTAAGGTTGCAGAAG
+TGCCACAACCTCCTGGAAAAATAACTGTGGATGATGTCACCAGAAACAGTGTCTCTCTGAGTTGGACAAA
+ACCTGAACATGATGGTGGCAGTAAAATCATTCAGTATATTGTGGAAATGCAAGCTAAACACAGTGAGAAA
+TGGTCAGAGTGTGCTCGAGTAAAGTCTCTTCAGGCAGTAATTACCAACCTAACTCAAGGGGAAGAATATC
+TTTTTAGAGTTGTTGCTGTAAATGAAAAGGGGAGAAGTGATCCTCGGTCCCTTGCAGTTCCAATAGTTGC
+CAAAGATCTGGTAATTGAGCCAGATGTAAAACCTGCATTCAGTAGTTACAGTGTACAGGTTGGCCAAGAT
+TTGAAAATAGAAGTGCCAATTTCTGGACGTCCTAAGCCAACCATTACCTGGACTAAAGATGGTCTCCCAC
+TGAAGCAGACCACAAGAATCAATGTTACCGATTCACTGGATCTCACCACACTCAGTATTAAAGAAACTCA
+TAAGGATGATGGTGGACAATATGGAATCACAGTTGCCAATGTTGTTGGTCAGAAGACAGCATCCATCGAA
+ATTGTAACTCTAGATAAACCTGATCCTCCAAAAGGACCTGTTAAATTTGATGACGTCAGTGCTGAAAGTA
+TTACATTATCTTGGAACCCTCCATTATATACAGGGGGCTGCCAAATCACCAACTACATTGTTCAGAAAAG
+AGATACAACCACCACAGTATGGGATGTTGTTTCTGCTACTGTTGCTAGAACTACACTCAAAGTGACCAAA
+CTGAAAACTGGTACAGAATACCAATTTAGAATATTTGCCGAAAACAGATATGGACAAAGCTTTGCCTTAG
+AGTCTGATCCAATTGTAGCTCAATATCCCTACAAAGAACCAGGCCCTCCAGGTACACCATTTGCCACAGC
+CATTTCCAAAGACTCCATGGTCATACAGTGGCATGAACCAGTCAACAATGGTGGAAGCCCCGTCATAGGT
+TACCACCTGGAGAGAAAAGAAAGAAACAGTATTTTGTGGACAAAGGTCAACAAAACTATTATTCATGACA
+CCCAATTCAAAGCACAGAATCTTGAAGAAGGCATTGAATATGAATTCAGAGTGTATGCTGAAAATATTGT
+TGGTGTAGGCAAAGCAAGCAAGAATTCTGAATGCTATGTAGCCAGAGATCCCTGTGACCCACCAGGAACC
+CCAGAACCAATAATGGTTAAAAGAAATGAAATCACTTTACAGTGGACCAAACCTGTGTATGATGGTGGAA
+GTATGATTACAGGCTACATTGTAGAGAAACGTGATTTGCCTGATGGTCGTTGGATGAAAGCTAGCTTTAC
+AAATGTCATTGAAACTCAATTTACTGTGTCAGGTCTTACTGAAGATCAAAGATATGAATTCAGAGTCATT
+GCAAAGAATGCAGCTGGTGCAATAAGTAAACCCTCTGACAGTACTGGACCAATAACTGCCAAGGATGAGG
+TTGAACTCCCAAGAATTTCAATGGATCCAAAATTCAGAGACACAATTGTGGTAAATGCTGGAGAAACATT
+CAGACTTGAGGCTGATGTCCATGGAAAGCCCCTACCTACCATTGAGTGGTTAAGAGGAGATAAGGAAATT
+GAAGAATCTGCTAGATGTGAAATAAAGAACACAGATTTCAAGGCTTTACTTATTGTAAAAGATGCAATTA
+GAATTGATGGTGGGCAGTATATTTTAAGAGCTTCCAATGTTGCAGGTTCTAAGTCATTCCCAGTAAATGT
+AAAAGTATTAGATAGACCAGGACCTCCAGAAGGGCCAGTCCAGGTTACTGGAGTCACTTCTGAAAAATGC
+TCTTTAACATGGTCTCCACCACTTCAAGATGGTGGCAGTGACATTTCTCACTATGTTGTTGAAAAGCGAG
+AAACCAGTCGACTTGCCTGGACTGTTGTTGCTTCAGAAGTTGTGACCAATTCTCTGAAAGTTACCAAACT
+CTTAGAAGGTAATGAATATGTTTTCCGTATAATGGCTGTCAACAAATATGGTGTTGGAGAGCCTTTGGAA
+TCTGCACCAGTACTAATGAAAAATCCATTTGTGCTTCCTGGACCACCAAAAAGCTTGGAAGTCACAAATA
+TTGCCAAAGACTCCATGACCGTCTGTTGGAACCGTCCAGATAGTGATGGTGGAAGTGAGATTATTGGTTA
+CATTGTAGAGAAAAGAGACAGAAGTGGCATTCGATGGATAAAATGTAATAAACGCCGCATTACAGATTTG
+CGTCTAAGAGTGACAGGATTAACAGAAGATCATGAGTATGAATTCAGGGTCTCTGCAGAAAATGCTGCTG
+GAGTTGGGGAACCAAGTCCAGCTACAGTTTATTATAAAGCCTGTGATCCTGTGTTCAAACCTGGCCCACC
+TACCAATGCACACATTGTAGACACCACTAAAAATTCAATCACACTTGCCTGGGGTAAACCCATCTATGAT
+GGCGGCAGTGAGATCTTGGGATATGTAGTAGAAATCTGTAAAGCAGATGAAGAAGAATGGCAAATAGTTA
+CTCCACAGACTGGCCTGAGAGTCACTCGATTTGAAATTTCAAAACTCACTGAACACCAAGAGTATAAAAT
+ACGAGTCTGTGCCCTCAACAAAGTTGGTTTAGGTGAGGCTACATCAGTTCCTGGTACTGTGAAACCAGAA
+GATAAACTTGAAGCACCTGAACTTGACCTTGACTCCGAATTAAGAAAAGGAATTGTTGTAAGAGCTGGTG
+GATCTGCCAGAATTCACATTCCATTCAAAGGTCGTCCAACGCCTGAGATCACTTGGTCTCGAGAGGAAGG
+TGAATTCACAGATAAGGTCCAAATTGAAAAGGGAGTAAACTATACCCAACTATCAATAGATAACTGTGAT
+AGAAATGATGCTGGAAAATACATTCTTAAGTTGGAAAACAGCAGTGGATCAAAGTCTGCTTTTGTAACTG
+TGAAAGTTCTTGACACTCCAGGACCACCACAGAATTTGGCAGTCAAAGAAGTGAGAAAAGATTCTGCCTT
+CCTGGTATGGGAGCCACCCATCATTGATGGAGGGGCAAAGGTCAAGAACTATGTGATTGACAAACGTGAG
+TCAACCAGAAAAGCGTATGCTAATGTGAGTAGTAAATGCAGCAAAACAAGTTTTAAAGTGGAAAACCTTA
+CAGAAGGAGCCATTTATTACTTCAGAGTCATGGCTGAAAATGAATTTGGAGTTGGTGTTCCAGTGGAAAC
+TGTTGATGCCGTGAAAGCTGCTGAACCTCCTTCCCCACCAGGAAAGGTTACACTCACTGATGTGTCCCAG
+ACCAGTGCATCACTTATGTGGGAGAAACCTGAACATGATGGCGGTAGCAGAGTCCTGGGGTACGTTGTTG
+AAATGCAGCCCAAAGGAACTGAAAAATGGAGCATTGTGGCTGAATCCAAAGTCTGTAATGCAGTTGTTAC
+TGGTTTGAGTTCTGGACAAGAATATCAGTTCCGTGTCAAGGCTTATAATGAGAAAGGAAAAAGCGATCCA
+AGAGTGTTGGGTGTTCCTGTCATAGCCAAGGACTTGACTATACAGCCTAGTTTAAAGTTACCATTTAACA
+CATATAGTATCCAAGCTGGAGAAGATCTTAAAATAGAAATTCCAGTTATAGGCCGACCAAGACCTAACAT
+TTCTTGGGTCAAAGATGGTGAGCCTCTTAAACAGACAACAAGAGTAAACGTTGAAGAAACAGCTACCTCA
+ACTGTTTTGCACATTAAAGAAGGTAACAAAGATGACTTTGGAAAATACACCGTAACGGCAACAAATAGTG
+CAGGCACAGCAACAGAAAATCTCAGTGTTATCGTTTTAGAAAAGCCTGGACCTCCAGTTGGCCCAGTTCG
+GTTTGATGAAGTTAGTGCAGACTTTGTAGTCATATCTTGGGAACCTCCAGCCTATACTGGTGGCTGCCAA
+ATAAGCAACTACATTGTAGAGAAGCGAGATACAACCACCACCACTTGGCACATGGTATCAGCAACAGTTG
+CAAGAACAACAATTAAAATAACCAAACTGAAAACAGGCACGGAGTACCAGTTTAGAATTTTTGCTGAAAA
+CAGGTATGGAAAAAGTGCCCCACTGGATTCTAAGGCAGTTATTGTACAATATCCATTTAAAGAACCTGGA
+CCACCTGGAACTCCTTTTGTGACATCAATCTCAAAAGATCAGATGCTTGTGCAATGGCATGAGCCAGTGA
+ATGATGGAGGCACCAAAATTATTGGCTACCATCTTGAACAGAAAGAAAAGAACAGTATTTTATGGGTCAA
+GTTAAATAAGACCCCCATTCAGGACACCAAATTCAAAACAACTGGGCTTGATGAGGGCCTTGAGTATGAG
+TTCAAAGTTTCTGCTGAAAATATTGTTGGCATTGGCAAGCCTAGCAAAGTGTCAGAATGCTTTGTTGCTC
+GTGATCCATGTGACCCACCTGGTCGCCCTGAAGCCATTGTTATTACAAGAAACAATGTCACACTGAAATG
+GAAGAAACCTGCCTATGATGGTGGTAGCAAAATAACAGGTTATATTGTAGAAAAGAAAGATCTACCTGAT
+GGCCGCTGGATGAAAGCCAGCTTTACCAACGTATTAGAAACTGAATTTACAGTGAGTGGACTTGTAGAAG
+ACCAAAGATATGAATTTAGAGTAATTGCAAGAAATGCAGCTGGAAACTTTAGTGAACCATCTGATAGTAG
+TGGTGCCATTACTGCAAGAGATGAAATTGATGCACCAAATGCCTCTCTGGATCCAAAATATAAAGATGTC
+ATCGTTGTTCATGCAGGAGAGACTTTTGTTCTTGAAGCCGACATCCGTGGCAAACCTATACCTGATGTTG
+TTTGGTCAAAAGATGGAAAAGAACTTGAAGAAACAGCTGCTAGAATGGAAATTAAATCTACTATTCAGAA
+AACAACTCTTGTTGTCAAAGACTGTATACGGACTGATGGAGGACAATATATTCTGAAACTCAGCAATGTT
+GGTGGTACAAAGTCTATACCCATCACTGTAAAGGTACTTGACAGGCCAGGGCCTCCTGAAGGGCCTCTGA
+AAGTTACTGGAGTTACTGCGGAAAAATGTTACCTGGCATGGAACCCACCTTTGCAAGATGGTGGTGCTAA
+TATTTCACATTACATCATTGAAAAGAGGGAGACAAGCCGACTCTCTTGGACCCAGGTTTCAACTGAGGTA
+CAGGCCCTTAACTACAAAGTTACTAAACTTCTTCCTGGTAATGAGTACATTTTCCGTGTCATGGCTGTGA
+ATAAATATGGAATTGGAGAGCCCTTGGAATCTGGGCCTGTTACGGCCTGTAATCCTTATAAGCCACCAGG
+TCCTCCCTCAACACCTGAAGTCTCAGCAATCACCAAAGATTCTATGGTAGTAACATGGGCACGCCCAGTA
+GACGACGGAGGTACCGAAATTGAGGGCTACATTCTTGAAAAACGAGATAAGGAAGGCGTTAGATGGACCA
+AGTGCAACAAGAAAACATTAACGGATCTGCGGCTCAGGGTAACTGGTCTTACCGAAGGCCATTCCTATGA
+ATTCAGAGTTGCTGCTGAAAATGCAGCTGGTGTGGGAGAACCTAGTGAGCCATCTGTTTTCTACCGTGCG
+TGTGATGCCTTGTATCCACCAGGTCCCCCAAGCAATCCAAAAGTGACGGACACTTCCAGATCTTCTGTCT
+CCCTGGCATGGAGTAAGCCAATTTATGATGGTGGCGCACCTGTTAAAGGCTATGTTGTAGAGGTCAAAGA
+AGCTGCTGCGGATGAATGGACAACCTGCACTCCACCAACAGGATTACAAGGAAAGCAGTTCACAGTGACC
+AAGCTTAAAGAAAACACTGAATATAACTTCCGTATTTGTGCCATCAATTCTGAAGGTGTAGGTGAACCTG
+CAACTCTACCTGGCTCAGTGGTTGCTCAGGAGAGGATAGAGCCACCAGAAATAGAACTCGATGCTGATCT
+CAGAAAGGTGGTCGTTCTGCGTGCAAGTGCTACTTTACGCTTATTTGTCACTATCAAAGGTCGACCAGAA
+CCCGAAGTTAAATGGGAAAAGGCAGAAGGCATTCTCACTGACAGGGCTCAGATAGAGGTGACCAGCTCAT
+TTACAATGTTGGTGATTGATAATGTTACCAGATTTGACAGTGGTCGGTATAATCTGACATTAGAAAATAA
+TAGTGGCTCCAAAACAGCTTTTGTTAACGTCAGAGTTCTTGACTCACCAAGTGCCCCTGTGAATTTGACC
+ATAAGAGAAGTGAAGAAAGACTCAGTGACGTTGTCCTGGGAACCACCACTTATTGATGGTGGAGCTAAGA
+TTACAAACTACATTGTCGAAAAACGAGAAACTACAAGAAAAGCCTATGCTACCATTACAAATAATTGCAC
+TAAAACTACTTTCAGAATTGAAAATCTACAAGAAGGATGTTCTTACTACTTCCGAGTCTTGGCTTCCAAT
+GAATATGGGATTGGTTTGCCAGCTGAAACAACAGAACCCGTTAAAGTGTCTGAACCACCCCTCCCACCTG
+GAAGAGTAACTCTTGTTGATGTGACCCGTAATACAGCTACAATTAAGTGGGAGAAACCAGAAAGTGATGG
+TGGCAGCAAAATTACTGGTTATGTGGTTGAAATGCAGACTAAAGGGAGTGAAAAGTGGAGCACCTGCACA
+CAAGTTAAGACTCTAGAAGCAACTATATCTGGCTTAACTGCAGGAGAAGAGTATGTCTTCAGGGTAGCTG
+CAGTTAACGAAAAGGGAAGAAGTGATCCAAGACAACTTGGAGTGCCAGTAATTGCAAGGGATATTGAAAT
+AAAGCCTTCAGTTGAGCTTCCTTTCCATACTTTCAATGTAAAGGCTAGAGAACAACTTAAGATTGATGTG
+CCATTCAAAGGAAGACCTCAAGCTACTGTGAACTGGAGAAAAGATGGTCAGACTCTTAAAGAGACAACTA
+GAGTCAATGTTTCTTCTTCAAAGACTGTAACATCACTATCTATTAAGGAAGCTTCAAAGGAAGATGTTGG
+AACTTATGAATTATGTGTTTCAAACAGTGCTGGATCCATAACAGTTCCTATTACTATAATTGTCCTTGAC
+AGACCAGGACCTCCAGGTCCTATACGTATTGATGAGGTTAGTTGTGACAGCATAACCATTTCTTGGAATC
+CTCCAGAATATGATGGTGGCTGCCAAATTAGCAATTACATTGTTGAAAAGAAAGAAACCACCTCTACAAC
+ATGGCACATAGTTTCACAAGCAGTTGCAAGAACATCCATTAAAATAGTTCGCCTGACAACAGGAAGTGAG
+TATCAGTTCCGTGTTTGTGCAGAAAACCGCTATGGAAAGAGCTCCTACAGTGAATCTTCAGCTGTTGTTG
+CAGAGTATCCATTCAGTCCCCCAGGTCCTCCTGGTACTCCTAAAGTTGTGCATGCCACAAAATCTACCAT
+GCTTGTAACCTGGCAAGTGCCAGTTAATGATGGAGGAAGTCGAGTAATTGGCTATCATCTTGAGTATAAA
+GAAAGAAGCAGCATTCTTTGGTCAAAAGCAAATAAAATCCTCATTGCTGATACTCAAATGAAAGTCTCCG
+GCCTTGATGAAGGACTGATGTATGAGTATCGTGTATATGCTGAAAATATTGCTGGAATTGGTAAATGCAG
+TAAATCTTGTGAACCAGTCCCTGCAAGAGATCCTTGTGACCCTCCTGGACAACCTGAAGTCACAAATATC
+ACAAGAAAATCAGTGTCACTTAAATGGTCTAAACCACATTATGATGGTGGAGCTAAGATCACAGGATACA
+TTGTTGAACGCAGAGAACTACCAGATGGCCGGTGGCTGAAGTGCAATTATACTAATATACAAGAAACATA
+CTTTGAAGTAACTGAACTTACTGAAGATCAGCGTTATGAATTCCGGGTTTTTGCAAGGAATGCTGCTGAC
+TCAGTTAGTGAGCCATCTGAATCCACTGGGCCTATTATAGTTAAAGATGATGTTGAGCCTCCAAGAGTTA
+TGATGGATGTCAAGTTCCGAGACGTTATTGTTGTCAAAGCTGGAGAGGTCCTTAAGATAAATGCAGACAT
+TGCAGGGCGACCTCTGCCAGTAATTTCCTGGGCCAAGGATGGTATAGAAATTGAAGAAAGAGCAAGAACA
+GAAATCATCTCAACAGACAATCATACTTTGTTAACAGTTAAAGACTGTATAAGACGAGACACTGGGCAAT
+ATGTACTAACACTGAAGAATGTTGCCGGCACTCGGTCTGTGGCCGTTAATTGCAAAGTACTTGATAAGCC
+TGGTCCACCAGCAGGACCACTTGAAATAAATGGCCTCACTGCTGAGAAATGCTCTCTTTCCTGGGGACGT
+CCCCAAGAAGATGGTGGTGCAGATATCGACTATTACATCGTAGAAAAACGTGAAACAAGCCACCTTGCAT
+GGACAATATGTGAAGGAGAGTTACAGATGACATCCTGTAAAGTAACCAAGTTACTCAAAGGCAATGAATA
+TATATTTAGAGTAACTGGTGTTAATAAATATGGTGTTGGTGAGCCCCTAGAGAGTGTAGCTATAAAGGCA
+CTAGATCCATTTACAGTTCCAAGTCCACCCACGTCTTTGGAAATTACTTCTGTGACCAAAGAATCTATGA
+CACTTTGCTGGTCAAGACCCGAGAGTGATGGAGGTAGTGAAATATCTGGATATATAATTGAAAGGCGAGA
+GAAAAATAGCCTAAGATGGGTGCGTGTAAACAAAAAACCAGTTTATGATCTAAGAGTGAAATCAACAGGA
+CTTCGGGAAGGATGTGAATATGAATATCGTGTTTATGCAGAAAATGCTGCTGGCCTAAGTCTTCCAAGTG
+AAACCTCTCCCTTAATTAGGGCAGAAGATCCAGTGTTCCTACCATCTCCTCCATCCAAACCCAAAATTGT
+GGACTCAGGCAAGACAACTATAACTATTGCCTGGGTTAAGCCGCTGTTTGATGGTGGGGCCCCGATAACT
+GGATATACTGTAGAATACAAAAAATCTGATGACACTGACTGGAAAACTTCCATTCAGAGCTTACGAGGGA
+CAGAATATACAATAAGCGGACTAACAACAGGAGCTGAATATGTTTTCAGAGTAAAATCTGTCAATAAGGT
+TGGTGCTAGTGACCCCAGTGATAGCTCTGACCCTCAGATAGCAAAGGAAAGAGAAGAAGAACCTTTATTT
+GATATTGACAGTGAAATGAGGAAGACCTTGATTGTCAAGGCTGGTGCCTCATTTACCATGACTGTGCCTT
+TCCGAGGAAGACCAGTACCCAATGTCTTGTGGAGTAAGCCAGACACTGACCTCCGTACTAGAGCTTATGT
+TGATACCACAGACTCCCGTACATCACTGACCATTGAAAATGCCAACAGAAATGACTCTGGAAAGTACACA
+TTAACAATTCAGAATGTTTTGAGTGCTGCTTCACTGACCTTAGTTGTCAAAGTTTTAGATACCCCAGGTC
+CTCCAACCAACATTACTGTGCAAGATGTAACCAAAGAGTCTGCAGTGTTATCCTGGGATGTTCCTGAAAA
+CGATGGTGGAGCACCAGTGAAGAATTACCACATAGAAAAACGTGAGGCCAGCAAGAAAGCATGGGTCTCT
+GTGACCAACAACTGTAACCGCCTCTCCTACAAAGTTACCAATTTACAAGAAGGAGCTATCTATTACTTCA
+GAGTCTCTGGAGAAAATGAGTTTGGTGTTGGTATACCAGCTGAAACAAAGGAAGGAGTAAAAATAACAGA
+AAAACCAAGCCCACCTGAAAAACTTGGAGTAACAAGTATATCCAAAGACAGTGTTTCCCTGACCTGGCTG
+AAGCCTGAACATGATGGCGGAAGCAGAATTGTACACTATGTCGTTGAAGCACTAGAAAAAGGACAGAAAA
+ACTGGGTTAAATGTGCAGTGGCAAAGTCAACCCATCACGTTGTTTCCGGTCTGAGAGAGAATTCTGAATA
+CTTTTTCCGAGTGTTTGCTGAAAATCAAGCTGGCCTGAGTGACCCGAGAGAGCTTCTGCTTCCTGTTCTT
+ATTAAGGAGCAACTAGAACCACCTGAAATTGATATGAAGAATTTCCCAAGTCACACTGTATATGTTAGAG
+CTGGTTCAAACCTTAAAGTTGACATTCCAATCTCTGGAAAACCACTTCCCAAAGTGACCTTATCAAGAGA
+TGGTGTCCCCCTTAAGGCAACCATGAGATTTAATACCGAAATTACTGCTGAGAACCTGACCATCAATCTC
+AAAGAAAGTGTTACAGCTGACGCTGGGAGATATGAAATCACTGCTGCCAACTCCAGTGGTACAACCAAAG
+CTTTCATTAACATTGTTGTGCTAGACAGGCCTGGTCCTCCAACTGGCCCTGTTGTTATTAGTGATATAAC
+TGAAGAAAGTGTGACTCTCAAATGGGAGCCACCTAAGTATGACGGTGGAAGTCAAGTTACCAACTACATT
+CTACTCAAAAGAGAAACAAGTACTGCAGTGTGGACTGAAGTGTCTGCAACAGTTGCAAGAACCATGATGA
+AAGTCATGAAACTGACCACAGGAGAAGAATACCAATTCCGCATCAAGGCAGAAAACCGCTTTGGCATCAG
+TGATCATATAGATTCAGCTTGTGTGACTGTCAAACTACCATACACAACACCTGGACCACCATCTACACCA
+TGGGTCACTAATGTTACTCGAGAAAGCATCACTGTGGGCTGGCATGAACCAGTGTCAAATGGAGGCAGTG
+CAGTCGTAGGCTATCACCTGGAAATGAAAGACAGAAACAGTATTTTATGGCAAAAAGCCAACAAACTGGT
+CATCCGCACAACTCACTTCAAAGTCACAACAATCAGTGCTGGACTTATTTATGAATTCAGGGTGTATGCA
+GAAAATGCTGCTGGAGTTGGAAAACCTAGCCATCCTTCTGAACCAGTCTTGGCAATTGATGCTTGTGAAC
+CCCCAAGAAATGTTCGTATCACTGATATTTCAAAGAACTCTGTCAGCCTTTCATGGCAACAACCAGCTTT
+CGATGGAGGTAGCAAGATTACAGGCTACATTGTTGAGAGACGTGACCTTCCAGATGGCAGATGGACCAAG
+GCCAGCTTCACCAATGTTACTGAAACTCAATTCATCATCTCTGGCTTGACTCAGAATTCCCAGTATGAAT
+TCCGTGTCTTTGCTAGGAATGCTGTTGGTTCCATTAGCAATCCATCTGAGGTTGTAGGGCCCATTACTTG
+CATCGATTCTTATGGTGGTCCTGTAATTGATTTGCCTCTAGAATATACAGAAGTTGTCAAATACAGAGCA
+GGTACATCTGTGAAGCTCAGAGCTGGCATTTCTGGCAAACCTGCGCCTACTATTGAGTGGTATAAAGATG
+ATAAAGAATTACAAACCAATGCACTGGTGTGTGTTGAAAATACCACGGACCTCGCATCTATACTCATCAA
+AGATGCCGATCGCCTTAATAGTGGATGCTATGAATTAAAACTAAGGAATGCCATGGGCTCAGCCTCAGCC
+ACCATCAGAGTACAGATCCTTGACAAACCAGGCCCACCTGGTGGACCAATTGAATTTAAGACTGTAACTG
+CTGAGAAGATCACCCTTCTCTGGCGGCCTCCAGCTGATGATGGTGGTGCAAAAATCACTCACTACATTGT
+GGAAAAGCGTGAGACAAGCCGCGTTGTGTGGTCTATGGTGTCTGAACATTTGGAAGAGTGCATCATTACA
+ACCACCAAAATTATCAAAGGAAATGAATACATCTTCCGGGTCCGAGCCGTGAACAAATATGGAATTGGCG
+AGCCACTGGAATCTGATTCCGTTGTAGCCAAGAACGCATTTGTTACACCTGGGCCACCAGGCATACCAGA
+AGTGACAAAGATTACCAAGAATTCGATGACTGTTGTATGGAGCAGGCCAATTGCAGATGGCGGTAGTGAT
+ATAAGTGGCTATTTCCTTGAAAAACGAGACAAGAAGAGCCTAGGATGGTTTAAAGTACTAAAAGAGACTA
+TCCGTGACACCAGACAAAAAGTAACAGGACTCACAGAAAACAGTGACTATCAATACAGAGTTTGTGCTGT
+AAACGCTGCTGGACAGGGTCCATTTTCTGAACCATCTGAATTCTACAAAGCTGCTGATCCTATTGATCCT
+CCAGGTCCACCTGCTAAGATAAGAATCGCAGATTCAACCAAGTCATCCATCACCCTTGGCTGGAGTAAGC
+CTGTCTATGATGGGGGCAGTGCTGTTACTGGGTATGTTGTCGAGATAAGACAAGGAGAGGAAGAGGAATG
+GACTACTGTCTCTACCAAAGGAGAGGTCAGAACTACAGAATATGTGGTATCCAACCTGAAACCTGGAGTC
+AATTACTACTTCCGGGTATCTGCTGTAAACTGTGCTGGACAAGGAGAACCTATAGAAATGAATGAACCTG
+TACAAGCTAAAGATATACTTGAGGCACCAGAGATTGACCTGGATGTGGCTCTCAGAACTTCTGTTATTGC
+CAAAGCTGGTGAAGATGTACAAGTGTTGATTCCCTTTAAAGGCAGACCTCCACCTACTGTCACATGGAGA
+AAAGATGAGAAGAATCTTGGCAGTGATGCCAGATACAGCATTGAAAACACTGATTCATCCTCATTACTCA
+CCATTCCTCAAGTTACTCGCAATGATACAGGAAAATATATTCTCACAATAGAAAATGGAGTTGGTGAACC
+TAAGTCTTCAACTGTGAGTGTTAAAGTGCTTGACACACCAGCTGCCTGCCAGAAACTACAGGTTAAACAT
+GTTTCTCGAGGCACAGTCACTTTGCTCTGGGATCCTCCTCTCATTGATGGAGGATCTCCAATAATTAATT
+ATGTCATTGAAAAGAGAGATGCCACCAAGAGAACATGGTCTGTCGTGTCACACAAATGTTCTAGCACATC
+CTTCAAGCTAATAGATTTGTCGGAGAAGACTCCATTCTTCTTCAGAGTTCTTGCAGAAAATGAAATTGGA
+ATTGGGGAACCCTGTGAAACTACAGAGCCAGTGAAGGCTGCTGAAGTACCAGCTCCTATACGTGATCTCT
+CAATGAAAGACTCAACAAAGACATCTGTCATCCTCAGCTGGACCAAACCTGACTTTGATGGTGGTAGCGT
+CATCACAGAATATGTTGTAGAAAGGAAAGGTAAAGGTGAACAGACGTGGTCCCACGCTGGCATAAGTAAG
+ACATGTGAAATTGAGGTTAGCCAACTTAAGGAGCAGTCAGTCCTGGAGTTCAGAGTGTTTGCCAAAAATG
+AGAAAGGACTGAGTGATCCTGTCACTATTGGGCCAATTACAGTGAAAGAACTTATTATTACACCTGAAGT
+TGACCTGTCAGATATCCCTGGGGCACAAGTCACTGTGAGAATTGGGCACAATGTGCACCTTGAATTACCT
+TATAAGGGAAAACCCAAACCATCCATCAGTTGGCTGAAAGATGGCTTGCCACTGAAAGAAAGTGAATTTG
+TTCGCTTCAGTAAAACTGAAAACAAAATTACTTTGAGTATTAAGAATGCCAAGAAGGAGCATGGAGGAAA
+ATACACTGTTATTCTTGATAATGCAGTGTGTAGAATTGCAGTCCCCATTACAGTCATCACCCTTGGCCCA
+CCATCAAAGCCCAAAGGACCCATTCGATTTGATGAAATCAAGGCTGATAGTGTCATCCTGTCATGGGATG
+TACCTGAAGATAATGGAGGAGGAGAAATTACTTGTTACAGCATCGAGAAGCGGGAAACTTCACAAACTAA
+CTGGAGGATGGTGTGTTCAAGTGTTGCCAGAACGACTTTCAAAGTTCCTAATCTAGTCAAAGATGCTGAG
+TACCAGTTTAGAGTGAGAGCAGAAAACAGATACGGAGTCAGCCAACCACTTGTCTCAAGCATTATTGTGG
+CAAAACACCAGTTCAGGATTCCTGGTCCCCCAGGAAAGCCAGTTATATACAATGTGACTTCTGATGGCAT
+GTCACTAACTTGGGATGCTCCAGTTTATGATGGTGGTTCAGAAGTTACTGGATTCCATGTTGAAAAGAAA
+GAAAGAAATAGCATCCTCTGGCAAAAAGTTAATACATCACCAATCTCTGGAAGAGAATATAGAGCCACTG
+GACTGGTAGAAGGTCTGGATTACCAATTCCGTGTATATGCTGAAAATTCTGCTGGCCTAAGCTCACCTAG
+TGACCCAAGCAAATTTACCTTAGCTGTTTCTCCAGTAGACCCACCTGGCACTCCTGACTACATTGATGTC
+ACCCGGGAAACCATCACACTTAAATGGAACCCACCATTGCGTGATGGAGGCAGTAAGATTGTGGGCTATA
+GCATTGAGAAACGGCAAGGAAATGAACGCTGGGTGAGATGCAACTTTACTGACGTCAGTGAATGTCAGTA
+CACAGTTACAGGACTCAGTCCTGGGGATCGCTATGAGTTCAGAATAATTGCAAGAAATGCTGTTGGAACT
+ATAAGCCCGCCCTCACAGTCTTCTGGCATTATTATGACAAGAGATGAAAATGTTCCACCAATAGTAGAGT
+TTGGCCCTGAATACTTTGATGGTCTCATTATTAAGTCCGGAGAGAGCCTTAGAATTAAAGCTTTGGTACA
+AGGAAGACCAGTGCCTCGAGTAACTTGGTTCAAAGATGGAGTGGAAATCGAAAAGAGGATGAATATGGAA
+ATAACCGACGTACTTGGATCCACCAGCCTATTTGTTAGAGATGCTACTCGGGACCATCGTGGTGTATACA
+CAGTGGAAGCCAAAAATGCATCTGGTTCTGCAAAAGCAGAAATTAAAGTGAAAGTACAAGATACACCAGG
+AAAAGTAGTTGGGCCAATAAGATTCACCAATATTACTGGGGAGAAGATGACTCTGTGGTGGGATGCCCCA
+CTCAATGACGGTTGTGCTCCCATAACCCACTACATCATTGAAAAACGGGAAACCAGCAGACTTGCCTGGG
+CACTAATTGAGGATAAATGTGAAGCCCAAAGTTACACTGCCATTAAACTAATAAACGGCAATGAATACCA
+ATTCCGTGTTTCTGCAGTTAACAAGTTTGGTGTTGGCAGGCCACTTGATTCTGATCCAGTGGTTGCTCAA
+ATACAATATACTGTTCCTGATGCCCCTGGCATTCCAGAACCTAGCAACATAACAGGCAACAGCATTACCC
+TGACATGGGCAAGGCCAGAATCAGATGGTGGCAGTGAAATTCAACAGTATATCCTTGAAAGAAGAGAAAA
+GAAAAGCACAAGATGGGTAAAAGTGATCAGCAAACGACCAATCTCTGAAACAAGATTCAAAGTCACTGGT
+CTGACAGAAGGCAATGAGTATGAATTCCATGTCATGGCTGAAAATGCTGCAGGAGTTGGACCTGCAAGTG
+GCATCTCAAGACTCATTAAATGTAGAGAGCCCGTCAACCCACCAGGTCCTCCCACAGTGGTCAAAGTAAC
+AGACACATCAAAGACAACTGTGAGCTTAGAATGGTCCAAACCAGTGTTTGATGGTGGCATGGAAATAATT
+GGGTATATTATTGAAATGTGTAAGGCCGACTTAGGAGACTGGCACAAGGTGAATGCAGAGGCATGTGTGA
+AAACAAGATATACAGTCACTGATCTACAAGCAGGTGAAGAATACAAATTCCGAGTTAGTGCTATCAATGG
+TGCTGGAAAAGGCGACAGCTGTGAAGTGACTGGCACAATTAAAGCAGTTGACCGGTTAACAGCTCCTGAG
+TTAGACATAGATGCAAACTTCAAACAGACTCATGTTGTTAGAGCTGGGGCCAGTATTCGCCTCTTCATTG
+CCTACCAAGGTAGACCTACTCCTACAGCTGTGTGGAGCAAACCAGACTCTAACCTTAGCCTTCGGGCTGA
+TATCCATACAACAGATTCCTTCAGCACCCTCACTGTGGAAAACTGCAACAGAAATGATGCAGGGAAATAT
+ACCCTTACTGTGGAAAACAACAGTGGTAGTAAGTCAATCACATTCACCGTGAAAGTGCTAGACACTCCAG
+GCCCACCTGGCCCAATTACCTTCAAAGATGTGACCCGGGGATCTGCTACATTGATGTGGGATGCCCCTCT
+TCTTGACGGTGGTGCCCGAATCCATCATTATGTGGTAGAGAAACGAGAGGCAAGTCGCCGTAGTTGGCAG
+GTTATCAGTGAAAAATGCACTCGTCAGATCTTCAAGGTCAATGACCTGGCCGAAGGTGTTCCGTACTATT
+TCCGTGTTTCTGCAGTAAATGAGTATGGTGTTGGTGAGCCCTATGAAATGCCAGAACCAATTGTAGCCAC
+AGAACAGCCTGCTCCACCTAGGAGACTTGATGTTGTTGATACTAGCAAATCCTCCGCAGTCTTAGCTTGG
+CTTAAACCTGACCACGATGGAGGCAGCCGGATCACTGGCTACCTGCTTGAAATGAGACAAAAGGGATCTG
+ACTTCTGGGTTGAAGCTGGTCACACCAAACAGCTAACTTTCACAGTAGAGCGTCTTGTTGAGAAAACTGA
+ATATGAATTCCGTGTGAAGGCCAAGAATGATGCTGGCTATAGTGAACCCAGAGAAGCCTTCTCTTCTGTC
+ATCATTAAGGAGCCTCAAATCGAGCCCACTGCTGACCTCACTGGAATTACCAATCAGCTTATAACTTGCA
+AAGCAGGAAGCCCATTTACCATTGACGTACCAATCAGTGGTCGTCCTGCCCCCAAAGTAACATGGAAACT
+GGAAGAAATGAGACTTAAAGAGACAGATCGAGTGAGCATTACAACAACAAAAGACAGAACCACACTGACT
+GTAAAGGACAGCATGAGAGGTGACTCTGGAAGATACTTCTTGACCCTGGAAAATACAGCTGGTGTTAAAA
+CATTTAGCGTCACAGTTGTGGTCATTGGAAGGCCAGGTCCAGTAACCGGCCCCATTGAGGTCTCATCTGT
+CTCAGCTGAATCGTGTGTCCTGTCATGGGGAGAACCTAAAGATGGAGGAGGCACTGAAATTACTAATTAC
+ATAGTTGAAAAGCGTGAATCGGGTACAACAGCTTGGCAGCTTGTCAATTCCAGTGTCAAGCGCACTCAAA
+TTAAAGTCACTCATCTCACAAAATACATGGAATATTCTTTCCGTGTCAGTTCAGAGAACAGATTTGGTGT
+CAGCAAACCTCTAGAATCAGCACCAATAATTGCTGAACATCCATTTGTCCCACCAAGCGCTCCTACCAGA
+CCTGAGGTCTACCATGTGTCTGCCAATGCCATGTCTATTCGTTGGGAAGAACCCTACCACGATGGTGGCA
+GTAAAATCATTGGCTACTGGGTTGAGAAGAAAGAACGTAATACAATTCTTTGGGTGAAAGAAAACAAAGT
+GCCATGCTTAGAGTGCAACTACAAAGTAACTGGTTTAGTAGAAGGACTGGAATATCAGTTCAGAACTTAT
+GCACTCAATGCTGCAGGTGTTAGCAAGGCCAGCGAAGCTTCAAGACCTATAATGGCTCAAAATCCAGTTG
+ATGCACCAGGCAGACCAGAGGTGACAGATGTCACAAGATCAACAGTATCACTGATTTGGTCTGCCCCAGC
+GTATGATGGAGGCAGCAAGGTTGTGGGCTACATCATAGAGCGTAAGCCAGTCAGTGAGGTAGGAGATGGT
+CGCTGGCTGAAGTGCAACTACACCATTGTATCTGACAATTTCTTCACCGTGACTGCTCTCAGTGAAGGAG
+ACACTTATGAGTTCCGTGTGTTAGCCAAGAATGCAGCAGGCGTAATTAGCAAAGGGTCTGAATCTACAGG
+CCCTGTCACTTGCCGAGATGAATACGCTCCACCCAAAGCCGAACTGGATGCCCGATTACACGGTGATCTG
+GTTACCATCAGAGCAGGTTCTGATCTTGTTCTGGATGCTGCAGTTGGTGGCAAACCTGAACCCAAAATTA
+TCTGGACCAAAGGAGACAAGGAGCTAGATCTCTGTGAAAAAGTCTCTTTGCAGTATACTGGCAAACGAGC
+AACTGCTGTGATCAAGTTCTGTGACAGAAGTGACAGTGGAAAATACACTTTAACAGTGAAAAATGCCAGC
+GGGACCAAGGCCGTGTCTGTCATGGTCAAAGTGCTTGATTCCCCTGGCCCATGTGGAAAGCTCACCGTCA
+GCAGAGTAACACAGGAGAAGTGCACTTTAGCCTGGAGCCTTCCGCAGGAAGACGGAGGAGCAGAAATCAC
+TCACTACATCGTGGAAAGACGCGAGACTAGCAGGCTCAACTGGGTGATTGTTGAAGGCGAATGCCCAACC
+CTATCCTATGTCGTTACCAGGCTCATCAAGAACAATGAGTACATATTCCGAGTGAGGGCAGTAAACAAAT
+ATGGCCCTGGTGTGCCTGTTGAATCAGAGCCAATTGTAGCCAGAAACTCATTCACTATTCCATCACCACC
+CGGCATACCTGAAGAAGTTGGGACTGGCAAAGAGCATATCATCATTCAGTGGACAAAACCTGAATCTGAT
+GGTGGCAATGAAATCAGCAACTACCTAGTAGACAAACGTGAGAAGAAGAGCCTGCGCTGGACACGTGTCA
+ACAAAGACTATGTGGTGTATGATACCAGGCTGAAGGTGACCAGCCTGATGGAGGGTTGTGATTACCAGTT
+CCGGGTGACCGCAGTGAATGCAGCTGGTAACAGTGAGCCCAGCGAAGCTTCCAACTTCATCTCATGCAGA
+GAACCATCATATACCCCTGGACCACCTTCTGCTCCAAGAGTTGTGGATACCACCAAACACAGCATTAGTT
+TGGCATGGACCAAACCCATGTACGATGGTGGTACTGACATTGTAGGATATGTTCTGGAAATGCAAGAGAA
+GGACACTGATCAGTGGTACCGAGTGCATACCAATGCCACAATAAGAAATACTGAATTCACTGTGCCAGAC
+CTTAAAATGGGCCAGAAATATTCCTTCAGAGTTGCTGCCGTGAACGTGAAGGGTATGAGCGAATACAGCG
+AATCAATTGCTGAAATTGAGCCCGTGGAAAGAATAGAAATACCAGATCTTGAGCTTGCAGATGATCTAAA
+GAAGACTGTGACCATCAGGGCTGGGGCCTCCTTGCGCTTGATGGTGTCTGTATCTGGAAGACCACCTCCT
+GTCATAACGTGGAGCAAGCAGGGCATTGACCTTGCAAGCCGGGCAATTATTGACACCACTGAGAGCTACT
+CATTGCTAATAGTGGACAAAGTTAATCGGTACGATGCTGGAAAATACACAATTGAAGCTGAAAACCAATC
+TGGCAAGAAATCAGCAACAGTCCTTGTTAAAGTCTATGATACTCCTGGTCCCTGTCCTTCAGTGAAAGTT
+AAGGAAGTATCAAGAGATTCTGTGACTATAACTTGGGAAATTCCCACGATTGATGGTGGAGCTCCAGTCA
+ACAATTACATCGTTGAGAAGCGTGAAGCTGCTATGAGAGCATTCAAAACAGTAACTACCAAATGCAGCAA
+GACACTTTACAGAATTTCTGGACTTGTAGAAGGAACCATGTACTATTTCAGAGTGCTGCCAGAAAATATT
+TATGGCATTGGAGAACCTTGTGAAACATCTGATGCAGTACTGGTCTCAGAAGTGCCTTTGGTGCCTGCAA
+AGCTAGAAGTGGTCGATGTCACCAAATCCACTGTTACCCTTGCCTGGGAAAAACCACTCTACGATGGTGG
+TAGCCGACTCACTGGATATGTTCTCGAGGCCTGCAAAGCTGGCACAGAGAGATGGATGAAGGTTGTCACC
+TTAAAACCCACAGTCCTAGAGCACACTGTTACTTCCTTAAATGAAGGTGAACAATACTTATTTAGAATAA
+GGGCACAAAATGAGAAAGGTGTGTCAGAACCAAGAGAGACTGTCACAGCCGTGACTGTACAAGACCTCAG
+AGTGTTGCCAACAATCGATCTTTCTACAATGCCTCAGAAGACCATCCATGTCCCAGCTGGCAGACCAGTA
+GAGCTGGTGATACCTATTGCTGGCCGTCCACCTCCTGCTGCTTCCTGGTTCTTTGCTGGTTCTAAACTGA
+GAGAATCAGAGCGTGTCACAGTTGAAACTCACACTAAAGTAGCTAAATTAACCATCCGTGAAACCACTAT
+CAGAGATACTGGAGAATACACACTTGAATTGAAGAATGTTACCGGAACTACTTCAGAAACCATTAAAGTT
+ATCATTCTTGACAAGCCTGGTCCACCAACAGGACCTATTAAGATTGATGAAATTGATGCTACATCAATTA
+CCATTTCCTGGGAACCACCTGAATTGGACGGTGGTGCTCCACTGAGTGGTTATGTGGTAGAACAACGTGA
+CGCTCATCGTCCAGGATGGCTGCCCGTTTCTGAATCAGTGACTAGGTCCACGTTTAAGTTTACCAGACTC
+ACCGAAGGAAATGAGTATGTGTTCCGTGTGGCTGCAACAAACCGCTTCGGGATTGGCTCTTACTTGCAGT
+CTGAGGTCATAGAGTGTCGCAGCAGCATCCGTATTCCTGGACCCCCAGAAACATTACAGATATTTGATGT
+TTCCCGTGATGGCATGACACTTACTTGGTACCCACCAGAGGATGACGGTGGCTCCCAAGTGACTGGATAT
+ATTGTGGAGCGCAAAGAAGTGAGAGCAGATCGATGGGTCCGTGTAAATAAAGTACCTGTGACAATGACAC
+GGTACCGCTCCACTGGCCTTACTGAAGGCTTAGAATATGAACACCGTGTCACAGCCATTAATGCAAGAGG
+GTCTGGGAAACCAAGTCGTCCTTCCAAACCCATCGTTGCCATGGATCCAATTGCTCCTCCAGGAAAGCCA
+CAAAACCCAAGAGTTACTGATACAACAAGGACATCAGTCTCCCTGGCCTGGAGTGTTCCAGAAGATGAAG
+GAGGATCTAAAGTCACAGGCTACTTGATTGAAATGCAAAAAGTAGATCAACATGAATGGACCAAGTGTAA
+CACCACTCCAACCAAGATTCGAGAGTATACTCTAACACACCTACCTCAGGGTGCAGAATACAGGTTCCGC
+GTCCTAGCTTGTAATGCTGGTGGACCTGGTGAGCCTGCTGAGGTACCAGGAACAGTCAAAGTCACTGAAA
+TGCTTGAATATCCTGATTATGAACTTGATGAAAGATACCAAGAAGGTATCTTTGTAAGGCAAGGTGGCGT
+CATCAGACTTACCATACCAATCAAAGGAAAACCATTCCCAATATGTAAATGGACCAAGGAAGGCCAGGAT
+ATTAGTAAGCGTGCCATGATTGCAACATCTGAAACACACACTGAGCTTGTGATCAAAGAAGCAGACAGGG
+GTGATTCTGGCACTTATGACCTGGTTCTGGAAAATAAATGTGGCAAGAAGGCTGTCTACATCAAGGTCAG
+GGTGATAGGAAGTCCCAACAGTCCAGAAGGGCCACTGGAATATGATGACATCCAAGTCCGCTCTGTGAGG
+GTCAGCTGGAGACCTCCTGCTGATGATGGTGGTGCTGACATCTTAGGCTACATCCTCGAGAGACGAGAAG
+TGCCTAAAGCCGCCTGGTATACCATTGATTCCAGAGTCCGAGGTACATCTCTGGTGGTAAAAGGCCTCAA
+AGAGAATGTAGAATACCATTTCCGTGTTTCAGCAGAAAACCAGTTTGGCATAAGCAAACCCTTGAAATCT
+GAGGAACCAGTCACACCAAAAACACCATTGAATCCTCCAGAACCTCCAAGCAATCCTCCAGAAGTACTCG
+ATGTAACCAAGAGTTCTGTTAGCTTGTCCTGGTCCCGGCCCAAAGATGATGGTGGTTCTAGAGTCACAGG
+CTACTACATCGAACGCAAAGAGACATCCACTGACAAGTGGGTCAGACACAACAAGACTCAGATCACCACC
+ACAATGTACACTGTCACAGGGCTTGTTCCCGATGCTGAGTATCAGTTCCGCATCATCGCACAGAATGATG
+TTGGCCTGAGTGAGACCAGCCCTGCTTCTGAACCAGTTGTTTGCAAAGATCCATTTGATAAACCAAGCCA
+ACCAGGAGAACTTGAGATTCTTTCAATATCCAAAGATAGTGTCACTCTACAGTGGGAGAAACCTGAATGT
+GATGGTGGTAAAGAAATTCTTGGATACTGGGTTGAATATAGACAGTCTGGAGACAGTGCCTGGAAGAAGA
+GCAATAAGGAACGTATTAAGGACAAGCAATTCACAATAGGAGGTTTGCTGGAAGCTACTGAGTATGAATT
+CAGGGTTTTTGCTGAGAATGAGACTGGGCTGAGCAGACCTCGCAGAACTGCTATGTCTATAAAGACTAAA
+CTCACATCTGGAGAGGCCCCAGGAATACGCAAAGAAATGAAGGATGTTACCACAAAATTGGGTGAAGCTG
+CTCAACTCTCATGCCAGATTGTTGGAAGGCCTCTTCCTGACATTAAATGGTACAGATTTGGTAAAGAGCT
+CATACAAAGCCGGAAATACAAAATGTCTTCAGATGGACGCACACACACTCTTACAGTAATGACAGAGGAA
+CAGGAAGATGAAGGTGTTTATACCTGCATAGCCACCAATGAGGTTGGAGAAGTAGAAACCAGTAGTAAGC
+TTCTCCTGCAAGCAACACCGCAGTTCCATCCTGGTTACCCACTGAAAGAGAAATATTATGGAGCTGTGGG
+TTCCACACTTCGGCTTCATGTTATGTACATTGGTCGTCCAGTACCTGCCATGACTTGGTTCCATGGTCAG
+AAACTTTTGCAAAACTCAGAAAACATTACTATTGAAAACACTGAGCACTATACTCATCTTGTCATGAAGA
+ATGTCCAACGTAAGACTCATGCTGGGAAATACAAAGTCCAGCTCAGCAATGTTTTTGGAACAGTTGATGC
+CATCCTTGATGTGGAAATACAAGATAAACCAGACAAACCTACAGGACCAATTGTGATCGAAGCTCTATTG
+AAGAACTCCGCAGTGATCAGCTGGAAACCACCCGCAGATGACGGAGGCTCCTGGATCACCAACTATGTGG
+TGGAAAAATGTGAGGCCAAGGAGGGGGCTGAATGGCAATTGGTGTCTTCAGCCATCTCAGTGACAACCTG
+TAGAATTGTGAACCTCACAGAAAATGCTGGCTATTACTTCCGGGTTTCAGCTCAGAACACTTTCGGCATC
+AGTGACCCTCTAGAAGTGTCCTCAGTTGTGATCATTAAGAGTCCATTTGAAAAGCCAGGTGCTCCTGGCA
+AACCAACTATTACTGCTGTCACAAAAGATTCTTGTGTTGTGGCCTGGAAGCCACCTGCCAGTGATGGAGG
+TGCAAAGATTAGAAATTACTACCTTGAGAAGCGTGAGAAGAAGCAGAATAAATGGATTTCTGTGACAACA
+GAAGAAATTCGAGAAACTGTCTTTTCAGTGAAAAACCTTATTGAAGGTCTTGAATACGAGTTTCGTGTGA
+AATGTGAAAATCTAGGTGGGGAAAGTGAATGGAGTGAAATATCAGAACCCATCACTCCCAAATCTGATGT
+CCCAATTCAGGCACCACACTTTAAAGAGGAACTGAGAAATCTAAATGTCAGATATCAGAGCAATGCTACC
+TTGGTCTGCAAAGTGACTGGTCATCCAAAACCTATCGTCAAATGGTACAGACAAGGCAAAGAAATCATTG
+CAGATGGATTAAAATATAGGATTCAAGAATTTAAGGGTGGCTACCACCAGCTCATCATTGCAAGTGTCAC
+AGATGATGATGCCACAGTTTACCAAGTCAGAGCTACCAACCAAGGGGGATCTGTGTCTGGCACTGCCTCC
+TTGGAAGTGGAAGTTCCAGCTAAGATACACTTACCTAAAACTCTTGAAGGCATGGGAGCAGTTCATGCTC
+TCCGAGGTGAAGTGGTCAGCATCAAGATTCCTTTCAGTGGCAAACCAGATCCTGTGATCACCTGGCAGAA
+AGGACAAGATCTCATTGACAATAATGGCCACTACCAAGTTATTGTCACAAGATCCTTCACATCACTTGTT
+TTCCCCAATGGGGTAGAGAGAAAAGATGCTGGTTTCTATGTGGTCTGTGCTAAAAACAGATTTGGAATTG
+ATCAGAAGACAGTTGAACTGGATGTGGCTGATGTTCCTGACCCACCCAGAGGAGTCAAAGTTAGTGATGT
+CTCACGAGATTCTGTCAACTTAACATGGACTGAGCCAGCCTCTGATGGTGGCAGCAAAATCACCAACTAC
+ATTGTTGAAAAATGTGCAACTACTGCAGAAAGATGGCTCCGTGTAGGACAGGCCCGAGAAACACGTTATA
+CCGTGATCAACTTATTTGGAAAAACAAGTTACCAGTTCCGGGTAATAGCTGAAAATAAATTTGGTCTGAG
+CAAGCCTTCAGAGCCTTCAGAACCAACCATAACCAAAGAAGATAAGACCAGAGCTATGAACTATGATGAA
+GAGGTAGATGAAACCAGGGAAGTCTCCATGACTAAAGCATCTCACTCTTCAACCAAGGAACTCTATGAGA
+AATATATGATTGCTGAAGATCTTGGGCGTGGTGAGTTTGGAATTGTCCATCGTTGTGTTGAAACATCCTC
+AAAGAAGACATACATGGCCAAATTTGTTAAAGTCAAAGGGACTGATCAGGTTTTGGTAAAGAAGGAAATT
+TCCATTCTGAATATTGCTAGGCATAGAAACATCTTACACCTCCATGAATCATTTGAAAGCATGGAAGAAT
+TAGTTATGATCTTTGAGTTTATATCAGGACTTGACATATTTGAGCGCATTAACACAAGTGCTTTTGAACT
+TAATGAAAGAGAAATTGTAAGTTATGTTCACCAGGTCTGTGAAGCACTTCAGTTTTTACACAGTCATAAT
+ATTGGACACTTTGACATTAGACCAGAAAATATCATTTACCAAACCAGAAGAAGCTCTACCATTAAAATCA
+TAGAATTTGGTCAAGCCCGTCAGCTGAAACCAGGGGACAACTTCAGGCTTCTATTCACTGCCCCAGAATA
+CTATGCACCTGAAGTCCACCAGCATGATGTTGTCAGCACAGCCACAGACATGTGGTCACTTGGAACACTG
+GTATATGTGCTATTGAGTGGTATCAACCCATTCCTGGCTGAAACTAACCAACAGATCATTGAGAATATCA
+TGAATGCTGAATATACTTTCGATGAGGAAGCATTCAAAGAGATTAGCATTGAAGCCATGGATTTTGTTGA
+CCGGTTGTTAGTGAAAGAGAGGAAATCTCGCATGACAGCATCGGAGGCTCTCCAGCACCCATGGTTGAAG
+CAGAAGATAGAAAGAGTCAGTACTAAAGTTATCAGAACATTAAAACACCGGCGTTATTACCACACCCTGA
+TCAAGAAAGACCTCAACATGGTTGTGTCAGCAGCCCGGATCTCCTGTGGTGGTGCAATTCGATCTCAGAA
+GGGAGTGAGTGTTGCTAAAGTTAAAGTGGCATCCATTGAAATTGGCCCAGTTTCTGGGCAGATAATGCAT
+GCAGTTGGTGAAGAAGGAGGACATGTCAAATATGTATGCAAAATTGAAAATTATGATCAGTCTACCCAAG
+TGACTTGGTACTTTGGCGTCCGACAGCTGGAGAACAGTGAGAAATACGAAATCACCTACGAAGATGGAGT
+GGCCATCCTCTATGTCAAAGACATTACCAAATTAGATGATGGTACCTACAGATGCAAAGTAGTCAATGAC
+TATGGTGAAGACAGTTCTTATGCAGAGCTATTTGTTAAAGGTGTGAGAGAAGTCTATGACTATTACTGCC
+GTAGAACCATGAAGAAAATTAAGCGCAGAACAGACACAATGAGACTCCTGGAAAGGCCACCAGAATTTAC
+CCTGCCTCTCTATAATAAGACAGCTTATGTAGGTGAAAATGTCCGGTTTGGAGTAACTATAACTGTCCAC
+CCAGAGCCTCATGTAACATGGTATAAATCAGGTCAGAAAATCAAACCAGGTGACAATGACAAGAAGTACA
+CATTTGAGTCAGACAAGGGTCTTTACCAATTAACAATCAACAGTGTCACTACAGATGATGACGCTGAATA
+TACTGTTGTGGCAAGGAACAAATATGGTGAAGACAGCTGTAAAGCAAAGCTGACAGTAACCCTACACCCA
+CCTCCAACAGATAGTACCTTAAGACCCATGTTCAAAAGGTTACTGGCAAATGCAGAATGCCAAGAAGGCC
+AAAGTGTCTGCTTTGAGATCAGAGTGTCTGGCATCCCCCCACCAACATTAAAATGGGAGAAAGATGGTCA
+GCCACTGTCCCTCGGGCCTAACATTGAAATTATCCATGAAGGCTTGGATTATTATGCTCTGCACATCAGG
+GACACTTTGCCTGAAGACACGGGTTATTATAGAGTCACAGCCACTAACACAGCTGGGTCCACCAGCTGCC
+AGGCTCACCTACAAGTGGAACGCCTGAGGTACAAGAAACAGGAATTCAAGAGTAAGGAGGAGCATGAGCG
+ACACGTACAAAAACAAATTGACAAAACCCTCAGAATGGCTGAAATTCTTTCTGGAACTGAAAGTGTACCA
+CTGACACAGGTAGCTAAAGAGGCTCTGAGAGAAGCTGCTGTCCTTTATAAACCGGCTGTAAGCACCAAGA
+CTGTAAAAGGGGAATTCAGACTTGAGATAGAAGAAAAGAAGGAGGAGAGAAAACTCCGGATGCCTTATGA
+TGTACCAGAGCCACGCAAGTATAAGCAGACTACCATAGAAGAAGACCAACGCATCAAGCAGTTCGTGCCC
+ATGTCTGACATGAAGTGGTATAAAAAGATACGTGATCAGTATGAAATGCCTGGGAAACTTGACAGAGTTG
+TACAGAAACGACCCAAGCGCATCCGCCTTTCAAGATGGGAACAGTTCTATGTGATGCCTCTTCCACGCAT
+TACAGATCAATACAGACCTAAATGGCGTATTCCTAAACTGTCCCAAGATGATCTTGAGATAGTGAGACCA
+GCCCGCCGGCGTACACCTTCTCCTGATTATGACTTTTACTACCGACCTAGAAGACGTTCTCTTGGGGACA
+TCTCTGATGAAGAATTACTCCTCCCCATTGATGACTACTTAGCAATGAAAAGAACAGAGGAAGAGAGGCT
+GCGTCTTGAAGAAGAGCTTGAGTTAGGTTTTTCAGCTTCACCCCCAAGTCGAAGCCCTCCACACTTTGAG
+CTTTCTAGCCTACGTTACTCTTCACCACAAGCTCATGTCAAGGTGGAGGAAACAAGAAAAGACTTCAGGT
+ATTCAACCTATCACATCCCAACGAAGGCTGAAGCTAGTACAAGTTATGCAGAACTGAGGGAACGGCATGC
+CCAGGCTGCGTACAGACAGCCAAAGCAACGGCAAAGAATCATGGCTGAGAGGGAGGATGAAGAGTTGCTT
+CGCCCAGTTACGACCACCCAGCATCTCTCAGAATACAAAAGCGAACTTGACTTCATGTCAAAGGAGGAAA
+AGTCTAGAAAGAAATCAAGGCGACAAAGAGAAGTGACAGAAATAACAGAAATTGAGGAAGAATACGAAAT
+CTCAAAACATGCTCAAAGAGAATCATCCTCATCTGCGTCTAGACTACTGAGACGACGGCGCTCCCTGTCT
+CCAACTTATATTGAGTTAATGAGGCCAGTGTCTGAGCTGATCCGGTCACGTCCACAACCGGCTGAGGAAT
+ACGAAGATGACACAGAAAGAAGGTCACCTACTCCAGAGAGAACTCGCCCACGATCCCCCAGCCCTGTGTC
+TAGTGAGAGATCACTCTCGAGATTTGAGAGGTCTGCAAGATTTGATATCTTTTCCAGGTATGAGTCCATG
+AAAGCTGCTTTAAAAACTCAGAAGACATCAGAAAGGAAGTATGAAGTTTTGAGTCAGCAGCCTTTCACAC
+TGGACCATGCCCCTCGAATCACACTGAGAATGCGCTCGCACAGGGTACCATGTGGCCAAAATACACGTTT
+TATTTTAAATGTTCAGTCTAAGCCAACTGCCGAGGTTAAATGGTACCACAATGGTGTGGAACTCCAAGAA
+AGCAGTAAGATTCATTACACCAACACGAGTGGAGTCCTCACCCTGGAAATTCTGGACTGTCATACTGATG
+ACAGTGGAACCTACCGTGCTGTGTGCACCAACTACAAGGGCGAAGCTTCTGACTATGCAACGTTGGACGT
+GACAGGAGGGGATTATACCACCTATGCTTCCCAACGCAGAGATGAAGAGGTCCCCAGATCTGTTTTCCCT
+GAGCTGACAAGAACAGAGGCGTATGCTGTTTCATCATTTAAGAAAACATCTGAGATGGAAGCTTCGTCTT
+CTGTCAGGGAAGTGAAATCACAGATGACGGAGACAAGGGAAAGTCTCTCCTCATATGAACACTCTGCATC
+TGCAGAAATGAAAAGTGCTGCATTAGAAGAAAAGTCACTGGAAGAAAAATCCACAACCAGAAAGATCAAG
+ACGACTTTGGCAGCAAGAATTCTAACAAAGCCACGGTCCATGACCGTCTACGAGGGCGAGTCTGCAAGGT
+TTTCTTGTGACACCGATGGTGAGCCGGTACCAACTGTGACCTGGCTGCGTAAAGGACAAGTGCTAAGTAC
+TTCTGCCCGCCACCAAGTGACCACCACAAAGTACAAATCAACCTTTGAGATCTCTTCAGTCCAGGCTTCC
+GATGAGGGCAATTACAGCGTGGTGGTAGAAAACAGTGAAGGGAAACAAGAAGCAGAGTTCACTCTGACTA
+TTCAAAAGGCCAGGGTAACTGAAAAGGCTGTGACATCACCACCAAGAGTCAAATCCCCAGAGCCTCGGGT
+GAAATCCCCAGAAGCAGTTAAGTCTCCAAAACGAGTGAAATCTCCAGAACCTTCTCACCCGAAAGCCGTA
+TCACCCACAGAGACAAAACCAACACCAACAGAGAAAGTTCAGCACCTCCCAGTCTCTGCCCCACCAAAGA
+TTACTCAGTTCCTGAAAGCAGAAGCTTCTAAAGAGATTGCAAAACTGACCTGTGTGGTTGAAAGCAGTGT
+ATTAAGGGCAAAAGAGGTCACCTGGTATAAAGATGGCAAGAAACTGAAGGAAAATGGGCATTTCCAGTTT
+CATTATTCAGCAGATGGTACCTATGAGCTCAAAATCAATAACCTCACTGAATCTGATCAAGGAGAATATG
+TTTGTGAGATTTCTGGTGAAGGTGGAACGTCTAAAACCAACTTACAATTTATGGGGCAAGCCTTTAAGAG
+TATCCATGAGAAGGTATCAAAAATATCAGAAACTAAGAAATCAGATCAGAAAACCACTGAGTCAACAGTA
+ACCAGAAAAACTGAACCAAAAGCTCCTGAACCAATTTCCTCAAAACCAGTAATTGTTACTGGGTTGCAGG
+ATACAACTGTTTCTTCAGACAGTGTTGCTAAATTTGCAGTTAAGGCTACTGGAGAACCCCGGCCAACTGC
+CATCTGGACAAAAGATGGAAAGGCCATTACACAAGGAGGTAAATATAAACTCTCTGAAGACAAGGGAGGG
+TTCTTCTTAGAAATTCATAAGACTGATACTTCTGACAGTGGACTTTATACTTGTACAGTAAAAAATTCAG
+CTGGATCTGTGTCCTCTAGCTGCAAATTAACAATAAAAGCTATAAAAGATACTGAGGCACAGAAAGTCTC
+TACACAAAAGACTTCTGAAATTACACCTCAGAAGAAAGCTGTTGTCCAAGAGGAAATTTCCCAAAAAGCC
+CTAAGGTCTGAAGAAATTAAGATGTCAGAGGCAAAATCTCAAGAAAAGTTAGCCCTCAAAGAGGAAGCTT
+CAAAGGTTCTGATTTCTGAAGAAGTCAAGAAATCAGCAGCAACCTCCCTGGAAAAATCCATTGTCCATGA
+GGAAATCACTAAAACATCACAGGCATCAGAAGAAGTCAGAACTCATGCTGAGATTAAAGCATTTTCTACT
+CAGATGAGCATAAACGAAGGTCAAAGACTGGTTTTAAAAGCCAACATTGCTGGTGCCACTGATGTGAAAT
+GGGTACTGAATGGCGTAGAGCTTACCAACTCTGAGGAGTACCGATATGGTGTCTCAGGCAGCGATCAGAC
+CCTAACCATCAAGCAAGCCAGTCACAGAGATGAAGGAATCCTCACCTGCATAAGCAAAACCAAGGAAGGA
+ATCGTCAAGTGTCAGTATGATTTGACACTGAGCAAAGAACTCTCAGATGCTCCAGCCTTCATCTCACAGC
+CTAGATCTCAAAATATTAATGAAGGACAAAATGTTCTCTTTACTTGTGAAATCAGTGGCGAGCCATCCCC
+TGAAATCGAATGGTTTAAAAACAACCTGCCAATTTCTATTTCTTCAAATGTCAGCATAAGCCGCTCCAGA
+AATGTATACTCCCTTGAAATCCGAAATGCATCAGTCAGCGACAGTGGAAAGTACACAATTAAGGCCAAAA
+ATTTCCGTGGCCAGTGTTCAGCTACAGCTTCCTTAATGGTCCTTCCTCTAGTTGAAGAACCTTCCAGAGA
+GGTAGTATTGAGAACAAGTGGTGACACAAGCTTGCAAGGAAGCTTCTCGTCTCAGTCAGTCCAAATGTCT
+GCCTCCAAGCAGGAGGCCTCCTTCAGCAGTTTCAGCAGCAGCAGTGCTAGCAGCATGACTGAGATGAAAT
+TTGCAAGCATGTCTGCCCAAAGCATGTCCTCCATGCAAGAGTCCTTTGTAGAAATGAGTTCCAGCAGCTT
+TATGGGAATATCTAATATGACACAACTGGAAAGCTCAACTAGTAAAATGCTTAAAGCAGGCATAAGAGGA
+ATTCCGCCTAAAATTGAAGCTCTTCCATCTGATATCAGCATTGATGAAGGCAAAGTTCTAACAGTAGCCT
+GTGCTTTCACGGGTGAGCCTACCCCAGAAGTAACATGGTCCTGTGGTGGAAGAAAAATCCACAGTCAAGA
+ACAGGGGAGGTTCCACATTGAAAACACAGATGACCTGACAACCCTGATCATCATGGACGTACAGAAACAA
+GATGGTGGACTTTATACCCTGAGTTTAGGGAATGAATTTGGATCTGACTCTGCCACTGTGAATATACATA
+TTCGATCCATTTAAGAGGGCCTGTGCCCTTATACTCTACACTCATTCTTAACTTTTCGCAAACGTTTCAC
+ACGGACTAATCTTTCTGAACTGTAAATATTTAAAGAAAAAAAAGTAGTTTTGTATCAACCTAAATGAGTC
+AAAGTTCAAAAATATTCATTTCAATCTTTTCATAATTGTTGACCTAAGAATATAATACATTTGCTAGTGA
+CATGTACATACTGTATATAGCCGGATTAACGGTTATAAAGTTTTGTACCATTTATTTTATGACATTTTAC
+AATGTAAGTTTTGAAACTAACTGTTGGTAGGAGAAAGTTTCTTATGGAACGAATACCCTGCTCAACATTT
+AATCAATCTTTGTGCCTCAACATACTGTTGATGTCTAAGTATGCCTCAGTGGGTTGAGAAAATCCCCATT
+GAAGATGTCCTGTCCACCTAAAAGAGAATGATGCTGTGCATATCACTTGATATGTGCACCAATACCTACT
+GAATCAGAAATGTAAGGCATTGGTGATGTTTGCATTTACCCTCCTGTAAGCAACACTTTAACGTCTTACA
+TTTTCTCTGATGATGTCACACAAAATTATCATGACAAATATTACCAGAGCAAAGTGTAACGGCCAACACT
+TTGTTCGCTCATTTTACGCTGTCTCTGACATAAGGAGTGCCTGAATAGCTTGGAAAAGTAACATCTCCTG
+GCCATCCCTTCATTTAACCAAGCTATTCAAGTATTCCTATGCCAGAGCAGTGCCAACTCTTGGAGGTCCC
+AGAGTGCAGCCAATGCCTTTGTGTGGTAGTTCTAAATTTTAATTGCACCTGAAAAACCTGGGCACCTAAG
+CAATGAGCCACAGCAAAAAGTAAAGAACAACAACAAAATAAAGCTGTTGTTAAATTTTAAACAATATTAC
+TAATTGCCCAAAATGTCAATTTGATGTAGTTCTTTTCATGCAAGTATAAATTCAATTGTTAGTTATAATT
+GTTGGACCTCCTTGAGATAGTAACAACAAAATAAAGCAAGCTATCTGCACCTCAAAA
+X
+SHAR_EOF
+chmod 0644 titin_hum.seq ||
+echo 'restore of titin_hum.seq failed'
+Wc_c="`wc -c < 'titin_hum.seq'`"
+test 83286 -eq "$Wc_c" ||
+ echo 'titin_hum.seq: original size 83286, current size' "$Wc_c"
+fi
+# ============= uascii.h ==============
+if test -f 'uascii.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping uascii.h (File already exists)'
+else
+echo 'x - extracting uascii.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'uascii.h' &&
+/* Concurrent read version */
+/* ascii.gbl ascii translation to amino acids */
+/* modified 10-Mar-1987 for B, Z */
+X
+/* $Name: fa_34_26_5 $ - $Id: uascii.h,v 1.6 2004/12/30 16:45:01 wrp Exp $ */
+X
+#define NA 123
+#define NANN 50
+#define ESS 49 /* code for ',' in FASTS,FASTF, FASTM */
+#define EL 125
+#define ES 126
+#define AAMASK 127
+X
+#ifndef XTERNAL
+/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+/* 32 ! " # $ % & ' ( ) * + , - . / */
+/* 48 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+/* 64 @ A B C D E F G H I J K L M N O */
+/* 80 P Q R S T U V W X Y Z [ \ ] ^ _ */
+/* 96 ` a b c d e f g h i j k l m n o */
+/*112 p q r s t u v w x y z { | } ~ ^? */
+X
+int aascii[128]={
+X EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA, /* 15 */
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, /* 31 */
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,24,NA,NA,NA,NA,NA, /* 47 */
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, /* 63 */
+X NA, 1,21, 5, 4, 7,14, 8, 9,10,NA,12,11,13, 3,NA, /* 79 */
+X 15, 6, 2,16,17,23,20,18,23,19,22,NA,NA,NA,NA,NA, /* 95 */
+X NA, 1,21, 5, 4, 7,14, 8, 9,10,NA,12,11,13, 3,NA, /*111 */
+X 15, 6, 2,16,17,23,20,18,23,19,22,NA,NA,NA,NA,NA}; /*127 */
+X
+int nascii[128]={
+/* 0 1 2 3 5 6 7 8 9 10 11 12 13 14 15 15
+X @ A B C D E F G H I J K L M N O
+X P Q R S T U V W X Y Z */
+X EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,16,NA,NA,
+X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,ES,NA,
+X NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
+X 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA,
+X NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
+X 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA};
+X
+int *pascii;
+int qascii[128];
+int lascii[128];
+#else
+#define AAMASK 127
+extern int aascii[128];
+extern int nascii[128];
+X
+extern int *pascii;
+extern int qascii[128];
+extern int lascii[128];
+#endif
+SHAR_EOF
+chmod 0644 uascii.h ||
+echo 'restore of uascii.h failed'
+Wc_c="`wc -c < 'uascii.h'`"
+test 2006 -eq "$Wc_c" ||
+ echo 'uascii.h: original size 2006, current size' "$Wc_c"
+fi
+# ============= upam.h ==============
+if test -f 'upam.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping upam.h (File already exists)'
+else
+echo 'x - extracting upam.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'upam.h' &&
+/* Concurrent read version */
+/* 20-June-1986 universal pam file */
+X
+/* $Name: fa_34_26_5 $ - $Id: upam.h,v 1.19 2006/02/07 17:58:19 wrp Exp $ */
+X
+/* modified to accomodate both lower and upper case amino acid numbers
+X as a result MAXSQ = 50
+*/
+X
+#ifndef UPAM_GBL_DEF
+#define UPAM_GBL_DEF
+X
+#define EOSEQ 0
+#define MAXSQ 50
+#define MAXUC 24
+#define MAXLC 48
+X
+#define MAXHASH 32
+#define NMAP MAXHASH+1
+X
+#ifndef XTERNAL
+X
+int pamoff=0;
+X
+/*extern int gdelval, ggapval;*/
+X
+/* char sqnam[]="aa"; */
+/* char sqtype[]="protein"; */
+X
+char aa[MAXSQ+1] = {"\0ARNDCQEGHILKMFPSTWYVBZX*ARNDCQEGHILKMFPSTWYVBZX*\0"};
+char aax[MAXSQ+1] = {"\0ARNDCQEGHILKMFPSTWYVBZX*arndcqeghilkmfpstwyvbzx*\0"};
+X
+int naa = 24; /* this should be calculated from aa[] */
+int naax = 48;
+X
+/* haa[] used to map all valid amino acid codes into a hash value;
+X now, there is an additional hash value - not-mapped - NM */
+X
+/* this has been expanded to accomodate '*' */
+int haa[MAXSQ+1] = {
+X NMAP,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP,
+X 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP};
+X
+int haax[MAXSQ+1] = {
+X NMAP,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,
+X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+X NMAP};
+X
+/*
+X PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+X Expected score = -0.844, Entropy = 0.354 bits
+X Lowest score = -8, Highest score = 17
+*/
+int apam250[450] = {
+X 2,
+-2, 6,
+X 0, 0, 2,
+X 0,-1, 2, 4,
+-2,-4,-4,-5,12,
+X 0, 1, 1, 2,-5, 4,
+X 0,-1, 1, 3,-5, 2, 4,
+X 1,-3, 0, 1,-3,-1, 0, 5,
+-1, 2, 2, 1,-3, 3, 1,-2, 6,
+-1,-2,-2,-2,-2,-2,-2,-3,-2, 5,
+-2,-3,-3,-4,-6,-2,-3,-4,-2, 2, 6,
+-1, 3, 1, 0,-5, 1, 0,-2, 0,-2,-3, 5,
+-1, 0,-2,-3,-5,-1,-2,-3,-2, 2, 4, 0, 6,
+-4,-4,-4,-6,-4,-5,-5,-5,-2, 1, 2,-5, 0, 9,
+X 1, 0,-1,-1,-3, 0,-1,-1, 0,-2,-3,-1,-2,-5, 6,
+X 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-3, 0,-2,-3, 1, 2,
+X 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-2, 0,-1,-3, 0, 1, 3,
+-6, 2,-4,-7,-8,-5,-7,-7,-3,-5,-2,-3,-4, 0,-6,-2,-5,17,
+-3,-4,-2,-4, 0,-4,-4,-5, 0,-1,-1,-4,-2, 7,-5,-3,-3, 0,10,
+X 0,-2,-2,-2,-2,-2,-2,-1,-2, 4, 2,-2, 2,-1,-1,-1, 0,-6,-2, 4,
+X 0,-1, 2, 3,-4, 1, 2, 0, 1,-2,-3, 1,-2,-5,-1, 0, 0,-5,-3,-2, 2,
+X 0, 0, 1, 3,-5, 3, 3,-1, 2,-2,-3, 0,-2,-5, 0, 0,-1,-6,-4,-2, 2, 3,
+X 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1,
+X 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1, 8};
+X
+/*
+X This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+X PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
+X Expected score = -1.64, Entropy = 0.979 bits
+X Lowest score = -8, Highest score = 12
+*/
+int apam120[450] = {
+X 3,
+X -3, 6,
+X 0,-1, 4,
+X 0,-3, 2, 5,
+X -3,-4,-5,-7, 9,
+X -1, 1, 0, 1,-7, 6,
+X 0,-3, 1, 3,-7, 2, 5,
+X 1,-4, 0, 0,-5,-3,-1, 5,
+X -3, 1, 2, 0,-4, 3,-1,-4, 7,
+X -1,-2,-2,-3,-3,-3,-3,-4,-4, 6,
+X -3,-4,-4,-5,-7,-2,-4,-5,-3, 1, 5,
+X -2, 2, 1,-1,-7, 0,-1,-3,-2,-2,-4, 5,
+X -2,-1,-3,-4,-6,-1,-4,-4,-4, 1, 3, 0, 8,
+X -4,-4,-4,-7,-6,-6,-6,-5,-2, 0, 0,-6,-1, 8,
+X 1,-1,-2,-2,-3, 0,-1,-2,-1,-3,-3,-2,-3,-5, 6,
+X 1,-1, 1, 0,-1,-2,-1, 1,-2,-2,-4,-1,-2,-3, 1, 3,
+X 1,-2, 0,-1,-3,-2,-2,-1,-3, 0,-3,-1,-1,-4,-1, 2, 4,
+X -7, 1,-5,-8,-8,-6,-8,-8,-5,-7,-5,-5,-7,-1,-7,-2,-6, 12,
+X -4,-6,-2,-5,-1,-5,-4,-6,-1,-2,-3,-6,-4, 4,-6,-3,-3,-1, 8,
+X 0,-3,-3,-3,-2,-3,-3,-2,-3, 3, 1,-4, 1,-3,-2,-2, 0,-8,-3, 5,
+X 0,-2, 3, 4,-6, 0, 3, 0, 1,-3,-4, 0,-4,-5,-2, 0, 0,-6,-3,-3, 4,
+X -1,-1, 0, 3,-7, 4, 4,-2, 1,-3,-3,-1,-2,-6,-1,-1,-2,-7,-5,-3, 2, 4,
+X -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2,
+X -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2, 6};
+X
+/*
+# VTML160
+#
+# This matrix was produced with scripts written by
+# Tobias Mueller and Sven Rahmann [June-2001].
+#
+# VTML160 substitution matrix, Units = Third-Bits
+# Expected Score = -1.297840 Third-Bits
+# Lowest Score = -7, Highest Score = 16
+#
+# Entropy H = 0.562489 Bits
+#
+# 30-Jun-2001
+*/
+int avt160[450] = {
+X 5,
+X -2, 7,
+X -1, 0, 7,
+X -1, -3, 3, 7,
+X 1, -3, -3, -5, 13,
+X -1, 2, 0, 1, -4, 6,
+X -1, -1, 0, 3, -5, 2, 6,
+X 0, -3, 0, -1, -2, -3, -2, 8,
+X -2, 1, 1, 0, -2, 2, -1, -3, 9,
+X -1, -4, -4, -6, -1, -4, -5, -7, -4, 6,
+X -2, -3, -4, -6, -4, -2, -4, -6, -3, 3, 6,
+X -1, 4, 0, 0, -4, 2, 1, -2, 0, -4, -3, 5,
+X -1, -2, -3, -5, -1, -1, -3, -5, -3, 2, 4, -2, 8,
+X -3, -5, -5, -7, -4, -4, -6, -6, 0, 0, 2, -5, 1, 9,
+X 0, -2, -2, -1, -3, -1, -1, -3, -2, -4, -3, -1, -4, -5, 9,
+X 1, -1, 1, 0, 1, 0, 0, 0, -1, -3, -3, -1, -3, -3, 0, 4,
+X 1, -1, 0, -1, 0, -1, -1, -2, -1, -1, -2, -1, -1, -3, -1, 2, 5,
+X -5, -4, -5, -7, -7, -6, -7, -5, -1, -2, -1, -5, -4, 3, -5, -4, -6, 16,
+X -3, -3, -2, -5, -1, -4, -3, -5, 3, -2, -1, -3, -2, 6, -6, -2, -3, 4, 10,
+X 0, -4, -4, -4, 1, -3, -3, -5, -3, 4, 2, -3, 1, -1, -3, -2, 0, -5, -3, 5,
+X -1, -2, 5, 6, -4, 0, 2, -1, 0, -5, -5, 0, -4, -6, -2, 1, 0, -6, -3, -4, 5,
+X -1, 0, 0, 3, -5, 4, 5, -2, 0, -4, -3, 2, -3, -5, -1, 0, -1, -7, -4, -3, 2, 5,
+X 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+X -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, 6};
+X
+/*
+X Matrix made by matblas from blosum50.iij
+X BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+X Blocks Database = /data/blocks_5.0/blocks.dat
+X Cluster Percentage: >= 50
+X Entropy = 0.4808, Expected = -0.3573
+*/
+int abl50[450] = {
+X 5,
+X -2, 7,
+X -1,-1, 7,
+X -2,-2, 2, 8,
+X -1,-4,-2,-4,13,
+X -1, 1, 0, 0,-3, 7,
+X -1, 0, 0, 2,-3, 2, 6,
+X 0,-3, 0,-1,-3,-2,-3, 8,
+X -2, 0, 1,-1,-3, 1, 0,-2,10,
+X -1,-4,-3,-4,-2,-3,-4,-4,-4, 5,
+X -2,-3,-4,-4,-2,-2,-3,-4,-3, 2, 5,
+X -1, 3, 0,-1,-3, 2, 1,-2, 0,-3,-3, 6,
+X -1,-2,-2,-4,-2, 0,-2,-3,-1, 2, 3,-2, 7,
+X -3,-3,-4,-5,-2,-4,-3,-4,-1, 0, 1,-4, 0, 8,
+X -1,-3,-2,-1,-4,-1,-1,-2,-2,-3,-4,-1,-3,-4,10,
+X 1,-1, 1, 0,-1, 0,-1, 0,-1,-3,-3, 0,-2,-3,-1, 5,
+X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 2, 5,
+X -3,-3,-4,-5,-5,-1,-3,-3,-3,-3,-2,-3,-1, 1,-4,-4,-3,15,
+X -2,-1,-2,-3,-3,-1,-2,-3, 2,-1,-1,-2, 0, 4,-3,-2,-2, 2, 8,
+X 0,-3,-3,-4,-1,-3,-3,-4,-4, 4, 1,-3, 1,-1,-3,-2, 0,-3,-1, 5,
+X -2,-1, 4, 5,-3, 0, 1,-1, 0,-4,-4, 0,-3,-4,-2, 0, 0,-5,-3,-4, 5,
+X -1, 0, 0, 1,-3, 4, 5,-2, 0,-3,-3, 1,-1,-4,-1, 0,-1,-2,-2,-3, 2, 5,
+X -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1,
+X -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1, 7};
+X
+/*
+X A R N D C Q E G H I L K M F P S T W Y V B Z X * */
+int a_md10[450]= {
+X 11, /* A */
+-12, 12, /* R */
+-12,-13, 13, /* N */
+-11,-18, -3, 12, /* D */
+-13,-10,-14,-20, 17, /* C */
+-13, -5,-11,-13,-19, 13, /* Q */
+-10,-15,-12, -2,-22, -5, 12, /* E */
+X -8, -9,-11, -9,-12,-16, -9, 11, /* G */
+-16, -5, -5,-10,-12, -3,-15,-16, 16, /* H */
+-13,-17,-14,-19,-17,-20,-19,-21,-18, 12, /* I */
+-15,-14,-19,-21,-16,-12,-20,-21,-13, -7, 10, /* L */
+-14, -2, -6,-15,-21, -6, -8,-15,-13,-17,-18, 12, /* K */
+-13,-14,-15,-18,-15,-14,-18,-19,-15, -4, -4,-12, 16, /* M */
+-18,-22,-19,-22,-11,-22,-23,-22,-14,-11, -6,-23,-14, 14, /* F */
+X -7,-12,-17,-18,-18, -8,-17,-16,-10,-19,-10,-16,-17,-17, 13, /* P */
+X -5,-10, -4,-12, -7,-13,-15, -7,-11,-14,-13,-13,-15,-11, -6, 11, /* S */
+X -4,-12, -7,-14,-14,-13,-15,-14,-13, -7,-16,-10, -7,-19, -9, -4, 12, /* T */
+-21, -9,-21,-21,-10,-17,-21,-13,-21,-21,-13,-21,-17,-13,-21,-15,-18, 18, /* W */
+-20,-17,-12,-13, -7,-16,-21,-20, -3,-15,-16,-20,-17, -3,-20,-12,-17,-12, 15, /* Y */
+X -6,-17,-17,-15,-12,-17,-14,-13,-19, -1, -8,-18, -5,-12,-16,-14,-10,-16,-18, 11, /* V */
+-12,-15, 5, 5,-17,-12, -7,-10, -7,-16,-20,-11,-17,-21,-17, -8,-10,-22,-13,-16, 13, /* B */
+-16,-18,-17, -8,-32, 1, 9,-17,-17,-29,-26,-11,-24,-34,-21,-21,-21,-29,-29,-22, -9, 13, /* Z */
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};
+X
+int a_md20[450] = {
+X 10,
+-10, 12,
+X -9,-10, 13,
+X -8,-14, -1, 12,
+-10, -7,-11,-16, 17,
+-10, -3, -8, -9,-16, 13,
+X -7,-11, -9, 1,-19, -3, 11,
+X -5, -6, -8, -6, -9,-12, -7, 11,
+-12, -3, -2, -7, -9, 0,-12,-13, 15,
+-10,-14,-11,-16,-14,-16,-16,-17,-14, 12,
+-12,-11,-15,-18,-13, -9,-17,-18,-10, -4, 10,
+-11, 0, -4,-12,-17, -3, -5,-12, -9,-14,-15, 12,
+X -9,-11,-12,-15,-12,-11,-15,-16,-12, -1, -2, -9, 15,
+-15,-19,-16,-19, -8,-18,-20,-19,-11, -8, -4,-19,-10, 13,
+X -5, -9,-13,-15,-14, -5,-14,-12, -7,-15, -7,-13,-14,-14, 12,
+X -2, -8, -1, -9, -4,-10,-12, -5, -8,-11,-10,-10,-12, -8, -3, 10,
+X -1, -9, -4,-11,-10,-10,-12,-11,-10, -4,-12, -7, -4,-15, -7, -1, 11,
+-17, -6,-18,-18, -7,-14,-18,-10,-17,-17,-10,-17,-14,-10,-18,-12,-15, 18,
+-16,-14, -9,-11, -4,-12,-18,-17, 0,-12,-12,-17,-14, 0,-16, -9,-13, -9, 14,
+X -3,-14,-14,-12, -9,-14,-11,-11,-15, 2, -5,-15, -2, -9,-13,-11, -7,-13,-14, 11,
+X -9,-12, 6, 6,-14, -9, -4, -7, -4,-13,-17, -8,-13,-18,-14, -5, -7,-19,-10,-13, 12,
+-12,-13,-13, -4,-27, 4, 10,-13,-12,-24,-21, -6,-20,-29,-17,-17,-17,-24,-24,-18, -6, 12,
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9 };
+X
+int a_md40[450] = {
+X 9,
+X -7, 11,
+X -6, -6, 12,
+X -6,-10, 1, 11,
+X -7, -5, -8,-13, 16,
+X -7, 0, -5, -6,-12, 12,
+X -5, -8, -5, 3,-15, 0, 11,
+X -3, -4, -5, -4, -7, -9, -4, 10,
+X -9, 0, 0, -4, -6, 2, -8,-10, 14,
+X -6,-10, -8,-12,-11,-12,-12,-13,-11, 11,
+X -9, -9,-12,-14,-10, -6,-13,-14, -7, -1, 9,
+X -8, 3, -1, -8,-12, -1, -3, -9, -6,-11,-12, 11,
+X -6, -8, -9,-12, -9, -8,-11,-12, -9, 1, 1, -7, 14,
+-11,-15,-12,-15, -5,-14,-16,-15, -7, -5, -1,-16, -7, 13,
+X -2, -6, -9,-11,-11, -3,-11, -9, -4,-11, -5,-10,-10,-11, 12,
+X 0, -5, 1, -6, -2, -7, -8, -2, -6, -8, -7, -7, -8, -6, -1, 9,
+X 1, -6, -2, -8, -7, -7, -8, -7, -7, -2, -9, -5, -2,-11, -4, 1, 10,
+-14, -4,-14,-15, -4,-11,-15, -7,-13,-13, -8,-13,-11, -7,-14, -9,-12, 18,
+-13,-10, -6, -8, -2, -9,-14,-13, 2, -9, -9,-13,-11, 2,-13, -7,-10, -6, 14,
+X -1,-11,-10, -9, -7,-11, -8, -8,-12, 4, -2,-12, 0, -6, -9, -7, -4,-10,-11, 10,
+X -6, -8, 6, 6,-10, -6, -1, -4, -2,-10,-13, -5,-10,-14,-10, -3, -5,-15, -7,-10, 11,
+X -8, -8, -8, 0,-21, 6, 10, -8, -7,-18,-16, -3,-15,-23,-12,-12,-12,-19,-18,-14, -3, 11,
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};
+X
+/*
+X Matrix made by matblas from blosum62.iij
+X * column uses minimum score
+X BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+X Blocks Database = /data/blocks_5.0/blocks.dat
+X Cluster Percentage: >= 62
+X Entropy = 0.6979, Expected = -0.5209
+*/
+X
+int abl62[450] = {
+X 4,
+X -1, 5,
+X -2, 0, 6,
+X -2,-2, 1, 6,
+X 0,-3,-3,-3, 9,
+X -1, 1, 0, 0,-3, 5,
+X -1, 0, 0, 2,-4, 2, 5,
+X 0,-2, 0,-1,-3,-2,-2, 6,
+X -2, 0, 1,-1,-3, 0, 0,-2, 8,
+X -1,-3,-3,-3,-1,-3,-3,-4,-3, 4,
+X -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,
+X -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,
+X -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5,
+X -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,
+X -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,
+X 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4,
+X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,
+X -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11,
+X -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,
+X 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,
+X -2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4,
+X -1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,
+X 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1,
+X 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1, 6};
+X
+/* blosum80 in 1/2 bit units (previous versions had 1/3 bit units) */
+/*
+X Matrix made by matblas from blosum80.iij
+X * column uses minimum score
+X BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+X Blocks Database = /data/blocks_5.0/blocks.dat
+X Cluster Percentage: >= 80
+X Entropy = 0.9868, Expected = -0.7442
+*/
+X
+int abl80[450] = {
+X 5,
+X -2, 6,
+X -2,-1, 6,
+X -2,-2, 1, 6,
+X -1,-4,-3,-4, 9,
+X -1, 1, 0,-1,-4, 6,
+X -1,-1,-1, 1,-5, 2, 6,
+X 0,-3,-1,-2,-4,-2,-3, 6,
+X -2, 0, 0,-2,-4, 1, 0,-3, 8,
+X -2,-3,-4,-4,-2,-3,-4,-5,-4, 5,
+X -2,-3,-4,-5,-2,-3,-4,-4,-3, 1, 4,
+X -1, 2, 0,-1,-4, 1, 1,-2,-1,-3,-3, 5,
+X -1,-2,-3,-4,-2, 0,-2,-4,-2, 1, 2,-2, 6,
+X -3,-4,-4,-4,-3,-4,-4,-4,-2,-1, 0,-4, 0, 6,
+X -1,-2,-3,-2,-4,-2,-2,-3,-3,-4,-3,-1,-3,-4, 8,
+X 1,-1, 0,-1,-2, 0, 0,-1,-1,-3,-3,-1,-2,-3,-1, 5,
+X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-2,-1,-1,-2,-2, 1, 5,
+X -3,-4,-4,-6,-3,-3,-4,-4,-3,-3,-2,-4,-2, 0,-5,-4,-4,11,
+X -2,-3,-3,-4,-3,-2,-3,-4, 2,-2,-2,-3,-2, 3,-4,-2,-2, 2, 7,
+X 0,-3,-4,-4,-1,-3,-3,-4,-4, 3, 1,-3, 1,-1,-3,-2, 0,-3,-2, 4,
+X -2,-2, 4, 4,-4, 0, 1,-1,-1,-4,-4,-1,-3,-4,-2, 0,-1,-5,-3,-4, 4,
+X -1, 0, 0, 1,-4, 3, 4,-3, 0,-4,-3, 1,-2,-4,-2, 0,-1,-4,-3,-3, 0, 4,
+X -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1,
+X -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1, 6};
+X
+/* DNA alphabet
+X
+X A, C, G, T, U 1-4, 5
+X R, Y 6, 7
+X M (A or C) 8
+X W (A or T) 9
+X S (C or G) 10
+X K (G or T) 11
+X D (not C) 12
+X H (not G) 13
+X V (not T) 14
+X B (not A) 15
+X N 16
+X X 17
+*/
+X
+char nt[MAXSQ+1] ={"\0ACGTURYMWSKDHVBNXACGTURYMWSKDHVBNX\0"};
+char ntx[MAXSQ+1]={"\0ACGTURYMWSKDHVBNXacgturymwskdhvbnx\0"};
+char ntc[MAXSQ+1]={"\0TGCAAYRKWSMHDBVNXtgcaayrkwsmhdbvnx\0"};
+X
+/* nt complement to encoding */
+X /* A:T C:G G:C T:A U:A */
+int gc_nt[MAXSQ+1]={ 0, 4, 3, 2, 1, 1,
+X /* R:Y Y:R M:K W:W */
+X 7, 6, 11, 9,
+X /* S:S K:M D:H H:D */
+X 10, 8, 13, 12,
+X /* B:V V:B N:N X:X */
+X 15, 14, 16, 16};
+X
+int nnt = 17;
+int nntx = 34;
+X
+int hnt[MAXSQ+1] = {
+X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
+X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,NMAP};
+int hntx[MAXSQ+1] = {
+X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
+X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
+X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP};
+X
+int npam[450] = {
+/* A C G T U R Y M W S K D H V B N X */
+X 5, /* A */
+X -4, 5, /* C */
+X -4,-4, 5, /* G */
+X -4,-4,-4, 5, /* T */
+X -4,-4,-4, 5, 5, /* U */
+X 2,-1, 2,-1,-1, 2, /* R (A G)*/
+X -1, 2,-1, 2, 2,-2, 2, /* Y (C T)*/
+X 2, 2,-1,-1,-1,-1,-1, 2, /* M (A C)*/
+X 2,-1,-1, 2, 2, 1, 1, 1, 2, /* W (A T)*/
+X -1, 2, 2,-1,-1, 1, 1, 1,-1, 2, /* S (C G)*/
+X -1,-1, 2, 2, 2, 1, 1,-1, 1, 1, 2, /* K (G T)*/
+X 1,-2, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* D (!C) */
+X 1, 1,-2, 1, 1,-1, 1, 1, 1,-1,-1,-1, 1, /* H (!G) */
+X 1, 1, 1,-2,-2, 1,-1, 1,-1, 1,-1,-1,-1, 1, /* V (!T) */
+X -2, 1, 1, 1, 1,-1, 1,-1,-1, 1, 1,-1,-1,-1, 1, /* B (!A) */
+X -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* N */
+X -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; /* X */
+/* A C G T U R Y M W S K D H V B N */
+X
+int *pam; /* Pam matrix- 1D */
+int *pam12;
+int *pam12x;
+int pamh1[MAXSQ+1]; /* used for kfact replacement */
+X
+/* Robinson & Robinson counts */
+long rrcounts[25] = {
+X 0,
+X 35155,
+X 23105,
+X 20212,
+X 24161,
+X 8669,
+X 19208,
+X 28354,
+X 33229,
+X 9906,
+X 23161,
+X 40625,
+X 25872,
+X 10101,
+X 17367,
+X 23435,
+X 32070,
+X 26311,
+X 5990,
+X 14488,
+X 29012,
+X 0, 0, 0, 0 };
+X
+long rrtotal = 450431;
+#else
+X
+/* extern char sqnam[]; */
+/* extern char sqtype[]; */
+/* extern int gdelval, ggapval; */
+extern int pamoff;
+extern char aa[MAXSQ+1];
+extern char aax[MAXSQ+1];
+extern char nt[MAXSQ+1];
+extern char ntx[MAXSQ+1];
+extern char ntc[MAXSQ+1];
+extern int gc_nt[MAXSQ+1];
+X
+extern int naa;
+extern int naax;
+extern int nnt;
+extern int nntx;
+X
+extern int haa[MAXSQ+1];
+extern int haax[MAXSQ+1];
+extern int hnt[MAXSQ+1];
+extern int hntx[MAXSQ+1];
+/* extern int had[MAXSQ+1]; */
+X
+extern int apam250[450];
+extern int apam120[450];
+extern int a_md10[450];
+extern int a_md20[450];
+extern int a_md40[450];
+extern int abl50[450];
+extern int abl62[450];
+extern int abl80[450];
+extern int npam[450];
+extern int *pam;
+extern int *pam12;
+extern int *pam12x;
+extern int pamh1[MAXSQ+1];
+extern long rrcounts[25];
+extern long rrtotal;
+#endif
+#endif
+SHAR_EOF
+chmod 0644 upam.h ||
+echo 'restore of upam.h failed'
+Wc_c="`wc -c < 'upam.h'`"
+test 16008 -eq "$Wc_c" ||
+ echo 'upam.h: original size 16008, current size' "$Wc_c"
+fi
+# ============= url_subs.c ==============
+if test -f 'url_subs.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping url_subs.c (File already exists)'
+else
+echo 'x - extracting url_subs.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'url_subs.c' &&
+X
+/* copyright (c) 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: url_subs.c,v 1.9 2006/08/20 18:18:33 wrp Exp $ */
+X
+/* 30 Dec 2004 - modify REF_URL to accomodate current Entrez */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "defs.h"
+#include "structs.h"
+#include "param.h"
+X
+#ifndef DEF_PROT_LIB
+#define DEF_PROT_LIB "q"
+#endif
+X
+#ifndef FASTA_HOST
+#define FASTA_HOST "your.fasta.host.here/fasta/cgi"
+#endif
+X
+void do_url1(FILE *fp, struct mngmsg m_msg, struct pstruct pst,
+X char *l_name, int n1, struct a_struct aln, long loffset)
+{
+X char my_l_name[200];
+X char *db;
+X char pgm[10], lib[MAX_FN];
+X char *ref_url, *lbp=NULL;
+X char *srch_url, *srch_url1;
+X
+X if (m_msg.ldnaseq==SEQT_DNA) db="nucleotide";
+X else db="Protein";
+X
+X if (strncmp(m_msg.f_id0,"rss",3)==0) {
+X strncpy(pgm,"fa",sizeof(pgm));
+X }
+X else if (strncmp(m_msg.f_id0,"rfx",3)==0) {
+X strncpy(pgm,"fx",sizeof(pgm));
+X }
+X else { strncpy(pgm,m_msg.f_id0,sizeof(pgm)); }
+X
+X if (m_msg.lname[0]!='%') {
+X strncpy(lib,m_msg.lname,sizeof(lib));
+X }
+X else {
+X strncpy(lib,"%25",sizeof(lib));
+X strncat(lib,&m_msg.lname[1],sizeof(lib));
+X }
+X lib[sizeof(lib)-1]='\0';
+X
+X strncpy(my_l_name,l_name,sizeof(my_l_name));
+X my_l_name[sizeof(my_l_name)-1]='\0';
+X
+X if (pgm[0]=='t' || strcmp(pgm,"fx") || strcmp(pgm,"fy")==0 ) {
+X if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
+X lbp = &my_l_name[strlen(my_l_name)-2];
+X if ( *lbp == '_' ) *lbp = '\0';
+X }
+X
+X /* change the program name for fastx, tfastx, tfasta */
+X /* fastx returns proteins */
+X if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) strncpy(pgm,"fa",sizeof(pgm));
+X else if (strcmp(pgm,"ff")==0) strncpy(pgm,"fa",sizeof(pgm));
+X else if (pgm[0]=='t') {
+X strncpy(pgm,"fx",sizeof(pgm));
+X strncpy(lib,DEF_PROT_LIB,sizeof(lib));
+X }
+X
+X fflush(fp);
+X if ((ref_url = getenv("REF_URL"))==NULL)
+X fprintf(fp,"<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A> ",
+X db,my_l_name);
+X else
+X fprintf(fp,ref_url,db,my_l_name);
+X
+X if ((srch_url = getenv("SRCH_URL"))==NULL)
+X fprintf(fp,"<A HREF=\"http://%s/searchfa.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d\">Re-search database</A> ",
+X FASTA_HOST,my_l_name,db,lib,pgm,
+X loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X else
+X fprintf(fp,srch_url,my_l_name,db,lib,pgm,
+X loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X
+X if ((srch_url1 = getenv("SRCH_URL1"))==NULL)
+X fprintf(fp,"<A HREF=\"http://%s/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d\">General re-search</A>\n<p>\n",
+X FASTA_HOST,my_l_name,db,lib,pgm,
+X loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X else
+X fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
+X loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X
+X /* put back "_r" */
+X if (lbp!=NULL) *lbp = '_';
+X
+X /*
+X if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
+X fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X else
+X fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
+X loffset+aln.amin1+1,loffset+aln.amax1,n1);
+X */
+X fflush(fp);
+X
+}
+SHAR_EOF
+chmod 0644 url_subs.c ||
+echo 'restore of url_subs.c failed'
+Wc_c="`wc -c < 'url_subs.c'`"
+test 3335 -eq "$Wc_c" ||
+ echo 'url_subs.c: original size 3335, current size' "$Wc_c"
+fi
+# ============= uthr_subs.h ==============
+if test -f 'uthr_subs.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping uthr_subs.h (File already exists)'
+else
+echo 'x - extracting uthr_subs.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'uthr_subs.h' &&
+X
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+X
+X
+/* $Name: fa_34_26_5 $ - $Id: uthr_subs.h,v 1.1.1.1 1999/10/22 20:56:02 wrp Exp $ */
+X
+X
+#ifndef MAX_WORKERS
+#define MAX_WORKERS 2
+#endif
+#define NUM_WORK_BUF 2*MAX_WORKERS
+X
+#include <synch.h>
+#include <thread.h>
+X
+#define check(status,string) \
+X if (status == -1) perror(string) /* error macro for thread calls */
+X
+#ifndef XTERNAL
+X
+thread_t threads[MAX_WORKERS];
+X
+/* mutex stuff */
+X
+mutex_t reader_mutex; /* empty buffer pointer structure lock */
+mutex_t worker_mutex; /* full buffer pointer structure lock */
+X
+/* condition variable stuff */
+X
+cond_t reader_cond_var; /* condition variable for reader */
+cond_t worker_cond_var; /* condition variable for workers */
+X
+mutex_t start_mutex; /* start-up synchronisation lock */
+cond_t start_cond_var; /* start-up synchronisation condition variable */
+X
+#else
+X
+extern thread_t threads[];
+X
+/* mutex stuff */
+X
+extern mutex_t reader_mutex;
+extern mutex_t worker_mutex;
+X
+/* condition variable stuff */
+X
+extern cond_t reader_cond_var;
+extern cond_t worker_cond_var;
+X
+extern mutex_t start_mutex;
+extern cond_t start_cond_var;
+X
+#endif
+SHAR_EOF
+chmod 0644 uthr_subs.h ||
+echo 'restore of uthr_subs.h failed'
+Wc_c="`wc -c < 'uthr_subs.h'`"
+test 1229 -eq "$Wc_c" ||
+ echo 'uthr_subs.h: original size 1229, current size' "$Wc_c"
+fi
+# ============= vtml160.mat ==============
+if test -f 'vtml160.mat' -a X"$1" != X"-c"; then
+ echo 'x - skipping vtml160.mat (File already exists)'
+else
+echo 'x - extracting vtml160.mat (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'vtml160.mat' &&
+#
+# VTML160
+#
+# This matrix was produced with scripts written by
+# Tobias Mueller and Sven Rahmann [June-2001].
+#
+# VTML160 substitution matrix, Units = Third-Bits
+# Expected Score = -1.297840 Third-Bits
+# Lowest Score = -7, Highest Score = 16
+#
+# Entropy H = 0.562489 Bits
+#
+# 30-Jun-2001
+X A R N D C Q E G H I L K M F P S T W Y V B Z X *
+A 5 -2 -1 -1 1 -1 -1 0 -2 -1 -2 -1 -1 -3 0 1 1 -5 -3 0 -1 -1 0 -7
+R -2 7 0 -3 -3 2 -1 -3 1 -4 -3 4 -2 -5 -2 -1 -1 -4 -3 -4 -2 0 0 -7
+N -1 0 7 3 -3 0 0 0 1 -4 -4 0 -3 -5 -2 1 0 -5 -2 -4 5 0 0 -7
+D -1 -3 3 7 -5 1 3 -1 0 -6 -6 0 -5 -7 -1 0 -1 -7 -5 -4 6 3 0 -7
+C 1 -3 -3 -5 13 -4 -5 -2 -2 -1 -4 -4 -1 -4 -3 1 0 -7 -1 1 -4 -5 0 -7
+Q -1 2 0 1 -4 6 2 -3 2 -4 -2 2 -1 -4 -1 0 -1 -6 -4 -3 0 4 0 -7
+E -1 -1 0 3 -5 2 6 -2 -1 -5 -4 1 -3 -6 -1 0 -1 -7 -3 -3 2 5 0 -7
+G 0 -3 0 -1 -2 -3 -2 8 -3 -7 -6 -2 -5 -6 -3 0 -2 -5 -5 -5 -1 -2 0 -7
+H -2 1 1 0 -2 2 -1 -3 9 -4 -3 0 -3 0 -2 -1 -1 -1 3 -3 0 0 0 -7
+I -1 -4 -4 -6 -1 -4 -5 -7 -4 6 3 -4 2 0 -4 -3 -1 -2 -2 4 -5 -4 0 -7
+L -2 -3 -4 -6 -4 -2 -4 -6 -3 3 6 -3 4 2 -3 -3 -2 -1 -1 2 -5 -3 0 -7
+K -1 4 0 0 -4 2 1 -2 0 -4 -3 5 -2 -5 -1 -1 -1 -5 -3 -3 0 2 0 -7
+M -1 -2 -3 -5 -1 -1 -3 -5 -3 2 4 -2 8 1 -4 -3 -1 -4 -2 1 -4 -3 0 -7
+F -3 -5 -5 -7 -4 -4 -6 -6 0 0 2 -5 1 9 -5 -3 -3 3 6 -1 -6 -5 0 -7
+P 0 -2 -2 -1 -3 -1 -1 -3 -2 -4 -3 -1 -4 -5 9 0 -1 -5 -6 -3 -2 -1 0 -7
+S 1 -1 1 0 1 0 0 0 -1 -3 -3 -1 -3 -3 0 4 2 -4 -2 -2 1 0 0 -7
+T 1 -1 0 -1 0 -1 -1 -2 -1 -1 -2 -1 -1 -3 -1 2 5 -6 -3 0 0 -1 0 -7
+W -5 -4 -5 -7 -7 -6 -7 -5 -1 -2 -1 -5 -4 3 -5 -4 -6 16 4 -5 -6 -7 0 -7
+Y -3 -3 -2 -5 -1 -4 -3 -5 3 -2 -1 -3 -2 6 -6 -2 -3 4 10 -3 -3 -4 0 -7
+V 0 -4 -4 -4 1 -3 -3 -5 -3 4 2 -3 1 -1 -3 -2 0 -5 -3 5 -4 -3 0 -7
+B -1 -2 5 6 -4 0 2 -1 0 -5 -5 0 -4 -6 -2 1 0 -6 -3 -4 5 2 0 -7
+Z -1 0 0 3 -5 4 5 -2 0 -4 -3 2 -3 -5 -1 0 -1 -7 -4 -3 2 5 0 -7
+XX 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -7
+* -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 1
+SHAR_EOF
+chmod 0644 vtml160.mat ||
+echo 'restore of vtml160.mat failed'
+Wc_c="`wc -c < 'vtml160.mat'`"
+test 2771 -eq "$Wc_c" ||
+ echo 'vtml160.mat: original size 2771, current size' "$Wc_c"
+fi
+# ============= w_mw.h ==============
+if test -f 'w_mw.h' -a X"$1" != X"-c"; then
+ echo 'x - skipping w_mw.h (File already exists)'
+else
+echo 'x - extracting w_mw.h (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'w_mw.h' &&
+X
+/* $Name: fa_34_26_5 $ - $Id: w_mw.h,v 1.17 2006/04/12 18:00:02 wrp Exp $ */
+X
+/* 21-July-2000 - changes for p2_complib/p2_workcomp:
+X there are now two sequence numbers; the old (worker) seqnm,
+X and a new manager (master) sequence number, m_seqnm
+*/
+X
+#ifndef BFR
+#define BFR 300
+#endif
+#ifndef BFR2
+#define BFR2 100
+#endif
+X
+#define MAXSQL 125000
+#define MMAXSQL 2000000
+#ifndef MAXWRKR
+#define MAXWRKR 64
+#endif
+#define MAXLSEQ 50000
+#define DESLIN 60
+#define NDES 100
+X
+struct qmng_str
+{
+X int n0; /* query sequence length */
+X int nm0; /* number of segments */
+X int escore_flg; /* use escores */
+X int qshuffle; /* query shuffle */
+X int pam_pssm; /* flag for pssm/profile search */
+X int s_func; /* for p_workcomp: func==0>simple comparison, ==1>alignments */
+X int slist; /* number of alignments to do */
+X int seqnm; /* query sequence number - used for identity searches */
+X char libstr[MAX_FN];
+};
+X
+struct comstr
+{
+X int m_seqnm; /* sequence number */
+X int seqnm; /* sequence number */
+X int score[3]; /* score */
+X double escore;
+X float comp;
+X float H;
+X int segnum;
+X int seglen;
+X int frame;
+X int r_score, qr_score;
+X double r_escore, qr_escore;
+};
+X
+struct comstr2
+{
+X int m_seqnm; /* sequence number */
+X int seqnm; /* sequence number */
+X int score[3]; /* score */
+X double escore;
+X int segnum;
+X int seglen;
+X int sw_score;
+X
+X /* int a_len; */ /* consensus alignment length */
+X /* int min0, max0, min1, max1;
+X int nident, ngap_q, ngap_l; */ /* number of identities, gaps in q, l */
+X
+X struct a_struct aln_d;
+X float percent, gpercent;
+X int aln_code_n;
+};
+X
+/* The message structure */
+X
+struct wrkmsg
+{
+X char lname [80]; /* name of the library */
+X char libenv[80]; /* directory in which library resides */
+X int lb_off; /* offset in the library */
+X int lb_stop; /* stop position in library */
+X int lb_code; /* continue code */
+X int lb_size; /* library size */
+X int p_size; /* parcel size */
+X int libfn; /* current library being searched */
+X int stage; /* current stage number */
+};
+X
+struct sql
+{
+X int n1;
+X int *n1tot_p;
+X int sfnum[10]; /* superfamily number */
+X int nsfnum;
+#ifndef USE_FSEEKO
+X long lseek; /* location of sequence in file */
+#else
+X off_t lseek;
+#endif
+X long loffset; /* offset from the beginning of the sequence */
+X int wrkr; /* worker that has sequence */
+X int cont;
+X char *bline; /* descriptive line */
+};
+X
+struct sqs
+{
+X int n1; /* size of library sequence */
+X unsigned char *aa1; /* sequence data */
+};
+X
+#include "aln_structs.h"
+X
+struct sqs2
+{
+X int n1; /* size of library sequence */
+X int m_seqnm; /* location in master list */
+X unsigned char *aa1;
+X int walign_dflg[2];
+X int sw_score[2];
+X struct a_res_str a_res[2]; /* need a_res for each frame */
+};
+X
+struct stage2_str {
+X int m_seqnm; /* manager sequence number */
+X int seqnm; /* worker sequence number */
+X int frame; /* query frame */
+};
+SHAR_EOF
+chmod 0644 w_mw.h ||
+echo 'restore of w_mw.h failed'
+Wc_c="`wc -c < 'w_mw.h'`"
+test 2899 -eq "$Wc_c" ||
+ echo 'w_mw.h: original size 2899, current size' "$Wc_c"
+fi
+# ============= work_thr.c ==============
+if test -f 'work_thr.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping work_thr.c (File already exists)'
+else
+echo 'x - extracting work_thr.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'work_thr.c' &&
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: work_thr.c,v 1.23 2007/04/26 18:33:20 wrp Exp $ */
+X
+/* work_thr.c - threaded worker */
+X
+/* modified 21-Oct-1998 to work with reverse complement for DNA */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+X
+#include "defs.h" /* various constants */
+#include "mw.h" /* defines beststr */
+#include "structs.h"
+#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
+X
+/***************************************/
+/* thread global variable declarations */
+/***************************************/
+X
+#define XTERNAL
+#include "thr.h"
+#undef XTERNAL
+X
+void alloc_pam (int, int, struct pstruct *);
+int **alloc_pam2p(int, int);
+void revcomp(unsigned char *seq, int n, int *c_nt);
+#ifdef WIN32
+void pthread_exit(void *);
+#else
+void THR_EXIT(void *);
+#endif
+X
+/* functions getting/sending buffers to threads (thr_sub.c) */
+extern void wait_thr(void);
+extern int get_wbuf(struct buf_head **cur_buf, int max_work_buf);
+extern void put_wbuf(struct buf_head *cur_buf, int max_work_buf);
+X
+/* dropxx.c functions */
+extern void init_work (unsigned char *aa0, int n0,
+X struct pstruct *ppst, void **f_arg);
+X
+extern void do_work (unsigned char *aa0, int n0, unsigned char *aa1, int n1,
+X int frame,
+X struct pstruct *ppst, void *f_str, int qr_flg,
+X struct rstruct *rst);
+X
+extern void close_work (unsigned char *, int, struct pstruct *, void **);
+X
+extern void irand(int);
+extern int shuffle(unsigned char *, unsigned char *, int);
+extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
+extern void qshuffle(unsigned char *aa0, int n0, int nm0);
+extern void free_pam2p(int **);
+X
+void
+work_thread (struct thr_str *work_info)
+{
+X struct buf_head *cur_buf;
+X struct buf_str *cur_buf_p;
+X struct buf_str *p_rbuf;
+X unsigned char *aa1s;
+X int cur_cnt;
+X int my_worker;
+X int i, j, npam, n0, nm0;
+X int ix_score, debug_lib, zsflag, zs_win, do_shuffle, ieven=0;
+X int frame;
+X
+X struct rstruct rrst;
+X struct pstruct my_pst, *my_ppst;
+X unsigned char *aa0[6], *aa0s;
+X void *f_str[6], *qf_str;
+X
+X my_worker = work_info->worker;
+X
+X wait_thr(); /* wait for start_thread predicate to drop to 0 */
+X
+X /* do init_work */
+X
+X /* let each thread have its own copy of the query */
+X n0 = work_info->n0;
+X nm0 = work_info->nm0;
+X
+X if ((aa0[0]=(unsigned char *)calloc((size_t)n0+2,sizeof(unsigned char)))
+X ==NULL) {
+X fprintf(stderr," cannot allocate aa00[%d] for worker %d\n",
+X n0, my_worker);
+X exit(1);
+X }
+X *aa0[0]='\0';
+X aa0[0]++;
+X memcpy(aa0[0],work_info->aa0,n0+1);
+X
+X /* make certain that all but 0 have their own copy of pst */
+X if (my_worker) {
+X my_ppst = &my_pst;
+X memcpy(my_ppst,work_info->ppst,sizeof(struct pstruct));
+X
+X alloc_pam(MAXSQ, MAXSQ, my_ppst);
+X
+X npam = (my_pst.ext_sq_set) ? my_pst.nsqx : my_pst.nsq;
+X
+X for (i=0; i<=npam; i++) {
+X for (j=0; j<=npam; j++) {
+X my_pst.pam2[0][i][j] = work_info->ppst->pam2[0][i][j];
+X my_pst.pam2[1][i][j] = work_info->ppst->pam2[1][i][j];
+X }
+X }
+X
+X if (work_info->ppst->pam_pssm && work_info->ppst->pam2p[0]) {
+X my_ppst->pam2p[0] = alloc_pam2p(n0,npam);
+X my_ppst->pam2p[1] = alloc_pam2p(n0,npam);
+X for (i=0; i<n0; i++) {
+X for (j=0; j <= npam; j++) {
+X my_pst.pam2p[0][i][j] = work_info->ppst->pam2p[0][i][j];
+X my_pst.pam2p[1][i][j] = work_info->ppst->pam2p[1][i][j];
+X }
+X }
+X }
+X }
+X else my_ppst=work_info->ppst;
+X
+X /* note that aa[5,4,3,2] are never used, but are provided so that frame
+X can range from 0 .. 5; likewise for f_str[5..2] */
+X
+X aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
+X init_work (aa0[0], n0, my_ppst, &f_str[0]);
+X
+X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
+X
+X if (work_info->qframe == 2) {
+X if ((aa0[1]=(unsigned char *)calloc((size_t)n0+2,sizeof(unsigned char)))==NULL) {
+X fprintf(stderr," cannot allocate aa01[%d] for worker %d\n",
+X n0, my_worker);
+X }
+X *aa0[1]='\0';
+X aa0[1]++;
+X memcpy(aa0[1],work_info->aa0,n0+1);
+X revcomp(aa0[1],n0,my_ppst->c_nt);
+X init_work (aa0[1], n0, my_ppst, &f_str[1]);
+X }
+X
+X if (work_info->qshuffle) {
+X if ((aa0s=(unsigned char *)calloc(n0+2,sizeof(char)))==NULL) {
+X fprintf(stderr,"cannot allocate aa0s[%d]\n",n0+2);
+X exit(1);
+X }
+X *aa0s='\0';
+X aa0s++;
+X memcpy(aa0s,aa0[0],n0);
+X qshuffle(aa0s,n0,nm0);
+X init_work (aa0s, n0, my_ppst, &qf_str);
+X }
+X
+X ix_score = my_ppst->score_ix;
+X debug_lib = my_ppst->debug_lib;
+X zsflag = my_ppst->zsflag;
+X zs_win = my_ppst->zs_win;
+X
+X if (zsflag >= 10) {
+X if((aa1s=calloc(work_info->max_tot+1,sizeof(char))) == NULL) {
+X fprintf(stderr,"unable to allocate shuffled library sequence\n");
+X }
+X else {
+X *aa1s=0;
+X aa1s++;
+X do_shuffle =1;
+X irand(0);
+X }
+X }
+X else {do_shuffle = 0;}
+X
+X /* main work loop */
+X while (get_wbuf(&cur_buf,work_info->max_work_buf)) {
+X
+X cur_cnt = cur_buf->buf_cnt;
+X if (cur_cnt == -1) break;
+X cur_buf_p = cur_buf->buf;
+X
+X while (cur_cnt--) { /* count down the number of sequences */
+X p_rbuf = cur_buf_p++; /* step through each sequence */
+X p_rbuf->rst.score[0] = p_rbuf->rst.score[1] = p_rbuf->rst.score[2] = 0;
+X frame = p_rbuf->frame;
+X
+#ifdef DEBUG
+X if (debug_lib) {
+X if (frame >= 2) fprintf(stderr,"* frame: %d\n",frame);
+X for (i=0; i<p_rbuf->n1; i++)
+X if (p_rbuf->aa1b[i]>=my_ppst->nsqx) {
+X fprintf(stderr,
+X "%s residue[%d/%d] %d range (%d)\n",
+X p_rbuf->libstr,i,p_rbuf->n1,p_rbuf->aa1b[i],my_ppst->nsqx);
+X p_rbuf->aa1b[i]=0;
+X p_rbuf->n1=i-1;
+X break;
+X }
+X }
+#endif
+X
+X do_work (aa0[frame], n0, p_rbuf->aa1b, p_rbuf->n1, frame,
+X my_ppst, f_str[frame], 0, &p_rbuf->rst);
+X
+X if (work_info->qshuffle) {
+X do_work(aa0s,n0,p_rbuf->aa1b, p_rbuf->n1, frame,
+X my_ppst, qf_str, 1, &rrst);
+X p_rbuf->qr_score = rrst.score[ix_score];
+X p_rbuf->qr_escore = rrst.escore;
+X }
+X
+X if (do_shuffle) {
+X if (zs_win > 0) wshuffle(p_rbuf->aa1b,aa1s,p_rbuf->n1,zs_win,&ieven);
+X else shuffle(p_rbuf->aa1b,aa1s,p_rbuf->n1);
+X
+X do_work (aa0[frame], n0, aa1s, p_rbuf->n1, frame,
+X my_ppst, f_str[frame], 0, &rrst);
+X p_rbuf->r_score = rrst.score[ix_score];
+X p_rbuf->r_escore = rrst.escore;
+X }
+X }
+X
+X cur_buf->have_results = 1;
+X
+X put_wbuf(cur_buf,work_info->max_work_buf);
+X
+X } /* end main while */
+X
+X close_work(aa0[0], n0, my_ppst, &f_str[0]);
+X free(aa0[0]-1);
+X if (work_info->qframe == 2) {
+X close_work(aa0[1], n0, my_ppst, &f_str[1]);
+X free(aa0[1]-1);
+X }
+X
+X if (do_shuffle) free(aa1s-1);
+X
+X if (my_worker) {
+X free(my_pst.pam2[1][0]);
+X free(my_pst.pam2[0][0]);
+X free(my_pst.pam2[1]);
+X free(my_pst.pam2[0]);
+X
+X if (my_pst.pam_pssm) {
+X free_pam2p(my_pst.pam2p[0]);
+X free_pam2p(my_pst.pam2p[1]);
+X }
+X }
+X
+#ifdef WIN32
+X pthread_exit(&work_info->status);
+#else
+X THR_EXIT(&work_info->status);
+#endif
+X
+} /* end work_thread */
+X
+SHAR_EOF
+chmod 0644 work_thr.c ||
+echo 'restore of work_thr.c failed'
+Wc_c="`wc -c < 'work_thr.c'`"
+test 7001 -eq "$Wc_c" ||
+ echo 'work_thr.c: original size 7001, current size' "$Wc_c"
+fi
+# ============= workacc.c ==============
+if test -f 'workacc.c' -a X"$1" != X"-c"; then
+ echo 'x - skipping workacc.c (File already exists)'
+else
+echo 'x - extracting workacc.c (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'workacc.c' &&
+X
+/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
+X U. of Virginia */
+X
+/* $Name: fa_34_26_5 $ - $Id: workacc.c,v 1.19 2006/02/07 17:58:19 wrp Exp $ */
+X
+/* Concurrent read version */
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+X
+#include "param.h"
+X
+#define XTERNAL
+#include "uascii.h"
+#include "upam.h"
+#undef XTERNAL
+X
+char err_str[128];
+X
+/* Initialization - set up defaults - assume protein sequence */
+void w_init ()
+{
+X pascii=aascii;
+}
+X
+#ifndef MPI_SRC
+/* allocate memory for pam matrix - identical to version in initfa/sw.c */
+alloc_pam (int d1, int d2, struct pstruct *ppst)
+{
+X int i, *d2p;
+X char err_str[128];
+X
+X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X return -1;
+X }
+X
+X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X return -1;
+X }
+X
+X if ((d2p = pam12 = (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
+X return -1;
+X }
+X for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[0][i] = d2p;
+X
+X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
+X sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);
+X return -1;
+X }
+X for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[1][i] = d2p;
+X
+X return 1;
+}
+X
+int **
+alloc_pam2p(int len, int nsq) {
+X int i;
+X int **pam2p;
+X
+X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
+X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
+X return NULL;
+X }
+X
+X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
+X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
+X free(pam2p);
+X return NULL;
+X }
+X
+X for (i=1; i<len; i++) {
+X pam2p[i] = pam2p[0] + (i*(nsq+1));
+X }
+X
+X return pam2p;
+}
+X
+void free_pam2p(int **pam2p) {
+X if (pam2p) {
+X free(pam2p[0]);
+X free(pam2p);
+X }
+}
+X
+void
+aancpy(char *to, char *from, int count, struct pstruct pst)
+{
+X char *tp, *sq;
+X int nsq;
+X
+X tp=to;
+X
+X if (pst.ext_sq_set) {
+X nsq = pst.nsqx;
+X sq = pst.sqx;
+X }
+X else {
+X nsq = pst.nsq;
+X sq = pst.sq;
+X }
+X
+X while (count-- && *from) {
+X if (*from <= nsq) *tp++ = sq[*(from++)];
+X else *tp++ = *from++;
+X }
+X *tp='\0';
+}
+#endif
+X
+/* copies from from to to shuffling */
+X
+void
+shuffle(unsigned char *from, unsigned char *to, int n)
+{
+X int i,j; unsigned char tmp;
+X
+X if (from != to) memcpy((void *)to,(void *)from,(size_t)n);
+X
+X for (i=n; i>0; i--) {
+X j = nrand(i);
+X tmp = to[j];
+X to[j] = to[i-1];
+X to[i-1] = tmp;
+X }
+X to[n] = 0;
+}
+X
+/* this shuffle is for FASTS */
+/* convert ',' -> '\0', shuffle each of the substrings */
+qshuffle(unsigned char *aa0, int n0, int nm0)
+{
+X unsigned char **aa0start, *aap, tmp;
+X int i,j,k, ns;
+X
+X if ((aa0start=(unsigned char **)calloc(nm0+1,
+X sizeof(unsigned char *)))==NULL) {
+X fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);
+X exit(1);
+X }
+X aa0start[0]=aa0;
+X for (k=1,i=0; i<n0; i++) {
+X if (aa0[i]==EOSEQ || aa0[i]==ESS) {
+X aa0[i]='\0';
+X aa0start[k++] = &aa0[i+1];
+X }
+X }
+X
+X /* aa0start has the beginning of each substring */
+X for (k=0; k<nm0; k++) {
+X aap=aa0start[k];
+X ns = strlen((char *)aap);
+X for (i=ns; i>1; i--) {
+X j = nrand(i);
+X tmp = aap[j];
+X aap[j] = aap[i-1];
+X aap[i-1] = tmp;
+X }
+X aap[ns] = 0;
+X }
+X
+X for (k=1; k<nm0; k++) {
+/* aap = aa0start[k];
+X while (*aap) fputc(pst.sq[*aap++],stderr);
+X fputc('\n',stderr);
+*/
+X aa0start[k][-1]=ESS;
+X }
+X
+X free(aa0start);
+}
+X
+/* copies from from to from shuffling */
+void
+wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven)
+{
+X int i,j, k, mm;
+X unsigned char tmp, *top;
+X
+X memcpy((void *)to,(void *)from,n);
+X
+X mm = n%wsiz;
+X
+X if (*ieven) {
+X for (k=0; k<(n-wsiz); k += wsiz) {
+X top = &to[k];
+X for (i=wsiz; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X }
+X top = &to[n-mm];
+X for (i=mm; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X *ieven = 0;
+X }
+X else {
+X for (k=n; k>=wsiz; k -= wsiz) {
+X top = &to[k-wsiz];
+X for (i=wsiz; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X }
+X top = &to[0];
+X for (i=mm; i>0; i--) {
+X j = nrand(i);
+X tmp = top[j];
+X top[j] = top[i-1];
+X top[i-1] = tmp;
+X }
+X *ieven = 1;
+X }
+X to[n] = 0;
+}
+X
+void initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz) /* initialize arrays */
+{
+X *seqc0=(char *)calloc((size_t)(seqsiz+1)*4,sizeof(char));
+X *seqc0a= *seqc0+seqsiz+1;
+X *seqc1= *seqc0a+seqsiz+1;
+X *seqca= *seqc1+seqsiz+1;
+X if (*seqc0==NULL)
+X {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
+X exit(1);}
+}
+X
+void freeseq(char **seqc0, char **seqc1, char **seqca)
+{
+X free(*seqc0);
+}
+X
+#define ESS 49
+X
+void
+revcomp(unsigned char *seq, int n, int *c_nt)
+{
+X unsigned char tmp;
+X int i, ni;
+X
+X for (i=0, ni = n-1; i< n/2; i++,ni--) {
+X tmp = c_nt[seq[i]];
+X seq[i] = c_nt[seq[ni]];
+X seq[ni] = tmp;
+X }
+X if ((n%2)==1) {
+X i = n/2;
+X seq[i] = c_nt[seq[i]];
+X }
+X seq[n]=0;
+}
+SHAR_EOF
+chmod 0644 workacc.c ||
+echo 'restore of workacc.c failed'
+Wc_c="`wc -c < 'workacc.c'`"
+test 5262 -eq "$Wc_c" ||
+ echo 'workacc.c: original size 5262, current size' "$Wc_c"
+fi
+# ============= xurt8c.aa ==============
+if test -f 'xurt8c.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping xurt8c.aa (File already exists)'
+else
+echo 'x - extracting xurt8c.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'xurt8c.aa' &&
+>XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
+MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
+ILSYLAAKYNLYGKDLKERVRIDMYADGTQDLMMMIIGAPFKAPQEKEESLALAVKRAKNRYFPVFEKIL
+KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
+HYVDVVRTVLKF
+SHAR_EOF
+chmod 0644 xurt8c.aa ||
+echo 'restore of xurt8c.aa failed'
+Wc_c="`wc -c < 'xurt8c.aa'`"
+test 302 -eq "$Wc_c" ||
+ echo 'xurt8c.aa: original size 302, current size' "$Wc_c"
+fi
+# ============= xurt8c.lc ==============
+if test -f 'xurt8c.lc' -a X"$1" != X"-c"; then
+ echo 'x - skipping xurt8c.lc (File already exists)'
+else
+echo 'x - extracting xurt8c.lc (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'xurt8c.lc' &&
+>XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
+MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
+ilsylaakynlygkdlkervridmyadgtqdlmmmiigapfkapqekeeslalavkraknryfpvfekil
+KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
+HYVDVVRTVLKF
+SHAR_EOF
+chmod 0644 xurt8c.lc ||
+echo 'restore of xurt8c.lc failed'
+Wc_c="`wc -c < 'xurt8c.lc'`"
+test 302 -eq "$Wc_c" ||
+ echo 'xurt8c.lc: original size 302, current size' "$Wc_c"
+fi
+# ============= xurtg.aa ==============
+if test -f 'xurtg.aa' -a X"$1" != X"-c"; then
+ echo 'x - skipping xurtg.aa (File already exists)'
+else
+echo 'x - extracting xurtg.aa (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'xurtg.aa' &&
+>XURTG glutathione transferase (EC 2.5.1.18) Ya - rat
+MSGKPVLHYFNARGRMECIRWLLAAAGVEFDEKFIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRA
+ILNYIATKYDLYGKDMKERALIDMYTEGILDLTEMIMQLVICPPDQKEAKTALAKDRTKNRYLPAFEKVL
+KSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKLPMDA
+KQIEEARKIFKF
+SHAR_EOF
+chmod 0644 xurtg.aa ||
+echo 'restore of xurtg.aa failed'
+Wc_c="`wc -c < 'xurtg.aa'`"
+test 281 -eq "$Wc_c" ||
+ echo 'xurtg.aa: original size 281, current size' "$Wc_c"
+fi
+exit 0