+++ /dev/null
-#!/bin/sh
-# This is a shell archive (produced by shar 3.49)
-# To extract the files from this archive, save it to a file, remove
-# everything above the "!/bin/sh" line above, and type "sh file_name".
-#
-# made 04/29/2007 13:53 UTC by wrp@wrpsun2.bioch.Virginia.EDU
-# Source directory /home2.t2/users/wrp/fa_cvs/fasta-34.26.5
-#
-# existing files will NOT be overwritten unless -c is specified
-#
-# This shar contains:
-# length mode name
-# ------ ---------- ------------------------------------------
-# 1018 -rw-r--r-- COPYRIGHT
-# 373 -rw-r--r-- FASTA_LIST
-# 4638 -rw-r--r-- FileDlog.c
-# 1580 -rw-r--r-- Makefile
-# 831 -rw-r--r-- Makefile.NetBSD
-# 684 -rw-r--r-- Makefile.cray_pvp
-# 8454 -rw-r--r-- Makefile.fcom
-# 2135 -rw-r--r-- Makefile.freebsd
-# 1484 -rw-r--r-- Makefile.hpux_it
-# 740 -rw-r--r-- Makefile.ibm
-# 1580 -rw-r--r-- Makefile.linux
-# 1577 -rw-r--r-- Makefile.linux_mysql
-# 1581 -rw-r--r-- Makefile.linux_pgsql
-# 1548 -rw-r--r-- Makefile.linux_sql
-# 1671 -rw-r--r-- Makefile.linux_sse2
-# 13073 -rw-r--r-- Makefile.mpcom
-# 1600 -rw-r--r-- Makefile.mpi4
-# 1602 -rw-r--r-- Makefile.mpi4_bluegene
-# 1509 -rw-r--r-- Makefile.mpi4_sql
-# 8182 -rwxr-xr-x Makefile.nm_fcom
-# 27480 -rwxr-xr-x Makefile.nm_pcom
-# 905 -rwxr-xr-x Makefile.nmk_icl
-# 2116 -rw-r--r-- Makefile.os_x
-# 1917 -rw-r--r-- Makefile.os_x86
-# 1922 -rw-r--r-- Makefile.pLinux
-# 1946 -rw-r--r-- Makefile.pLinux_sql
-# 24893 -rw-r--r-- Makefile.pcom
-# 13214 -rw-r--r-- Makefile.pvcom
-# 1344 -rw-r--r-- Makefile.pvm4
-# 1264 -rw-r--r-- Makefile.pvm4_sql
-# 1238 -rw-r--r-- Makefile.sgi
-# 1150 -rw-r--r-- Makefile.sun
-# 1264 -rw-r--r-- Makefile.sun_x86
-# 9746 -rw-r--r-- Makefile.tc
-# 1304 -rw-r--r-- Makefile34.common
-# 1330 -rw-r--r-- Makefile34.common_sql
-# 765 -rwxr-xr-x Makefile34.nmk_com
-# 1311 -rw-r--r-- Makefile34m.common
-# 1395 -rw-r--r-- Makefile34m.common_mysql
-# 1407 -rw-r--r-- Makefile34m.common_pgsql
-# 1406 -rw-r--r-- Makefile34m.common_sql
-# 722 -rw-r--r-- README
-# 2614 -rw-r--r-- README.versions
-# 1332 -rw-r--r-- Readme.Mac
-# 321 -rw-r--r-- a_mark.h
-# 504 -rw-r--r-- aamap.h
-# 3021 -rw-r--r-- ag_stats.c
-# 758 -rw-r--r-- aln_structs.h
-# 10311 -rw-r--r-- alt_parms.h
-# 2319 -rw-r--r-- altlib.h
-# 10085 -rw-r--r-- apam.c
-# 1922 -rw-r--r-- blosum45.mat
-# 1921 -rw-r--r-- blosum50.mat
-# 1922 -rw-r--r-- blosum62.mat
-# 1924 -rw-r--r-- blosum80.mat
-# 2528 -rw-r--r-- bovgh.seq
-# 986 -rw-r--r-- bovprl.seq
-# 11467 -rw-r--r-- c_dispn.c
-# 3492 -rw-r--r-- checkevent.c
-# 55202 -rw-r--r-- comp_lib.c
-# 21270 -rw-r--r-- compacc.c
-# 536 -rw-r--r-- create_seq_demo.sql
-# 81 -rw-r--r-- cvs_id
-# 6955 -rw-r--r-- dec_pthr_subs.c
-# 1116 -rw-r--r-- dec_pthr_subs.h
-# 3530 -rw-r--r-- defs.h
-# 976 -rw-r--r-- dna.mat
-# 10740 -rw-r--r-- doinit.c
-# 3226 -rw-r--r-- drop_func.h
-# 48853 -rw-r--r-- dropff2.c
-# 59078 -rw-r--r-- dropfs2.c
-# 73324 -rw-r--r-- dropfx.c
-# 77360 -rw-r--r-- dropfz2.c
-# 55870 -rw-r--r-- dropgsw.c
-# 677 -rw-r--r-- dropgsw.h
-# 70110 -rw-r--r-- dropnfa.c
-# 1882 -rw-r--r-- dropnfa.h
-# 34172 -rw-r--r-- dropnsw.c
-# 1286 -rw-r--r-- egmsmg.aa
-# 13742 -rw-r--r-- faatran.c
-# 1959 -rw-r--r-- fast_new
-# 529 -rw-r--r-- fasta.defaults
-# 1670 -rw-r--r-- fasta.options
-# 49762 -rw-r--r-- fasta20.doc
-# 10345 -rw-r--r-- fasta3.1
-# 177 -rw-r--r-- fasta3.rsp
-# 41617 -rw-r--r-- fasta3x.doc
-# 39642 -rw-r--r-- fasta3x.me
-# 9645 -rw-r--r-- fasta_func.doc
-# 4824 -rw-r--r-- fastf3.1
-# 2173 -rw-r--r-- fastlibs
-# 4556 -rw-r--r-- fasts3.1
-# 203 -rw-r--r-- fasts3.rsp
-# 1036 -rw-r--r-- getenv.c
-# 1174 -rw-r--r-- getopt.c
-# 9431 -rw-r--r-- getseq.c
-# 806 -rw-r--r-- grou_drome.pseg
-# 18633 -rw-r--r-- gst.nlib
-# 1405 -rw-r--r-- gst.seq
-# 300 -rw-r--r-- gtm1_human.aa
-# 291 -rw-r--r-- gtt1_drome.aa
-# 247 -rw-r--r-- h10_human.aa
-# 691 -rw-r--r-- h_altlib.h
-# 225 -rw-r--r-- hahu.aa
-# 1466 -rw-r--r-- hostacc.c
-# 7118 -rw-r--r-- hsgstm1b.gcg
-# 2788 -rw-r--r-- hsgstm1b.seq
-# 674 -rw-r--r-- htime.c
-# 1323 -rw-r--r-- humgstd.seq
-# 2210 -rw-r--r-- idn_aa.mat
-# 54882 -rw-r--r-- initfa.c
-# 13727 -rw-r--r-- karlin.c
-# 4128 -rw-r--r-- last_tat.c
-# 271 -rw-r--r-- lcbo.aa
-# 7638 -rw-r--r-- lib_sel.c
-# 5150 -rw-r--r-- list_db.c
-# 10617 -rw-r--r-- llgetaa.c
-# 56 -rw-r--r-- m1r.aa
-# 50 -rw-r--r-- m2.aa
-# 312 -rwxr-xr-x make_osx_univ.sh
-# 948 -rw-r--r-- map_db.1
-# 10852 -rw-r--r-- map_db.c
-# 212 -rw-r--r-- mchu.aa
-# 2255 -rw-r--r-- md_10.mat
-# 2256 -rw-r--r-- md_20.mat
-# 2255 -rw-r--r-- md_40.mat
-# 284 -rw-r--r-- mgstm1.aa
-# 310 -rw-r--r-- mgstm1.aaa
-# 1220 -rw-r--r-- mgstm1.e05
-# 1122 -rw-r--r-- mgstm1.eeq
-# 1116 -rw-r--r-- mgstm1.esq
-# 406 -rw-r--r-- mgstm1.gcg
-# 282 -rw-r--r-- mgstm1.lc
-# 677 -rw-r--r-- mgstm1.nt
-# 160 -rw-r--r-- mgstm1.nts
-# 259 -rw-r--r-- mgstm1.raa
-# 1167 -rw-r--r-- mgstm1.rev
-# 1158 -rw-r--r-- mgstm1.seq
-# 1286 -rw-r--r-- mgtt2_x.seq
-# 3057 -rw-r--r-- mm_file.h
-# 21318 -rw-r--r-- mmgetaa.c
-# 43 -rw-r--r-- ms1.aa
-# 1085 -rw-r--r-- msg.h
-# 17780 -rw-r--r-- mshowalign.c
-# 14393 -rw-r--r-- mshowbest.c
-# 2361 -rw-r--r-- mu.lib
-# 953 -rw-r--r-- musplfm.aa
-# 1042 -rw-r--r-- mw.h
-# 2047 -rw-r--r-- mwkw.aa
-# 500 -rw-r--r-- mwrtc1.aa
-# 1294 -rw-r--r-- myosin_bp.aa
-# 340 -rw-r--r-- mysql_demo1.sql
-# 381 -rw-r--r-- mysql_demo_pv.sql
-# 16406 -rw-r--r-- mysql_lib.c
-# 26 -rw-r--r-- n0.aa
-# 47 -rw-r--r-- n1.aa
-# 692 -rw-r--r-- n2.aa
-# 1482 -rw-r--r-- n2_fs.lib
-# 178 -rw-r--r-- n2s.aa
-# 243 -rw-r--r-- n2t.aa
-# 330 -rw-r--r-- n_fs.lib
-# 882 -rw-r--r-- ncbl2_head.h
-# 42930 -rw-r--r-- ncbl2_mlib.c
-# 1034 -rw-r--r-- ncbl_head.h
-# 12694 -rw-r--r-- ncbl_lib.c
-# 217 -rw-r--r-- ngt.aa
-# 111 -rw-r--r-- ngts.aa
-# 36301 -rw-r--r-- nmgetlib.c
-# 2452 -rwxr-xr-x nr_to_sql.pl
-# 566 -rw-r--r-- nrand.c
-# 533 -rw-r--r-- nrand48.c
-# 532 -rw-r--r-- nrandom.c
-# 385 -rw-r--r-- oohu.aa
-# 401 -rw-r--r-- oohu.raa
-# 55578 -rw-r--r-- p2_complib.c
-# 37611 -rw-r--r-- p2_workcomp.c
-# 1096 -rw-r--r-- p_mw.h
-# 1922 -rw-r--r-- pam120.mat
-# 1923 -rw-r--r-- pam250.mat
-# 3002 -rw-r--r-- param.h
-# 16978 -rw-r--r-- pgsql_lib.c
-# 230 -rw-r--r-- pirpsd.sql
-# 11147 -rw-r--r-- print_pssm.c
-# 340 -rw-r--r-- prio_atepa.aa
-# 2741 -rw-r--r-- prot_test.lib
-# 2786 -rw-r--r-- prot_test.lseg
-# 4969 -rw-r--r-- prss3.1
-# 119 -rw-r--r-- prss3.rsp
-# 317 -rw-r--r-- psql_demo.sql
-# 366 -rw-r--r-- psql_demo1.sql
-# 336 -rw-r--r-- psql_demo_pv.sql
-# 26268 -rw-r--r-- pssm_asn_subs.c
-# 1301 -rw-r--r-- pthr_subs.h
-# 7689 -rw-r--r-- pthr_subs2.c
-# 6657 -rw-r--r-- pvcomp.1
-# 914 -rw-r--r-- qrhuld.aa
-# 339 -rw-r--r-- randtest.c
-# 1184 -rw-r--r-- re_getlib.c
-# 1994 -rw-r--r-- readme.mpi_3.3
-# 1404 -rw-r--r-- readme.pvm_3.2
-# 7535 -rw-r--r-- readme.pvm_3.3
-# 3539 -rw-r--r-- readme.pvm_3.4
-# 1070 -rw-r--r-- readme.v30
-# 1871 -rw-r--r-- readme.v30t6
-# 5283 -rw-r--r-- readme.v30t7
-# 4461 -rw-r--r-- readme.v31t0
-# 3632 -rw-r--r-- readme.v31t1
-# 15841 -rw-r--r-- readme.v32t0
-# 50697 -rw-r--r-- readme.v33t0
-# 66121 -rw-r--r-- readme.v34t0
-# 2402 -rw-r--r-- readme.w32
-# 16277 -rw-r--r-- res_stats.c
-# 998 -rw-r--r-- rna.mat
-# 1427 -rw-r--r-- sc_to_e.c
-# 69722 -rw-r--r-- scaleswn.c
-# 37581 -rw-r--r-- scaleswt.c
-# 5247 -rw-r--r-- search.html
-# 2033 -rw-r--r-- showrss.c
-# 12412 -rw-r--r-- showsum.c
-# 113815 -rw-r--r-- smith_waterman_altivec.c
-# 1144 -rw-r--r-- smith_waterman_altivec.h
-# 12106 -rw-r--r-- smith_waterman_sse2.c
-# 1723 -rwxr-xr-x smith_waterman_sse2.h
-# 4279 -rw-r--r-- structs.h
-# 12998 -rw-r--r-- tatstats.c
-# 4126 -rw-r--r-- tatstats.h
-# 2891 -rw-r--r-- test.bat
-# 2996 -rwxr-xr-x test.sh
-# 2775 -rwxr-xr-x test2.bat
-# 2429 -rwxr-xr-x test_osx.sh
-# 1597 -rwxr-xr-x test_s.sh
-# 1312 -rwxr-xr-x test_z.sh
-# 203 -rw-r--r-- tfasts3.rsp
-# 1144 -rw-r--r-- thr.h
-# 27376 -rw-r--r-- titin_hum.aa
-# 83286 -rw-r--r-- titin_hum.seq
-# 2006 -rw-r--r-- uascii.h
-# 16008 -rw-r--r-- upam.h
-# 3335 -rw-r--r-- url_subs.c
-# 1229 -rw-r--r-- uthr_subs.h
-# 2771 -rw-r--r-- vtml160.mat
-# 2899 -rw-r--r-- w_mw.h
-# 7001 -rw-r--r-- work_thr.c
-# 5262 -rw-r--r-- workacc.c
-# 302 -rw-r--r-- xurt8c.aa
-# 302 -rw-r--r-- xurt8c.lc
-# 281 -rw-r--r-- xurtg.aa
-#
-# ============= COPYRIGHT ==============
-if test -f 'COPYRIGHT' -a X"$1" != X"-c"; then
- echo 'x - skipping COPYRIGHT (File already exists)'
-else
-echo 'x - extracting COPYRIGHT (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'COPYRIGHT' &&
-X
-X Copyright 1988, 1991, 1992, 1993, 1994 1995, by William
-X R. Pearson and the University of Virginia. All rights
-X reserved. The FASTA program and documentation may not be sold or
-X incorporated into a commercial product, in whole or in part,
-X without written consent of William R. Pearson and the University
-X of Virginia. For further information regarding permission for
-X use or reproduction, please contact:
-X
-X David Hudson
-X Assistant Provost for Research
-X University of Virginia
-X P.O. Box 400301
-X Charlottesville, VA 22906-9025
-X
-X (434) 924-3606
-X
-X Code in the smith_waterman_sse2.c and smith_waterman_sse2.h files
-X is copyright (c) 2006 by Michael Farrar.
-X
-X This program may not be sold or incorporated into a commercial
-X product, in whole or in part, without written consent of Michael
-X Farrar. For further information regarding permission for use or
-X reproduction, please contact: Michael Farrar at
-X farrar.michael@gmail.com.
-X
-SHAR_EOF
-chmod 0644 COPYRIGHT ||
-echo 'restore of COPYRIGHT failed'
-Wc_c="`wc -c < 'COPYRIGHT'`"
-test 1018 -eq "$Wc_c" ||
- echo 'COPYRIGHT: original size 1018, current size' "$Wc_c"
-fi
-# ============= FASTA_LIST ==============
-if test -f 'FASTA_LIST' -a X"$1" != X"-c"; then
- echo 'x - skipping FASTA_LIST (File already exists)'
-else
-echo 'x - extracting FASTA_LIST (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'FASTA_LIST' &&
-X
-X
-30 July 2003
-X
-If you regularly install the latest version of the fasta3 package from
-ftp://ftp.virginia.edu/pub/fasta, you may want to join the fasta_list
-majordomo mailing list. I plan to use this list to announce new
-releases and solicit bug reports.
-X
-To join the mailing list, go to the WWW page at:
-X
-X list.mail.virginia.edu/mailman/listinfo/fasta_list
-X
-Bill Pearson
-SHAR_EOF
-chmod 0644 FASTA_LIST ||
-echo 'restore of FASTA_LIST failed'
-Wc_c="`wc -c < 'FASTA_LIST'`"
-test 373 -eq "$Wc_c" ||
- echo 'FASTA_LIST: original size 373, current size' "$Wc_c"
-fi
-# ============= FileDlog.c ==============
-if test -f 'FileDlog.c' -a X"$1" != X"-c"; then
- echo 'x - skipping FileDlog.c (File already exists)'
-else
-echo 'x - extracting FileDlog.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'FileDlog.c' &&
-X
-/* copyright (c) 1997 William R. Pearson */
-X
-/* used only in Mac versions for file selection */
-/* should use navigation services if available */
-X
-X
-#include <Dialogs.h>
-#include <Fonts.h>
-#include <Types.h>
-#include <Gestalt.h>
-#include <Resources.h>
-#include <Controls.h>
-#include <StandardFile.h>
-#include <Files.h>
-#include <Folders.h>
-X
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-X
-#define NIL nil
-#define PauseID 301
-#define ExitID 302
-#define FileDID 204
-#define SFileDID 205
-X
-void HiliteDlog(DialogPtr);
-X
-SFTypeList tlist={'TEXT',0L,0L,0L};
-X
-extern Point wpos;
-X
-FileDlog(prompt,freply)
-X char *prompt;
-X StandardFileReply *freply;
-{
-X Point dpos={-1,-1};
-X if (GetResource('DLOG',SFileDID)==NIL) {
-X fprintf(stderr," cannot load %d DLOG resource\n",SFileDID); exit(1);
-X }
-X CtoPstr(prompt);
-X ParamText((StringPtr)prompt,"\p","\p","\p");
-/* SFPGetFile(wpos, (StringPtr)prompt, 0L,(short)1, tlist, 0L, freply, FileDID, NIL); */
-X CustomGetFile(NIL,
-X -1,
-X nil,
-X freply,
-X SFileDID,
-X dpos,
-X nil,
-X nil,nil,nil,nil);
-X
-X ParamText("\p","\p","\p","\p");
-X PtoCstr((StringPtr)prompt);
-X }
-X
-TFileDlog(prompt,freply,plist,nl)
-X char *prompt;
-X StandardFileReply *freply;
-X SFTypeList plist;
-X int nl;
-{
-X Point dpos={-1,-1};
-X if (GetResource('DLOG',SFileDID)==NIL) {
-X fprintf(stderr," cannot load %d TFile DLOG resource\n",SFileDID); exit(1);
-X }
-X CtoPstr(prompt);
-X ParamText((StringPtr)prompt,"\p","\p","\p");
-/* SFPGetFile(wpos,(StringPtr)prompt,0L,(short)nl,plist,0L,freply,FileDID,NIL); */
-X CustomGetFile(NIL,
-X nl,
-X plist,
-X freply,
-X SFileDID,
-X dpos,
-X nil,
-X nil,nil,nil,nil);
-X ParamText("\p","\p","\p","\p");
-X PtoCstr((StringPtr)prompt);
-X }
-X
-SFileDlog(prompt,freply)
-X char *prompt;
-X StandardFileReply *freply;
-{
-X Point dpos={-1,-1};
-X
-X if (GetResource('DLOG',SFileDID)==NIL) {
-X fprintf(stderr," cannot load %d DLOG resource\n",SFileDID); exit(1);
-X }
-X
-X CtoPstr(prompt);
-X ParamText((StringPtr)prompt,"\p","\p","\p");
-X
-/* StandardGetFile(NIL,(short)1,tlist,freply); */
-X CustomGetFile(NIL,
-X -1,
-X nil,
-X freply,
-X SFileDID,
-X dpos,
-X nil,
-X nil,nil,nil,nil);
-X ParamText("\p","\p","\p","\p");
-X PtoCstr((StringPtr)prompt);
-X }
-X
-STFileDlog(char *prompt, StandardFileReply *freply,
-X SFTypeList plist, int nl)
-{
-X Point dpos={-1,-1};
-X
-X if (GetResource('DLOG',SFileDID)==NIL) {
-X fprintf(stderr," cannot load %d TFile DLOG resource\n",SFileDID); exit(1);
-X }
-X CtoPstr(prompt);
-X ParamText((StringPtr)prompt,"\p","\p","\p");
-X
-X CustomGetFile(NIL,
-X -1,
-X nil,
-X freply,
-X SFileDID,
-X dpos,
-X nil,
-X nil,nil,nil,nil);
-X ParamText("\p","\p","\p","\p");
-X PtoCstr((StringPtr)prompt);
-}
-X
-PauseAlert(unsigned char *prompt)
-{
-X if (GetResource('DLOG',PauseID)==NIL) {
-X fprintf(stderr," cannot load %d TFile DLOG resource\n",PauseID); exit(1);
-X }
-X CtoPstr((char *)prompt);
-X ParamText(prompt,"\p","\p","\p");
-X CautionAlert(PauseID,NULL);
-X ParamText("\p","\p","\p","\p");
-}
-X
-IntroDlog(int DlogID, unsigned char *prompt)
-{
-X short itemHit;
-X DialogPtr DP;
-X
-X CtoPstr((char *)prompt);
-X ParamText(prompt,"\p","\p","\p");
-X
-X if (GetResource('DLOG',DlogID)==NIL) {
-X fprintf(stderr," cannot load %d Intro DLOG resource\n",DlogID); exit(1);
-X }
-X DP = GetNewDialog(DlogID,NULL,(WindowPtr)-1);
-X ShowWindow(DP);
-X SelectWindow(DP);
-X HiliteDlog(DP);
-X
-X ModalDialog(0L,&itemHit);
-X DisposeDialog(DP);
-X ParamText("\p","\p","\p","\p");
-X PtoCstr(prompt);
-}
-X
-NIntroDlog(int DlogID,unsigned char *p0,unsigned char *p1,
-X unsigned char *p2,unsigned char *p3)
-{
-X short itemHit;
-X DialogPtr DP;
-X unsigned char *p;
-X
-X for (p=p0; *p; p++) if (*p=='\n') *p=' ';
-X for (p=p1; *p; p++) if (*p=='\n') *p=' ';
-X for (p=p2; *p; p++) if (*p=='\n') *p=' ';
-X for (p=p2; *p; p++) if (*p=='\n') *p=' ';
-X
-X CtoPstr((char *)p0);
-X CtoPstr((char *)p1);
-X CtoPstr((char *)p2);
-X CtoPstr((char *)p3);
-X ParamText(p0,p1,p2,p3);
-X
-X if (GetResource('DLOG',DlogID)==NIL) {
-X fprintf(stderr," cannot load %d Intro DLOG resource\n",DlogID); exit(1);
-X }
-X DP = GetNewDialog(DlogID,NULL,(WindowPtr)-1);
-X ShowWindow(DP);
-X SelectWindow(DP);
-X HiliteDlog(DP);
-X
-X ModalDialog(0L,&itemHit);
-X DisposeDialog(DP);
-X ParamText("\p","\p","\p","\p");
-X PtoCstr(p0);
-X PtoCstr(p1);
-X PtoCstr(p2);
-X PtoCstr(p3);
-}
-X
-void
-HiliteDlog(DialogPtr DP)
-{
-X Rect tRect;
-X short tType;
-X Handle tItem;
-X
-X SetPort(DP);
-X GetDialogItem(DP,1,&tType,&tItem,&tRect);
-X PenSize(3, 3); /* Change pen to draw thick default outline */
-X InsetRect(&tRect, -4, -4); /* Draw outside the button by 1 pixel */
-X FrameRoundRect(&tRect, 16, 16); /* Draw the outline */
-X PenSize(1, 1); /* Restore the pen size to the default value */
-}
-SHAR_EOF
-chmod 0644 FileDlog.c ||
-echo 'restore of FileDlog.c failed'
-Wc_c="`wc -c < 'FileDlog.c'`"
-test 4638 -eq "$Wc_c" ||
- echo 'FileDlog.c: original size 4638, current size' "$Wc_c"
-fi
-# ============= Makefile ==============
-if test -f 'Makefile' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile (File already exists)'
-else
-echo 'x - extracting Makefile (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-#
-# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
-#
-X
-CC= gcc -g -O2
-X
-#CC=gcc -Wall -pedantic -ansi -g -O
-#CC = gcc -g -DDEBUG
-#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# EBI uses the following with pgcc, -O3 does not work:
-# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
-X
-# this file works for x86 LINUX
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-LIB_M = -lm
-#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta33) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0644 Makefile ||
-echo 'restore of Makefile failed'
-Wc_c="`wc -c < 'Makefile'`"
-test 1580 -eq "$Wc_c" ||
- echo 'Makefile: original size 1580, current size' "$Wc_c"
-fi
-# ============= Makefile.NetBSD ==============
-if test -f 'Makefile.NetBSD' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.NetBSD (File already exists)'
-else
-echo 'x - extracting Makefile.NetBSD (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.NetBSD' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-# this file works for NetBSD
-#
-# provided by Marc Baudoin <babafou@babafou.eu.org>
-#
-X
-CC= cc -O
-#CC= cc -g -DDEBUG
-#CC= gcc -g -Wall
-#
-# standard line for normal searching
-CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your.host.here/fasta/cgi"' -DUSE_MMAP
-X
-# special options for SUPERFAMLIES
-#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DSFCHAR="'|'" -c -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP
-X
-LIB_M= -lm
-HFLAGS= -o
-NFLAGS= -o
-X
-# for NetBSD
-THR_SUBS = pthr_subs2
-THR_LIBS = -L/usr/pkg/pthreads/lib -lpthread
-THR_CC = -I/usr/pkg/pthreads/include
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-X
-include Makefile34m.common
-SHAR_EOF
-chmod 0644 Makefile.NetBSD ||
-echo 'restore of Makefile.NetBSD failed'
-Wc_c="`wc -c < 'Makefile.NetBSD'`"
-test 831 -eq "$Wc_c" ||
- echo 'Makefile.NetBSD: original size 831, current size' "$Wc_c"
-fi
-# ============= Makefile.cray_pvp ==============
-if test -f 'Makefile.cray_pvp' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.cray_pvp (File already exists)'
-else
-echo 'x - extracting Makefile.cray_pvp (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.cray_pvp' &&
-#
-# makefile for fasta33
-#
-# for more information on FASTA on CRAY's, see:
-#
-# http://home.cray.com/~cpsosa/ChemApps/BioInf/fasta/fasta.html
-# provided by: Carlos P. Sosa, cpsosa@cray.com
-#
-X
-CC= cc -h inline1,scalar3,task0,vector2
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-LIB_M=
-#
-X
-CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4
--DTHR_EXIT=pthread_exit -DPROGRESS
--DFASTA_HOST='"crick.med.virginia.edu/fasta/cgi"' -DIS_BIG_ENDIAN
-X
-THR_SUBS = pthr_subs
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-X
-# renamed (fasta33) programs
-include Makefile33.nommap
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.cray_pvp ||
-echo 'restore of Makefile.cray_pvp failed'
-Wc_c="`wc -c < 'Makefile.cray_pvp'`"
-test 684 -eq "$Wc_c" ||
- echo 'Makefile.cray_pvp: original size 684, current size' "$Wc_c"
-fi
-# ============= Makefile.fcom ==============
-if test -f 'Makefile.fcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.fcom (File already exists)'
-else
-echo 'x - extracting Makefile.fcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.fcom' &&
-X
-#================ common .o files
-X
-doinit.o : doinit.c defs.h param.h upam.h structs.h uascii.h
-X $(CC) $(THR_CC) $(CFLAGS) -c doinit.c
-X
-init_sw.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DSSEARCH initfa.c -o init_sw.o
-X
-init_ssw.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DOSEARCH initfa.c -o init_ssw.o
-X
-init_rss.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS initfa.c -o init_rss.o
-X
-init_rfx.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c -o init_rfx.o
-X
-init_fa.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA initfa.c -o init_fa.o
-X
-init_ff.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF initfa.c -o init_ff.o
-X
-init_tf.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c -o init_tf.o
-X
-init_fs.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS initfa.c -o init_fs.o
-X
-init_fm.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM initfa.c -o init_fm.o
-X
-init_tfs.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -DTFAST initfa.c -o init_tfs.o
-X
-init_tfm.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -DTFAST initfa.c -o init_tfm.o
-X
-init_tfa.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c -o init_tfa.o
-X
-init_fx.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX initfa.c -o init_fx.o
-X
-init_tfx.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c -o init_tfx.o
-X
-init_fy.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY initfa.c -o init_fy.o
-X
-init_tfy.o : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c -o init_tfy.o
-X
-#================ miscellaneous
-X
-htime.o : htime.c
-X $(CC) $(THR_CC) $(CFLAGS) -c htime.c
-X
-compacc.o : compacc.c upam.h uascii.h param.h structs.h $(MWH) defs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c compacc.c
-X
-pssm_asn_subs.o : pssm_asn_subs.c defs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c pssm_asn_subs.c
-X
-#================ display list of best hits / alignments
-X
-showbest.o : $(SHOWBESTC) $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c $(SHOWBESTC) -o showbest.o
-X
-showrss.o : showrss.c $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c showrss.c
-X
-showun.o : mshowbest.c $(MWH) defs.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DSHOWUN mshowbest.c -o showun.o
-X
-showrel.o : $(SHOWBESTC) $(MWH) defs.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DSHOWREL $(SHOWBESTC) -o showrel.o
-X
-showsum.o : showsum.c $(MWH) defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c showsum.c
-X
-$(SHOWALIGN).o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c $(SHOWALIGN).c -o $(SHOWALIGN).o
-X
-$(SHOWALIGN)_u.o : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -DSHOWUN -c -o $(SHOWALIGN)_u.o $(SHOWALIGN).c
-re_getlib.o : re_getlib.c mw.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c re_getlib.c
-X
-lib_sel.o : lib_sel.c defs.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c lib_sel.c
-X
-c_dispn.o : c_dispn.c defs.h structs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c c_dispn.c
-X
-#================ statistical functions
-X
-karlin.o : karlin.c param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c karlin.c
-X
-scaleswn.o : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(THR_CC) $(CFLAGS) -c scaleswn.c
-X
-scaleswtf.o : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -c scaleswt.c -o scaleswtf.o
-X
-scaleswts.o : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(THR_CC) $(CFLAGS) -c scaleswt.c -o scaleswts.o
-X
-tatstats_fs.o : tatstats.c tatstats.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS tatstats.c -o tatstats_fs.o
-X
-tatstats_ff.o : tatstats.c tatstats.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF tatstats.c -o tatstats_ff.o
-X
-tatstats_fm.o : tatstats.c tatstats.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM tatstats.c -o tatstats_fm.o
-X
-last_tat.o : last_tat.c defs.h mm_file.h structs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c last_tat.c
-X
-#================ drop functions - actual scores/alignments
-X
-drop_nfa.o : dropnfa.c dropnfa.h param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropnfa.c -o drop_nfa.o
-X
-# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
-# that tatstats.h is built appropriately
-X
-drop_ff.o : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -c dropff2.c -o drop_ff.o
-X
-drop_tff.o : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -DFASTF -DTFAST -c dropff2.c -o drop_tff.o
-X
-drop_ff2.o : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF dropff2.c -o drop_ff2.o
-X
-drop_tff2.o : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c -o drop_tff.o
-X
-drop_fs.o : dropfs2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -DFASTS -c dropfs2.c -o drop_fs.o
-X
-drop_tfs.o : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c -o drop_tfs.o
-X
-drop_fm.o : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM dropfs2.c -o drop_fm.o
-X
-drop_tfm.o : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c -o drop_tfm.o
-X
-drop_tfa.o : dropnfa.c dropnfa.h upam.h param.h defs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DTFASTA dropnfa.c -o drop_tfa.o
-X
-drop_fx.o : dropfx.c upam.h param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropfx.c -o drop_fx.o
-X
-drop_tfx.o : dropfx.c upam.h param.h defs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfx.c -o drop_tfx.o
-X
-drop_fz.o : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropfz2.c -o drop_fz.o
-X
-drop_tfz.o : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DTFAST dropfz2.c -o drop_tfz.o
-X
-dropnsw.o : dropnsw.c upam.h param.h structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropnsw.c
-X
-dropgsw.o : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropgsw.c
-X
-smith_waterman_altivec.o : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw.h defs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c smith_waterman_altivec.c
-X
-smith_waterman_sse2.o : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw.h defs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c smith_waterman_sse2.c
-X
-dropnw.o : dropnw.c upam.h param.h structs.h drop_func.h
-X $(CC) $(THR_CC) $(CFLAGS) -c dropnw.c
-X
-#================ reading query, libraries
-X
-getseq.o : getseq.c defs.h uascii.h structs.h upam.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c getseq.c
-X
-llgetaa.o : llgetaa.c upam.h uascii.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DNOLIB llgetaa.c
-X
-lgetlib.o : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c $(NGETLIB).c -o lgetlib.o
-X
-lgetaa_m.o : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c mmgetaa.c -o lgetaa_m.o
-X
-ncbl_lib.o : ncbl_lib.c ncbl_head.h
-X $(CC) $(THR_CC) $(CFLAGS) -c ncbl_lib.c
-X
-ncbl2_mlib.o : ncbl2_mlib.c ncbl2_head.h mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c ncbl2_mlib.c
-X
-mysql_lib.o : mysql_lib.c mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c mysql_lib.c
-X
-pgsql_lib.o : pgsql_lib.c mm_file.h
-X $(CC) $(THR_CC) $(CFLAGS) -c pgsql_lib.c
-X
-#================ threading functions
-X
-pthr_subs2.o : pthr_subs2.c thr.h pthr_subs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c pthr_subs2.c
-X
-uthr_subs.o : uthr_subs.c thr.h uthr_subs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c uthr_subs.c
-X
-#================ translation
-X
-faatran.o : faatran.c upam.h uascii.h
-X $(CC) $(THR_CC) $(CFLAGS) -c faatran.c
-X
-url_subs.o : url_subs.c structs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c url_subs.c
-X
-$(NRAND).o : $(NRAND).c
-X $(CC) $(THR_CC) $(CFLAGS) -c $(NRAND).c
-#================ pvm/mpi specific functions
-X
-hostacc.o : hostacc.c upam.h uascii.h
-X $(CC) $(THR_CC) $(CFLAGS) hostacc.c
-X
-workacc.o : workacc.c upam.h uascii.h param.h
-X $(NCC) $(THR_CC) $(CFLAGS) workacc.c -o workacc.o
-SHAR_EOF
-chmod 0644 Makefile.fcom ||
-echo 'restore of Makefile.fcom failed'
-Wc_c="`wc -c < 'Makefile.fcom'`"
-test 8454 -eq "$Wc_c" ||
- echo 'Makefile.fcom: original size 8454, current size' "$Wc_c"
-fi
-# ============= Makefile.freebsd ==============
-if test -f 'Makefile.freebsd' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.freebsd (File already exists)'
-else
-echo 'x - extracting Makefile.freebsd (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.freebsd' &&
-#
-# Makefile for building fasta3 on FreeBSD
-#
-# Fernan Aguero - <fernan@iib.unsam.edu.ar>
-X
-# we take care of doing variable assignment using the '?=' and '+='
-# operators to preserve the value of variables if they are already
-# defined. In FreeBSD this happens when fasta3 is build from the port or
-# when the user has set these variables -- most notably CC and/or CFLAGS
-# -- in /etc/make.conf
-X
-# Compiler executable, and optional flags
-CC?= gcc
-CFLAGS?= -g -O2
-X
-# your FASTA host
-FASTA_HOST?= "your_fasta_host"
-X
-# common CFLAGS. These are the set of CFLAGS that are always used
-COMMON_CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -c -DMAX_WORKERS=2 \
-X -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -D_REENTRANT \
-X -D_LARGE_FILE_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO \
-X -DHAS_INTTYPES -DSAMP_STATS
-X
-# standard options, these will be added to the common CFLAGS if
-# selected below
-STANDARD_CFLAGS= -DSFCHAR="':'" -DFASTA_HOST='${FASTA_HOST}' \
-X -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DPGM_DOC
-X
-# options for superfamily validations, these will be added to the common
-# CFLAGS if selected below
-SUPERFAMILY_CFLAGS= -DSFCHAR="'|'" -DSUPERFAMNUM -DBIG_LIB64
-X
-# here we define CFLAGS to be the sum of common flags plus a subset of
-# optional flags that define our intended use.
-# The default standard flags are selected by default, although the user
-# can override this if s/he wants
-CFLAGS+= ${COMMON_CFLAGS} ${STANDARD_CFLAGS}
-X
-XXDIR?= /usr/local/bin
-X
-LIB_M+= -lm
-X
-HFLAGS+= -o
-NFLAGS+= -o
-X
-# FreeBSD users BEWARE! Different threading models ahead!
-X
-# The threading model has changed along the way from FreeBSD-4 to
-# FreeBSD-6. If you're building fasta3 on your own, you will need to
-# adjust this accordingly. The default works in FreeBSD-6x (currently
-# the recommended major version for use in production). Or better yet,
-# use the biology/fasta3 port from the ports collection, which will use
-# the correct threading library for your OSVERSION
-X
-THR_SUBS?= pthr_subs2
-THR_LIBS?= -lpthread
-THR_CC?=
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-include Makefile34m.common
-SHAR_EOF
-chmod 0644 Makefile.freebsd ||
-echo 'restore of Makefile.freebsd failed'
-Wc_c="`wc -c < 'Makefile.freebsd'`"
-test 2135 -eq "$Wc_c" ||
- echo 'Makefile.freebsd: original size 2135, current size' "$Wc_c"
-fi
-# ============= Makefile.hpux_it ==============
-if test -f 'Makefile.hpux_it' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.hpux_it (File already exists)'
-else
-echo 'x - extracting Makefile.hpux_it (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.hpux_it' &&
-#
-# makefile for fasta3, fasta3_t
-#
-# flags for HP-UX #
-X
-CC= cc -g -O2 +Onolimit -Wl,+pi,1M -Wl,+pd,1M -Wl,+mergeseg
-#CC = gcc -g -DDEBUG
-X
-#CC=gcc -Wall -pedantic -ansi -g -O
-#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# EBI uses the following with pgcc, -O3 does not work:
-# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
-X
-# this file works for x86 LINUX
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-LIB_M = -lm
-#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta33) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0644 Makefile.hpux_it ||
-echo 'restore of Makefile.hpux_it failed'
-Wc_c="`wc -c < 'Makefile.hpux_it'`"
-test 1484 -eq "$Wc_c" ||
- echo 'Makefile.hpux_it: original size 1484, current size' "$Wc_c"
-fi
-# ============= Makefile.ibm ==============
-if test -f 'Makefile.ibm' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.ibm (File already exists)'
-else
-echo 'x - extracting Makefile.ibm (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.ibm' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-X
-CC= xlc_r -O3 -qarch=auto -qtune=auto -qcache=auto
-X
-# for IBM with current pthreads
-CFLAGS= -DUNIX -DTIMES -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DIS_BIG_ENDIAN -DUSE_MMAP -DIBM_AIX -D_LARGE_FILES -DHAS_INTTYPES -D_LARGE_FILES -UMAXSEG -DSAMP_STATS -DPGM_DOC
-X
-# consider -D_LARGE_FILE_API -D_LARGE_FILES for files > 2 GB
-X
-LIB_M = -lm
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthreads
-THR_CC =
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0644 Makefile.ibm ||
-echo 'restore of Makefile.ibm failed'
-Wc_c="`wc -c < 'Makefile.ibm'`"
-test 740 -eq "$Wc_c" ||
- echo 'Makefile.ibm: original size 740, current size' "$Wc_c"
-fi
-# ============= Makefile.linux ==============
-if test -f 'Makefile.linux' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.linux (File already exists)'
-else
-echo 'x - extracting Makefile.linux (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-#
-# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
-#
-X
-CC= gcc -g -O
-#CC = gcc -g -DDEBUG
-X
-#CC=gcc -Wall -pedantic -ansi -g -O
-#CC= /usr/local/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# EBI uses the following with pgcc, -O3 does not work:
-# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
-X
-# this file works for x86 LINUX
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
-X
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-LIB_M = -lm
-#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPTFA_O = drop_tfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-X
-# renamed (fasta33) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0644 Makefile.linux ||
-echo 'restore of Makefile.linux failed'
-Wc_c="`wc -c < 'Makefile.linux'`"
-test 1580 -eq "$Wc_c" ||
- echo 'Makefile.linux: original size 1580, current size' "$Wc_c"
-fi
-# ============= Makefile.linux_mysql ==============
-if test -f 'Makefile.linux_mysql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.linux_mysql (File already exists)'
-else
-echo 'x - extracting Makefile.linux_mysql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_mysql' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-# On the version of linux that I use, MININT is defined and causes a warning
-# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
-# MININT causes the code to break. Do not change the MININT definition
-# in dropnfa.c (wrp 3/19/1998)
-#
-# for DEC/Compaq Alpha/LINUX, use gcc -mieee -g to avoid buggy compilers
-X
-CC= gcc -g -O2
-X
-#CC= gcc -g -DDEBUG
-#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# this file works for x86 LINUX
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/include/mysql -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
-X
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-#LIB_M = -lm
-#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common_mysql
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.linux_mysql ||
-echo 'restore of Makefile.linux_mysql failed'
-Wc_c="`wc -c < 'Makefile.linux_mysql'`"
-test 1577 -eq "$Wc_c" ||
- echo 'Makefile.linux_mysql: original size 1577, current size' "$Wc_c"
-fi
-# ============= Makefile.linux_pgsql ==============
-if test -f 'Makefile.linux_pgsql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.linux_pgsql (File already exists)'
-else
-echo 'x - extracting Makefile.linux_pgsql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_pgsql' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-# On the version of linux that I use, MININT is defined and causes a warning
-# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
-# MININT causes the code to break. Do not change the MININT definition
-# in dropnfa.c (wrp 3/19/1998)
-#
-# for DEC/Compaq Alpha/LINUX, use gcc -mieee -g to avoid buggy compilers
-X
-CC= gcc -g -O
-#CC= gcc -g -DDEBUG
-#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# this file works for x86 LINUX
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -DPGSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
-X
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-#LIB_M = -lm
-#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common_pgsql
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.linux_pgsql ||
-echo 'restore of Makefile.linux_pgsql failed'
-Wc_c="`wc -c < 'Makefile.linux_pgsql'`"
-test 1581 -eq "$Wc_c" ||
- echo 'Makefile.linux_pgsql: original size 1581, current size' "$Wc_c"
-fi
-# ============= Makefile.linux_sql ==============
-if test -f 'Makefile.linux_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.linux_sql (File already exists)'
-else
-echo 'x - extracting Makefile.linux_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_sql' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-# On the version of linux that I use, MININT is defined and causes a warning
-# as dropnfa.c is compiled. Unfortunately, using the Linux definition of
-# MININT causes the code to break. Do not change the MININT definition
-# in dropnfa.c (wrp 3/19/1998)
-#
-X
-X
-CC= gcc -g -O
-#CC= gcc -g -DDEBUG
-#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
-X
-# this file works for x86 LINUX
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -I/usr/local/pgsql/include -I/usr/include/mysql -DPGSQL_DB -DMYSQL_DB -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DM10_CONS
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DLINUX6 -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT
-X
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-#LIB_M = -lm
-#LIB_M = -L/usr/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common_sql
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.linux_sql ||
-echo 'restore of Makefile.linux_sql failed'
-Wc_c="`wc -c < 'Makefile.linux_sql'`"
-test 1548 -eq "$Wc_c" ||
- echo 'Makefile.linux_sql: original size 1548, current size' "$Wc_c"
-fi
-# ============= Makefile.linux_sse2 ==============
-if test -f 'Makefile.linux_sse2' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.linux_sse2 (File already exists)'
-else
-echo 'x - extracting Makefile.linux_sse2 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.linux_sse2' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-#
-# Dec 8, 2005 - with gcc4.0.2 (or .1) under Redhat Linux Fedora FC4 -03 breaks the alignment code
-#
-X
-CC= gcc -g -O -DSW_SSE2 -msse2
-#CC = gcc -g -DDEBUG -DSW_SSE2 -msse2
-X
-#CC=gcc -Wall -pedantic -ansi -g -O
-#CC= /usr/local/parasoft/bin/insure -g -DDEBUG
-X
-# EBI uses the following with pgcc, -O3 does not work:
-# CC= pgcc -O2 -pipe -mcpu=pentiumpro -march=pentiumpro -fomit-frame-pointer
-X
-# this file works for x86 LINUX
-X
-# standard options
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="':'" -c -DMAX_WORKERS=8 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"your_fasta_host_here"' -DUSE_MMAP -D_REENTRANT -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DSAMP_STATS -DPGM_DOC
-X
-# use options below for superfamily validations
-#CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DSFCHAR="'|'" -c -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DUSE_MMAP -D_REENTRANT -DBIG_LIB64 -D_LARGE_FILE_SOURCE -DUSE_FSEEKO -D_FILE_OFFSET_BITS=64 -DHAS_INTTYPES -DSAMP_STATS
-X
-# -I/usr/local/include/mysql -DMYSQL_DB
-#
-#(for mySQL databases) (also requires change to Makefile34.common)
-X
-LIB_M = -lm
-#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-# for Linux
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/bin
-#XDIR = ~/bin/LINUX
-X
-DROPNFA_O = drop_nfa.o
-DROPTFA_O = drop_tfa.o
-DROPGSW_O = dropgsw.o smith_waterman_sse2.o
-DROPRSS_O = dropgsw.o smith_waterman_sse2.o
-X
-# renamed (fasta33) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0644 Makefile.linux_sse2 ||
-echo 'restore of Makefile.linux_sse2 failed'
-Wc_c="`wc -c < 'Makefile.linux_sse2'`"
-test 1671 -eq "$Wc_c" ||
- echo 'Makefile.linux_sse2: original size 1671, current size' "$Wc_c"
-fi
-# ============= Makefile.mpcom ==============
-if test -f 'Makefile.mpcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.mpcom (File already exists)'
-else
-echo 'x - extracting Makefile.mpcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpcom' &&
-X
-PROGS= mp34compfa mp34compsw mp34compfx mp34comptfx mp34compfy mp34comptfy mp34compfs mp34comptfs
-X
-# ms34compfa, etc provides a summaries of effectiveness, require
-# superfamily annotated database. ms34compss uses dropnsw.c instead of
-# dropgsw.c, thus allowing high gap penalties.
-X
-SPROGS = ms34compfa ms34compsw ms34compss ms34compfx ms34compfy ms34comptfx ms34comptfy
-X
-# report highest unrelated sequences
-UPROGS = mu34compfa mu34compsw mu34compfx mu34comptfx mu34compfy mu34comptfy
-X
-vall : $(PROGS) $(WPROGS)
-X
-uall : $(UPROGS) $(WPROGS)
-X
-sall : $(SPROGS) $(WPROGS)
-X
-all : $(PROGS) $(UPROGS) $(SPROGS) $(WPROGS)
-X
-clean-up:
-X rm -f *.o $(PROGS) $(WPROGS) $(SPROGS) $(UPROGS)
-X
-install : $(PROGS) $(WPROGS)
-X cp $(PROGS) $(WPROGS) $(XDIR)
-X
-sinstall : $(SPROGS) $(WPROGS)
-X cp $(SPROGS) $(WPROGS) $(XDIR)
-X
-uinstall : $(UPROGS) $(WPROGS)
-X cp $(UPROGS) $(WPROGS) $(XDIR)
-X
-mp34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) mp34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-ms34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) ms34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-mu34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o c_dispn.o
-X $(LCC) $(LFLAGS) mu34compfa p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
-X
-mr34compfa : p2_complib.o compacc.o lib_sel.o url_subs.o manshowrel.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) mr34compfa p2_complib.o compacc.o lib_sel.o url_subs.o manshowrel.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fa.o scaleswn.o karlin.o p2_workcomp.o $(DROPNFA_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-mp34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) mp34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-ms34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) ms34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-mu34compsw : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o c_dispn.o
-X $(LCC) $(LFLAGS) mu34compsw p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o $(DROPGSW_O) workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
-X
-mp34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) mp34compss p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-ms34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) ms34compss p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-mu34compss : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o c_dispn.o
-X $(LCC) $(LFLAGS) mu34compss p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_sw.o scaleswn.o karlin.o p2_workcomp.o dropnsw.o workacc.o faatran.o $(NRAND).o c_dispn.o $(PLIB) $(LIB_M)
-X
-mp34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mp34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-ms34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) ms34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mu34compfx : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mu34compfx p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mp34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mp34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-ms34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) ms34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mu34compfy : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mu34compfy p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mp34compfs : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fs.o workacc.o faatran.o $(NRAND).o
-X $(LCC) $(LFLAGS) mp34compfs p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fs.o workacc.o faatran.o $(NRAND).o $(PLIB) $(LIB_M)
-X
-mp34comptfs : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mp34comptfs p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_fs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o c_dispn.o p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mp34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mp34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-ms34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) ms34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mu34comptfx : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mu34comptfx p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfx.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mp34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mp34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showbest.o $(SHOWALIGN).o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-ms34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) ms34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showsum.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-mu34comptfy : p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o
-X $(LCC) $(LFLAGS) mu34comptfy p2_complib.o compacc.o lib_sel.o url_subs.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o $(LGETLIB) $(NCBL_LIB) apam.o doinit.o init_tfy.o scaleswn.o karlin.o c_dispn.o p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o $(PLIB) $(LIB_M)
-X
-p2_complib.o : p2_complib.c msg.h defs.h upam.h uascii.h param.h structs.h
-X $(CC) $(CFLAGS) p2_complib.c -o p2_complib.o
-X
-p2_workcomp.o : p2_workcomp.c structs.h msg.h defs.h mw.h upam.h uascii.h param.h
-X $(NCC) $(CFLAGS) p2_workcomp.c
-SHAR_EOF
-chmod 0644 Makefile.mpcom ||
-echo 'restore of Makefile.mpcom failed'
-Wc_c="`wc -c < 'Makefile.mpcom'`"
-test 13073 -eq "$Wc_c" ||
- echo 'Makefile.mpcom: original size 13073, current size' "$Wc_c"
-fi
-# ============= Makefile.mpi4 ==============
-if test -f 'Makefile.mpi4' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.mpi4 (File already exists)'
-else
-echo 'x - extracting Makefile.mpi4 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4,v 1.26 2006/12/12 16:58:51 wrp Exp $
-#
-# 21 July, 2000
-#
-# Makefile for MPI versions of the parallel library comparison programs.
-# this file is derived from Makefile.pvm, with only a few differences:
-# (1) -DMPI_SRC instead of -DPVM_SRC
-# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
-# (3) MPI does not require/allow a "worker" program, thus no c3.work*
-#
-X
-# setenv MPI_CC gcc-3.3 for best performance
-X
-MPI_ROOT = /m0/xshare/mpich2
-MPICC = ${MPI_ROOT}/bin/mpicc
-X
-CC= ${MPICC} -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
-NCC= ${MPICC} -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
-LCC= ${MPICC}
-X
-#ARCH = ALPHAMP (get from $ARCH)
-X
-PLIB = -L${MPI_ROOT}/lib -lmpich
-XXDIR = /home/slib/mpi/bin/
-SDIR = .
-X
-CFLAGS= -DMPI_SRC -DUNIX -DPCOMPLIB -DBFR=120 -DSHOWSIM -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DSW_ALTIVEC
-# -DMYSQL_DB -I/usr/include/mysql
-# -DSFCHAR="'|'" -DSUPERFAMNUM
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = p_mw.h
-MWHP = p_mw.h w_mw.h
-X
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -lm
-X
-LFLAGS= -o
-X
-DROPGSW_O = dropgsw.o smith_waterman_altivec.o
-DROPNFA_O = drop_nfa.o
-X
-include Makefile.mpcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile.mpi4 ||
-echo 'restore of Makefile.mpi4 failed'
-Wc_c="`wc -c < 'Makefile.mpi4'`"
-test 1600 -eq "$Wc_c" ||
- echo 'Makefile.mpi4: original size 1600, current size' "$Wc_c"
-fi
-# ============= Makefile.mpi4_bluegene ==============
-if test -f 'Makefile.mpi4_bluegene' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.mpi4_bluegene (File already exists)'
-else
-echo 'x - extracting Makefile.mpi4_bluegene (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4_bluegene' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4_bluegene,v 1.1 2006/04/01 14:09:11 wrp Exp $
-#
-# 21 July, 2000
-#
-# Makefile for MPI versions of the parallel library comparison programs.
-# this file is derived from Makefile.pvm, with only a few differences:
-# (1) -DMPI_SRC instead of -DPVM_SRC
-# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
-# (3) MPI does not require/allow a "worker" program, thus no c3.work*
-#
-# 1-April-2006 - Makefile for IBM BlueGene - use -DMAXWRKR to set the
-# maximum number of workers.
-#
-CC= blrts_xlc -O3 -qsource -qlist -qarch=440d -qtune=440
-NCC= blrts_xlc -O3 -qsource -qlist -qarch=440d -qtune=440
-LCC= blrts_xlc -O3
-X
-MPI_ROOT = /bgl/BlueLight/ppcfloor/bglsys
-PLIB = -L${MPI_ROOT}/lib -lmpich.rts -lrts.rts -ldevices.rts -lmsglayer.rts -ldevices.440
-XXDIR = /home/slib/mpi/bin/
-SDIR = .
-X
-CFLAGS= -DMPI_SRC -DMAXWRKR=128 -DUNIX -DPCOMPLIB -DBFR=1200 -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS
-# -DMYSQL_DB -I/usr/include/mysql
-# -DSFCHAR="'|'" -DSUPERFAMNUM
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = p_mw.h
-MWHP = p_mw.h w_mw.h
-X
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -lm
-X
-LFLAGS= -o
-X
-DROPGSW_O = dropgsw.o
-DROPNFA_O = drop_nfa.o
-X
-include Makefile.mpcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile.mpi4_bluegene ||
-echo 'restore of Makefile.mpi4_bluegene failed'
-Wc_c="`wc -c < 'Makefile.mpi4_bluegene'`"
-test 1602 -eq "$Wc_c" ||
- echo 'Makefile.mpi4_bluegene: original size 1602, current size' "$Wc_c"
-fi
-# ============= Makefile.mpi4_sql ==============
-if test -f 'Makefile.mpi4_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.mpi4_sql (File already exists)'
-else
-echo 'x - extracting Makefile.mpi4_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.mpi4_sql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile.mpi4_sql,v 1.19 2004/11/19 15:28:26 wrp Exp $
-#
-# 21 July, 2000
-#
-# Makefile for MPI versions of the parallel library comparison programs.
-# this file is derived from Makefile.pvm, with only a few differences:
-# (1) -DMPI_SRC instead of -DPVM_SRC
-# (2) programs are mp34comp*, ms34comp*, and mu34comp* rather than pv34comp*, etc.
-# (3) MPI does not require/allow a "worker" program, thus no c3.work*
-#
-X
-CC= mpicc -g -O
-NCC= mpicc -O
-LCC= mpicc -O
-X
-#ARCH = ALPHAMP (get from $ARCH)
-X
-#MPI_ROOT = /opt/share/mpi
-#PLIB = -L${MPI_ROOT}/lib -lmpich
-#XDIR = /seqprg/pvm3/bin/LINUX
-XXDIR = ${HOME}/pvm3/bin/LINUX
-SDIR = .
-X
-CFLAGS= -DMPI_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -I${MPI_ROOT}/include -DSRAND=srand -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DMYSQL_DB -I/usr/include/mysql -DPGSQL_DB -I/usr/include/pgsql
-# -DMYSQL_DB -I/usr/include/mysql
-# -DSFCHAR="'|'" -DSUPERFAMNUM
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = p_mw.h
-MWHP = p_mw.h w_mw.h
-X
-NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-#NCBL_LIB=ncbl2_mlib.o
-LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm -L/usr/lib/pgsql -lpq -lcrypt -lssl
-#LIB_M= -lm
-X
-LFLAGS= -o
-X
-DROPGSW_O = dropgsw.o
-DROPNFA_O = drop_nfa.o
-X
-include Makefile.mpcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile.mpi4_sql ||
-echo 'restore of Makefile.mpi4_sql failed'
-Wc_c="`wc -c < 'Makefile.mpi4_sql'`"
-test 1509 -eq "$Wc_c" ||
- echo 'Makefile.mpi4_sql: original size 1509, current size' "$Wc_c"
-fi
-# ============= Makefile.nm_fcom ==============
-if test -f 'Makefile.nm_fcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.nm_fcom (File already exists)'
-else
-echo 'x - extracting Makefile.nm_fcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nm_fcom' &&
-X
-#================ common .obj files
-X
-doinit.obj : doinit.c defs.h param.h upam.h structs.h uascii.h
-X $(CC) $(CFLAGS) -c doinit.c
-X
-init_sw.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DSSEARCH initfa.c /Foinit_sw.obj
-X
-init_ssw.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DOSEARCH initfa.c /Foinit_ssw.obj
-X
-init_rss.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DPRSS initfa.c /Foinit_rss.obj
-X
-init_rfx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DPRSS -DFASTX initfa.c /Foinit_rfx.obj
-X
-init_fa.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTA initfa.c /Foinit_fa.obj
-X
-init_ff.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTF initfa.c /Foinit_ff.obj
-X
-init_tf.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTF -DTFAST initfa.c /Foinit_tf.obj
-X
-init_fs.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTS initfa.c /Foinit_fs.obj
-X
-init_fm.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTM initfa.c /Foinit_fm.obj
-X
-init_tfs.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTS -DTFAST initfa.c /Foinit_tfs.obj
-X
-init_tfm.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTM -DTFAST initfa.c /Foinit_tfm.obj
-X
-init_tfa.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTA -DTFAST initfa.c /Foinit_tfa.obj
-X
-init_fx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTX initfa.c /Foinit_fx.obj
-X
-init_tfx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTX -DTFAST initfa.c /Foinit_tfx.obj
-X
-init_fy.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTY initfa.c /Foinit_fy.obj
-X
-init_tfy.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c -DFASTY -DTFAST initfa.c /Foinit_tfy.obj
-X
-#================ miscellaneous
-X
-htime.obj : htime.c
-X $(CC) $(CFLAGS) -c htime.c
-X
-compacc.obj : compacc.c upam.h uascii.h param.h structs.h $(MWH) defs.h
-X $(CC) $(CFLAGS) -c compacc.c
-X
-pssm_asn_subs.obj : pssm_asn_subs.c defs.h
-X $(CC) $(CFLAGS) -c pssm_asn_subs.c
-X
-#================ display list of best hits / alignments
-X
-showbest.obj : $(SHOWBESTC) $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -c $(SHOWBESTC) /Foshowbest.obj
-X
-showrss.obj : showrss.c $(MWH) defs.h param.h structs.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -c showrss.c
-X
-showun.obj : mshowbest.c $(MWH) defs.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DSHOWUN mshowbest.c /Foshowun.obj
-X
-showrel.obj : $(SHOWBESTC) $(MWH) defs.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DSHOWREL $(SHOWBESTC) /Foshowrel.obj
-X
-showsum.obj : showsum.c $(MWH) defs.h drop_func.h
-X $(CC) $(CFLAGS) -c showsum.c
-X
-$(SHOWALIGN).obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -c $(SHOWALIGN).c /Fo$(SHOWALIGN).obj
-X
-$(SHOWALIGN)_u.obj : $(SHOWALIGN).c $(MWHP) defs.h structs.h param.h aln_structs.h drop_func.h
-X $(CC) $(CFLAGS) -DSHOWUN -c /Fo$(SHOWALIGN)_u.obj $(SHOWALIGN).c
-re_getlib.obj : re_getlib.c mw.h mm_file.h
-X $(CC) $(CFLAGS) -c re_getlib.c
-X
-lib_sel.obj : lib_sel.c defs.h structs.h
-X $(CC) $(CFLAGS) -c lib_sel.c
-X
-c_dispn.obj : c_dispn.c defs.h structs.h param.h
-X $(CC) $(CFLAGS) -c c_dispn.c
-X
-#================ statistical functions
-X
-karlin.obj : karlin.c param.h
-X $(CC) $(CFLAGS) -c karlin.c
-X
-scaleswn.obj : scaleswn.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(CFLAGS) -c scaleswn.c
-X
-scaleswtf.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(CFLAGS) -DFASTF -c scaleswt.c /Foscaleswtf.obj
-X
-scaleswts.obj : scaleswt.c defs.h param.h structs.h $(MWH) alt_parms.h
-X $(CC) $(CFLAGS) -c scaleswt.c /Foscaleswts.obj
-X
-tatstats_fs.obj : tatstats.c tatstats.h
-X $(CC) $(CFLAGS) -c -DFASTS tatstats.c /Fotatstats_fs.obj
-X
-tatstats_ff.obj : tatstats.c tatstats.h
-X $(CC) $(CFLAGS) -c -DFASTF tatstats.c /Fotatstats_ff.obj
-X
-tatstats_fm.obj : tatstats.c tatstats.h
-X $(CC) $(CFLAGS) -c -DFASTM tatstats.c /Fotatstats_fm.obj
-X
-last_tat.obj : last_tat.c defs.h mm_file.h structs.h param.h
-X $(CC) $(CFLAGS) -c last_tat.c
-X
-#================ drop functions - actual scores/alignments
-X
-drop_nfa.obj : dropnfa.c dropnfa.h param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c dropnfa.c /Fodrop_nfa.obj
-X
-# drop_ff, _fs, _fm must define FASTF, FASTS, and FASTM to ensure
-# that tatstats.h is built appropriately
-X
-drop_ff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(CFLAGS) -DFASTF -c dropff2.c /Fodrop_ff.obj
-X
-drop_tff.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(CFLAGS) -DFASTF -DTFAST -c dropff2.c /Fodrop_tff.obj
-X
-drop_ff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(CFLAGS) -c -DFASTF dropff2.c /Fodrop_ff2.obj
-X
-drop_tff2.obj : dropff2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(CFLAGS) -c -DFASTF -DTFAST dropff2.c /Fodrop_tff.obj
-X
-drop_fs.obj : dropfs2.c param.h defs.h tatstats.h drop_func.h
-X $(CC) $(CFLAGS) -DFASTS -c dropfs2.c /Fodrop_fs.obj
-X
-drop_tfs.obj : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DTFAST -DFASTS dropfs2.c /Fodrop_tfs.obj
-X
-drop_fm.obj : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DFASTM dropfs2.c /Fodrop_fm.obj
-X
-drop_tfm.obj : dropfs2.c param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DTFAST -DFASTM dropfs2.c /Fodrop_tfm.obj
-X
-drop_tfa.obj : dropnfa.c dropnfa.h upam.h param.h defs.h
-X $(CC) $(CFLAGS) -c -DTFASTA dropnfa.c /Fodrop_tfa.obj
-X
-drop_fx.obj : dropfx.c upam.h param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c dropfx.c /Fodrop_fx.obj
-X
-drop_tfx.obj : dropfx.c upam.h param.h defs.h drop_func.h
-X $(CC) $(CFLAGS) -c -DTFAST dropfx.c /Fodrop_tfx.obj
-X
-drop_fz.obj : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
-X $(CC) $(CFLAGS) -c dropfz2.c /Fodrop_fz.obj
-X
-drop_tfz.obj : dropfz2.c upam.h param.h defs.h aamap.h drop_func.h
-X $(CC) $(CFLAGS) -c -DTFAST dropfz2.c /Fodrop_tfz.obj
-X
-dropnsw.obj : dropnsw.c upam.h param.h structs.h drop_func.h
-X $(CC) $(CFLAGS) -c dropnsw.c
-X
-dropgsw.obj : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
-X $(CC) $(CFLAGS) -c dropgsw.c
-X
-dropgsw_sse2.obj : dropgsw.c dropgsw.h upam.h param.h structs.h drop_func.h
-X $(CC) $(CFLAGS) -DSW_SSE2 -c dropgsw.c /Fodropgsw_sse2.obj
-X
-smith_waterman_altivec.obj : smith_waterman_altivec.c smith_waterman_altivec.h dropgsw.h defs.h param.h
-X $(CC) $(CFLAGS) -c smith_waterman_altivec.c
-X
-smith_waterman_sse2.obj : smith_waterman_sse2.c smith_waterman_sse2.h dropgsw.h defs.h param.h
-X $(CC) $(CFLAGS) -DSW_SSE2 -c smith_waterman_sse2.c
-X
-dropnw.obj : dropnw.c upam.h param.h structs.h drop_func.h
-X $(CC) $(CFLAGS) -c dropnw.c
-X
-#================ reading query, libraries
-X
-getseq.obj : getseq.c defs.h uascii.h structs.h upam.h
-X $(CC) $(CFLAGS) -c getseq.c
-X
-llgetaa.obj : llgetaa.c upam.h uascii.h
-X $(CC) $(CFLAGS) -c -DNOLIB llgetaa.c
-X
-lgetlib.obj : $(NGETLIB).c altlib.h upam.h uascii.h mm_file.h
-X $(CC) $(CFLAGS) -c $(NGETLIB).c /Folgetlib.obj
-X
-lgetaa_m.obj : mmgetaa.c altlib.h ncbl2_head.h upam.h uascii.h mm_file.h
-X $(CC) $(CFLAGS) -c mmgetaa.c /Folgetaa_m.obj
-X
-ncbl_lib.obj : ncbl_lib.c ncbl_head.h
-X $(CC) $(CFLAGS) -c ncbl_lib.c
-X
-ncbl2_mlib.obj : ncbl2_mlib.c ncbl2_head.h mm_file.h
-X $(CC) $(CFLAGS) -c ncbl2_mlib.c
-X
-mysql_lib.obj : mysql_lib.c mm_file.h
-X $(CC) $(CFLAGS) -c mysql_lib.c
-X
-pgsql_lib.obj : pgsql_lib.c mm_file.h
-X $(CC) $(CFLAGS) -c pgsql_lib.c
-X
-#================ threading functions
-X
-pthr_subs2.obj : pthr_subs2.c thr.h pthr_subs.h
-X $(CC) $(CFLAGS) -c pthr_subs2.c
-X
-uthr_subs.obj : uthr_subs.c thr.h uthr_subs.h
-X $(CC) $(CFLAGS) -c uthr_subs.c
-X
-#================ translation
-X
-faatran.obj : faatran.c upam.h uascii.h
-X $(CC) $(CFLAGS) -c faatran.c
-X
-url_subs.obj : url_subs.c structs.h param.h
-X $(CC) $(CFLAGS) -c url_subs.c
-X
-$(NRAND).obj : $(NRAND).c
-X $(CC) $(CFLAGS) -c $(NRAND).c
-#================ pvm/mpi specific functions
-X
-hostacc.obj : hostacc.c upam.h uascii.h
-X $(CC) $(CFLAGS) hostacc.c
-X
-workacc.obj : workacc.c upam.h uascii.h param.h
-X $(NCC) $(CFLAGS) workacc.c /Foworkacc.obj
-X
-#================ windows getopt()
-X
-getopt.obj : getopt.c
-X $(CC) $(CFLAGS) -c getopt.c
-SHAR_EOF
-chmod 0755 Makefile.nm_fcom ||
-echo 'restore of Makefile.nm_fcom failed'
-Wc_c="`wc -c < 'Makefile.nm_fcom'`"
-test 8182 -eq "$Wc_c" ||
- echo 'Makefile.nm_fcom: original size 8182, current size' "$Wc_c"
-fi
-# ============= Makefile.nm_pcom ==============
-if test -f 'Makefile.nm_pcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.nm_pcom (File already exists)'
-else
-echo 'x - extracting Makefile.nm_pcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nm_pcom' &&
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = mw.h
-MWHP = mw.h
-X
-TPROGS = ssearch34_t.exe ssearch34sse2_t.exe fasta34_t.exe fasts34_t.exe fastx34_t.exe tfastx34_t.exe fasty34_t.exe tfasty34_t.exe tfasts34_t.exe fastm34_t.exe fastf34_t.exe tfastf34_t.exe prss34_t.exe prss34sse2_t.exe prfx34_t.exe
-X
-SPROGS = fasta34.exe ssearch34.exe ssearch34sse2.exe fasts34.exe fastx34.exe tfastx34.exe fasty34.exe tfasty34.exe tfasts34.exe fastm34.exe tfastm34.exe prss34.exe prss34sse2.exe prfx34.exe fastf34.exe tfastf34.exe
-X
-MAPROGS = map_db.exe
-X
-XXTPROGS = fastx34_t.exe tfastx34_t.exe fasty34_t.exe tfasty34_t.exe
-XXPROGS = fastx34.exe tfastx34.exe .exe fasty34 tfasty34.exe
-X
-PROGS = $(SPROGS) $(TPROGS)
-X
-all : $(PROGS)
-X
-tall: $(TPROGS)
-X
-sall: $(SPROGS)
-X
-xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS)
-X
-clean-up:
-X del *.obj $(PROGS)
-X
-install: $(PROGS)
-X copy $(PROGS) $(XDIR)
-X
-sinstall: $(SPROGS)
-X copy $(SPROGS) $(XDIR)
-X
-tinstall: $(TPROGS)
-X cp $(TPROGS) $(XDIR)
-X
-fasta34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefasta34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-fastx34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj scaleswn.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefastx34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-fasty34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj scaleswn.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefasty34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-fastf34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastf34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswts.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-fastf34u : $(COMP_LIBO) compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastf34u.exe $(COMP_LIBO) compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-fastf34s : $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastf34s.exe $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-fasts34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_fs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefasts34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-fastm34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj drop_fm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastm34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fm.obj drop_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-tfastx34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scaleswn.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfastx34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-tfasty34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scaleswn.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfasty34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-tfastf34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfastf34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-tfastf34s : $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfastf34s.exe $(COMP_LIBO) compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-tfasts34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj drop_tfs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfasts34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj drop_tfs.obj scaleswts.obj tatstats_fs.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-tfastm34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj drop_tfm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfastm34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfm.obj drop_tfm.obj scaleswts.obj tatstats_fm.obj last_tat.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X
-ssearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X $(CL) /Fessearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X
-ssearch34sse2.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X $(CL) /Fessearch34sse2.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X
-osearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Feosearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ssw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-usearch34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Feusearch34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-prss34.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj
-X $(CL) /Feprss34.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X
-prss34sse2.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_SSE2_O) llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj
-X $(CL) /Feprss34sse2.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X
-prfx34.exe : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj faatran.obj
-X $(CL) /Feprfx34.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-prss34o : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
-X $(CL) /Feprss34o.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_O) scaleswn.obj karlin.obj llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X
-prfx34o : rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showrss.obj lib_sel.obj $(NRAND).obj faatran.obj
-X $(CL) /Feprfx34o.exe rcomp_lib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showrss.obj lib_sel.obj faatran.obj $(NRAND).obj getopt.obj
-X
-ssearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X $(CL) /Fessearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
-X
-ssearch34sse2_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj $(DROPGSW_SSE2_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj
-X $(CL) /Fessearch34sse2_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj $(DROPGSW_SSE2_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
-X
-osearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Feosearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-usearch34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropnsw.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Feusearch34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_sw.obj dropnsw.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-fasta34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefasta34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-fasta34s_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefasta34s_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-fasta34u_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefasta34u_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showun.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fasta34r_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj $(DROPNFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefasta34r_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showrel.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fa.obj $(DROPNFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fastf34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastf34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fastf34s_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj scaleswtf.obj karlin.obj drop_ff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastf34s_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showsum.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_ff.obj drop_ff.obj scaleswtf.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fasts34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_fs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefasts34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fastm34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj drop_fm.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fefastm34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fs.obj drop_fm.obj scaleswts.obj last_tat.obj tatstats_fm.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-fastx34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fx.obj faatran.obj scaleswn.obj karlin.obj drop_fx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefastx34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fx.obj drop_fx.obj faatran.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-fasty34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_fy.obj faatran.obj scaleswn.obj karlin.obj drop_fz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fefasty34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_fy.obj drop_fz.obj faatran.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-tfasta34.exe : $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj scaleswn.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfasta34.exe $(COMP_LIBO) compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X
-tfasta34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfa.obj scaleswn.obj karlin.obj $(DROPTFA_O) $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfasta34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfa.obj $(DROPTFA_O) scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-tfastf34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tf.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj drop_tff.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfastf34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tf.obj drop_tff.obj scaleswtf.obj last_tat.obj tatstats_ff.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-tfasts34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj c_dispn.obj htime.obj apam.obj doinit.obj init_tfs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj drop_tfs.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj
-X $(CL) /Fetfasts34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfs.obj drop_tfs.obj scaleswts.obj last_tat.obj tatstats_fs.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj $(NRAND).obj url_subs.obj getopt.obj $(THR_LIBS)
-X
-tfastx34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj scaleswn.obj karlin.obj drop_tfx.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfastx34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfx.obj drop_tfx.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-tfasty34_t.exe : $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj scaleswn.obj karlin.obj drop_tfz.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj
-X $(CL) /Fetfasty34_t.exe $(COMP_THRO) work_thr.obj $(THR_SUBS).obj compacc.obj showbest.obj re_getlib.obj $(SHOWALIGN).obj htime.obj apam.obj doinit.obj init_tfy.obj drop_tfz.obj scaleswn.obj karlin.obj $(LGETLIB) c_dispn.obj $(NCBL_LIB) lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-prss34_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
-X $(CL) /Feprss34_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
-X
-prss34sse2_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj $(DROPRSS_O) llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj
-X $(CL) /Feprss34sse2_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj $(DROPRSS_SSE2_O) scaleswn.obj karlin.obj llgetaa.obj showbest.obj $(SHOWALIGN).obj c_dispn.obj url_subs.obj lib_sel.obj $(NRAND).obj pssm_asn_subs.obj getopt.obj $(THR_LIBS)
-X
-prfx34_t.exe : rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj url_subs.obj $(NRAND).obj faatran.obj
-X $(CL) /Feprfx34_t.exe rcomp_thr.obj work_thr.obj $(THR_SUBS).obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj drop_fx.obj scaleswn.obj karlin.obj llgetaa.obj showbest.obj mshowalign.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj $(NRAND).obj getopt.obj $(THR_LIBS)
-X
-comp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(CFLAGS) -c comp_lib.c
-X
-comp_mlib.obj : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib.c /Focomp_mlib.obj
-X
-rcomp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(CFLAGS) -c -DPRSS comp_lib.c /Forcomp_lib.obj
-X
-comp_thr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(CFLAGS) -DCOMP_THR -c comp_lib.c /Focomp_thr.obj
-X
-comp_mthr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib.c /Focomp_mthr.obj
-X
-rcomp_thr.obj : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(CFLAGS) -DPRSS -DCOMP_THR -c comp_lib.c /Forcomp_thr.obj
-X
-work_thr.obj : work_thr.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(CFLAGS) -c work_thr.c
-X
-print_pssm.exe : print_pssm.c getseq.c karlin.c apam.c
-X $(CC) /Feprint_pssm.exe $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c getopt.obj
-X
-map_db.exe : map_db.c uascii.h ncbl2_head.h
-X $(CC) /Femap_db.exe map_db.c
-X
-list_db.exe : list_db.c
-X $(CC) /Felist_db.exe list_db.c
-X
-SHAR_EOF
-chmod 0755 Makefile.nm_pcom ||
-echo 'restore of Makefile.nm_pcom failed'
-Wc_c="`wc -c < 'Makefile.nm_pcom'`"
-test 27480 -eq "$Wc_c" ||
- echo 'Makefile.nm_pcom: original size 27480, current size' "$Wc_c"
-fi
-# ============= Makefile.nmk_icl ==============
-if test -f 'Makefile.nmk_icl' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.nmk_icl (File already exists)'
-else
-echo 'x - extracting Makefile.nmk_icl (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.nmk_icl' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-# options for Intel C compiler (v9.1)
-#
-# must be compiled/linked with /MT (or /MTd for debugging) to ensure
-# multi-threaded staticly linked executables. /MD uses dynamic
-# linking to DLL's, which may not be available on the users machine
-X
-CC= icl /O2 /MT /W1
-#CC= icl /Zi /MTd /W1
-CL= icl /O2 /MT
-#CL= icl /Zi /MTd
-X
-# standard options
-CFLAGS= -DSHOWSIM -DWIN32 -DHZ=100 -DPROGRESS -DSAMP_STATS -DPGM_DOC -DTHR_EXIT=pthread_exit -D_CRT_SECURE_NO_WARNINGS=1
-X
-XXDIR = /seqprg/bin
-X
-THR_SUBS = pthr_subs2
-THR_LIBS= pthreadVC2.lib
-X
-DROPNFA_O = drop_nfa.obj
-DROPGSW_O = dropgsw.obj
-DROPGSW_SSE2_O = dropgsw_sse2.obj smith_waterman_sse2.obj
-DROPRSS_O = dropnsw.obj
-DROPRSS_SSE2_O = dropgsw_sse2.obj smith_waterman_sse2.obj
-#
-X
-# renamed (fasta33) programs
-include Makefile34.nmk_com
-# conventional (fasta3) names
-# include Makefile.common
-X
-SHAR_EOF
-chmod 0755 Makefile.nmk_icl ||
-echo 'restore of Makefile.nmk_icl failed'
-Wc_c="`wc -c < 'Makefile.nmk_icl'`"
-test 905 -eq "$Wc_c" ||
- echo 'Makefile.nmk_icl: original size 905, current size' "$Wc_c"
-fi
-# ============= Makefile.os_x ==============
-if test -f 'Makefile.os_x' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.os_x (File already exists)'
-else
-echo 'x - extracting Makefile.os_x (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.os_x' &&
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-# this file works for DEC Alphas
-#
-# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
-# for mmap()ed BLAST2 format.
-X
-# the -DDEBUG option provides additional debugging information, particularly
-# with -D on the command line.
-X
-# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
-X
-# changed to gcc-3.3 for MacOSX Tiger because of problems with Altivec
-#
-X
-# in my hands, gcc-4.0 is about 40% slower than gcc-3.3 on the Altivec code
-#CC= gcc-4.0 -g -falign-loops=32 -O3 -mcpu=7450 -maltivec -mpim-altivec -DSW_ALTIVEC
-X
-CC= gcc-3.3 -g -falign-loops=32 -O3 -mcpu=7450 -faltivec -DSW_ALTIVEC
-#CC= gcc-3.3 -g -DDEBUG -mcpu=7450 -faltivec -DSW_ALTIVEC
-#CC= cc -g -Wall -pedantic -faltivec -DSW_ALTIVEC
-#
-# standard line for normal searching
-CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
-X
-#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=2 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
-X
-# add for MySQL support
-# -I/usr/local/mysql/include -DMYSQL_DB
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-#for DEC Unix V4.0
-THR_SUBS = pthr_subs2
-THR_LIBS =
-THR_CC =
-X
-#for Sun
-#THR_SUBS = uthr_subs2
-#THR_LIBS = -lthread
-#THR_CC =
-#
-# for SGI with current pthreads
-#THR_SUBS = pthr_subs2
-#THR_LIBS = -lpthreads
-#THR_CC =
-#
-# for IBM with current pthreads
-#CC= xlc_r -v -g
-#THR_SUBS = ibm_pthr_subs2
-#THR_LIBS = -lpthreads
-#THR_CC =
-X
-X
-#XDIR = ${HOME}/bin
-#XDIR = /home/slib/bin/MACOSX/
-#XDIR = /Users/seqprg/bin
-XXDIR = /seqprg/bin
-#XDIR = ./ppc
-X
-DROPNFA_O = drop_nfa.o
-DROPTFA_O = drop_tfa.o
-DROPGSW_O = dropgsw.o smith_waterman_altivec.o
-DROPRSS_O = dropgsw.o smith_waterman_altivec.o
-#DROPGSW_O = dropgsw.o
-#DROPRSS_O = dropgsw.o
-X
-# provide mysql function
-#include Makefile34m.common_sql
-X
-# no mysql
-include Makefile34m.common
-SHAR_EOF
-chmod 0644 Makefile.os_x ||
-echo 'restore of Makefile.os_x failed'
-Wc_c="`wc -c < 'Makefile.os_x'`"
-test 2116 -eq "$Wc_c" ||
- echo 'Makefile.os_x: original size 2116, current size' "$Wc_c"
-fi
-# ============= Makefile.os_x86 ==============
-if test -f 'Makefile.os_x86' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.os_x86 (File already exists)'
-else
-echo 'x - extracting Makefile.os_x86 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.os_x86' &&
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-# this file works for DEC Alphas
-#
-# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
-# for mmap()ed BLAST2 format.
-X
-# the -DDEBUG option provides additional debugging information, particularly
-# with -D on the command line.
-X
-# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files
-X
-# changed to gcc-3.3 for MacOSX Tiger because of problems with Altivec
-#
-X
-CC= gcc -g -O3 -DSW_SSE2 -msse2 -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk
-#CC= gcc -g -DDEBUG
-#CC= cc -g -Wall -pedantic
-#
-# standard line for normal searching
-CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=100 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
-X
-#CFLAGS= -DSHOWSIM -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"xs00.achs.virginia.edu/fasta_www/cgi"' -DIS_LITTLE_ENDIAN -DUSE_MMAP -DUSE_FSEEKO -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC -DSUPERFAMNUM -DSFCHAR="'|'"
-X
-LDFLAGS= -arch i386
-X
-# add for MySQL support
-# -I/usr/local/mysql/include -DMYSQL_DB
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-#for DEC Unix V4.0
-THR_SUBS = pthr_subs2
-THR_LIBS =
-THR_CC =
-X
-#for Sun
-#THR_SUBS = uthr_subs2
-#THR_LIBS = -lthread
-#THR_CC =
-#
-# for SGI with current pthreads
-#THR_SUBS = pthr_subs2
-#THR_LIBS = -lpthreads
-#THR_CC =
-#
-# for IBM with current pthreads
-#CC= xlc_r -v -g
-#THR_SUBS = ibm_pthr_subs2
-#THR_LIBS = -lpthreads
-#THR_CC =
-X
-X
-#XDIR = ${HOME}/bin
-#XDIR = /home/slib/bin/MACOSX/
-#XDIR = /Users/seqprg/bin
-XXDIR = /seqprg/bin
-#XDIR = ./i386
-X
-DROPNFA_O = drop_nfa.o
-DROPTFA_O = drop_tfa.o
-DROPGSW_O = dropgsw.o smith_waterman_sse2.o
-DROPRSS_O = dropgsw.o smith_waterman_sse2.o
-X
-# provide mysql function
-#include Makefile34m.common_sql
-X
-# no mysql
-include Makefile34m.common
-SHAR_EOF
-chmod 0644 Makefile.os_x86 ||
-echo 'restore of Makefile.os_x86 failed'
-Wc_c="`wc -c < 'Makefile.os_x86'`"
-test 1917 -eq "$Wc_c" ||
- echo 'Makefile.os_x86: original size 1917, current size' "$Wc_c"
-fi
-# ============= Makefile.pLinux ==============
-if test -f 'Makefile.pLinux' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pLinux (File already exists)'
-else
-echo 'x - extracting Makefile.pLinux (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pLinux' &&
-# $Name: fa_34_26_5 $ - $Id: Makefile.pLinux,v 1.4 2004/11/19 15:28:26 wrp Exp $
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-# this file works for DEC Alphas
-#
-# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
-# for mmap()ed BLAST2 format.
-X
-# the -DDEBUG option provides additional debugging information, particularly
-# with -D on the command line.
-X
-# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
-# files
-X
-# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
-X
-CC= xlc_r
-X
-#CC= cc -g3 -O -std1
-#CC= insure -g -DDEBUG
-#CC= cc -g -DDEBUG -std1
-X
-#CC= gcc -g -Wall
-#
-# standard line for normal searching
-CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__
-#
-#(-DMYSQL_DB for mySQL databases) (also requires change to Makefile34.common)
-X
-# special options for SUPERFAMLIES
-#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
-X
-LIB_M = -lm
-#LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-#for DEC Unix V4.0
-#THR_SUBS = pthr_subs2
-#THR_LIBS = -lpthreads
-#THR_CC =
-X
-#for Sun
-#THR_SUBS = uthr_subs
-#THR_LIBS = -lthread
-#THR_CC =
-#
-# for SGI with current pthreads
-#THR_SUBS = pthr_subs
-#THR_LIBS = -lpthreads
-#THR_CC =
-#
-# for IBM with current pthreads
-#CC= xlc_r -v -g
-#THR_SUBS = ibm_pthr_subs
-#THR_LIBS = -lpthreads
-#THR_CC =
-X
-X
-# for IBM Linux with current pthreads
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-#include Makefile34m.common_sql
-include Makefile34m.common
-X
-SHAR_EOF
-chmod 0644 Makefile.pLinux ||
-echo 'restore of Makefile.pLinux failed'
-Wc_c="`wc -c < 'Makefile.pLinux'`"
-test 1922 -eq "$Wc_c" ||
- echo 'Makefile.pLinux: original size 1922, current size' "$Wc_c"
-fi
-# ============= Makefile.pLinux_sql ==============
-if test -f 'Makefile.pLinux_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pLinux_sql (File already exists)'
-else
-echo 'x - extracting Makefile.pLinux_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pLinux_sql' &&
-# $Name: fa_34_26_5 $ - $Id: Makefile.pLinux_sql,v 1.4 2004/11/19 15:28:26 wrp Exp $
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-# this file works for DEC Alphas
-#
-# this file supports mmap()'ed databases in BLAST2 format use -DUSE_MMAP
-# for mmap()ed BLAST2 format.
-X
-# the -DDEBUG option provides additional debugging information, particularly
-# with -D on the command line.
-X
-# use -DBIG_LIB64 to generate and use 64-bit offsets in map_db .xin
-# files
-X
-# for Tru64 4.0F, no "<inttypes.h>" 4.0G has inttypes.h
-X
-CC= xlc_r
-X
-#CC= cc -g3 -O -std1
-#CC= insure -g -DDEBUG
-#CC= cc -g -DDEBUG -std1
-X
-#CC= gcc -g -Wall
-#
-X
-CFLAGS= -O3 -qtune=auto -qarch=auto -DUNIX -DTIMES -DBIGMEM -DMAX_WORKERS=4 -DSFCHAR="':'" -DTHR_EXIT=pthread_exit -DPROGRESS -DUSE_MMAP -DIS_BIG_ENDIAN -DSAMP_STATS -DPGM_DOC -D_LARGE_FILES -DHAS_INTTYPES -D__pLinux__ -DFASTA_HOST='"fasta.bioch.virginia.edu/fasta/cgi"' -I/usr/include/mysql -DMYSQL_DB
-#
-#(-DMYSQL_DB for mySQL databases) (also requires change to Makefile34.common)
-X
-# special options for SUPERFAMLIES
-#CFLAGS= -DM10_CONS -DUNIX -DTIMES -DHZ=60 -DBIGMEM -DSFCHAR="'|'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DSUPERFAMNUM -DIS_LITTLE_ENDIAN -DUSE_MMAP -DMAXBEST=200000
-X
-#LIB_M = -lm
-LIB_M = -L/usr/local/lib/mysql -lmysqlclient -lm
-# for mySQL databases
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-#for DEC Unix V4.0
-#THR_SUBS = pthr_subs2
-#THR_LIBS = -threads
-#THR_CC =
-X
-#for Sun
-#THR_SUBS = uthr_subs
-#THR_LIBS = -lthread
-#THR_CC =
-#
-# for SGI with current pthreads
-#THR_SUBS = pthr_subs
-#THR_LIBS = -lpthreads
-#THR_CC =
-#
-# for IBM with current pthreads
-#CC= xlc_r -v -g
-#THR_SUBS = ibm_pthr_subs
-#THR_LIBS = -lpthreads
-#THR_CC =
-X
-# for IBM Linux with current pthreads
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common_sql
-X
-SHAR_EOF
-chmod 0644 Makefile.pLinux_sql ||
-echo 'restore of Makefile.pLinux_sql failed'
-Wc_c="`wc -c < 'Makefile.pLinux_sql'`"
-test 1946 -eq "$Wc_c" ||
- echo 'Makefile.pLinux_sql: original size 1946, current size' "$Wc_c"
-fi
-# ============= Makefile.pcom ==============
-if test -f 'Makefile.pcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pcom (File already exists)'
-else
-echo 'x - extracting Makefile.pcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pcom' &&
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = mw.h
-MWHP = mw.h
-X
-TPROGS = ssearch34_t fasta34_t fasts34_t tfasta34_t fastx34_t tfastx34_t fasty34_t tfasty34_t tfasts34_t fastm34_t fastf34_t tfastf34_t prss34_t prfx34_t
-X
-SPROGS = fasta34 ssearch34 fasts34 tfasta34 fastx34 tfastx34 fasty34 tfasty34 tfasts34 fastm34 tfastm34 prss34 prfx34 fastf34 tfastf34
-X
-APROGS = map_db
-X
-XXTPROGS = fastx34_t tfastx34_t fasty34_t tfasty34_t
-XXPROGS = fastx34 tfastx34 fasty34 tfasty34
-X
-PROGS = $(SPROGS) $(TPROGS)
-X
-all : $(PROGS)
-X
-tall: $(TPROGS)
-X
-sall: $(SPROGS)
-X
-xall: $(XTPROGS) $(XPROGS) $(ZTPROGS) $(ZPROGS)
-X
-clean-up:
-X rm -f *.o $(PROGS)
-X
-install: $(PROGS)
-X cp $(PROGS) $(XDIR)
-X
-sinstall: $(SPROGS)
-X cp $(SPROGS) $(XDIR)
-X
-tinstall: $(TPROGS)
-X cp $(TPROGS) $(XDIR)
-X
-fasta34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fasta34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
-X
-fasta34u : $(COMP_LIBO) compacc.o showun.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasta34u $(COMP_LIBO) compacc.o showun.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o$(LIB_M)
-X
-fasta34r : $(COMP_LIBO) compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasta34r $(COMP_LIBO) compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o$(LIB_M)
-X
-fasta34s : $(COMP_LIBO) compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasta34s $(COMP_LIBO) compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
-X
-fastx34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fastx34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o drop_fx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-fastx34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fastx34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-fasty34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o scaleswn.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasty34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o drop_fz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-fastf34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastf34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswts.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
-X
-fastf34u : $(COMP_LIBO) compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastf34u $(COMP_LIBO) compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
-X
-fastf34s : $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastf34s $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
-X
-fasts34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_fs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fasts34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
-X
-fastm34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o drop_fm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastm34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fm.o drop_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M)
-X
-tfastx34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o scaleswn.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfastx34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-tfasty34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o scaleswn.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfasty34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-tfastf34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfastf34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
-X
-tfastf34s : $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o scaleswtf.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfastf34s $(COMP_LIBO) compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
-X
-tfasts34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o drop_tfs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfasts34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o drop_tfs.o scaleswts.o tatstats_fs.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
-X
-tfastm34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o drop_tfm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfastm34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfm.o drop_tfm.o scaleswts.o tatstats_fm.o last_tat.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M)
-X
-ssearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) ssearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
-X
-osearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ssw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) osearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ssw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
-X
-usearch34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) usearch34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M)
-X
-prss34 : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) prss34 rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
-X
-prfx34 : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o faatran.o
-X $(CC) $(HFLAGS) prfx34 rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-prss34o : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) prss34o rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M)
-X
-prfx34o : rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showrss.o lib_sel.o $(NRAND).o faatran.o
-X $(CC) $(HFLAGS) prfx34o rcomp_lib.o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o faatran.o $(NRAND).o $(LIB_M)
-X
-ssearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) ssearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
-X
-ssearch34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) ssearch34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-ssearch34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o $(DROPGSW_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) ssearch34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN)_u.o htime.o apam.o doinit.o init_sw.o $(DROPGSW_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-osearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) osearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-usearch34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o scaleswn.o karlin.o dropnsw.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) usearch34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_sw.o dropnsw.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-fasta34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasta34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-fasta34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasta34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-fasta34u_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fasta34u_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showun.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fasta34r_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o scaleswn.o karlin.o $(DROPNFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fasta34r_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showrel.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fa.o $(DROPNFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fastf34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastf34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fastf34s_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o scaleswtf.o karlin.o drop_ff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastf34s_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showsum.o $(SHOWALIGN).o htime.o apam.o doinit.o init_ff.o drop_ff.o scaleswtf.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fasts34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_fs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fasts34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fastm34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o scaleswts.o last_tat.o tatstats_fm.o karlin.o drop_fm.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) fastm34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fs.o drop_fm.o scaleswts.o last_tat.o tatstats_fm.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-fastx34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_fx.o faatran.o scaleswn.o karlin.o drop_fx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fastx34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fx.o drop_fx.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-fasty34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_fy.o faatran.o scaleswn.o karlin.o drop_fz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) fasty34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_fy.o drop_fz.o faatran.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-tfasta34 : $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o scaleswn.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfasta34 $(COMP_LIBO) compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M)
-X
-tfasta34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tfa.o scaleswn.o karlin.o $(DROPTFA_O) $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfasta34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfa.o $(DROPTFA_O) scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-tfastf34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tf.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o drop_tff.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfastf34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tf.o drop_tff.o scaleswtf.o last_tat.o tatstats_ff.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-tfasts34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o c_dispn.o htime.o apam.o doinit.o init_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o drop_tfs.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o
-X $(CC) $(HFLAGS) tfasts34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfs.o drop_tfs.o scaleswts.o last_tat.o tatstats_fs.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o $(NRAND).o url_subs.o $(LIB_M) $(THR_LIBS)
-X
-tfastx34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o scaleswn.o karlin.o drop_tfx.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfastx34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfx.o drop_tfx.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-tfasty34_t : $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o scaleswn.o karlin.o drop_tfz.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o
-X $(CC) $(HFLAGS) tfasty34_t $(COMP_THRO) work_thr.o $(THR_SUBS).o compacc.o showbest.o re_getlib.o $(SHOWALIGN).o htime.o apam.o doinit.o init_tfy.o drop_tfz.o scaleswn.o karlin.o $(LGETLIB) c_dispn.o $(NCBL_LIB) lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-prss34_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showbest.o $(SHOWALIGN).o c_dispn.o url_subs.o lib_sel.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) prss34_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showbest.o $(SHOWALIGN).o c_dispn.o url_subs.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
-X
-prss34o_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o scaleswn.o karlin.o $(DROPRSS_O) llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o
-X $(CC) $(HFLAGS) prss34o_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rss.o $(DROPRSS_O) scaleswn.o karlin.o llgetaa.o showrss.o lib_sel.o $(NRAND).o pssm_asn_subs.o $(LIB_M) $(THR_LIBS)
-X
-prfx34_t : rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rfx.o scaleswn.o karlin.o drop_fx.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o url_subs.o $(NRAND).o faatran.o
-X $(CC) $(HFLAGS) prfx34_t rcomp_thr.o work_thr.o $(THR_SUBS).o compacc.o htime.o apam.o doinit.o init_rfx.o drop_fx.o scaleswn.o karlin.o llgetaa.o showbest.o mshowalign.o c_dispn.o lib_sel.o faatran.o url_subs.o $(NRAND).o $(LIB_M) $(THR_LIBS)
-X
-comp_lib.o : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c comp_lib.c
-X
-comp_mlib.o : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_MLIB -c comp_lib.c -o comp_mlib.o
-X
-rcomp_lib.o : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS comp_lib.c -o rcomp_lib.o
-X
-comp_thr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -c comp_lib.c -o comp_thr.o
-X
-comp_mthr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(THR_CC) $(CFLAGS) -DCOMP_THR -DCOMP_MLIB -c comp_lib.c -o comp_mthr.o
-X
-rcomp_thr.o : comp_lib.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(THR_CC) $(CFLAGS) -DPRSS -DCOMP_THR -c comp_lib.c -o rcomp_thr.o
-X
-work_thr.o : work_thr.c mw.h structs.h defs.h param.h thr.h
-X $(CC) $(THR_CC) $(CFLAGS) -c work_thr.c
-X
-print_pssm : print_pssm.c getseq.c karlin.c apam.c
-X $(CC) -o print_pssm $(CFLAGS) print_pssm.c getseq.c karlin.c apam.c $(LIB_M)
-X
-map_db : map_db.c uascii.h ncbl2_head.h
-X $(CC) -o map_db map_db.c
-X
-list_db : list_db.c
-X $(CC) -o list_db list_db.c
-X
-SHAR_EOF
-chmod 0644 Makefile.pcom ||
-echo 'restore of Makefile.pcom failed'
-Wc_c="`wc -c < 'Makefile.pcom'`"
-test 24893 -eq "$Wc_c" ||
- echo 'Makefile.pcom: original size 24893, current size' "$Wc_c"
-fi
-# ============= Makefile.pvcom ==============
-if test -f 'Makefile.pvcom' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pvcom (File already exists)'
-else
-echo 'x - extracting Makefile.pvcom (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvcom' &&
-X
-SHOWBESTC = mshowbest.c
-SHOWALIGN = mshowalign
-MWH = p_mw.h
-MWHP = p_mw.h w_mw.h
-X
-# normal search programs are pv3compfa, etc.
-# each main program requires a worker pv3compfa/c34.workfa
-X
-PROGS= pv34compfa pv34compsw pv34compfx pv34comptfx pv34compfy pv34comptfy pv34compfs pv34comptfs
-X
-WPROGS = c34.workfa c34.worksw c34.workgsw c34.workfx c34.worktfx c34.workfy c34.worktfy c34.workfs c34.worktfs
-# ps4compfa, etc provides a summaries of effectiveness, require superfamily
-# annotated database. ps4compss uses c34.worksw instead of c34.workgsw, thus
-# allowing high gap penalties.
-X
-SPROGS = ps34compfa ps34compsw ps34compss ps34compfx ps34compfy ps34comptfx ps34comptfy
-X
-# report highest unrelated sequences
-UPROGS = pu34compfa pu34compsw pu34compfx pu34comptfx pu34compfy pu34comptfy
-X
-vall : $(PROGS) $(WPROGS)
-X
-uall : $(UPROGS) $(WPROGS)
-X
-sall : $(SPROGS) $(WPROGS)
-X
-all : $(PROGS) $(UPROGS) $(SPROGS) $(WPROGS)
-X
-clean-up:
-X rm -f *.o $(PROGS) $(WPROGS) $(SPROGS) $(UPROGS)
-X
-install : $(PROGS) $(WPROGS)
-X cp $(PROGS) $(WPROGS) $(XDIR)
-X
-sinstall : $(SPROGS) $(WPROGS)
-X cp $(SPROGS) $(WPROGS) $(XDIR)
-X
-uinstall : $(UPROGS) $(WPROGS)
-X cp $(UPROGS) $(WPROGS) $(XDIR)
-X
-pv34compfa : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o ${LGETLIB} $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compfa p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o ${LGETLIB} $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34compfa : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34compfa p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34compfa : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34compfa p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pr4compfa : p2_complib.o compacc.o showrel.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pr4compfa p2_complib.o compacc.o showrel.o htime.o hostacc.o apam.o doinit.o init_fa.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34compsw : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compsw p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34compsw : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34compsw p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34compsw : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34compsw p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_sw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o pssm_asn_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34compss : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compss p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34compss : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34compss p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34compss : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34compss p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_ssw.o scaleswn.o $(NRAND).o karlin.o lib_sel.o url_subs.o c_dispn.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34compfs : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compfs p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34compfx : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compfx p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34compfx : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34compfx p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34compfx : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34compfx p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34compfy : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34compfy p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34compfy : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34compfy p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34compfy : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34compfy p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_fy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34comptfx : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34comptfx p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34comptfx : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34comptfx p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34comptfx : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34comptfx p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfx.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34comptfy : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34comptfy p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-ps34comptfy : p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) ps34comptfy p2_complib.o compacc.o showsum.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pu34comptfy : p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pu34comptfy p2_complib.o compacc.o showun.o $(SHOWALIGN)_u.o htime.o hostacc.o apam.o doinit.o init_tfy.o scaleswn.o $(NRAND).o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-pv34comptfs : p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB)
-X $(CC) $(HFLAGS) pv34comptfs p2_complib.o compacc.o showbest.o $(SHOWALIGN).o htime.o hostacc.o apam.o doinit.o init_tfs.o scaleswts.o $(NRAND).o tatstats_fs.o last_tat.o karlin.o c_dispn.o lib_sel.o url_subs.o $(LGETLIB) $(NCBL_LIB) $(PLIB) $(LIB_M)
-X
-c34.workfa : p2_workcomp.o $(DROPNFA_O) workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.workfa p2_workcomp.o $(DROPNFA_O) workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
-X
-c34.worksw : p2_workcomp.o dropnsw.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.worksw p2_workcomp.o dropnsw.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
-X
-c34.workgsw : p2_workcomp.o $(DROPGSW_O) workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.workgsw p2_workcomp.o $(DROPGSW_O) workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
-X
-c34.worknw : p2_workcomp.o dropnw.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.worknw p2_workcomp.o dropnw.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
-X
-c34.workfx : p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.workfx p2_workcomp.o drop_fx.o workacc.o $(NRAND).o faatran.o karlin.o $(PLIB) $(LIB_WM)
-X
-c34.workfs : p2_workcomp.o drop_fs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o
-X $(NCC) $(NFLAGS) c34.workfs p2_workcomp.o drop_fs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o $(PLIB) $(LIB_WM)
-X
-c34.worktfs : p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o
-X $(NCC) $(NFLAGS) c34.worktfs p2_workcomp.o drop_tfs.o workacc.o $(NRAND).o tatstats_fs.o faatran.o $(PLIB) $(LIB_WM)
-X
-c34.workfy : p2_workcomp.o drop_fz.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.workfy p2_workcomp.o drop_fz.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
-X
-c34.worktfx : p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.worktfx p2_workcomp.o drop_tfx.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
-X
-c34.worktfy : p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o faatran.o karlin.o
-X $(NCC) $(NFLAGS) c34.worktfy p2_workcomp.o drop_tfz.o workacc.o $(NRAND).o karlin.o faatran.o $(PLIB) $(LIB_WM)
-X
-p2_complib.o : p2_complib.c msg.h defs.h upam.h uascii.h param.h structs.h
-X $(CC) -DWORKERPGM=\"c34.work\" $(CFLAGS) p2_complib.c
-X
-p2_workcomp.o : p2_workcomp.c structs.h msg.h defs.h p_mw.h w_mw.h upam.h uascii.h param.h
-X $(NCC) $(CFLAGS) p2_workcomp.c
-X
-SHAR_EOF
-chmod 0644 Makefile.pvcom ||
-echo 'restore of Makefile.pvcom failed'
-Wc_c="`wc -c < 'Makefile.pvcom'`"
-test 13214 -eq "$Wc_c" ||
- echo 'Makefile.pvcom: original size 13214, current size' "$Wc_c"
-fi
-# ============= Makefile.pvm4 ==============
-if test -f 'Makefile.pvm4' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pvm4 (File already exists)'
-else
-echo 'x - extracting Makefile.pvm4 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvm4' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile.pvm4,v 1.35 2006/12/06 16:53:12 wrp Exp $
-#
-# tested with pvm3.4.beta7 and pvm3.3.11. Tested on DEC Alpha, x86
-# and Alpha LINUX for DEC/Compaq Alpha/LINUX
-#
-X
-#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
-#CC= cc -O -ggdb -DDEBUG
-CC= cc -ggdb -O3 -falign-loops=32 -mcpu=7450 -DMacOSX -faltivec -DSW_ALTIVEC
-X
-X
-#NCC= cc -O3 -ggdb
-NCC= cc -g -falign-loops=32 -O3 -mcpu=7450 -DMacOSX -faltivec -DSW_ALTIVEC
-X
-#ARCH = NETBSDPOWERPC
-X
-PLIB = ${PVM_ROOT}/lib/$(ARCH)/libpvm3.a
-XXDIR = /home/slib/pvm3/bin/$(ARCH)
-#XDIR = /wrpx00.p0/users/wrp/pvm3/bin/$(ARCH)
-SDIR = .
-PVMSRC = ${PVM_ROOT}/src
-X
-CFLAGS= -DPVM_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -I${PVM_ROOT}/include -DSRAND=srandom -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DSHOWSIM
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -L/usr/lib/mysql -lmysqlclient -lm -lz
-LIB_M= -lm
-LIB_WM= -lm
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-DROPGSW_O = dropgsw.o smith_waterman_altivec.o
-DROPNFA_O = drop_nfa.o
-X
-# common pv34comp programs
-include Makefile.pvcom
-X
-# common *.o files for all environments
-include Makefile.fcom
-SHAR_EOF
-chmod 0644 Makefile.pvm4 ||
-echo 'restore of Makefile.pvm4 failed'
-Wc_c="`wc -c < 'Makefile.pvm4'`"
-test 1344 -eq "$Wc_c" ||
- echo 'Makefile.pvm4: original size 1344, current size' "$Wc_c"
-fi
-# ============= Makefile.pvm4_sql ==============
-if test -f 'Makefile.pvm4_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.pvm4_sql (File already exists)'
-else
-echo 'x - extracting Makefile.pvm4_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.pvm4_sql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile.pvm4_sql,v 1.26 2006/05/19 16:29:45 wrp Exp $
-#
-# tested with pvm3.4.beta7 and pvm3.3.11. Tested on DEC Alpha, x86
-# and Alpha LINUX for DEC/Compaq Alpha/LINUX
-#
-X
-#CC=/opt/parasoft/bin.linux2/insure -g -DDEBUG
-CC= cc -g
-NCC= cc -O -g
-X
-#ARCH = NETBSDPOWERPC
-X
-PLIB = ${PVM_ROOT}/lib/$(ARCH)/libpvm3.a
-XXDIR = /seqprg/pvm3/bin/$(ARCH)
-#XDIR = /wrpx00.p0/users/wrp/pvm3/bin/$(ARCH)
-SDIR = .
-PVMSRC = ${PVM_ROOT}/src
-X
-CFLAGS= -DPVM_SRC -DUNIX -DPCOMPLIB -DBFR=1200 -DBIGMEM -I${PVM_ROOT}/include -DSRAND=srandom -DRAND=random -c -DHAS_INTTYPES -DSAMP_STATS -DMYSQL_DB -I/usr/include/mysql -DM10_CONS -DSHOWSIM
-# -DSFCHAR="'|'" -DSUPERFAMNUM
-X
-HFLAGS= -o
-NFLAGS= -o
-X
-NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-# pgsql_lib.o
-#NCBL_LIB=ncbl2_mlib.o
-LIB_M= -L/usr/lib/mysql -lmysqlclient -lm
-#LIB_M= -lm
-LIB_WM= -lm
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# non-ALTIVEC versions
-DROPGSW_O = dropgsw.o
-DROPNFA_O = drop_nfa.o
-X
-# common pv34comp programs
-include Makefile.pvcom
-X
-# common *.o files for all environments
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile.pvm4_sql ||
-echo 'restore of Makefile.pvm4_sql failed'
-Wc_c="`wc -c < 'Makefile.pvm4_sql'`"
-test 1264 -eq "$Wc_c" ||
- echo 'Makefile.pvm4_sql: original size 1264, current size' "$Wc_c"
-fi
-# ============= Makefile.sgi ==============
-if test -f 'Makefile.sgi' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.sgi (File already exists)'
-else
-echo 'x - extracting Makefile.sgi (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sgi' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-#
-# for more information on FASTA on SGI's, see:
-#
-# http://www.sgi.com/chembio/resources/fasta/index.html
-#
-# use -DBIG_LIB64 to generate 64-bit offsets in map_db .xin files. This
-# only works on SGI's with the -64 option.
-X
-CC= cc -w -64 -mips4 -O2 -TENV:X=3 -DSGI_BUG -Wl,-multigot -DIRIX
-#CC= cc -64 -mips4 -g -DSGI_BUG -DDEBUG -DIRIX
-X
-HFLAGS= -64 -mips4 -o
-NFLAGS= -64 -mips4 -o
-X
-#CC= cc -g
-#HFLAGS= -o
-#NFLAGS= -o
-X
-LIB_M= -lm
-# For R2000/R3000 MIPS Processors, use -mips1
-#
-#CC= cc -mips1 -O2
-#HFLAGS= -mips1 -o
-#NFLAGS= -mips1 -o
-#
-# For R4000 MIPS Processors, use -mips2:
-#
-#CC = cc -mips2 -O2
-#HFLAGS= -mips2 -o
-#NFLAGS= -mips2 -o
-#
-X
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=4 -DTHR_EXIT=pthread_exit -DPROGRESS -DFASTA_HOST='"crick.med.virginia.edu/fasta/cgi"' -DIS_BIG_ENDIAN -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -DSAMP_STATS -DPGM_DOC
-X
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-XXDIR = /seqprg/slib/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.sgi ||
-echo 'restore of Makefile.sgi failed'
-Wc_c="`wc -c < 'Makefile.sgi'`"
-test 1238 -eq "$Wc_c" ||
- echo 'Makefile.sgi: original size 1238, current size' "$Wc_c"
-fi
-# ============= Makefile.sun ==============
-if test -f 'Makefile.sun' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.sun (File already exists)'
-else
-echo 'x - extracting Makefile.sun (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sun' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-X
-#CC= cc -g -xarch=v8plusa
-X
-# switches for 64-bit addressing
-CC= cc -fast -xO4 -xarch=v9
-#CC= cc -g -xarch=v9
-X
-# for SUNMP, use -DTHR_EXIT=thr_exit
-# HZ=100 for Solaris x86
-# -DIS_LITTLE_ENDIAN for Solaris x86
-X
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit -DPROGRESS -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
-HFLAGS= -o
-NFLAGS= -o
-X
-# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-# for files > 2 GB
-X
-#for Sun pthreads (preferred, pthreads used on all other platforms)
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-#for Sun threads (no longer necessary as Sun supports pthreads)
-#THR_SUBS = uthr_subs2
-#THR_LIBS = -lthread
-#THR_CC =
-X
-LIB_M= -lmopt
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPGSW_O = dropgsw.o
-DROPRSS_O = dropnsw.o
-DROPTFA_O = drop_tfa.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.sun ||
-echo 'restore of Makefile.sun failed'
-Wc_c="`wc -c < 'Makefile.sun'`"
-test 1150 -eq "$Wc_c" ||
- echo 'Makefile.sun: original size 1150, current size' "$Wc_c"
-fi
-# ============= Makefile.sun_x86 ==============
-if test -f 'Makefile.sun_x86' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.sun_x86 (File already exists)'
-else
-echo 'x - extracting Makefile.sun_x86 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.sun_x86' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-X
-# switches for 64-bit addressing - AMD64
-CC= cc -g -fast -xarch=amd64 -DSW_SSE2
-X
-# debugging options
-#CC= cc -g -DDEBUG -xarch=amd64 -DSW_SSE2
-X
-# for SUNMP, use -DTHR_EXIT=thr_exit
-# HZ=100 for Solaris x86
-# Solaris X86 is little endian - be certain IS_BIG_ENDIAN is not defined
-X
-CFLAGS= -DSHOWSIM -DUNIX -DTIMES -DHZ=100 -DBIGMEM -DSFCHAR="':'" -DMAX_WORKERS=2 -DTHR_EXIT=thr_exit -DPROGRESS -DFASTA_setscope -DUSE_MMAP -DBIG_LIB64 -DHAS_INTTYPES -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -DUSE_FSEEKO -DM10_CONS -DSAMP_STATS -DPGM_DOC
-HFLAGS= -o
-NFLAGS= -o
-X
-# use -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-# for files > 2 GB
-X
-#for Sun pthreads (preferred, pthreads used on all other platforms)
-THR_SUBS = pthr_subs2
-THR_LIBS = -lpthread
-THR_CC =
-X
-#for Sun threads (no longer necessary as Sun supports pthreads)
-#THR_SUBS = uthr_subs2
-#THR_LIBS = -lthread
-#THR_CC =
-X
-LIB_M= -lmopt
-XXDIR = /seqprg/bin
-X
-DROPNFA_O = drop_nfa.o
-DROPTFA_O = drop_tfa.o
-DROPGSW_O = dropgsw.o smith_waterman_sse2.o
-DROPRSS_O = dropnsw.o smith_waterman_sse2.o
-X
-# renamed (fasta34) programs
-include Makefile34m.common
-# conventional (fasta3) names
-# include Makefile.common
-SHAR_EOF
-chmod 0644 Makefile.sun_x86 ||
-echo 'restore of Makefile.sun_x86 failed'
-Wc_c="`wc -c < 'Makefile.sun_x86'`"
-test 1264 -eq "$Wc_c" ||
- echo 'Makefile.sun_x86: original size 1264, current size' "$Wc_c"
-fi
-# ============= Makefile.tc ==============
-if test -f 'Makefile.tc' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile.tc (File already exists)'
-else
-echo 'x - extracting Makefile.tc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile.tc' &&
-#
-# makefile for fasta3, fasta3_t. Use makefile.pvm for pvcompxx.
-X
-# MSDOS Borland C commands
-#CC= bcc -mm -w-rvl -w-pro -3 -O
-CC= bcc32 -WC -w-rvl -w-pro -3 -O
-#CFLAGS= -IC:\bc5\include -DFAR_PTR -DALLOCN0 -DMSDOS
-CFLAGS=-IC:\bc5\include -DBIGMEM -DALLOCN0 -DMSDOS
-#CL= bcc -mm
-CL= bcc32 -WC
-LFLAGS= -LC:\bc5\lib
-NRAND= nrand
-HZ=100
-X
-X
-XXDIR = /seqprg/slib/bin
-X
-SPROGS = fasta34.exe ssearch34.exe fastx34.exe tfastx34.exe fasty34.exe tfasty34.exe fasts34.exe tfasts34.exe prss34.exe prfx34.exe
-X
-PROGS = $(SPROGS)
-X
-all : $(PROGS)
-X
-sall: $(SPROGS)
-X
-clean-up:
-X del *.obj $(PROGS)
-X
-install:
-X cp $(PROGS) $(XDIR)
-X
-fasta34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fa.obj scaleswn.obj karlin.obj drop_nfa.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj
-X $(CL) $(LFLAGS) -efasta34.exe comp_lib.obj showalig.obj init_fa.obj drop_nfa.obj getseq.obj @fasta3.rsp -lm
-X
-fastx34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fx.obj scaleswn.obj karlin.obj drop_fx.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj getopt.obj
-X $(CL) $(LFLAGS) -efastx34.exe comp_lib.obj showalig.obj init_fx.obj drop_fx.obj faatran.obj getseq.obj @fasta3.rsp -lm
-X
-fasty34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fy.obj scaleswn.obj karlin.obj drop_fz.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
-X $(CL) $(LFLAGS) -efasty34.exe comp_lib.obj showalig.obj init_fy.obj drop_fz.obj faatran.obj getseq.obj @fasta3.rsp -lm
-X
-tfastx34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfx.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj scaleswn.obj karlin.obj tdropfx.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
-X $(CL) $(LFLAGS) -etfastx34.exe comp_lib.obj showalig.obj init_tfx.obj tdropfx.obj faatran.obj getseq.obj @fasta3.rsp -lm
-X
-tfasty34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfy.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj scaleswn.obj karlin.obj tdropfz.obj c_dispn.obj lib_sel.obj faatran.obj url_subs.obj nrand.obj
-X $(CL) $(LFLAGS) -etfasty34.exe comp_lib.obj showalig.obj init_tfy.obj tdropfz.obj faatran.obj getseq.obj @fasta3.rsp -lm
-X
-ssearch34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_sw.obj scaleswn.obj karlin.obj dropgsw.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj pssm_asn_subs.obj
-X $(CL) $(LFLAGS) -essearch34.exe comp_lib.obj showalig.obj init_sw.obj dropgsw.obj getseq.obj pssm_asn_subs.obj @fasta3.rsp -lm
-X
-fasts34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_fs.obj dropfs2.obj scaleswt.obj karlin.obj tatsta_s.obj last_tat.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj
-X $(CL) $(LFLAGS) -efasts34.exe comp_lib.obj showalig.obj init_fs.obj dropfs2.obj getseq.obj @fasts3.rsp -lm
-X
-tfasts34.exe : comp_lib.obj compacc.obj showbest.obj showalig.obj htime.obj apam.obj doinit.obj init_tfs.obj droptfs2.obj scaleswt.obj karlin.obj tatsttfs.obj last_tat.obj getseq.obj lgetlib.obj regetlib.obj ncbl2_lib.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj faatran.obj getopt.obj
-X $(CL) $(LFLAGS) -etfasts34.exe comp_lib.obj showalig.obj init_tfs.obj droptfs2.obj getseq.obj faatran.obj @tfasts3.rsp -lm
-X
-prss34.exe : rcomplib.obj compacc.obj htime.obj apam.obj doinit.obj init_rss.obj scaleswn.obj karlin.obj dropgsw.obj llgetaa.obj showrss.obj lib_sel.obj nrand.obj getopt.obj pssm_asn_subs.obj
-X $(CL) $(LFLAGS) -eprss34.exe rcomplib.obj init_rss.obj dropgsw.obj llgetaa.obj nrand.obj @prss3.rsp -lm
-X
-prfx34.exe : rcomplib.obj compacc.obj htime.obj apam.obj doinit.obj init_rfx.obj scaleswn.obj karlin.obj drop_fx.obj llgetaa.obj faatran.obj showrss.obj lib_sel.obj nrand.obj getopt.obj
-X $(CL) $(LFLAGS) -eprfx34.exe rcomplib.obj init_rfx.obj drop_fx.obj faatran.obj llgetaa.obj nrand.obj @prss3.rsp -lm
-X
-comp_lib.obj : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(CFLAGS) -DPGM_DOC -ocomp_lib.obj -c comp_lib.c
-X
-rcomplib.obj : comp_lib.c mw.h structs.h defs.h param.h
-X $(CC) $(CFLAGS) -DPRSS -orcomplib.obj -c comp_lib.c
-X
-htime.obj : htime.c
-X $(CC) $(CFLAGS) -c htime.c
-X
-hxgetaa.obj : hxgetaa.c altlib.h upam.h uascii.h
-X $(CC) $(CFLAGS) -c hxgetaa.c
-X
-init_sw.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DSSEARCH -oinit_sw.obj initfa.c
-X
-init_ssw.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DOSEARCH -oinit_ssw.obj initfa.c
-X
-init_rss.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DPRSS -oinit_rss.obj initfa.c
-X
-init_rfx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DPRFX -oinit_rfx.obj initfa.c
-X
-init_fa.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -oinit_fa.obj initfa.c
-X
-init_ff.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -oinit_ff.obj initfa.c
-X
-init_tf.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTF -DTFAST -oinit_tf.obj initfa.c
-X
-init_fs.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -oinit_fs.obj initfa.c
-X
-init_fm.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -oinit_fm.obj initfa.c
-X
-init_tfs.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTS -DTFAST -oinit_tfs.obj initfa.c
-X
-init_tfm.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTM -DTFAST -oinit_tfm.obj initfa.c
-X
-init_tfa.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTA -DTFAST -oinit_tfa.obj initfa.c
-X
-init_fx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -oinit_fx.obj initfa.c
-X
-init_tfx.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTX -DTFAST -oinit_tfx.obj initfa.c
-X
-init_fy.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -oinit_fy.obj initfa.c
-X
-init_tfy.obj : initfa.c defs.h param.h upam.h structs.h
-X $(CC) $(THR_CC) $(CFLAGS) -c -DFASTY -DTFAST -oinit_tfy.obj initfa.c
-X
-doinit.obj : doinit.c defs.h param.h upam.h structs.h
-X $(CC) $(CFLAGS) -c doinit.c
-X
-compacc.obj : compacc.c upam.h uascii.h param.h structs.h mw.h defs.h
-X $(CC) $(CFLAGS) -c compacc.c
-X
-showbest.obj : mshowbest.c mw.h defs.h param.h structs.h mm_file.h
-X $(CC) $(CFLAGS) -oshowbest.obj -c mshowbest.c
-X
-showrss.obj : showrss.c mw.h defs.h param.h structs.h
-X $(CC) $(CFLAGS) -c showrss.c
-X
-showalig.obj : mshowalign.c mw.h defs.h structs.h param.h
-X $(CC) $(CFLAGS) -oshowalig.obj -c mshowalign.c
-X
-c_dispn.obj : c_dispn.c defs.h structs.h param.h
-X $(CC) $(CFLAGS) -c c_dispn.c
-X
-lib_sel.obj : lib_sel.c defs.h structs.h
-X $(CC) $(CFLAGS) -c lib_sel.c
-X
-scaleswn.obj : scaleswn.c defs.h mw.h alt_parms.h
-X $(CC) $(CFLAGS) -c scaleswn.c
-X
-scaleswt.obj : scaleswt.c defs.h mw.h alt_parms.h
-X $(CC) $(CFLAGS) -c scaleswt.c
-X
-tatsta_s.obj : tatstats.c defs.h mw.h alt_parms.h tatstats.h
-X $(CC) $(CFLAGS) -DFASTS -otatsta_s.obj -c tatstats.c
-X
-tatsttfs.obj : tatstats.c defs.h mw.h alt_parms.h tatstats.h
-X $(CC) $(CFLAGS) -DTFAST -DFASTS -otatsttfs.obj -c tatstats.c
-X
-karlin.obj : karlin.c param.h
-X $(CC) $(CFLAGS) -c karlin.c
-X
-scaleswg.obj : scaleswg.c defs.h mw.h alt_parms.h
-X $(CC) $(CFLAGS) -c scaleswg.c
-X
-drop_nfa.obj : dropnfa.c param.h defs.h
-X $(CC) $(CFLAGS) -odrop_nfa.obj -c dropnfa.c
-X
-drop_ff.obj : dropffa.c mw.h param.h defs.h
-X $(CC) $(CFLAGS) -odrop_ff.obj -c dropffa.c
-X
-drop_tff.obj : dropffa.c mw.h param.h defs.h
-X $(CC) $(CFLAGS) -DTFAST -odrop_tff.obj -c dropffa.c
-X
-drop_fx.obj : dropfx.c mw.h upam.h param.h defs.h
-X $(CC) $(CFLAGS) -DFASTX -odrop_fx.obj -c dropfx.c
-X
-dropfs2.obj : dropfs2.c mw.h upam.h param.h defs.h tatstats.h
-X $(CC) $(CFLAGS) -DFASTS -c dropfs2.c
-X
-droptfs2.obj : dropfs2.c mw.h upam.h param.h defs.h tatstats.h
-X $(CC) $(CFLAGS) -DTFAST -DFASTS -c -odroptfs2.obj dropfs2.c
-X
-tdropfx.obj : dropfx.c mw.h upam.h param.h defs.h
-X $(CC) $(CFLAGS) -DTFAST -otdropfx.obj -c dropfx.c
-X
-drop_fz.obj : dropfz2.c mw.h upam.h param.h defs.h aamap.h
-X $(CC) $(CFLAGS) -odrop_fz.obj -c dropfz2.c
-X
-tdropfz.obj : dropfz2.c mw.h upam.h param.h defs.h aamap.h
-X $(CC) $(CFLAGS) -DTFAST -otdropfz.obj -c dropfz2.c
-X
-dropnsw.obj : dropnsw.c mw.h upam.h param.h structs.h
-X $(CC) $(CFLAGS) -c dropnsw.c
-X
-dropgsw.obj : dropgsw.c mw.h upam.h param.h structs.h
-X $(CC) $(CFLAGS) -c dropgsw.c
-X
-dropnw.obj : dropnw.c mw.h upam.h param.h structs.h
-X $(CC) $(CFLAGS) -c dropnw.c
-X
-llgetaa.obj : llgetaa.c altlib.h upam.h uascii.h
-X $(CC) $(CFLAGS) -DNOLIB -c llgetaa.c
-X
-lgetlib.obj : nmgetlib.c altlib.h upam.h uascii.h
-X $(CC) $(CFLAGS) -olgetlib.obj -c nmgetlib.c
-X
-regetlib.obj : re_getlib.c mw.h mm_file.h
-X $(CC) $(CFLAGS) -oregetlib.obj -c re_getlib.c
-X
-getseq.obj : getseq.c defs.h uascii.h structs.h upam.h
-X $(CC) $(CFLAGS) -c getseq.c
-X
-ncbl_lib.obj : ncbl_lib.c ncbl_head.h
-X $(CC) $(CFLAGS) -c ncbl_lib.c
-X
-ncbl2_lib.obj : ncbl2_mlib.c ncbl2_head.h
-X $(CC) $(CFLAGS) -c ncbl2_mlib.c
-X
-faatran.obj : faatran.c upam.h uascii.h
-X $(CC) $(CFLAGS) -c faatran.c
-X
-url_subs.obj : url_subs.c structs.h param.h
-X $(CC) $(CFLAGS) -c url_subs.c
-X
-nrand48.obj : nrand48.c
-X $(CC) $(CFLAGS) -c nrand48.c
-X
-nrand.obj : nrand.c
-X $(CC) $(CFLAGS) -c nrand.c
-X
-getopt.obj : getopt.c
-X $(CC) $(CFLAGS) -c getopt.c
-SHAR_EOF
-chmod 0644 Makefile.tc ||
-echo 'restore of Makefile.tc failed'
-Wc_c="`wc -c < 'Makefile.tc'`"
-test 9746 -eq "$Wc_c" ||
- echo 'Makefile.tc: original size 9746, current size' "$Wc_c"
-fi
-# ============= Makefile34.common ==============
-if test -f 'Makefile34.common' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34.common (File already exists)'
-else
-echo 'x - extracting Makefile34.common (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.common' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34.common,v 1.9 2004/02/19 18:29:43 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-# COMP_LIBO=comp_lib.o
-# COMP_THRO=comp_thr.o
-# GETSEQO = getseq.o
-X
-# use for multiple query sequences
-# work with prss34 (yet)
-COMP_LIBO=comp_mlib.o
-COMP_THRO=comp_mthr.o
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-LGETLIB=getseq.o lgetlib.o
-NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -lm
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-SHAR_EOF
-chmod 0644 Makefile34.common ||
-echo 'restore of Makefile34.common failed'
-Wc_c="`wc -c < 'Makefile34.common'`"
-test 1304 -eq "$Wc_c" ||
- echo 'Makefile34.common: original size 1304, current size' "$Wc_c"
-fi
-# ============= Makefile34.common_sql ==============
-if test -f 'Makefile34.common_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34.common_sql (File already exists)'
-else
-echo 'x - extracting Makefile34.common_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.common_sql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34.common_sql,v 1.10 2005/12/07 17:22:02 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-COMP_LIBO=comp_lib.o
-COMP_THRO=comp_thr.o
-GETSEQO = getseq.o
-# use for multiple query sequences, requires "-n" for DNA fasta, does not
-# work with prss34 (yet)
-#COMP_LIB=comp_mlib.o
-#COMP_THRO=comp_mthr.o
-#
-# standard nxgetaa, no memory mapping for 0 - 6
-LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-# LGETLIB=getseq.o lgetlib.o lgetaa_m.o
-# NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
-#LIB_M= -lm
-NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-#NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile34.common_sql ||
-echo 'restore of Makefile34.common_sql failed'
-Wc_c="`wc -c < 'Makefile34.common_sql'`"
-test 1330 -eq "$Wc_c" ||
- echo 'Makefile34.common_sql: original size 1330, current size' "$Wc_c"
-fi
-# ============= Makefile34.nmk_com ==============
-if test -f 'Makefile34.nmk_com' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34.nmk_com (File already exists)'
-else
-echo 'x - extracting Makefile34.nmk_com (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34.nmk_com' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34.nmk_com,v 1.2 2006/10/06 17:26:47 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-# COMP_LIBO=comp_lib.obj
-# COMP_THRO=comp_thr.obj
-# GETSEQO = getseq.obj
-X
-# use for multiple query sequences
-COMP_LIBO=comp_mlib.obj
-COMP_THRO=comp_mthr.obj
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-LGETLIB=getseq.obj lgetlib.obj
-NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-# no memory mapping for Win32
-#LGETLIB= lgetlib.obj lgetaa_m.obj
-X
-NRAND=nrand
-X
-# normally use ncbl2_mlib.c
-NCBL_LIB=ncbl2_mlib.obj
-#LIB_M= -lm
-X
-include Makefile.nm_pcom
-X
-include Makefile.nm_fcom
-SHAR_EOF
-chmod 0755 Makefile34.nmk_com ||
-echo 'restore of Makefile34.nmk_com failed'
-Wc_c="`wc -c < 'Makefile34.nmk_com'`"
-test 765 -eq "$Wc_c" ||
- echo 'Makefile34.nmk_com: original size 765, current size' "$Wc_c"
-fi
-# ============= Makefile34m.common ==============
-if test -f 'Makefile34m.common' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34m.common (File already exists)'
-else
-echo 'x - extracting Makefile34m.common (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34m.common,v 1.11 2003/02/27 14:26:14 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-# COMP_LIBO=comp_lib.o
-# COMP_THRO=comp_thr.o
-# GETSEQO = getseq.o
-X
-# use for multiple query sequences
-# work with prss34 (yet)
-COMP_LIBO=comp_mlib.o
-COMP_THRO=comp_mthr.o
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-#LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -lm
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-SHAR_EOF
-chmod 0644 Makefile34m.common ||
-echo 'restore of Makefile34m.common failed'
-Wc_c="`wc -c < 'Makefile34m.common'`"
-test 1311 -eq "$Wc_c" ||
- echo 'Makefile34m.common: original size 1311, current size' "$Wc_c"
-fi
-# ============= Makefile34m.common_mysql ==============
-if test -f 'Makefile34m.common_mysql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34m.common_mysql (File already exists)'
-else
-echo 'x - extracting Makefile34m.common_mysql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_mysql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_mysql,v 1.3 2005/12/07 17:22:02 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-#COMP_LIBO=comp_lib.o
-#COMP_THRO=comp_thr.o
-#GETSEQO = getseq.o
-X
-# use for multiple query sequences
-# work with prss34 (yet)
-COMP_LIBO=comp_mlib.o
-COMP_THRO=comp_mthr.o
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
-#LIB_M= -L/usr/lib/pgsql/ -lpq -lm -lcrypto -lssl
-# LIB_M= -lm
-NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-#NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
-# NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile34m.common_mysql ||
-echo 'restore of Makefile34m.common_mysql failed'
-Wc_c="`wc -c < 'Makefile34m.common_mysql'`"
-test 1395 -eq "$Wc_c" ||
- echo 'Makefile34m.common_mysql: original size 1395, current size' "$Wc_c"
-fi
-# ============= Makefile34m.common_pgsql ==============
-if test -f 'Makefile34m.common_pgsql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34m.common_pgsql (File already exists)'
-else
-echo 'x - extracting Makefile34m.common_pgsql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_pgsql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_pgsql,v 1.3 2005/12/07 17:22:02 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-#COMP_LIBO=comp_lib.o
-#COMP_THRO=comp_thr.o
-#GETSEQO = getseq.o
-X
-# use for multiple query sequences
-# work with prss34 (yet)
-COMP_LIBO=comp_mlib.o
-COMP_THRO=comp_mthr.o
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/local/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-# LIB_M= -L/usr/local/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
-# LIB_M= -lm
-#NCBL_LIB=ncbl2_mlib.o mysql_lib.o
-NCBL_LIB=ncbl2_mlib.o pgsql_lib.o
-# NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile34m.common_pgsql ||
-echo 'restore of Makefile34m.common_pgsql failed'
-Wc_c="`wc -c < 'Makefile34m.common_pgsql'`"
-test 1407 -eq "$Wc_c" ||
- echo 'Makefile34m.common_pgsql: original size 1407, current size' "$Wc_c"
-fi
-# ============= Makefile34m.common_sql ==============
-if test -f 'Makefile34m.common_sql' -a X"$1" != X"-c"; then
- echo 'x - skipping Makefile34m.common_sql (File already exists)'
-else
-echo 'x - extracting Makefile34m.common_sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Makefile34m.common_sql' &&
-#
-# $Name: fa_34_26_5 $ - $Id: Makefile34m.common_sql,v 1.14 2005/12/07 17:22:02 wrp Exp $
-#
-# commands common to all architectures
-# if your architecture does not support "include", append at the end.
-#
-X
-# use for "normal" fasta34(_t) programs - only one query
-#COMP_LIBO=comp_lib.o
-#COMP_THRO=comp_thr.o
-#GETSEQO = getseq.o
-X
-# use for multiple query sequences
-# work with prss34 (yet)
-COMP_LIBO=comp_mlib.o
-COMP_THRO=comp_mthr.o
-GETSEQO =
-X
-# standard nxgetaa, no memory mapping for 0 - 6
-#LGETLIB=getseq.o lgetlib.o
-#NGETLIB=nmgetlib
-X
-# memory mapping for 0FASTA, 5PIRVMS, 6GCGBIN
-LGETLIB= $(GETSEQO) lgetlib.o lgetaa_m.o
-NGETLIB=nmgetlib
-X
-NRAND=nrandom
-X
-# use ncbl_lib.c for BLAST1.4 support instead of ncbl2_mlib.c
-#NCBL_LIB=ncbl_lib.o
-X
-# this option should support both formats (BLAST1.4 not currently supported):
-#NCBL_LIB=ncbl_lib.o ncbl2_mlib.o
-X
-# normally use ncbl2_mlib.c
-#NCBL_LIB=ncbl2_mlib.o
-#LIB_M= -lm
-X
-# this option supports NCBI BLAST2 and mySQL
-# it requires "-I/usr/local/include/mysql -DMYSQL_DB" in CFLAGS
-# and "-L/usr/lib/mysql -lmysqlclient -lz" in LIB_M
-# some systems may also require a LD_LIBRARY_PATH change
-# LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -lm
-LIB_M= -L/usr/lib/mysql -lmysqlclient -lz -L/usr/local/pgsql/lib -lpq -lm -lcrypto -lssl
-# LIB_M= -lm
-NCBL_LIB=ncbl2_mlib.o mysql_lib.o pgsql_lib.o
-# NCBL_LIB=ncbl2_mlib.o
-X
-include Makefile.pcom
-X
-include Makefile.fcom
-X
-SHAR_EOF
-chmod 0644 Makefile34m.common_sql ||
-echo 'restore of Makefile34m.common_sql failed'
-Wc_c="`wc -c < 'Makefile34m.common_sql'`"
-test 1406 -eq "$Wc_c" ||
- echo 'Makefile34m.common_sql: original size 1406, current size' "$Wc_c"
-fi
-# ============= README ==============
-if test -f 'README' -a X"$1" != X"-c"; then
- echo 'x - skipping README (File already exists)'
-else
-echo 'x - extracting README (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'README' &&
-X
-Up to date release notes are available in the file readme.v34t0
-X
-Documentation on the fasta3 version programs is available in the files:
-X
-X fasta3.1 (unix man page)
-X ssearch3.1 (unix man page)
-X
-X readme.v34t0 (text descriptions of bug fixes and version history)
-X
-X fasta3x.me (unix -me nroff file)
-X fasta3x.doc (text version of fast3x.me)
-X
-The latter two files provide background information on installing the
-fasta programs (in particular, the FASTLIBS file), that new users of
-the fasta3 package may find useful. Note that many non-database
-searching programs are available in the fasta20 package.
-X
-X
-Documentation on the pvm3/mpi versions of the programs is available
-in:
-X readme.pvm_3.4
-X
-X
-Bill Pearson
-wrp@virginia.edu
-SHAR_EOF
-chmod 0644 README ||
-echo 'restore of README failed'
-Wc_c="`wc -c < 'README'`"
-test 722 -eq "$Wc_c" ||
- echo 'README: original size 722, current size' "$Wc_c"
-fi
-# ============= README.versions ==============
-if test -f 'README.versions' -a X"$1" != X"-c"; then
- echo 'x - skipping README.versions (File already exists)'
-else
-echo 'x - extracting README.versions (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'README.versions' &&
-X
-August, 2002
-X
-The latest versions of the FASTA search programs are in fasta3.shar.Z.
-This file contains the fasta34 series of programs. fasta34 also runs
-the exact same functions threaded (fasta33, fasta33_t) and in parallel
-using PVM and MPI.
-X
-Here is a list of the programs, and where they can be found:
-X
-program fasta2 fasta34 replaced by
-X
-fasta yes fasta34, fasta34_t
-X
-ssearch yes ssearch34, ssearch34_t
-X
-tfasta yes tfasta34, tfasta34_t (tfastx34 preferred)
-X
-fastx yes fastx34, fastx34_t
-fasty no fasty34, fasty34_t
-X
-tfastx yes tfastx34, tfastx34_t
-tfasty no tfasty34, tfasty34_t
-X
-fasts/tfasts no fasts34(_t), tfasts34(_t)
-X
-fastf/tfastf no fastf34(_t), tfastf34(_t)
-X
-prss yes prss34
-X
-prfx no prfx34
-X
-================
-X
-The following programs are part of the fasta2 program package. The
-latest version of fasta2 is fasta20u66.shar.Z.
-X
-The most useful fasta2 programs are lalign and plalign, which calculate
-multiple non-intersecting local alignments using Webb Miller's "sim"
-implementation of the Waterman-Eggert algorithm.
-X
-You should not use the fasta2 programs for library searching; the
-fasta3 programs are more sensitive and have better statistics.
-X
-lalign yes no
-X
-plalign yes no
-X
-flalign yes no
-X
-align yes no
-X
-align0 yes no
-X
-lfasta yes no
-X
-randseq yes no
-X
-crandseq yes no
-X
-aacomp yes no
-X
-bestscor yes no
-X
-grease yes no
-X
-tgrease yes no
-X
-garnier yes no
-X
-================
-X
-The fasta3.shar.Z and fasta2.shar.Z files a Unix "shell archive" files.
-To unpack them, go into an empty directory and type:
-X
-X zcat fasta3.shar.Z | sh
-X
-You can then make the programs by typing:
-X
-X make all
-X
-Makefile's are available for many platforms, e.g.
-X
-X make -f Makefile.linux
-X make -f Makefile.sun
-X
-etc. You are much better off using the pre-configured Makefile.???
-than trying to edit the Makefile (which is designed for a Compaq/HP
-Alpha).
-X
-Precompiled versions of the programs for Mac and Windows are available
-in the mac_fasta and win32_fasta directories. If you are running
-MacOSX from the command line, use the Unix version (fasta3.shar.Z and
-Makefile.os_x).
-X
-SHAR_EOF
-chmod 0644 README.versions ||
-echo 'restore of README.versions failed'
-Wc_c="`wc -c < 'README.versions'`"
-test 2614 -eq "$Wc_c" ||
- echo 'README.versions: original size 2614, current size' "$Wc_c"
-fi
-# ============= Readme.Mac ==============
-if test -f 'Readme.Mac' -a X"$1" != X"-c"; then
- echo 'x - skipping Readme.Mac (File already exists)'
-else
-echo 'x - extracting Readme.Mac (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'Readme.Mac' &&
-X
-X 1-January-2002
-X
-X
-This is the first release of the fasta34t10 distribution for the
-Macintosh. In addition to the traditional "classic" applications that have
-been available in the past for Macintosh FASTA distributions, this release
-also includes "Carbon" versions of the FASTA programs. Carbon
-applications are designed to work under both MacOSX and under MacOS8.6 and
-later.
-X
-This is the first "Carbon" implementation of the FASTA programs. In this
-first implementation, the Mac-like interface for selecting sequence files
-has been disabled, as the older file interface I used is not available
-under Carbon. This should be implemented in the future.
-X
-Today, the main advantage of the Carbon implementation is its ability to
-run under MacOSX without needing the classic environment. However, the
-unix version of the FASTA programs compiles and runs fine under MacOSX,
-simply type:
-X
-X make -f Makefile.os_x all
-X
-However, the Unix version of the FASTA programs expects sequence files and
-libraries to have lines that ends with a linefeed characther ('\n', \012),
-which is different from the traditional Mac return ('\r', \015) end-of-line
-character. If you work with Mac-like text files under MacOSX, try the "carbon"
-FASTA programs. If you work with Unix-like text files, use the Unix
-version.
-X
-Bill Pearson
-SHAR_EOF
-chmod 0644 Readme.Mac ||
-echo 'restore of Readme.Mac failed'
-Wc_c="`wc -c < 'Readme.Mac'`"
-test 1332 -eq "$Wc_c" ||
- echo 'Readme.Mac: original size 1332, current size' "$Wc_c"
-fi
-# ============= a_mark.h ==============
-if test -f 'a_mark.h' -a X"$1" != X"-c"; then
- echo 'x - skipping a_mark.h (File already exists)'
-else
-echo 'x - extracting a_mark.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'a_mark.h' &&
-/* a_mark.h - symbols used to indicate match/mismatch alignment code */
-X
-/* copyright (c) 2003 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: a_mark.h,v 1.1 2003/06/26 19:36:32 wrp Exp $ */
-X
-#define M_BLANK 0
-#define M_NEG 1
-#define M_ZERO 2
-#define M_POS 3
-#define M_IDENT 4
-#define M_DEL 5
-SHAR_EOF
-chmod 0644 a_mark.h ||
-echo 'restore of a_mark.h failed'
-Wc_c="`wc -c < 'a_mark.h'`"
-test 321 -eq "$Wc_c" ||
- echo 'a_mark.h: original size 321, current size' "$Wc_c"
-fi
-# ============= aamap.h ==============
-if test -f 'aamap.h' -a X"$1" != X"-c"; then
- echo 'x - skipping aamap.h (File already exists)'
-else
-echo 'x - extracting aamap.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'aamap.h' &&
-X
-/* aamap.gbl character and number translations */
-X
-/* $Name: fa_34_26_5 $ - $Id: aamap.h,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp $ */
-X
-char aacmap[64]={
-X 'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
-X 'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
-X 'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
-X 'X','Y','X','Y','S','S','S','S','X','C','W','C','L','F','L','F'
-X };
-X
-int aamap[64]; /* integer aa values */
-int aamapr[64]; /* reverse sequence map */
-X
-X
-SHAR_EOF
-chmod 0644 aamap.h ||
-echo 'restore of aamap.h failed'
-Wc_c="`wc -c < 'aamap.h'`"
-test 504 -eq "$Wc_c" ||
- echo 'aamap.h: original size 504, current size' "$Wc_c"
-fi
-# ============= ag_stats.c ==============
-if test -f 'ag_stats.c' -a X"$1" != X"-c"; then
- echo 'x - skipping ag_stats.c (File already exists)'
-else
-echo 'x - extracting ag_stats.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ag_stats.c' &&
-/* this procedure implements Altschul's pre-calculated values for lambda, K */
-X
-/* $Name: fa_34_26_5 $ - $Id: ag_stats.c,v 1.5 2006/04/12 18:00:01 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-X
-#include "alt_parms.h"
-X
-static double K, Lambda, H;
-X
-int
-ag_parm(char *pam_type, int gdelval, int ggapval)
-{
-X int r_v, t_gdelval, t_ggapval;
-X
-#ifdef OLD_FASTA_GAP
-X t_gdelval = gdelval;
-X t_ggapval = ggapval;
-#else
-X t_gdelval = gdelval+ggapval;
-X t_ggapval = ggapval;
-#endif
-X
-X if (strcmp(pam_type,"BL50")==0 || strcmp(pam_type,"BLOSUM50")==0)
-X r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"BL62")==0 || strcmp(pam_type,"BLOSUM62")==0)
-X r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"P250")==0)
-X r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"P120")==0)
-X r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"MD_10")==0)
-X r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"MD_20")==0)
-X r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"MD_40")==0)
-X r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pam_type,"DNA")==0 || strcmp(pam_type,"+5/-4")==0)
-X r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else r_v = 0;
-X
-X return r_v;
-}
-X
-int
-look_p(struct alt_p parm[], int gap, int ext,
-X double *K, double *Lambda, double *H)
-{
-X int i;
-X
-X gap = -gap;
-X ext = -ext;
-X
-X if (gap > parm[1].gap) {
-X *K = parm[0].K;
-X *Lambda = parm[0].Lambda;
-X *H = parm[0].H;
-X return 1;
-X }
-X
-X for (i=1; parm[i].gap > 0; i++) {
-X if (parm[i].gap > gap) continue;
-X else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
-X else if (parm[i].gap == gap && parm[i].ext == ext) {
-X *K = parm[i].K;
-X *Lambda = parm[i].Lambda;
-X *H = parm[i].H;
-X return 1;
-X }
-X else break;
-X }
-X return 0;
-}
-X
-int E1_to_s(double e_val, int n0, int n1) {
-X double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
-X int score;
-X
-X a_n0 = (double)n0;
-X a_n0f = log(a_n0)/H;
-X
-X a_n1 = (double)n1;
-X a_n1f = log(a_n1)/H;
-X
-X mp = a_n0 - a_n0f - a_n1f;
-X np = a_n1 - a_n0f - a_n1f;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X /*
-X e_val = K * np * mp * exp ( - Lambda * score);
-X log(e_val) = log(K np mp) - Lambda * score;
-X (log(K np mp)-log(e_val)) / Lambda = score;
-X */
-X score = (int)((log( K * mp * np) - log(e_val))/Lambda +0.5);
-X if (score < 0) score = 0;
-X return score;
-}
-X
-double s_to_E4(int score, int n0, int n1)
-{
-X double p_val;
-X double mp, np, a_n0, a_n0f, a_n1, a_n1f, u;
-X
-X a_n0 = (double)n0;
-X a_n0f = log(a_n0)/H;
-X
-X a_n1 = (double)n1;
-X a_n1f = log(a_n1)/H;
-X
-X mp = a_n0 - a_n0f - a_n1f;
-X np = a_n1 - a_n0f - a_n1f;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X p_val = K * np * mp * exp ( - Lambda * score);
-X
-X if (p_val > 0.01) p_val = 1.0 - exp(-p_val);
-X
-X return p_val * 10000.0;
-}
-X
-SHAR_EOF
-chmod 0644 ag_stats.c ||
-echo 'restore of ag_stats.c failed'
-Wc_c="`wc -c < 'ag_stats.c'`"
-test 3021 -eq "$Wc_c" ||
- echo 'ag_stats.c: original size 3021, current size' "$Wc_c"
-fi
-# ============= aln_structs.h ==============
-if test -f 'aln_structs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping aln_structs.h (File already exists)'
-else
-echo 'x - extracting aln_structs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'aln_structs.h' &&
-X
-#ifndef A_STRUCT
-#define A_STRUCT
-X
-struct a_struct {
-X int smin0; /* coordinate of display start in seqc0 */
-X int smin1; /* coordinate of display start in seqc1 */
-X int amin0, amax0; /* coordinate of alignment start in seqc0 */
-X int amin1, amax1; /* coordinate of alignment start in seqc1 */
-X
-X int llen;
-X int llcntx, llcntx_flg, showall;
-X
-X int qlrev, qlfact;
-X int llrev, llfact, llmult;
-X int frame;
-X
-X int a_len; /* consensus alignment length */
-X int nident, nsim, ngap_q, ngap_l, nfs; /* number of identities, gaps in q, l */
-X long d_start0,d_stop0;
-X long d_start1,d_stop1;
-};
-X
-struct a_res_str {
-X int min0, max0; /* boundaries of alignment in aa0 */
-X int min1, max1; /* boundaries of alignment in aa1 */
-X int *res;
-X int nres;
-};
-#endif
-SHAR_EOF
-chmod 0644 aln_structs.h ||
-echo 'restore of aln_structs.h failed'
-Wc_c="`wc -c < 'aln_structs.h'`"
-test 758 -eq "$Wc_c" ||
- echo 'aln_structs.h: original size 758, current size' "$Wc_c"
-fi
-# ============= alt_parms.h ==============
-if test -f 'alt_parms.h' -a X"$1" != X"-c"; then
- echo 'x - skipping alt_parms.h (File already exists)'
-else
-echo 'x - extracting alt_parms.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'alt_parms.h' &&
-/* tables of Altschul-Gish parameters */
-X
-/* $Name: fa_34_26_5 $ - $Id: alt_parms.h,v 1.4 2003/09/08 18:40:04 wrp Exp $ */
-X
-X
-/* first entry must be for (inf,inf) penalty */
-X
-struct alt_p {
-X int gap;
-X int ext;
-X float Lambda;
-X float K;
-X float H;
-};
-X
-/* BL80 1/2 bit */
-struct alt_p bl80_p[] = {
-X {0, 0, 0.343, 0.177, 0.66},
-X {14, 2, 0.336, 0.150, 0.62},
-X {12, 2, 0.328, 0.130, 0.54},
-X {12, 1, 0.314, 0.096, 0.41},
-X {11, 2, 0.320, 0.110, 0.51},
-X {11, 1, 0.296, 0.066, 0.36},
-X {10, 2, 0.311, 0.097, 0.46},
-X {10, 1, 0.282, 0.052, 0.29},
-X { 9, 2, 0.292, 0.069, 0.33},
-X { 9, 1, 0.248, 0.026, 0.18},
-X { 8, 2, 0.271, 0.050, 0.27},
-X { 8, 1, 0.189, 0.0071, 0.07}
-};
-X
-/* BL62 1/2 bit */
-struct alt_p bl62_p[] = {
-X {0, 0, 0.318, 0.13, 0.40},
-X {12, 3, 0.305, 0.10, 0.38},
-X {12, 2, 0.300, 0.09, 0.34},
-X {12, 1, 0.275, 0.05, 0.25},
-X {11, 3, 0.301, 0.09, 0.36},
-X {11, 2, 0.286, 0.07, 0.29},
-X {11, 1, 0.255, 0.035, 0.19},
-X {10, 4, 0.293, 0.08, 0.33},
-X {10, 3, 0.281, 0.06, 0.29},
-X {10, 2, 0.266, 0.04, 0.24},
-X {10, 1, 0.216, 0.014, 0.12},
-X {9, 5, 0.286, 0.08, 0.29},
-X {9, 4, 0.273, 0.06, 0.25},
-X {9, 4, 0.273, 0.06, 0.25},
-X {9, 2, 0.244, 0.030, 0.18},
-X {9, 1, 0.176, 0.008, 0.06},
-X {8, 8, 0.270, 0.06, 0.25},
-X {8, 7, 0.270, 0.06, 0.25},
-X {8, 6, 0.262, 0.05, 0.23},
-X {8, 5, 0.262, 0.05, 0.23},
-X {8, 4, 0.262, 0.05, 0.23},
-X {8, 3, 0.243, 0.035, 0.18},
-X {8, 2, 0.215, 0.021, 0.12},
-X {7, 7, 0.247, 0.05, 0.18},
-X {7, 6, 0.247, 0.05, 0.18},
-X {7, 5, 0.230, 0.030, 0.15},
-X {7, 4, 0.230, 0.030, 0.15},
-X {7, 3, 0.208, 0.021, 0.11},
-X {7, 2, 0.164, 0.009, 0.06},
-X {6, 6, 0.200, 0.021, 0.10},
-X {6, 5, 0.200, 0.021, 0.10},
-X {6, 4, 0.179, 0.014, 0.08},
-X {6, 3, 0.153, 0.010, 0.05},
-X {5, 5, 0.131, 0.009, 0.04},
-X {-1, -1, -1.0, -1.0, -1.0},
-};
-X
-/* BL50 1/3 bit */
-X
-struct alt_p bl50_p[] = {
-X {0, 0, 0.232, 0.11, 0.34},
-X {16, 4, 0.222, 0.08, 0.31},
-X {16, 3, 0.213, 0.06, 0.27},
-X {16, 2, 0.207, 0.05, 0.24},
-X {16, 1, 0.180, 0.024, 0.15},
-X {15, 8, 0.222, 0.09, 0.31},
-X {15, 7, 0.219, 0.08, 0.29},
-X {15, 6, 0.219, 0.08, 0.29},
-X {15, 5, 0.216, 0.07, 0.28},
-X {15, 4, 0.216, 0.07, 0.28},
-X {15, 3, 0.210, 0.06, 0.25},
-X {15, 2, 0.202, 0.05, 0.22},
-X {15, 1, 0.166, 0.018, 0.11},
-X {14, 8, 0.218, 0.08, 0.29},
-X {14, 7, 0.214, 0.07, 0.27},
-X {14, 6, 0.214, 0.07, 0.27},
-X {14, 5, 0.214, 0.07, 0.27},
-X {14, 4, 0.205, 0.05, 0.24},
-X {14, 3, 0.201, 0.05, 0.22},
-X {14, 2, 0.188, 0.034, 0.17},
-X {14, 1, 0.140, 0.009, 0.07},
-X {13, 8, 0.211, 0.06, 0.27},
-X {13, 7, 0.205, 0.05, 0.24},
-X {13, 6, 0.205, 0.05, 0.24},
-X {13, 5, 0.205, 0.05, 0.24},
-X {13, 4, 0.202, 0.05, 0.22},
-X {13, 3, 0.188, 0.034, 0.18},
-X {13, 2, 0.174, 0.025, 0.13},
-X {13, 1, 0.114, 0.006, 0.04},
-X {12, 7, 0.205, 0.06, 0.24},
-X {12, 6, 0.197, 0.05, 0.21},
-X {12, 5, 0.197, 0.05, 0.21},
-X {12, 4, 0.192, 0.04, 0.18},
-X {12, 3, 0.178, 0.028, 0.15},
-X {12, 2, 0.158, 0.019, 0.10},
-X {11, 8, 0.197, 0.05, 0.21},
-X {11, 7, 0.190, 0.04, 0.19},
-X {11, 6, 0.190, 0.04, 0.19},
-X {11, 5, 0.184, 0.04, 0.17},
-X {11, 4, 0.177, 0.031, 0.15},
-X {11, 3, 0.167, 0.028, 0.11},
-X {11, 2, 0.130, 0.009, 0.06},
-X {10, 8, 0.183, 0.04, 0.17},
-X {10, 7, 0.178, 0.035, 0.16},
-X {10, 6, 0.178, 0.035, 0.16},
-X {10, 5, 0.168, 0.026, 0.13},
-X {10, 4, 0.156, 0.020, 0.10},
-X {10, 3, 0.139, 0.013, 0.07},
-X {10, 2, 0.099, 0.007, 0.03},
-X {9, 7, 0.164, 0.029, 0.13},
-X {9, 6, 0.152, 0.021, 0.10},
-X {9, 5, 0.152, 0.021, 0.10},
-X {9, 4, 0.134, 0.014, 0.07},
-X {9, 3, 0.107, 0.008, 0.04},
-X {8, 8, 0.139, 0.017, 0.08},
-X {8, 7, 0.134, 0.015, 0.07},
-X {8, 6, 0.127, 0.013, 0.06},
-X {8, 5, 0.117, 0.011, 0.05},
-X {8, 4, 0.101, 0.009, 0.03},
-X {7, 7, 0.100, 0.010, 0.04},
-X {7, 6, 0.094, 0.010, 0.03},
-X {-1, -1, -1.0, -1.0, -1.0},
-};
-X
-struct alt_p p250_p[] = {
-X {0, 0, 0.229, 0.09, 0.23},
-X {16, 4, 0.217, 0.07, 0.21},
-X {16, 3, 0.208, 0.05, 0.18},
-X {16, 2, 0.200, 0.04, 0.16},
-X {16, 1, 0.172, 0.018, 0.09},
-X {15, 5, 0.215, 0.06, 0.20},
-X {15, 4, 0.208, 0.05, 0.18},
-X {15, 3, 0.203, 0.04, 0.16},
-X {15, 2, 0.193, 0.035, 0.14},
-X {15, 1, 0.154, 0.012, 0.07},
-X {14, 6, 0.212, 0.06, 0.19},
-X {14, 5, 0.204, 0.05, 0.17},
-X {14, 4, 0.204, 0.05, 0.17},
-X {14, 3, 0.194, 0.035, 0.14},
-X {14, 2, 0.180, 0.025, 0.11},
-X {14, 1, 0.131, 0.008, 0.04},
-X {13, 6, 0.206, 0.06, 0.17},
-X {13, 5, 0.196, 0.04, 0.14},
-X {13, 4, 0.196, 0.04, 0.14},
-X {13, 3, 0.184, 0.029, 0.12},
-X {13, 2, 0.163, 0.016, 0.08},
-X {13, 1, 0.110, 0.008, 0.03},
-X {12, 7, 0.199, 0.05, 0.15},
-X {12, 6, 0.191, 0.04, 0.13},
-X {12, 5, 0.191, 0.04, 0.13},
-X {12, 4, 0.181, 0.029, 0.12},
-X {12, 3, 0.170, 0.022, 0.10},
-X {12, 2, 0.145, 0.012, 0.06},
-X {11, 7, 0.186, 0.04, 0.13},
-X {11, 6, 0.180, 0.034, 0.11},
-X {11, 5, 0.180, 0.034, 0.11},
-X {11, 4, 0.165, 0.021, 0.09},
-X {11, 3, 0.153, 0.017, 0.07},
-X {11, 2, 0.122, 0.009, 0.04},
-X {10, 8, 0.175, 0.031, 0.11},
-X {10, 7, 0.171, 0.029, 0.10},
-X {10, 6, 0.165, 0.024, 0.09},
-X {10, 5, 0.158, 0.020, 0.08},
-X {10, 4, 0.148, 0.017, 0.07},
-X {10, 3, 0.129, 0.012, 0.05},
-X {9, 7, 0.151, 0.020, 0.07},
-X {9, 6, 0.146, 0.019, 0.06},
-X {9, 5, 0.137, 0.015, 0.05},
-X {9, 4, 0.121, 0.011, 0.04},
-X {9, 3, 0.102, 0.010, 0.03},
-X {8, 8, 0.123, 0.014, 0.05},
-X {8, 7, 0.123, 0.014, 0.05},
-X {8, 6, 0.115, 0.012, 0.04},
-X {8, 5, 0.107, 0.011, 0.03},
-X {7, 7, 0.090, 0.014, 0.02},
-X {-1, -1, -1.0, -1.0, -1.0},
-};
-X
-struct alt_p p120_p[] = {
-X {0, 0, 0.342, 0.19, 0.63},
-X {12, 4, 0.334, 0.14, 0.60},
-X {12, 3, 0.330, 0.13, 0.57},
-X {12, 2, 0.330, 0.13, 0.57},
-X {12, 1, 0.219, 0.11, 0.46},
-X {11, 3, 0.330, 0.13, 0.57},
-X {11, 2, 0.323, 0.12, 0.51},
-X {11, 1, 0.296, 0.06, 0.38},
-X {10, 5, 0.323, 0.12, 0.54},
-X {10, 4, 0.314, 0.09, 0.50},
-X {10, 3, 0.314, 0.09, 0.50},
-X {10, 2, 0.301, 0.07, 0.42},
-X {10, 1, 0.273, 0.04, 0.28},
-X {9, 5, 0.316, 0.11, 0.49},
-X {9, 4, 0.311, 0.10, 0.45},
-X {9, 3, 0.311, 0.10, 0.45},
-X {9, 2, 0.284, 0.05, 0.35},
-X {9, 1, 0.239, 0.023, 0.18},
-X {8, 6, 0.307, 0.10, 0.43},
-X {8, 5, 0.295, 0.08, 0.39},
-X {8, 4, 0.295, 0.08, 0.39},
-X {8, 3, 0.284, 0.06, 0.34},
-X {8, 2, 0.262, 0.04, 0.26},
-X {8, 1, 0.183, 0.009, 0.08},
-X {7, 7, 0.286, 0.08, 0.34},
-X {7, 6, 0.286, 0.08, 0.34},
-X {7, 5, 0.276, 0.06, 0.31},
-X {7, 4, 0.276, 0.06, 0.31},
-X {7, 3, 0.255, 0.04, 0.24},
-X {7, 2, 0.224, 0.023, 0.16},
-X {6, 6, 0.248, 0.04, 0.23},
-X {6, 5, 0.248, 0.04, 0.23},
-X {6, 4, 0.234, 0.033, 0.19},
-X {6, 3, 0.216, 0.025, 0.15},
-X {6, 2, 0.160, 0.009, 0.06},
-X {5, 5, 0.191, 0.019, 0.11},
-X {5, 4, 0.173, 0.013, 0.09},
-X {5, 3, 0.134, 0.006, 0.05},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-struct alt_p bl55_p[] = {
-X {0, 0, 0.224, 0.12, 0.36},
-X {16, 4, 0.213, 0.08, 0.32},
-X {16, 3, 0.205, 0.07, 0.28},
-X {16, 2, 0.198, 0.06, 0.23},
-X {16, 1, 0.164, 0.020, 0.12},
-X {15, 8, 0.212, 0.09, 0.31},
-X {15, 7, 0.209, 0.08, 0.30},
-X {15, 6, 0.209, 0.08, 0.30},
-X {15, 5, 0.205, 0.07, 0.28},
-X {15, 4, 0.205, 0.07, 0.28},
-X {15, 3, 0.199, 0.06, 0.25},
-X {15, 2, 0.190, 0.05, 0.20},
-X {15, 1, 0.146, 0.013, 0.09},
-X {14, 7, 0.207, 0.08, 0.29},
-X {14, 6, 0.203, 0.07, 0.27},
-X {14, 5, 0.203, 0.07, 0.27},
-X {14, 4, 0.195, 0.05, 0.24},
-X {14, 3, 0.189, 0.04, 0.21},
-X {14, 2, 0.175, 0.030, 0.16},
-X {14, 1, 0.119, 0.006, 0.05},
-X {13, 8, 0.201, 0.07, 0.27},
-X {13, 7, 0.196, 0.06, 0.24},
-X {13, 6, 0.196, 0.06, 0.24},
-X {13, 5, 0.196, 0.06, 0.24},
-X {13, 4, 0.191, 0.05, 0.21},
-X {13, 3, 0.176, 0.032, 0.17},
-X {13, 2, 0.158, 0.020, 0.12},
-X {12, 8, 0.195, 0.06, 0.24},
-X {12, 7, 0.188, 0.05, 0.21},
-X {12, 6, 0.188, 0.05, 0.21},
-X {12, 5, 0.188, 0.05, 0.21},
-X {12, 4, 0.180, 0.04, 0.18},
-X {12, 3, 0.165, 0.026, 0.14},
-X {12, 2, 0.140, 0.014, 0.08},
-X {11, 8, 0.185, 0.05, 0.20},
-X {11, 7, 0.179, 0.04, 0.18},
-X {11, 6, 0.179, 0.04, 0.18},
-X {11, 5, 0.171, 0.033, 0.16},
-X {11, 4, 0.163, 0.027, 0.13},
-X {11, 3, 0.151, 0.022, 0.10},
-X {11, 2, 0.110, 0.008, 0.04},
-X {10, 10, 0.173, 0.04, 0.16},
-X {10, 9, 0.173, 0.04, 0.16},
-X {10, 8, 0.167, 0.035, 0.15},
-X {10, 7, 0.167, 0.035, 0.15},
-X {10, 6, 0.167, 0.035, 0.15},
-X {10, 5, 0.155, 0.025, 0.12},
-X {10, 4, 0.142, 0.017, 0.09},
-X {10, 3, 0.121, 0.011, 0.06},
-X {9, 9, 0.152, 0.026, 0.11},
-X {9, 8, 0.152, 0.026, 0.11},
-X {9, 7, 0.152, 0.026, 0.11},
-X {9, 6, 0.137, 0.018, 0.08},
-X {9, 5, 0.137, 0.018, 0.08},
-X {9, 4, 0.117, 0.011, 0.05},
-X {9, 3, 0.090, 0.007, 0.03},
-X {8, 8, 0.125, 0.014, 0.07},
-X {8, 7, 0.119, 0.013, 0.06},
-X {8, 6, 0.113, 0.012, 0.05},
-X {8, 5, 0.102, 0.010, 0.04},
-X {8, 4, 0.085, 0.009, 0.03},
-X {7, 7, 0.087, 0.010, 0.03},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-struct alt_p nt54_p[] =
-{
-X {0, 0, 0.192, 0.173, 0.36},
-X {16, 4, 0.192, 0.177, 0.36},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-struct alt_p rnt54_p[] =
-{
-X {0, 0, 0.192, 0.173, 0.36},
-X {16, 4, 0.192, 0.177, 0.36},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-struct alt_p nt32_p[] = {
-X {0, 0, 0.2712, 0.131, 0.22},
-X {18, 2, 0.2620, 0.100, 0.22},
-X {16, 4, 0.2600, 0.098, 0.22},
-X {16, 2, 0.2540, 0.081, 0.19},
-X {12, 4, 0.2340, 0.054, 0.15},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-struct alt_p nt13_p[] = {
-X {0, 0, 1.374, 0.711, 1.31},
-X {4, 1, 1.36, 0.67, 1.30},
-X {3, 1, 1.34, 0.58, 1.19},
-X {2, 1, 1.21, 0.34, 0.77},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-/* PAM-10 (1/10 Hartley ~ 1/3 bit scale) */
-X
-struct alt_p md10_p[] = {
-X {0, 0, 0.2299, 0.309, 3.45},
-X {20, 4, 0.222, 0.21, 3.1},
-X {20, 2, 0.218, 0.18, 2.9},
-X {18, 4, 0.220, 0.20, 2.9},
-X {18, 2, 0.217, 0.18, 2.7},
-X {16, 4, 0.217, 0.19, 2.8},
-X {16, 2, 0.212, 0.17, 2.3},
-X {14, 4, 0.212, 0.17, 2.5},
-X {14, 2, 0.205, 0.15, 1.9},
-X {12, 4, 0.206, 0.16, 2.1},
-X {12, 2, 0.190, 0.11, 1.3},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-/* PAM-20 (1/10 Hartley ~ 1/3 bit scale) */
-struct alt_p md20_p[] = {
-X {0, 0, 0.230, 0.287, 2.94},
-X {20, 4, 0.221, 0.19, 2.6},
-X {20, 2, 0.219, 0.18, 2.5},
-X {18, 4, 0.220, 0.19, 2.5},
-X {18, 2, 0.218, 0.18, 2.3},
-X {16, 4, 0.218, 0.18, 2.4},
-X {16, 2, 0.213, 0.17, 2.0},
-X {14, 4, 0.213, 0.17, 2.1},
-X {14, 2, 0.204, 0.14, 1.6},
-X {12, 4, 0.207, 0.17, 1.8},
-X {12, 2, 0.187, 0.10, 1.1},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-X
-/* PAM-40 (1/10 Hartley ~ 1/3 bit scale) */
-struct alt_p md40_p[] = {
-X {0, 0, 0.2293, 0.257, 2.22},
-X {20, 4, 0.225, 0.22, 2.1},
-X {20, 2, 0.222, 0.20, 1.9},
-X {18, 4, 0.224, 0.22, 2.0},
-X {18, 2, 0.220, 0.20, 1.8},
-X {16, 4, 0.219, 0.19, 1.8},
-X {16, 2, 0.212, 0.16, 1.5},
-X {14, 4, 0.211, 0.15, 1.6},
-X {14, 2, 0.199, 0.11, 1.2},
-X {12, 4, 0.203, 0.14, 1.3},
-X {12, 2, 0.177, 0.064, 0.7},
-X {-1, -1, -1.0, -1.0, -1.0}
-};
-SHAR_EOF
-chmod 0644 alt_parms.h ||
-echo 'restore of alt_parms.h failed'
-Wc_c="`wc -c < 'alt_parms.h'`"
-test 10311 -eq "$Wc_c" ||
- echo 'alt_parms.h: original size 10311, current size' "$Wc_c"
-fi
-# ============= altlib.h ==============
-if test -f 'altlib.h' -a X"$1" != X"-c"; then
- echo 'x - skipping altlib.h (File already exists)'
-else
-echo 'x - extracting altlib.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'altlib.h' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: altlib.h,v 1.9 2006/02/07 17:52:06 wrp Exp $ */
-X
-/* #ifdef UNIX */
-/* ncbi blast 1.3 format */
-/*
-#define NCBIBL13 11
-extern int ncbl_getliba();
-extern void ncbl_ranlib();
-void ncbl_closelib();
-*/
-#define NCBIBL20 12
-/* #endif */
-X
-#ifdef MYSQL_DB
-#define MYSQL_LIB 16
-#define LASTLIB MYSQL_LIB+1
-#endif
-X
-#ifdef PGSQL_DB
-#define PGSQL_LIB 17
-#define LASTLIB PGSQL_LIB+1
-#endif
-X
-#if !defined (LASTLIB) && defined(NCBIBL20)
-#define LASTLIB NCBIBL20+1
-#endif
-#if !defined (LASTLIB)
-#define LASTLIB 10
-#endif
-X
-#define FASTA_F 0
-#define DEFAULT 0
-#define FULLGB 1
-#define UNIXPIR 2
-#define EMBLSWISS 3
-#define INTELLIG 4
-#define VMSPIR 5
-#define GCGBIN 6
-#define LASTTXT 6
-X
-int agetlib(); void aranlib(); /* pearson fasta format */
-int lgetlib(); void lranlib(); /* full uncompressed GB FULLGB*/
-int pgetlib(); void pranlib(); /* PIR UNIX protein UNIXPIR */
-int egetlib(); void eranlib(); /* EMBL/SWISS-PROT EMBLSWISS */
-int igetlib(); void iranlib(); /* Intelligenetics INTELLIG */
-int vgetlib(); void vranlib(); /* PIR VMS format */
-int gcg_getlib(); void gcg_ranlib(); /* GCG 2bit format */
-X
-#ifdef NCBIBL20
-extern int ncbl2_getliba(); /* ncbi blast 2.0 format */
-extern void ncbl2_ranlib();
-void ncbl2_closelib();
-#endif
-X
-#ifdef MYSQL_DB
-extern int mysql_getlib();
-extern void mysql_ranlib();
-int mysql_closelib();
-#endif
-X
-int (*getliba[LASTLIB])()={
-X agetlib,lgetlib,pgetlib,egetlib,
-X igetlib,vgetlib,gcg_getlib,agetlib,
-X agetlib,agetlib
-#ifdef UNIX
-X ,agetlib
-#ifdef NCBIBL13
-X ,ncbl_getliba
-#else
-X ,ncbl2_getliba
-#endif
-#ifdef NCBIBL20
-X ,ncbl2_getliba
-#endif
-#ifdef MYSQL_DB
-X ,agetlib
-X ,agetlib
-X ,agetlib
-X ,mysql_getlib
-#endif
-#endif
-};
-X
-void (*ranliba[LASTLIB])()={
-X aranlib,lranlib,pranlib,eranlib,
-X iranlib,vranlib,gcg_ranlib,aranlib,
-X aranlib,aranlib
-#ifdef UNIX
-X ,aranlib
-#ifdef NCBIBL13
-X ,ncbl_ranlib
-#else
-X ,ncbl2_ranlib
-#endif
-#ifdef NCBIBL20
-X ,ncbl2_ranlib
-#endif
-#ifdef MYSQL_DB
-X ,aranlib
-X ,aranlib
-X ,aranlib
-X ,mysql_ranlib
-#endif
-#endif
-};
-X
-X
-/* mmap()ed functions */
-#ifdef USE_MMAP
-int agetlibm(); void aranlibm();
-int lgetlibm(); void lranlibm();
-void vranlibm();
-int gcg_getlibm();
-X
-int (*getlibam[])()={
-X agetlibm,lgetlibm, NULL, NULL,NULL,agetlibm,gcg_getlibm
-};
-X
-void (*ranlibam[])()={
-X aranlibm,lranlibm,NULL,NULL,NULL,vranlibm,vranlibm
-};
-#endif
-SHAR_EOF
-chmod 0644 altlib.h ||
-echo 'restore of altlib.h failed'
-Wc_c="`wc -c < 'altlib.h'`"
-test 2319 -eq "$Wc_c" ||
- echo 'altlib.h: original size 2319, current size' "$Wc_c"
-fi
-# ============= apam.c ==============
-if test -f 'apam.c' -a X"$1" != X"-c"; then
- echo 'x - skipping apam.c (File already exists)'
-else
-echo 'x - extracting apam.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'apam.c' &&
-/* pam.c 19-June-86
-X copyright (c) 1987 William R. Pearson
-X read in the alphabet and pam matrix data
-X designed for universal matcher
-X
-X This version reads BLAST format (square) PAM files
-*/
-X
-/* $Name: fa_34_26_5 $ - $Id: apam.c,v 1.41 2007/03/31 18:47:20 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-X
-#include "defs.h"
-#include "param.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#undef XTERNAL
-X
-extern void alloc_pam (int d1, int d2, struct pstruct *ppst);
-X
-void
-pam_opts(char *smstr, struct pstruct *ppst) {
-X char *bp;
-X
-X ppst->pam_ms = 0;
-X ppst->pamoff = 0;
-X
-X if ((bp=strchr(smstr,'-'))!=NULL) {
-X if (!strncmp(bp+1,"MS",2) || !strncmp(bp+1,"ms",2)) {
-X ppst->pam_ms = 1;
-X }
-X else {
-X ppst->pamoff=atoi(bp+1);
-X }
-X *bp = '\0';
-X }
-X else if ((bp=strchr(smstr,'+'))!=NULL) {
-X ppst->pamoff= -atoi(bp+1);
-X *bp = '\0';
-X }
-}
-X
-/* modified 13-Oct-2005 to accomodate assymetrical matrices */
-X
-int
-initpam (char *mfname, struct pstruct *ppst)
-{
-X char line[512], *lp;
-X int i, j, iaa, pval;
-X int *hsq, nsq;
-X int *sascii;
-X char *sq;
-X int ess_tmp, max_val, min_val;
-X int have_es = 0;
-X FILE *fmat;
-X
-X pam_opts(mfname, ppst);
-X
-X if ((fmat = fopen (mfname, "r")) == NULL)
-X {
-X printf ("***WARNING*** cannot open scoring matrix file %s\n", mfname);
-X fprintf (stderr,"***WARNING*** cannot open scoring matrix file %s\n", mfname);
-X return 0;
-X }
-X
-/*
-X the size of the alphabet is determined in advance
-*/
-X hsq = ppst->hsq;
-X sq = ppst->sq;
-X
-X ppst->nt_align = (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA);
-X
-/*
-X look for alphabet line, skipping the comments
-X alphabet ends up in line[]
-*/
-X while (fgets (line, sizeof(line), fmat) != NULL && line[0]=='#');
-X
-X /* decide whether this is a protein or DNA matrix */
-X if (ppst->nt_align) sascii = &nascii[0];
-X else sascii = &aascii[0];
-X
-/*
-X re-initialize sascii[] for matrix alphabet
-*/
-X
-X /* save ',' value used by FASTS/FASTM/FASTF */
-X ess_tmp = sascii[','];
-X
-/* clear out sascii */
-X for (i = 0; i <= AAMASK; i++) sascii[i] = NA;
-X
-/* set end of line stop */
-X sascii[0] = sascii['\r'] = sascii['\n'] = EL;
-X
-X sascii[','] = ess_tmp;
-X
-/* read the alphabet - determine alphabet nsq */
-X sq[0] = '\0';
-X for (i = 0, nsq = 1; line[i]; i++) {
-X if (line[i] == '*') have_es = 1;
-X if (line[i] > ' ') sq[nsq++] = toupper (line[i]);
-X }
-X sq[nsq]='\0';
-X nsq--;
-X
-/* set end of sequence stop */
-X fprintf(stderr,"sq[%d]: %s\n",nsq,sq+1);
-X
-/* initialize sascii */
-X for (iaa = 1; iaa <= nsq; iaa++) {
-X sascii[sq[iaa]] = iaa;
-X }
-X if (ppst->dnaseq==SEQT_DNA) {
-X sascii['U'] = sascii['T'];
-X sascii['u'] = sascii['t'];
-X }
-X else if (ppst->dnaseq==SEQT_RNA) {
-X sascii['T'] = sascii['U'];
-X sascii['t'] = sascii['u'];
-X }
-X
-/*
-X finished with sascii[]
-*/
-X
-/*
-X setup hnt (ambiguous nt hash) values
-*/
-X hsq[0] = 0;
-X for (iaa = 1; iaa <= nsq; iaa++) {
-X hsq[iaa]=iaa;
-X }
-X if (ppst->nt_align) { /* DNA ambiguitities */
-X hsq[sascii['R']]=hsq[sascii['M']]=hsq[sascii['W']]=hsq[sascii['A']];
-X hsq[sascii['D']]=hsq[sascii['H']]=hsq[sascii['V']]=hsq[sascii['A']];
-X hsq[sascii['N']]=hsq[sascii['X']]=hsq[sascii['A']];
-X hsq[sascii['Y']]=hsq[sascii['S']]=hsq[sascii['B']]=hsq[sascii['C']];
-X hsq[sascii['K']]=hsq[sascii['G']];
-X }
-X else /* protein ambiguities */
-X if (ppst->dnaseq == SEQT_UNK || ppst->dnaseq == SEQT_PROT ||
-X (ppst->nsq >= 20 && ppst->nsq <= 24)) {
-X hsq[sascii['B']] = hsq[sascii['N']];
-X hsq[sascii['Z']] = hsq[sascii['E']];
-X hsq[sascii['X']] = hsq[sascii['A']];
-X }
-X /* here if non-DNA, non-protein sequence */
-X else ppst->dnaseq = SEQT_OTHER;
-X
-/*
-X check for 2D pam - if not found, allocate it
-*/
-X
-X if (!ppst->have_pam2) {
-X alloc_pam (MAXSQ, MAXSQ, ppst);
-X ppst->have_pam2 = 1;
-X }
-X
-/*
-X read the scoring matrix values
-*/
-X
-X max_val = -1;
-X min_val = 1;
-X for (j=0; j < nsq; j++) ppst->pam2[0][0][j] = -BIGNUM;
-X for (iaa = 1; iaa <= nsq; iaa++) { /* read pam value line */
-X if (fgets(line,sizeof(line),fmat)==NULL) {
-X fprintf (stderr," error reading pam line: %s\n",line);
-X exit (1);
-X }
-X /* fprintf(stderr,"%d/%d %s",iaa,nsq,line); */
-X strtok(line," \t\n"); /* skip the letter (residue) */
-X ppst->pam2[0][i][0] = -BIGNUM;
-X for (j = 1; j <= nsq; j++) { /* iaa limits to triangle */
-X lp=strtok(NULL," \t\n"); /* get the number string */
-X pval=ppst->pam2[0][iaa][j]=atoi(lp); /* convert to integer */
-X if (pval > max_val) max_val = pval;
-X if (pval < min_val) min_val = pval;
-X }
-X }
-X
-X if (have_es==0) {
-X sascii['*']=nsq;
-X nsq++;
-X sq[nsq]='*';
-X sq[nsq+1]='\0';
-X for (j=1; j<=nsq; j++) ppst->pam2[0][nsq][j]= -1;
-X ppst->pam2[0][nsq][nsq]= max_val/2;
-X }
-X
-X ppst->sqx[0]='\0'; /* initialize sqx[] */
-X for (i=1; i<= nsq; i++) {
-X ppst->sqx[i] = sq[i];
-X ppst->sqx[i+nsq] = tolower(sq[i]);
-X if (sascii[aa[i]] < NA && sq[i] >= 'A' && sq[i] <= 'Z')
-X sascii[aa[i] - 'A' + 'a'] = sascii[aa[i]]+nsq;
-X }
-X
-X ppst->nsq = nsq; /* save new nsq */
-X ppst->nsqx = nsq*2; /* save new nsqx */
-X
-X ppst->pam_h = max_val;
-X ppst->pam_l = min_val;
-X
-X strncpy (ppst->pamfile, mfname, MAX_FN);
-X ppst->pamfile[MAX_FN-1]='\0';
-X
-X if (ppst->pam_ms) {
-X strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1);
-X }
-X ppst->pamfile[MAX_FN-1]='\0';
-X fclose (fmat);
-X return 1;
-}
-X
-/* make a DNA scoring from +match/-mismatch values */
-X
-void mk_n_pam(int *arr,int siz, int mat, int mis)
-{
-X int i, j, k;
-X /* current default match/mismatch values */
-X int max_mat = +5;
-X int min_mis = -4;
-X float f_val, f_scale;
-X
-X f_scale = (float)(mat - mis)/(float)(max_mat - min_mis);
-X
-X k = 0;
-X for (i = 0; i<nnt-1; i++)
-X for (j = 0; j <= i; j++ ) {
-X if (arr[k] == max_mat) arr[k] = mat;
-X else if (arr[k] == min_mis) arr[k] = mis;
-X else if (arr[k] != -1) {
-X f_val = (arr[k] - min_mis)*f_scale + 0.5;
-X arr[k] = f_val + mis;
-X }
-X k++;
-X }
-}
-X
-struct std_pam_str {
-X char abbrev[6];
-X char name[10];
-X int *pam;
-X float scale;
-X int gdel, ggap;
-};
-X
-static
-struct std_pam_str std_pams[] = {
-X {"P120", "PAM120", apam120, 0.346574, -20, -3},
-X {"P250", "PAM250", apam250, 0.231049, -12, -2},
-X {"P10", "MD10", a_md10, 0.346574, -27, -4},
-X {"M10", "MD10", a_md10, 0.346574, -27, -4},
-X {"MD10", "MD10", a_md10, 0.346574, -27, -4},
-X {"P20", "MD20", a_md20, 0.346574, -26, -4},
-X {"M20", "MD20", a_md20, 0.346574, -26, -4},
-X {"MD20", "MD20", a_md20, 0.346574, -26, -4},
-X {"P40", "MD40", a_md40, 0.346574, -25, -4},
-X {"M40", "MD40", a_md40, 0.346574, -25, -4},
-X {"MD40", "MD40", a_md40, 0.346574, -25, -4},
-X {"BL50", "BL50", abl50, 0.231049, -12, -2},
-X {"BL62", "BL62", abl62, 0.346574, -8, -1},
-X {"BP62", "BL62", abl62, 0.346574, -12, -1},
-X {"BL80", "BL80", abl80, 0.346574, -12, -2},
-X {"\0", "\0", NULL, 0.0, 0, 0}
-};
-X
-int
-standard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set) {
-X
-X struct std_pam_str *std_pam_p;
-X
-X pam_opts(smstr, ppst);
-X
-X for (std_pam_p = std_pams; std_pam_p->abbrev[0]; std_pam_p++ ) {
-X if (strcmp(smstr,std_pam_p->abbrev)==0) {
-X pam = std_pam_p->pam;
-X strncpy(ppst->pamfile,std_pam_p->name,MAX_FN);
-X ppst->pamfile[MAX_FN-1]='\0';
-X if (ppst->pam_ms) {
-X strncat(ppst->pamfile,"-MS",MAX_FN-strlen(ppst->pamfile)-1);
-X }
-X ppst->pamfile[MAX_FN-1]='\0';
-#ifdef OLD_FASTA_GAP
-X if (!del_set) ppst->gdelval = std_pam_p->gdel;
-#else
-X if (!del_set) ppst->gdelval = std_pam_p->gdel-std_pam_p->ggap;
-#endif
-X if (!gap_set) ppst->ggapval = std_pam_p->ggap;
-X ppst->pamscale = std_pam_p->scale;
-X return 1;
-X }
-X }
-X return 0;
-}
-X
-/* ESS must match uascii.h */
-#define ESS 49
-X
-void
-build_xascii(int *qascii, char *save_str) {
-X int i, max_save;
-X int comma_val, term_val;
-X int save_arr[MAX_SSTR];
-X
-X comma_val = qascii[','];
-X term_val = qascii['*'];
-X
-X /* preserve special characters */
-X for (i=0; i < MAX_SSTR && save_str[i]; i++ ) {
-X save_arr[i] = qascii[save_str[i]];
-X }
-X max_save = i;
-X
-X for (i=1; i<128; i++) {
-X qascii[i]=NA;
-X }
-X /* range of values in aax, ntx is from 1..naax,nntx -
-X do not zero-out qascii[0] - 9 Oct 2002 */
-X
-X for (i=1; i<naax; i++) {
-X qascii[aax[i]]=aax[i];
-X }
-X
-X for (i=1; i<nntx; i++) {
-X qascii[ntx[i]]=ntx[i];
-X }
-X
-X qascii['\n']=qascii['\r']=qascii[0] = EL;
-X
-X qascii[','] = comma_val;
-X qascii['*'] = term_val;
-X
-X for (i=0; i < max_save; i++) {
-X qascii[save_str[i]]=save_arr[i];
-X }
-}
-X
-/*
-X checks for lower case letters in *sq array;
-X if not present, map lowercase to upper
-*/
-void
-init_ascii(int is_ext, int *sascii, int is_dna) {
-X
-X int isq, have_lc;
-X char *sq, term_char;
-X int nsq;
-X
-X if (is_dna==SEQT_UNK) return;
-X
-X term_char = sascii['*'];
-X
-X if (is_dna==SEQT_DNA || is_dna == SEQT_RNA) {
-X if (is_ext) {
-X sq = &ntx[0];
-X nsq = nntx;
-X }
-X else {sq = &nt[0]; nsq = nnt;}
-X }
-X else {
-X if (is_ext) { sq = &aax[0]; nsq = naax; }
-X else {sq = &aa[0]; nsq = naa;}
-X }
-X
-X
-/* initialize sascii from sq[], checking for lower-case letters */
-X have_lc = 0;
-X for (isq = 1; isq <= nsq; isq++) {
-X sascii[sq[isq]] = isq;
-X if (sq[isq] >= 'a' && sq[isq] <= 'z') have_lc = 1;
-X }
-X
-X /* no lower case letters in alphabet, map lower case to upper */
-X if (have_lc != 1) {
-X for (isq = 1; isq <= nsq; isq++) {
-X if (sq[isq] >= 'A' && sq[isq] <= 'Z') sascii[sq[isq]-'A'+'a'] = isq;
-X }
-X if (is_dna==1) sascii['u'] = sascii['t'];
-X }
-X
-X sascii['*']=term_char;
-}
-X
-print_pam(struct pstruct *ppst) {
-X int i, nsq, ip;
-X char *sq;
-X
-X fprintf(stderr," ext_sq_set: %d\n",ppst->ext_sq_set);
-X
-X nsq = ppst->nsq;
-X ip = 0;
-X sq = ppst->sq;
-X
-X fprintf(stderr," sq[%d]: %s\n",nsq, sq);
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx;
-X ip = 1;
-X sq = ppst->sqx;
-X fprintf(stderr," sq[%d]: %s\n",nsq, sq);
-X }
-X
-X for (i=1; i<=nsq; i++) {
-X fprintf(stderr," %c:%c - %3d\n",sq[i], sq[i], ppst->pam2[ip][i][i]);
-X }
-}
-SHAR_EOF
-chmod 0644 apam.c ||
-echo 'restore of apam.c failed'
-Wc_c="`wc -c < 'apam.c'`"
-test 10085 -eq "$Wc_c" ||
- echo 'apam.c: original size 10085, current size' "$Wc_c"
-fi
-# ============= blosum45.mat ==============
-if test -f 'blosum45.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping blosum45.mat (File already exists)'
-else
-echo 'x - extracting blosum45.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'blosum45.mat' &&
-# Matrix made by matblas from blosum45.iij
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 45
-# Entropy = 0.3795, Expected = -0.2789
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 0
-R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 0 -1
-N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 4 0 -1
-D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 5 1 -1
-C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2
-Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 4 -1
-E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 4 -1
-G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
-H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 0 -1
-I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 -3 -1
-L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 -2 -1
-K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 1 -1
-M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 -1 -1
-F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 -3 -1
-P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -1 -1
-S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 0 0
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 0
-W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2
-Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 -2 -1
-V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 -3 -1
-B -1 -1 4 5 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 4 2 -1
-Z -1 0 0 1 -3 4 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 2 4 -1
-XX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -2 -1 -1 -1 -1 -1
-X
-SHAR_EOF
-chmod 0644 blosum45.mat ||
-echo 'restore of blosum45.mat failed'
-Wc_c="`wc -c < 'blosum45.mat'`"
-test 1922 -eq "$Wc_c" ||
- echo 'blosum45.mat: original size 1922, current size' "$Wc_c"
-fi
-# ============= blosum50.mat ==============
-if test -f 'blosum50.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping blosum50.mat (File already exists)'
-else
-echo 'x - extracting blosum50.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'blosum50.mat' &&
-# Matrix made by matblas from blosum50.iij
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 50
-# Entropy = 0.4808, Expected = -0.3573
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -1 -1
-R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 0 -1
-N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 4 0 -1
-D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 5 1 -1
-C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2
-Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 4 -1
-E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 5 -1
-G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -2 -2
-H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 0 -1
-I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 -3 -1
-L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 -3 -1
-K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 1 -1
-M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 -1 -1
-F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 -4 -2
-P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2
-S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 0 -1
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 0
-W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -3
-Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -2 -1
-V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -4 -3 -1
-B -2 -1 4 5 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -4 5 2 -1
-Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 2 5 -1
-XX -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 0 -3 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 blosum50.mat ||
-echo 'restore of blosum50.mat failed'
-Wc_c="`wc -c < 'blosum50.mat'`"
-test 1921 -eq "$Wc_c" ||
- echo 'blosum50.mat: original size 1921, current size' "$Wc_c"
-fi
-# ============= blosum62.mat ==============
-if test -f 'blosum62.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping blosum62.mat (File already exists)'
-else
-echo 'x - extracting blosum62.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'blosum62.mat' &&
-# Matrix made by matblas from blosum62.iij
-# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 62
-# Entropy = 0.6979, Expected = -0.5209
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0
-R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1
-N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1
-D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1
-C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2
-Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1
-E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1
-G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
-H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1
-I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1
-L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1
-K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1
-M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1
-F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1
-P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2
-S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0
-W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2
-Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1
-V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1
-B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1
-Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1
-XX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1
-X
-SHAR_EOF
-chmod 0644 blosum62.mat ||
-echo 'restore of blosum62.mat failed'
-Wc_c="`wc -c < 'blosum62.mat'`"
-test 1922 -eq "$Wc_c" ||
- echo 'blosum62.mat: original size 1922, current size' "$Wc_c"
-fi
-# ============= blosum80.mat ==============
-if test -f 'blosum80.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping blosum80.mat (File already exists)'
-else
-echo 'x - extracting blosum80.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'blosum80.mat' &&
-# Matrix made by matblas from blosum80_3.iij
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 80
-# Entropy = 0.9868, Expected = -0.7442
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1
-R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2
-N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2
-D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3
-C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4
-Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2
-E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2
-G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3
-H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2
-I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2
-L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2
-K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2
-M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2
-F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3
-P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3
-S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1
-T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1
-W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5
-Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3
-V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2
-B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3
-Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1
-XX -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2
-X
-SHAR_EOF
-chmod 0644 blosum80.mat ||
-echo 'restore of blosum80.mat failed'
-Wc_c="`wc -c < 'blosum80.mat'`"
-test 1924 -eq "$Wc_c" ||
- echo 'blosum80.mat: original size 1924, current size' "$Wc_c"
-fi
-# ============= bovgh.seq ==============
-if test -f 'bovgh.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping bovgh.seq (File already exists)'
-else
-echo 'x - extracting bovgh.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'bovgh.seq' &&
->BOVGH bovine growth hormone (presomatotropin) gene and flanks.
-X AAAACCTATG GGGTGGGCTC TCAAGCTGAG ACCCTGTGTG CACAGCCCTC TGGCTGGTGG
-X CAGTGGAGAC GGGATNNNAT GACAAGCCTG GGGGACATGA CCCCAGAGAA GGAACGGGAA
-X CAGGATGAGT GAGAGGAGGT TCTAAATTAT CCATTAGCAC AGGCTGCCAG TGGTCCTTGC
-X ATAAATGTAT AGAGCACACA GGTGGGGGGA AAGGGAGAGA GAGAAGAAGC CAGGGTATAA
-X AAATGGCCCA GCAGGGACCA ATTCCAGGAT CCCAGGACCC AGTTCACCAG ACGACTCAGG
-X GTCCTGTGGA CAGCTCACCA GCTATGATGG CTGCAGGTAA GCTCGCTAAA ATCCCCTCCA
-X TTCGCGTGTC CTAAAGGGGT AATGCGGGGG GCCCTGCCGA TGGATGTGTT CAGAGCTTTG
-X GGCTTTAGGG CTTCCGAATG TGAACATAGG TATCTACACC CAGACATTTG GCCAAGTTTG
-X AAATGTTCTC AGTCCCTGGA GGGAAGGGTA GGTGGGGGCT GGCAGGAGAT CAGGCGTCTA
-X GCTCCCTGGG GCCCTCCGTC GCGGCCCTCC TGGTCTCTCC CTAGGCCCCC GGACCTCCCT
-X GCTCCTGGCT TTCGCCCTGC TCTGCCTGCC CTGGACTCAG GTGGTGGGCG CCTTCCCAGC
-X CATGTCCTTG TCCGGCCTGT TTGCCAACGC TGTGCTCCGG GCTCAGCACC TGCATCAGCT
-X GGCTGCTGAC ACCTTCAAAG AGTTTGTAAG CTCCCGAGGG ATGCGTCCTA GGGGTGGGGA
-X GGCAGGAAGG GGTGAATCCA CACCCCCTCC ACACAGTGGG AGGAAACTGA GGAGTTCAGC
-X CGTATTTTAT CCAAGTAGGG ATGTGGTTAG GGGAGCAGAA ACGGGGGTGT GTGGGGTGGG
-X GAGGGTTCCG AATAAGGCGG GGAGGGGAAC CGCGCACCAG CTTAGACCTG GGTGGGTGTG
-X TTCTTCCCCC AGGAGCGCAC CTACATCCCG GAGGGACAGA GATACTCCAT CCAGAACACC
-X CAGGTTGCCT TCTGCTTCTC TGAAACCATC CCGGCCCCCA CGGGCAAGAA TGAGGCCCAG
-X CAGAAATCAG TGAGTGGCAA CCTCGGACCG AGGAGCAGGG GACCTCCTTC ATCCTAAGTA
-X GGCTGCCCCA GCTCTCCGCA CCGGGCCTGG GGCGGCCTTC TCCCCGAGGT GGCGGAGGTT
-X GTTGGATGGC AGTGGAGGAT GATGGTGGGC GGTGGTGGCA GGAGGTCCTC GGGCAGAGGC
-X CGACCTTGCA GGGCTGCCCC AAGCCCGCGG CACCCACCGA CCACCCATCT GCCAGCAGGA
-X CTTGGAGCTG CTTCGCATCT CACTGCTCCT CATCCAGTCG TGGCTTGGGC CCCTGCAGTT
-X CCTCAGCAGA GTCTTCACCA ACAGCTTGGT GTTTGGCACC TCGGACCGTG TCTATGAGAA
-X GCTGAAGGAC CTGGAGGAAG GCATCCTGGC CCTGATGCGG GTGGGGATGG CGTTGTGGGT
-X CCCTTCCATG CTGGGGGCCA TGCCCGCCCT CTCCTGGCTT AGCCAGGAGA ATGCACGTGG
-X GCTTGGGGAG ACAGATCCCT GCTCTCTCCC TCTTTCTAGC AGTCCAGCCT TGACCCAGGG
-X GAAACCTTTT CCCCTTTTGA AACCTCCTTC CTCGCCCTTC TCCAAGCCTG TAGGGGAGGG
-X TGGAAAATGG AGCGGGCAGG AGGGAGCTGC TCCTGAGGGC CCTTCGGCCT CTCTGTCTCT
-X CCCTCCCTTG GCAGGAGCTG GAAGATGGCA CCCCCCGGGC TGGGCAGATC CTCAAGCAGA
-X CCTATGACAA ATTTGACACA AACATGCGCA GTGACGACGC GCTGCTCAAG AACTACGGTC
-X TGCTCTCCTG CTTCCGGAAG GACCTGCATA AGACGGAGAC GTACCTGAGG GTCATGAAGT
-X GCCGCCGCTT CGGGGAGGCC AGCTGTGCCT TCTAGTTGCC AGCCATCTGT TGTTTGCCCC
-X TCCCCCGTGC CTTCCTTGAC CCTGGAAGGT GCCACTCCCA CTGTCCTTTC CTAATAAAAT
-X GAGGAAATTG CATCGCATTG TCTGAGTAGG TGTCATTCTA TTCTGGGGGG TGGGGTGGGG
-X CAGGACAGCA AGGGGGAGGA TTGGGAAGAC AATAGCAGGC ATGCTGGGGA TGCGGTGGGC
-X TCTATGGGTA CCCAGGTGCT GAAGAATTGA CCCGGTTCCT CCTGGG
-SHAR_EOF
-chmod 0644 bovgh.seq ||
-echo 'restore of bovgh.seq failed'
-Wc_c="`wc -c < 'bovgh.seq'`"
-test 2528 -eq "$Wc_c" ||
- echo 'bovgh.seq: original size 2528, current size' "$Wc_c"
-fi
-# ============= bovprl.seq ==============
-if test -f 'bovprl.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping bovprl.seq (File already exists)'
-else
-echo 'x - extracting bovprl.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'bovprl.seq' &&
->BOVPRL GenBank entry BOVPRL from omam file. 907 nucleotides.
-TGCTTGGCTGAGGAGCCATAGGACGAGAGCTTCCTGGTGAAGTGTGTTTCTTGAAATCAT
-CACCACCATGGACAGCAAAGGTTCGTCGCAGAAAGGGTCCCGCCTGCTCCTGCTGCTGGT
-GGTGTCAAATCTACTCTTGTGCCAGGGTGTGGTCTCCACCCCCGTCTGTCCCAATGGGCC
-TGGCAACTGCCAGGTATCCCTTCGAGACCTGTTTGACCGGGCAGTCATGGTGTCCCACTA
-CATCCATGACCTCTCCTCGGAAATGTTCAACGAATTTGATAAACGGTATGCCCAGGGCAA
-AGGGTTCATTACCATGGCCCTCAACAGCTGCCATACCTCCTCCCTTCCTACCCCGGAAGA
-TAAAGAACAAGCCCAACAGACCCATCATGAAGTCCTTATGAGCTTGATTCTTGGGTTGCT
-GCGCTCCTGGAATGACCCTCTGTATCACCTAGTCACCGAGGTACGGGGTATGAAAGGAGC
-CCCAGATGCTATCCTATCGAGGGCCATAGAGATTGAGGAAGAAAACAAACGACTTCTGGA
-AGGCATGGAGATGATATTTGGCCAGGTTATTCCTGGAGCCAAAGAGACTGAGCCCTACCC
-TGTGTGGTCAGGACTCCCGTCCCTGCAAACTAAGGATGAAGATGCACGTTATTCTGCTTT
-TTATAACCTGCTCCACTGCCTGCGCAGGGATTCAAGCAAGATTGACACTTACCTTAAGCT
-CCTGAATTGCAGAATCATCTACAACAACAACTGCTAAGCCCACATTCCATCCTATCCATT
-TCTGAGATGGTTCTTAATGATCCATTCCCTGGCAAACTTCTCTGAGCTTTATAGCTTTGT
-AATGCATGCTTGGCTCTAATGGGTTTCATCTTAAATAAAAACAGACTCTGTAGCGATGTC
-AAAATCT
-SHAR_EOF
-chmod 0644 bovprl.seq ||
-echo 'restore of bovprl.seq failed'
-Wc_c="`wc -c < 'bovprl.seq'`"
-test 986 -eq "$Wc_c" ||
- echo 'bovprl.seq: original size 986, current size' "$Wc_c"
-fi
-# ============= c_dispn.c ==============
-if test -f 'c_dispn.c' -a X"$1" != X"-c"; then
- echo 'x - skipping c_dispn.c (File already exists)'
-else
-echo 'x - extracting c_dispn.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'c_dispn.c' &&
-/* dispn.c associated subroutines for matching sequences */
-X
-/* $Name: fa_34_26_5 $ - $Id: c_dispn.c,v 1.21 2005/10/25 20:22:52 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-X
-#include "defs.h"
-#include "structs.h"
-#include "param.h"
-X
-#define XTERNAL
-X
-#define YES 1
-#define NO 0
-X
-#define MAXOUT 201
-X
-/* the seqca[] array has the following codes:
-X 0 - no alignment symbol
-X 1 - align; pam < 0
-X 2 - align; pam == 0
-X 3 - align; pam > 0
-X 4 - align; ident
-X 5 - align; del
-X
-X the map_sym arrays determine the value to be displayed with each
-X type of aligned residue
-*/
-X
-static char *map_sym_0 =" ..: ";
-static char *map_sym_1 =" Xxx ";
-static char *map_sym_2 =" . ";
-#ifdef M10_CONS_L
-static char *map_sym_10=" mzp=-";
-#else
-static char *map_sym_10=" ..:-";
-#endif
-X
-void
-discons(FILE *fd, struct mngmsg m_msg, struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
-X int n0, int n1, char *name0, char *name1, int nml,
-X struct a_struct *aln, long loffset)
-{
-X char line[3][MAXOUT], cline[2][MAXOUT+10], *clinep[2];
-X int il, i, lend, loff, id;
-X int del0, del1, ic, ll0, ll1, ll01, cl0, cl1, rl0, rl1;
-X int ic_save;
-X char *map_sym_p;
-X int l_llen;
-X int ioff0, ioff00, ioff1, ioff10;
-X long qqoff, lloff, qoffset;
-X int llsgn, llfact, qlsgn, qlfact, qfx0, qfxn, lfx0, lfxn;
-X int have_res;
-X char *name01, *sq;
-X char blank[MAX_UID], afmt[32];
-X
-X memset(blank,' ',sizeof(blank)-1);
-X blank[sizeof(blank)-1]='\0';
-X
-X if (nml > 6) {
-X blank[nml-6]='\0';
-X sprintf(afmt,"%%-%ds %%s\n",nml);
-X }
-X else {
-X blank[0]='\0';
-X strncpy(afmt,"%-6s %s\n",sizeof(afmt));
-X }
-X if (pst.ext_sq_set) sq = pst.sqx; else sq = pst.sq;
-X
-X clinep[0]=cline[0]+1;
-X clinep[1]=cline[1]+1;
-X
-X if (aln->qlfact == 0) {qlfact = 1;}
-X else qlfact = aln->qlfact;
-X if (aln->qlrev == 1) {
-X qoffset = n0;
-X qlsgn = -1;
-X qfx0 = 0;
-X qfxn = 1;
-X }
-X else {
-X qoffset = 0;
-X qlsgn = 1;
-X qfx0 = 1;
-X qfxn = 0;
-X }
-X
-X if (aln->llfact == 0) {llfact = 1;}
-X else llfact = aln->llfact;
-X
-X if (aln->llrev == 1) {
-X loffset += n1;
-X llsgn = -1;
-X lfx0 = 0;
-X lfxn = 1;
-X }
-X else {
-X llsgn = 1;
-X lfx0 = 1;
-X lfxn = 0;
-X }
-X
-X l_llen = aln->llen;
-X if ((m_msg.markx & MX_M9SUMM) && m_msg.show_code != 1) { l_llen += 40; }
-X
-X if ((m_msg.markx & MX_ATYPE)==2) name01=name1;
-X else name01 = "\0";
-X
-X ioff0=aln->smin0;
-X ioff00 = ioff0;
-X ioff1=aln->smin1;
-X ioff10 = ioff1;
-X
-X if (m_msg.markx& MX_AMAP && (m_msg.markx & MX_ATYPE)==7) return;
-X
-X /* set *map_sym_p to correct match symbol */
-X if ((m_msg.markx&MX_ATYPE)==1) {map_sym_p = map_sym_1;}
-X else if ((m_msg.markx&MX_ATYPE)==2) {map_sym_p = map_sym_2;}
-X else if (m_msg.markx&MX_M10FORM) {map_sym_p = map_sym_10;}
-X else {map_sym_p = map_sym_0;}
-X
-X if (m_msg.markx & MX_ASEP) {
-X fprintf(fd,">%s ..\n",name0);
-X for (i=0; i<nc && seqc0[i]; i++) {
-X /* if (seqc0[i]=='-') fputc('.',fd); else */
-X fputc(seqc0[i],fd);
-X if (i%50 == 49) fputc('\n',fd);
-X }
-X if ((i-1)%50 != 49) fputc('\n',fd);
-X fprintf(fd,">%s ..\n",name1);
-X for (i=0; i<nc && seqc1[i]; i++) {
-X /* if (seqc1[i]=='-') fputc('.',fd); else */
-X fputc(seqc1[i],fd);
-X if (i%50 == 49) fputc('\n',fd);
-X }
-X if ((i-1)%50 != 49) fputc('\n',fd);
-X return;
-X }
-X
-X if (m_msg.markx & MX_M10FORM) {
-X fprintf(fd,">%s ..\n",name0);
-X fprintf(fd,"; sq_len: %d\n",n0);
-X fprintf(fd,"; sq_offset: %ld\n",m_msg.sq0off);
-X fprintf(fd,"; sq_type: %c\n",m_msg.sqtype[0]);
-X fprintf(fd,"; al_start: %ld\n",aln->d_start0);
-X fprintf(fd,"; al_stop: %ld\n",aln->d_stop0);
-X fprintf(fd,"; al_display_start: %ld\n",
-X qoffset+qlsgn*ioff0*aln->llmult+qfx0);
-X
-X have_res = 0;
-X for (i=0; i<nc && seqc0[i]; i++) {
-X if (!have_res && seqc0[i]==' ') fputc('-',fd);
-X else if (seqc0[i]==' ') break;
-X else {
-X have_res = 1;
-X fputc(seqc0[i],fd);
-X }
-X if (i%50 == 49) fputc('\n',fd);
-X }
-X if ((i-1)%50!=49 || seqc0[i-1]==' ') fputc('\n',fd);
-X fprintf(fd,">%s ..\n",name1);
-X fprintf(fd,"; sq_len: %d\n",n1);
-X fprintf(fd,"; sq_type: %c\n",m_msg.sqtype[0]);
-X fprintf(fd,"; al_start: %ld\n",aln->d_start1);
-X fprintf(fd,"; al_stop: %ld\n",aln->d_stop1);
-X fprintf(fd,"; al_display_start: %ld\n",loffset+llsgn*ioff1+lfx0);
-X
-X have_res = 0;
-X for (i=0; i<nc && seqc1[i]; i++) {
-X if (!have_res && seqc1[i]==' ') fputc('-',fd);
-X else if (seqc1[i]==' ') break;
-X else {
-X have_res = 1;
-X fputc(seqc1[i],fd);
-X }
-X if (i%50 == 49) fputc('\n',fd);
-X }
-X if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
-#ifdef M10_CONS
-X fprintf(fd,"; al_cons:\n");
-X for (i=0,del0=0,id=ioff0; id-del0<aln->amax0 && i < nc; i++,id++) {
-X if (seqc0[i] == '\0' || seqc1[i] == '\0') break;
-X if (seqc0[i]=='-' || seqc0[i]==' ' || seqc0[i]=='\\') del0++;
-X else if (seqc0[i]=='/') del0++;
-X if (id-del0<aln->amin0) fputc(' ',fd);
-X else if (seqc0[i]=='-'||seqc1[i]=='-') fputc('-',fd);
-X else fputc(map_sym_10[seqca[i]],fd);
-X
-X if (i%50 == 49) fputc('\n',fd);
-X }
-X if ((i-1)%50!=49 || seqc1[i-1]==' ') fputc('\n',fd);
-#endif
-X return;
-X }
-X
-X memset(line[0],' ',MAXOUT);
-X memset(line[1],' ',MAXOUT);
-X memset(line[2],' ',MAXOUT);
-X
-X /* cl0 indicates whether a coordinate should be printed over the first
-X sequence; cl1 indicates a coordinate for the second;
-X */
-X
-X ic = 0; del0=del1=0;
-X for (il=0; il<(nc+l_llen-1)/l_llen; il++) {
-X loff=il*l_llen;
-X lend=min(l_llen,nc-loff);
-X
-X ll0 = NO; ll1 = NO;
-X
-X memset(cline[0],' ',MAXOUT+1);
-X memset(cline[1],' ',MAXOUT+1);
-X
-X ic_save = ic;
-X for (i=0; i<lend; i++, ic++,ioff0++,ioff1++) {
-X cl0 = cl1 = rl0 = rl1 = YES;
-X if ((line[0][i]=seqc0[ic])=='-' || seqc0[ic]=='\\') {
-X del0++; cl0=rl0=NO;
-X }
-X else if (seqc0[ic]=='/') {
-X del0++; cl0=rl0=NO;
-X }
-X if ((line[2][i]=seqc1[ic])=='-' || seqc1[ic]=='\\') {
-X del1++; cl1=rl1=NO;
-X }
-X else if (seqc1[ic]=='/') {
-X del1++; cl1=rl1=NO;
-X }
-X
-X if (seqc0[ic]==' ') {del0++; cl0=rl0=NO;}
-X else ll0 = YES;
-X if (seqc1[ic]==' ') {del1++; cl1=rl1=NO;}
-X else ll1 = YES;
-X
-X qqoff = m_msg.sq0off - 1 + qoffset + (long)qlsgn*ioff00 +
-X (long)qlsgn*qlfact*(ioff0-del0-ioff00);
-X if (cl0 && qqoff%10 == 9) {
-X sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1l);
-X clinep[0][i+8-qfxn]=' ';
-X rl0 = NO;
-X }
-X else if (cl0 && qqoff== -1) {
-X sprintf(&clinep[0][i-qfxn],"%8ld",0l);
-X clinep[0][i+8-qfxn]=' ';
-X rl0 = NO;
-X }
-X else if (rl0 && (qqoff+1)%10 == 0) {
-X sprintf(&clinep[0][i-qfxn],"%8ld",qqoff+1);
-X clinep[0][i+8-qfxn]=' ';
-X }
-X
-X /* the lloff coordinate of a residue is the sum of:
-X m_msg.sq1off-1 - the user defined coordinate
-X loffset - the offset into the library sequence
-X llsgn*ioff10 - the offset into the beginning of the alignment
-X (given in the "natural" coordinate system,
-X except for tfasta3 which provides context)
-X llsgn*llfact*(ioff1-del1-ioff10)
-X - the position in the consensus aligment, -gaps
-X */
-X
-X lloff = m_msg.sq1off-1 + loffset + aln->frame +
-X (long)llsgn*aln->llmult*ioff10 +
-X (long)llsgn*llfact*(ioff1-del1-ioff10);
-X
-X if (cl1 && lloff%10 == 9) {
-X sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1l);
-X clinep[1][i+8-lfxn]=' ';
-X rl1 = NO;
-X }
-X else if (cl1 && lloff== -1) {
-X sprintf(&clinep[1][i],"%8ld",0l);
-X clinep[1][i+8-lfxn]=' ';
-X rl1 = NO;
-X }
-X else if (rl1 && (lloff+1)%10 == 0) {
-X sprintf(&clinep[1][i-lfxn],"%8ld",lloff+1);
-X clinep[1][i+8-lfxn]=' ';
-X }
-X
-X line[1][i] = ' ';
-X if (ioff0-del0 >= aln->amin0 && ioff0-del0 <= aln->amax0) {
-X if (seqca[ic]==4) {line[1][i]=map_sym_p[4];}
-X else if ((m_msg.markx&MX_ATYPE)==2) line[1][i]=line[2][i];
-X else line[1][i] = map_sym_p[seqca[ic]];
-X }
-X else if ((m_msg.markx&MX_ATYPE)==2) line[1][i]=line[2][i];
-X }
-X
-X if (m_msg.ann_flg) {
-X for (ic=ic_save,i=0; i<lend; ic++,i++) {
-X if (seqc0a[ic]!= ' ') clinep[0][i+7-qfxn] = seqc0a[ic];
-X }
-X }
-X
-X line[0][lend]=line[1][lend]=line[2][lend]=0;
-X clinep[0][lend+7]=clinep[1][lend+7]=0;
-X
-X ll01 = ll0&&ll1;
-X if ((m_msg.markx&MX_ATYPE)==2 && (!aln->showall || ll0)) ll1=0;
-X fprintf(fd,"\n");
-X if (ll0) fprintf(fd,"%s%s\n",blank,clinep[0]);
-X if (ll0) fprintf(fd,afmt,name0,line[0]);
-X if (ll01) fprintf(fd,afmt,name01,line[1]);
-X if (ll1) fprintf(fd,afmt,name1,line[2]);
-X if (ll1) fprintf(fd,"%s%s\n",blank,clinep[1]);
-X }
-}
-X
-static float gscale= -1.0;
-X
-void
-disgraph(FILE *fd, int n0,int n1, float percent, int score,
-X int min0, int min1, int max0, int max1, long sq0off,
-X char *name0, char *name1, int nml,
-X int mlen, int markx)
-{
-X int i, gstart, gstop, gend;
-X int llen;
-X char line[MAXOUT+1];
-X char afmt[16], afmtf[64];
-X
-X if (nml > 6) {
-X sprintf(afmt,"%%-%ds",nml);
-X }
-X else {
-X strncpy(afmt,"%-6s",sizeof(afmt));
-X }
-X strncpy(afmtf,afmt,sizeof(afmtf));
-X strncat(afmtf," %4ld-%4ld: %5.1f%%:%s:\n",sizeof(afmtf));
-X
-X llen = mlen - 10;
-X memset(line,' ',llen);
-X
-X line[llen-1]='\0';
-X if (gscale < 0.0) {
-X gscale = (float)llen/(float)n0;
-X if ((markx&MX_ATYPE) == 7 )
-X fprintf(fd,afmtf,name0,sq0off,sq0off+n0-1,100.0,line);
-X }
-X
-X gstart = (int)(gscale*(float)min0+0.5);
-X gstop = (int)(gscale*(float)max0+0.5);
-X gend = gstop+(int)(gscale*(float)(n1-max1));
-X
-X if (gstop >= llen) gstop = llen-1;
-X if (gend >= llen) gend = llen-1;
-X for (i=0; i<gstart; i++) line[i]=' ';
-X for (; i<gstop; i++) line[i]='-';
-X for (; i<llen; i++) line[i]=' ';
-X
-X line[gend]=':';
-X line[llen]='\0';
-X
-X if (markx & MX_AMAP) {
-X if ((markx & MX_ATYPE)==7) { /* markx==4 - no alignment */
-X strncpy(afmtf,afmt,sizeof(afmtf));
-X strncat(afmtf," %4ld-%4ld:%4d %5.1f%%:%s\n",sizeof(afmtf));
-X fprintf(fd,afmtf,name1,min0+sq0off,max0+sq0off-1,score,percent,line);
-X }
-X else {
-X afmtf[0]='>';
-X strncpy(&afmtf[1],afmt,sizeof(afmtf)-1);
-X strncat(afmtf," %4ld-%4ld:%s\n",sizeof(afmtf));
-X fprintf(fd,afmtf, name1,min0+sq0off,max0+sq0off-1,line);
-X }
-X }
-}
-X
-void
-aancpy(char *to, char *from, int count, struct pstruct pst)
-{
-X char *tp, *sq;
-X int nsq;
-X
-X if (pst.ext_sq_set) {
-X nsq = pst.nsqx;
-X sq = pst.sqx;
-X }
-X else {
-X nsq = pst.nsq;
-X sq = pst.sq;
-X }
-X
-X tp=to;
-X while (count-- && *from) {
-X if (*from <= nsq) *tp++ = sq[*(from++)];
-X else *tp++ = *from++;
-X }
-X *tp='\0';
-}
-X
-void
-r_memcpy(dest,src,cnt)
-X char *dest, *src;
-X int cnt;
-{
-X while (cnt--) *dest++ = *src++;
-}
-X
-void
-l_memcpy(dest,src,cnt)
-X char *dest, *src;
-X int cnt;
-{
-X dest = dest+cnt;
-X src = src+cnt;
-X while (cnt--) *--dest = *--src;
-}
-X
-/* this routine now indexs from 1 (rather than 0) because sq starts
-X with a 0 */
-X
-#define MAXSQ 50 /* must be same as upam.h */
-X
-void cal_coord(int n0, int n1, long sq0off, long loffset,
-X struct a_struct *aln)
-{
-X long qoffset;
-X int llsgn, qlsgn, qfx0, qfxn, lfx0, lfxn;
-X
-X if (aln->qlrev == 1) {
-X qoffset = sq0off -1 + n0;
-X qlsgn = -1;
-X qfx0 = 0;
-X qfxn = 1;
-X }
-X else {
-X qoffset = sq0off - 1;
-X qlsgn = 1;
-X qfx0 = 1;
-X qfxn = 0;
-X }
-X
-X if (aln->llrev == 1) {
-X loffset += n1;
-X llsgn = -1;
-X lfx0 = 0;
-X lfxn = 1;
-X }
-X else {
-X llsgn = 1;
-X lfx0 = 1;
-X lfxn = 0;
-X }
-X aln->d_start0 = qoffset+qlsgn*aln->amin0+qfx0;
-X aln->d_stop0 = qoffset+qlsgn*aln->amax0+qfxn;
-X aln->d_start1 = loffset+llsgn*aln->amin1*aln->llmult+lfx0+aln->frame;
-X aln->d_stop1 = loffset+llsgn*aln->amax1*aln->llmult+lfxn+aln->frame;
-}
-SHAR_EOF
-chmod 0644 c_dispn.c ||
-echo 'restore of c_dispn.c failed'
-Wc_c="`wc -c < 'c_dispn.c'`"
-test 11467 -eq "$Wc_c" ||
- echo 'c_dispn.c: original size 11467, current size' "$Wc_c"
-fi
-# ============= checkevent.c ==============
-if test -f 'checkevent.c' -a X"$1" != X"-c"; then
- echo 'x - skipping checkevent.c (File already exists)'
-else
-echo 'x - extracting checkevent.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'checkevent.c' &&
-X
-/* Copyright 1995 William R. Pearson */
-X
-/* used only in Mac versions to provide mac multitasking */
-X
-#include <stdlib.h>
-X
-#ifdef __MWERKS__
-#include <sioux.h>
-#endif
-X
-#define SLEEP 2L
-#define NIL_MOUSE_REGION 0L
-X
-#define WNE_TRAP_NUM 0x60
-#define UNIMPL_TRAP_NUM 0x9F
-#define SUSPEND_RESUME_BIT 0x0001
-#define ACTIVATING 1
-#define RESUMING 1
-X
-Boolean gDone, gWNEImplemented=0;
-EventRecord gTheEvent;
-Rect gDragRect, gSizeRect;
-X
-void
-InitEvent()
-{
-X gWNEImplemented=(NGetTrapAddress(WNE_TRAP_NUM,ToolTrap)!=
-X NGetTrapAddress(UNIMPL_TRAP_NUM,ToolTrap));
-X }
-X
-X
-#define hiword(x) (((short *) &(x))[0])
-#define loword(x) (((short *) &(x))[1])
-static MenuHandle aMenu;
-X
-/*
-ChkEvent()
-{}
-*/
-X
-#ifdef TPLOT
-extern WindowPtr gDrawWindow;
-extern PicHandle aPic;
-#endif
-X
-static long checkTime=0;
-X
-void
-ChkEvent()
-{
-X EventRecord event;
-X WindowPeek wp;
-X Boolean gotEvent, SIOUXDidEvent;
-X long choice;
-X Str255 buf;
-X
-X if (TickCount() < checkTime) return;
-X checkTime = TickCount()+60L;
-X
-X if (gWNEImplemented)
-X gotEvent=WaitNextEvent(everyEvent-diskMask,&event,SLEEP,NIL_MOUSE_REGION);
-X else {
-X SystemTask();
-X gotEvent=GetNextEvent(everyEvent-diskMask,&event);
-X }
-X
-X if (gotEvent) SIOUXDidEvent=SIOUXHandleOneEvent(&event);
-X if (SIOUXDidEvent) return;
-X
-X if (event.what == nullEvent) {
-X if (FrontWindow() == 0) InitCursor();
-X return;
-X }
-X
-X if (SystemEvent(&event)) return;
-X
-X if (event.what == mouseDown) {
-X switch (FindWindow(event.where, (WindowPtr *)&wp)) {
-X case inMenuBar:
-X InitCursor();
-X choice = MenuSelect(event.where);
-X goto doMenu;
-X case inDrag :
-X DragWindow((WindowPtr)wp, event.where, &gDragRect);
-X break;
-X case inSysWindow:
-X SystemClick(&event, (WindowPtr)wp);
-X break;
-X }
-X }
-X
-X return;
-X
-doMenu:
-X switch (hiword(choice)) {
-X case 1:
-X GetMenuItemText(aMenu, loword(choice), buf);
-X OpenDeskAcc(buf);
-X break;
-X case 2:
-X exit(0);
-X
-X case 3:
-X SystemEdit(loword(choice) - 1);
-X break;
-X }
-X HiliteMenu(0);
-}
-X
-#ifdef TPLOT
-X
-Waitkey(keyval)
-X int keyval;
-{
-X int key;
-X EventRecord event;
-X WindowPeek wp;
-X long choice;
-X Str255 buf;
-X
-X SystemTask();
-X if (gWNEImplemented)
-X WaitNextEvent(everyEvent-diskMask,&event,SLEEP,NIL_MOUSE_REGION);
-X else {
-X SystemTask();
-X GetNextEvent(everyEvent-diskMask,&event);
-X }
-X
-X
-X InitCursor();
-X if (event.what == nullEvent) {
-X return 0;
-X }
-X
-X if (SystemEvent(&event)) return 0;
-X
-X if (event.what == updateEvt) {
-X if ((WindowPtr)event.message == gDrawWindow) {
-X BeginUpdate((WindowPtr)event.message);
-X DrawPicture(aPic,&gDrawWindow->portRect);
-X EndUpdate((WindowPtr)event.message);
-X }
-X else {
-X BeginUpdate((WindowPtr)event.message);
-X EndUpdate((WindowPtr)event.message);
-X }
-X return 0;
-X }
-X
-X if (event.what == keyDown) return 1;
-X if (event.what == mouseDown) {
-X switch (FindWindow(event.where, (WindowPtr *)&wp)) {
-X case inMenuBar:
-X InitCursor();
-X choice = MenuSelect(event.where);
-X goto doMenu;
-X case inDrag :
-X DragWindow((WindowPtr)wp, event.where, &gDragRect);
-X break;
-X case inSysWindow:
-X SystemClick(&event, (WindowPtr)wp);
-X break;
-X case inGoAway :
-X return 1;
-X case inContent:
-X SelectWindow((WindowPtr)wp);
-X SetPort(gDrawWindow);
-X DrawPicture(aPic,&gDrawWindow->portRect);
-X break;
-X }
-X }
-X
-X return 0;
-X
-doMenu:
-X switch (hiword(choice)) {
-X case 1:
-X GetItem(aMenu, loword(choice), buf);
-X OpenDeskAcc(buf);
-X break;
-X case 2:
-X return 1;
-X
-X case 3:
-X SystemEdit(loword(choice) - 1);
-X break;
-X }
-X HiliteMenu(0);
-X return 0;
-}
-#endif
-X
-X
-SHAR_EOF
-chmod 0644 checkevent.c ||
-echo 'restore of checkevent.c failed'
-Wc_c="`wc -c < 'checkevent.c'`"
-test 3492 -eq "$Wc_c" ||
- echo 'checkevent.c: original size 3492, current size' "$Wc_c"
-fi
-# ============= comp_lib.c ==============
-if test -f 'comp_lib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping comp_lib.c (File already exists)'
-else
-echo 'x - extracting comp_lib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'comp_lib.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999, 2002 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: comp_lib.c,v 1.100 2007/04/26 18:36:36 wrp Exp $ */
-X
-/*
-X * Concurrent read version
-X *
-X * Feb 20, 1998 modifications for prss3
-X *
-X * December, 1998 - DNA searches are now down with forward and reverse
-X * strands
-X */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <time.h>
-X
-#include <limits.h>
-#include <float.h>
-#include <math.h>
-X
-#ifdef UNIX
-#include <unistd.h>
-#include <sys/types.h>
-#include <signal.h>
-#endif
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#include "mw.h" /* defines beststr */
-#include "structs.h" /* mngmsg, libstruct */
-#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
-X
-#define XTERNAL
-#include "uascii.h"
-X
-char *mp_verstr="34.26";
-X
-/********************************/
-/* global variable declarations */
-/********************************/
-char gstring2[MAX_STR]; /* string for label */
-char gstring3[MAX_STR];
-char hstring1[MAX_STR];
-X
-extern int max_workers;
-X
-#ifdef SUPERFAMNUM
-int nsfnum;
-int sfnum[10];
-extern int sfn_cmp(int *q, int *s);
-int nsfnum_n;
-int sfnum_n[10];
-#endif
-X
-/********************************/
-/* extern variable declarations */
-/********************************/
-extern char *prog_func; /* function label */
-extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
-X
-/********************************/
-/*extern function declarations */
-/********************************/
-/* open sequence file (getseq.c) */
-extern int getseq(char *filen, int *sascii,
-X unsigned char *seq, int maxs,
-X char *libstr, int n_libstr,
-X long *sq0ff);
-X
-struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
-X
-void set_shuffle(struct mngmsg m_msg);
-void closelib(struct lmf_str *m_fptr);
-X
-void irand(int);
-int nrand(int);
-X
-extern int ann_scan(unsigned char *, int, struct mngmsg *, int );
-extern int scanseq(unsigned char *seq, int n, char *str);
-extern void re_ascii(int *qascii, int *sascii);
-extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
-extern void revcomp(unsigned char *seq, int n, int *c_nt);
-X
-extern void init_ascii(int is_ext, int *sascii, int is_dna);
-extern void qshuffle(unsigned char *aa0, int n0, int nm0);
-extern void free_pam2p(int **);
-X
-/* initialize environment (doinit.c) */
-extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
-X struct pstruct *ppst, unsigned char **aa0);
-X
-/* print timing information */
-extern void ptime (FILE *, time_t);
-X
-#ifdef COMP_MLIB
-#define QGETLIB (q_file_p->getlib)
-#endif
-X
-#define GETLIB (m_file_p->getlib)
-X
-/* calculation functions */
-extern void
-init_work(unsigned char *aa0, int n0,
-X struct pstruct *ppst, void **f_arg );
-#ifndef COMP_THR
-extern void
-do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame,
-X struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
-#endif
-X
-extern void
-close_work(unsigned char *aa0, int n0, struct pstruct *ppst, void **f_arg);
-extern void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2);
-X
-#ifdef COMP_THR
-#ifndef PRSS
-void
-save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
-X FILE *fdata, int *, struct hist_str *, void **);
-#else
-void
-save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
-X FILE *fdata, int *, struct hist_str *, void **, int *, int *);
-#endif
-#endif
-X
-/* statistics functions */
-extern int
-process_hist(struct stat_str *sptr, int nstat,
-X struct mngmsg m_msg,
-X struct pstruct pst,
-X struct hist_str *hist, void **, int);
-extern void addhistz(double, struct hist_str *); /* scaleswn.c */
-void selectbestz(struct beststr **, int, int );
-extern double (*find_zp)(int score, double escore, int length, double comp,void *);
-X
-void last_stats(const unsigned char *, int,
-X struct stat_str *sptr, int nstats,
-X struct beststr **bestp_arr, int nbest,
-X struct mngmsg m_msg, struct pstruct pst,
-X struct hist_str *histp, void *);
-X
-int last_calc( unsigned char **a0, unsigned char *a1, int maxn,
-X struct beststr **bestp_arr, int nbest,
-X struct mngmsg m_msg, struct pstruct *ppst,
-X void **f_str, void *rs_str);
-X
-void scale_scores(struct beststr **bestp_arr, int nbest,
-X struct db_str,struct pstruct pst, void *);
-X
-#ifndef COMP_THR
-extern int shuffle(unsigned char *, unsigned char *, int);
-extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
-#endif
-X
-extern void set_db_size(int, struct db_str *, struct hist_str *);
-X
-/* display functions */
-extern void
-showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1,
-X int maxn, struct beststr **bestp_arr, int nbest,
-X int qlib, struct mngmsg *m_msg,struct pstruct pst,
-X struct db_str db, char *gstring2, void **f_str);
-X
-extern void
-showalign (FILE *fp, unsigned char **aa0, unsigned char *aa1,
-X int maxn, struct beststr **bestp_arr, int nbest,
-X int qlib, struct mngmsg m_msg,struct pstruct pst,
-X char *gstring2, void **f_str);
-X
-/* misc functions */
-void h_init(struct pstruct *, struct mngmsg *, char *); /* doinit.c */
-void last_init(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
-void last_params(unsigned char *, int, struct mngmsg *, struct pstruct *);
-X
-void s_abort(char *, char *); /* compacc.c */
-X
-/* initfa/sw.c */
-void resetp(struct mngmsg *, struct pstruct *);
-X
-void gettitle(char *, char *, int); /* nxgetaa.c */
-void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
-void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
-void query_parm(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
-void selectbestz(struct beststr **, int, int);
-X
-/* compacc.c */
-void prhist(FILE *, struct mngmsg, struct pstruct,
-X struct hist_str hist, int nstats, struct db_str, char *);
-void printsum(FILE *, struct db_str db);
-int reset_maxn(struct mngmsg *, int); /* set m_msg.maxt, maxn from maxl */
-X
-FILE *outfd; /* Output file */
-X
-/* this information is global for fsigint() */
-extern time_t s_time(); /* fetches time */
-time_t tstart, tscan, tprev, tdone; /* Timing */
-#ifdef COMP_MLIB
-time_t ttscan, ttdisp;
-#endif
-time_t tdstart, tddone;
-X
-static struct db_str qtt = {0l, 0l, 0};
-X
-#ifdef COMP_THR
-/***************************************/
-/* thread global variable declarations */
-/***************************************/
-X
-/* functions for getting/sending buffers to threads (thr_sub.c) */
-extern void init_thr(int , struct thr_str *, struct mngmsg, struct pstruct *,
-X unsigned char *, int);
-extern void start_thr(void);
-extern void get_rbuf(struct buf_head **cur_buf, int max_wor_buf);
-extern void put_rbuf(struct buf_head *cur_buf, int max_work_buf);
-extern void put_rbuf_done(int nthreads, struct buf_head *cur_buf,
-X int max_work_buf);
-#undef XTERNAL
-#include "thr.h"
-struct buf_head buf_list[NUM_WORK_BUF];
-#endif
-X
-/* these variables must be global for comp_thr.c so that savebest()
-X can use them */
-X
-static struct beststr
-X *best, /* array of best scores */
-X *bestp,
-X **bestp_arr; /* array of pointers */
-static int nbest; /* number of best scores */
-X
-static struct stat_str *stats, *qstats; /* array of scores for statistics */
-X
-/* these variables are global so they can be set both by the main()
-X program and savebest() in threaded mode.
-*/
-static int nstats, nqstats, kstats;
-static double zbestcut; /* cut off for best z-score */
-static int bestfull; /* index for selectbest() */
-static int stats_done=0; /* flag for z-value processing */
-void fsigint();
-X
-int
-main (int argc, char *argv[])
-{
-X unsigned char *aa0[6], *aa0s, *aa1, *aa1ptr, *aa1s;
-X int n1, n1s; /* n1s needed for PRSS so that when getlib() returns -1 (because no more
-X library sequences, we have a valid n1 for shuffling */
-X
-X int *n1tot_ptr=NULL, *n1tot_cur;
-X int n1tot_cnt=0;
-X int n1tot_v, aa1_loff;
-X
-X long qoffset; /* qoffset is the equivalent of loffset */
-X /* m_msg.sq0off is the l_off equivalent */
-X
-X long loffset, l_off; /* loffset is the coordinate of first residue
-X when lcont > 0; l_off is not used in the
-X main loop, only in showbest and showalign */
-X char lib_label[MAX_FN];
-X char pgm_abbr[MAX_SSTR];
-X char qlabel[MAX_FN];
-#ifdef COMP_MLIB
-X char q_bline[MAX_STR];
-X fseek_t qseek;
-X int qlib;
-X struct lmf_str *q_file_p;
-X int sstart, sstop, is;
-#endif
-X int id;
-X struct lmf_str *m_file_p;
-X
-X int t_best, t_rbest, t_qrbest; /* best score of two/six frames */
-X double t_escore, t_rescore, t_qrescore; /* best evalues of two/six frames */
-X int i_score;
-#ifdef PRSS
-X int s_score[3];
-X int s_n1;
-#endif
-X
-X struct pstruct pst;
-X void *f_str[6], *qf_str; /* different f_str[]'s for different
-X translation frames, or forward,reverse */
-X int have_f_str=0;
-X
-#ifdef COMP_THR
-X long ntbuff;
-X int max_buf_cnt, ave_seq_len, buf_siz;
-X int max_work_buf;
-X struct buf_head *cur_buf;
-X struct buf_str *cur_buf_p;
-X int nseq;
-X struct thr_str *work_info;
-#endif
-X
-X struct mngmsg m_msg; /* Message from host to manager */
-X int iln, itt; /* index into library names */
-X char rline[MAX_FN];
-X char argv_line[MAX_STR];
-X int t_quiet;
-X
-X struct rstruct rst; /* results structure */
-X struct rstruct rrst; /* results structure for shuffle*/
-X int i;
-X
-X FILE *fdata=NULL; /* file for full results */
-X char libstr[MAX_UID]; /* string for labeling full results */
-X char *libstr_p; /* choose between libstr and ltitle */
-X int n_libstr; /* length of libstr */
-X int jstats;
-X int leng; /* leng is length of the descriptive line */
-X int maxn; /* size of the library sequence examined */
-X int maxl; /* size of library buffer */
-X fseek_t lmark; /* seek into library of current sequence */
-X int qlcont; /* continued query sequence */
-X int lcont, ocont, maxt; /* continued sequence */
-X int igncnt=0; /* count for ignoring sequences warning */
-X int ieven=0; /* tmp for wshuffle */
-X double zscore; /* tmp value */
-X char *bp; /* general purpose string ptr */
-X
-X /* Initialization */
-X
-#if defined(UNIX)
-X m_msg.quiet= !isatty(1);
-#else
-X m_msg.quiet = 0;
-#endif
-X
-#ifdef PGM_DOC
-X argv_line[0]='#'; argv_line[1]='\0';
-X for (i=0; i<argc; i++) {
-X strncat(argv_line," ",sizeof(argv_line)-strlen(argv_line)-1);
-X if (strchr(argv[i],' ')) {
-X strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
-X strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
-X strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
-X }
-X else {
-X strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
-X }
-X }
-X argv_line[sizeof(argv_line)-1]='\0';
-#endif
-X
-X /* first initialization routine - nothing is known */
-X h_init(&pst, &m_msg, pgm_abbr);
-X
-X m_msg.db.length = qtt.length = 0l;
-X m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
-X m_msg.pstat_void = NULL;
-X m_msg.hist.entries = 0;
-X
-X for (iln=0; iln<MAX_LF; iln++) m_msg.lb_mfd[iln]=NULL;
-X
-X f_str[0] = f_str[1] = NULL;
-X
-X aa0[0] = NULL;
-X /* second initialiation - get commmand line arguments */
-X initenv (argc, argv, &m_msg, &pst,&aa0[0]);
-X
-#ifdef COMP_THR
-X /* now have max_workers - allocate work_info[] */
-X if (max_workers >= MAX_WORKERS) max_workers = MAX_WORKERS;
-X if ((work_info=
-X (struct thr_str *)calloc(max_workers,sizeof(struct thr_str)))==NULL) {
-X fprintf(stderr, " cannot allocate work_info[%d]\n",max_workers);
-X exit(1);
-X }
-#else
-X max_workers = 1;
-#endif
-X
-#ifndef PRSS
-X /* label library size limits */
-X if (m_msg.n1_low > 0 && m_msg.n1_high < BIGNUM)
-X sprintf(lib_label,"library (range: %d-%d)",m_msg.n1_low,m_msg.n1_high);
-X else if (m_msg.n1_low > 0)
-X sprintf(lib_label,"library (range: >%d)",m_msg.n1_low);
-X else if (m_msg.n1_high < BIGNUM)
-X sprintf(lib_label,"library (range: <%d)",m_msg.n1_high);
-X else
-X strncpy(lib_label,"library",sizeof(lib_label));
-#else
-X sprintf(lib_label,"shuffled sequence");
-#endif
-X lib_label[sizeof(lib_label)-1]='\0';
-X
-X tstart = tscan = s_time();
-X tdstart = time(NULL);
-X
-X /* Allocate space for the query and library sequences */
-X /* pad aa0[] with an extra 32 chars for ALTIVEC padding */
-X if (aa0[0]==NULL) {
-X if ((aa0[0] = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof(unsigned char)))
-X == NULL)
-X s_abort ("Unable to allocate query sequence", "");
-X *aa0[0]=0;
-X aa0[0]++;
-X }
-X aa0[5]=aa0[4]=aa0[3]=aa0[2]=aa0[1]=aa0[0];
-X
-X /* make room for random sequence -
-X also used as storage for COMP_THR library overlaps
-X */
-X if ((aa1s = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof (char))) == NULL) {
-X s_abort ("Unable to allocate shuffled library sequence", "");
-X }
-X *aa1s=0;
-X aa1s++;
-X
-X irand(0);
-X
-X if (m_msg.markx & MX_HTML) {
-#ifdef HTML_HEAD
-X fprintf(stdout,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
-#endif
-X fprintf(stdout,"<pre>\n");
-X }
-X
-#ifdef PGM_DOC
-X fputs(argv_line,stdout);
-X fputc('\n',stdout);
-#endif
-X
-X fprintf(stdout,"%s\n",iprompt0);
-X fprintf(stdout," %s%s\n",verstr,refstr);
-X if (m_msg.markx & MX_HTML) fputs("</pre>\n",stdout);
-X
-X /* Query library */
-X if (m_msg.tname[0] == '\0') {
-X if (m_msg.quiet == 1)
-X s_abort("Query sequence undefined","");
-X l1: fputs (iprompt1, stdout);
-X fflush (stdout);
-X if (fgets (m_msg.tname, MAX_FN, stdin) == NULL)
-X s_abort ("Unable to read query library name","");
-X m_msg.tname[MAX_FN-1]='\0';
-X if ((bp=strchr(m_msg.tname,'\n'))!=NULL) *bp='\0';
-X if (m_msg.tname[0] == '\0') goto l1;
-X }
-X
-X /* Fetch first sequence */
-X qoffset = 0l;
-X qlcont = 0;
-#ifdef COMP_MLIB
-X /* Open query library */
-X if ((q_file_p= openlib(m_msg.tname, m_msg.qdnaseq,qascii,!m_msg.quiet,NULL))==NULL) {
-X s_abort(" cannot open library ",m_msg.tname);
-X }
-X qlib = 0;
-X m_msg.n0 =
-X QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
-X &qseek, &qlcont,q_file_p,&m_msg.sq0off);
-X if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
-X strncpy(qlabel,m_msg.qtitle,sizeof(qlabel));
-X if (bp != NULL) *bp = ' ';
-X qlabel[sizeof(qlabel)-1]='\0';
-X
-X /* if annotations are included in sequence, remove them */
-X if (m_msg.ann_flg) {
-X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
-X }
-X
-X if (m_msg.term_code && !(m_msg.qdnaseq==SEQT_DNA || m_msg.qdnaseq==SEQT_RNA) &&
-X aa0[0][m_msg.n0-1]!='*') {
-X aa0[0][m_msg.n0++]='*';
-X aa0[0][m_msg.n0]=0;
-X }
-X
-X /* check for subset */
-X if (q_file_p->opt_text[0]!='\0') {
-X if (q_file_p->opt_text[0]=='-') {
-X sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
-X }
-X else {
-X sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
-X sstart--;
-X if (sstop <= 0 ) sstop = BIGNUM;
-X }
-X for (id=0,is=sstart; is<min(m_msg.n0,sstop); ) aa0[0][id++]=aa0[0][is++];
-X aa0[0][id]=0;
-X m_msg.n0 = min(m_msg.n0,sstop)-sstart;
-X if (m_msg.sq0off==1) m_msg.sq0off = sstart+1;
-X }
-X
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X /* for ALTIVEC, must pad with 15 NULL's */
-X for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
-#endif
-X
-X if (qlcont) {
-X qoffset += m_msg.n0 - m_msg.sq0off;
-X }
-X else {
-X qoffset = 0l;
-X }
-X
-#else
-X m_msg.n0 = getseq (m_msg.tname, qascii, aa0[0], m_msg.max_tot,
-X m_msg.qtitle, sizeof(m_msg.qtitle),
-X &m_msg.sq0off);
-X strncpy(qlabel,m_msg.tname,sizeof(qlabel));
-X qlabel[sizeof(qlabel)-1]='\0';
-X
-X /* if annotations are included in sequence, remove them */
-X if (m_msg.ann_flg) {
-X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
-X }
-#endif
-X
-X if (m_msg.n0 > MAXTST) {
-X fprintf(stderr," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
-X fprintf(stdout," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
-X aa0[0][MAXTST]='\0';
-X m_msg.n0=MAXTST;
-X }
-X
-X if (m_msg.qdnaseq == SEQT_UNK) {
-X
-X /* do automatic sequence recognition,but only for sequences > 20 residues */
-X if (m_msg.n0 > 20 &&
-X (float)scanseq(aa0[0],m_msg.n0,"ACGTUNacgtun")/(float)m_msg.n0 >0.85) {
-X pascii = nascii;
-X m_msg.qdnaseq = SEQT_DNA;
-X }
-X else { /* its protein */
-X pascii = aascii;
-X m_msg.qdnaseq = SEQT_PROT;
-X }
-X /* modify qascii to use encoded version
-X cannot use memcpy() because it loses annotations
-X */
-X re_ascii(qascii,pascii);
-X init_ascii(pst.ext_sq_set,qascii,m_msg.qdnaseq);
-X m_msg.n0 = recode(aa0[0],m_msg.n0,qascii, pst.nsqx);
-X }
-X
-X if (m_msg.n0 <= 0)
-X s_abort ("Query sequence length <= 0: ", m_msg.tname);
-X
-#ifdef SUPERFAMNUM
-X m_msg.nqsfnum = nsfnum;
-X for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
-X m_msg.nqsfnum_n = nsfnum_n;
-X for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
-#endif
-X
-X resetp (&m_msg, &pst);
-X
-#ifndef COMP_MLIB
-X gettitle(m_msg.tname,m_msg.qtitle,sizeof(m_msg.qtitle));
-X if (m_msg.tname[0]=='-' || m_msg.tname[0]=='@') {
-X strncmp(m_msg.tname,m_msg.qtitle,sizeof(m_msg.tname));
-X if ((bp=strchr(m_msg.tname,' '))!=NULL) *bp='\0';
-X }
-#endif
-X
-X /* get library file names */
-X
-#ifndef PRSS
-X if (strlen (m_msg.lname) == 0) {
-X if (m_msg.quiet == 1) s_abort("Library name undefined","");
-X libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
-X }
-X
-X libselect(m_msg.lname, &m_msg);
-#else
-X if (strlen (m_msg.lname) == 0) {
-X if (m_msg.quiet == 1) s_abort("Shuffle sequence undefined","");
-l2: fputs(iprompt2,stdout);
-X fflush(stdout);
-X if (fgets (m_msg.lname, MAX_FN, stdin) == NULL)
-X s_abort ("Unable to read shuffle file name","");
-X m_msg.lname[MAX_FN-1]='\0';
-X if ((bp=strchr(m_msg.lname,'\n'))!=NULL) *bp='\0';
-X if (m_msg.lname[0] == '\0') goto l2;
-X }
-X m_msg.lbnames[0]= m_msg.lname;
-X m_msg.nln = 1;
-X m_msg.nshow = 0;
-#endif
-X
-X /* Get additional parameters here */
-X if (!m_msg.quiet) query_parm (&m_msg, &pst);
-X
-X last_init(&m_msg, &pst);
-X
-X /* Allocate space for saved scores */
-X if ((best =
-X (struct beststr *)calloc((MAXBEST+1),sizeof(struct beststr)))==NULL)
-X s_abort ("Cannot allocate best struct","");
-X if ((bestp_arr =
-X (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
-X s_abort ("Cannot allocate bestp_arr","");
-X
-X /* Initialize bestp_arr */
-X for (nbest = 0; nbest < MAXBEST+1; nbest++)
-X bestp_arr[nbest] = &best[nbest];
-X best++; bestp_arr++;
-X best[-1].score[0]=best[-1].score[1]=best[-1].score[2]= INT_MAX;
-X best[-1].zscore=FLT_MAX; /* for Z-scores, bigger is best */
-X best[-1].escore=FLT_MIN; /* for E()-values, lower is best */
-X
-X if ((stats =
-X (struct stat_str *)calloc(MAXSTATS,sizeof(struct stat_str)))==NULL)
-X s_abort ("Cannot allocate stats struct","");
-X
-#ifdef UNIX
-X /* set up signals now that input is done */
-X signal(SIGHUP,SIG_IGN);
-#endif
-X
-#ifdef COMP_THR
-X /* Set up buffers for reading the library:
-X
-X We will start by using a 2 Mbyte buffer for each worker. For
-X proteins, that means 5,000 sequences of length 400 (average).
-X For DNA, that means 2,000 sequences of length 1000. At the
-X moment, those are good averages.
-X */
-X
-X if (m_msg.ldnaseq== SEQT_DNA) {
-X max_buf_cnt = MAX_NT_BUF;
-X ave_seq_len = AVE_NT_LEN;
-X }
-X else {
-X max_buf_cnt = MAX_AA_BUF;
-X ave_seq_len = AVE_AA_LEN;
-X }
-X
-X /* however - buffer sizes should be a function of the number of
-X workers so that all the workers are kept busy. Assuming a 10,000
-X entry library is the smallest we want to schedule, then
-X */
-X
-X if (max_buf_cnt > 10000/max_workers)
-X max_buf_cnt = 10000/(2*max_workers);
-X
-X max_buf_cnt /= m_msg.thr_fact;
-X
-X /* finally, max_work_buf should be mod 6 for tfasta */
-X max_buf_cnt -= (max_buf_cnt % 6);
-X
-X max_work_buf = 2*max_workers;
-X
-X /* allocate space for library buffers and results */
-X
-X buf_siz=max_buf_cnt*ave_seq_len;
-X if (buf_siz < m_msg.max_tot) buf_siz = m_msg.max_tot;
-X for (i=0; i<max_work_buf; i++) {
-X if ((buf_list[i].buf =(struct buf_str *)calloc((size_t)(max_buf_cnt+1),
-X sizeof(struct buf_str)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate buffer struct %d %d\n",i,max_buf_cnt+1);
-X exit(1);
-X }
-X buf_list[i].buf_cnt=0;
-X buf_list[i].have_results=0;
-X if ((buf_list[i].start =
-X (unsigned char *)calloc((size_t)(buf_siz),sizeof(unsigned char)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate buffer %d\n",i);
-X exit(1);
-X }
-X
-X /* make certain there is a '\0' at the beginning */
-X buf_list[i].start++;
-X
-X reader_buf[i] = &buf_list[i];
-X }
-X
-X /* initialization of global variables for threads/buffers */
-X
-X num_worker_bufs = 0;
-X num_reader_bufs = max_work_buf;
-X reader_done = 0;
-X worker_buf_workp = 0;
-X worker_buf_readp = 0;
-X reader_buf_workp = 0;
-X reader_buf_readp = 0;
-X
-X start_thread = 1; /* keeps threads from starting */
-#endif
-X
-X /* Label the output */
-X if ((bp = (char *) strchr (m_msg.lname, ' ')) != NULL) *bp = '\0';
-X if (m_msg.ltitle[0] == '\0') {
-X strncpy(m_msg.ltitle,m_msg.lname,sizeof(m_msg.ltitle));
-X m_msg.ltitle[sizeof(m_msg.ltitle)-1]='\0';
-X }
-X
-#ifdef COMP_MLIB
-X printf("Query library %s vs %s library\n", m_msg.tname,m_msg.lname);
-X if (m_msg.nln > 0) printf("searching %s library\n\n",m_msg.lbnames[0]);
-#endif
-X
-#ifdef COMP_MLIB
-X while(1) {
-X m_msg.db.length = 0l;
-X m_msg.db.entries = m_msg.db.carry = 0;
-X qlib++;
-X stats_done = 0;
-#endif
-X
-X maxl = m_msg.max_tot - m_msg.n0 -2; /* maxn = max library sequence space */
-X
-X maxn = reset_maxn(&m_msg,maxl);
-X pst.maxlen = maxn;
-X
-X outfd = stdout;
-X nbest = 0;
-X zbestcut = -FLT_MAX;
-X nstats = 0;
-X
-X /* get the last parameters */
-X last_params(aa0[0],m_msg.n0, &m_msg, &pst);
-X
-X /*
-X if our function returns approximate E()-scores, we do not need to
-X work with raw scores and later calculate z-scores. When
-X approx. E()-scores are calculated, we still need various
-X statistics structures, but we can get them immediately. In this
-X case, find_zp() must produce a z_score (large positive is good)
-X from an e_score.
-X */
-X
-X if (m_msg.escore_flg) {
-X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
-X &m_msg.hist,&m_msg.pstat_void,0);
-X stats_done=1;
-X }
-X
-#ifndef COMP_THR
-X if (m_msg.qshuffle) {
-X if ((aa0s=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate aa0s[%d]\n",m_msg.n0+2);
-X exit(1);
-X }
-X *aa0s='\0';
-X aa0s++;
-X memcpy(aa0s,aa0[0],m_msg.n0);
-X qshuffle(aa0s,m_msg.n0,m_msg.nm0);
-X }
-X
-X /* previous versions of FASTA have stored the reverse complement in
-X the same array as the forward query sequence. This version
-X changes that, by allocating separate space for the reverse complement,
-X and thus reducing the demand for a large MAXLIB/MAXTRN for long queries
-X */
-X if (m_msg.qframe == 2) {
-X if ((aa0[1]=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate aa0[1][%d]\n",m_msg.n0+2);
-X exit(1);
-X }
-X *aa0[1] = '\0';
-X aa0[1]++;
-X memcpy(aa0[1],aa0[0],m_msg.n0+1);
-X revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
-X }
-X /* set aa1 for serial - threaded points aa1 to buffer */
-X
-X aa1 = aa0[0] + m_msg.n0+1; /* modified now that aa0[1] is done separately */
-X *aa1++='\0';
-#else
-X init_thr(max_workers, work_info, m_msg, &pst, aa0[0], max_work_buf);
-#endif
-X
-X if (m_msg.qshuffle && qstats==NULL) {
-X if ((qstats =
-X (struct stat_str *)calloc(m_msg.shuff_max+1,sizeof(struct stat_str)))==NULL)
-X s_abort ("Cannot allocate qstats struct","");
-X }
-X nqstats = 0;
-X
-X if (m_msg.markx & MX_HTML) fputs("<pre>\n",stdout);
-#ifndef PRSS
-X /* rline[] is a tmp string */
-X if (m_msg.qdnaseq == SEQT_DNA || m_msg.qdnaseq == SEQT_RNA) {
-X strncpy(rline,(m_msg.qframe==1)? " (forward-only)" : "\0",sizeof(rline));
-X rline[sizeof(rline)-1]='\0';
-X }
-X else rline[0]='\0';
-X
-X leng = (int)strlen(m_msg.qtitle);
-X if (leng > 50) leng -= 10;
-X
-X sprintf (&m_msg.qtitle[leng], " %d %s", m_msg.n0, m_msg.sqnam);
-X m_msg.seqnm = 0;
-X
-X
-#ifdef COMP_MLIB
-X printf("%3d>>>%s - %d %s%s\n vs %.60s %s\n", qlib,
-X m_msg.qtitle, m_msg.n0, m_msg.sqnam,
-X (m_msg.revcomp ? " (reverse complement)" : rline),
-X m_msg.ltitle,lib_label);
-#else
-X printf("%.50s: %d %s%s\n %s\n vs %.60s %s\n",
-X qlabel, m_msg.n0, m_msg.sqnam,
-X (m_msg.revcomp ? " (reverse complement)" : rline),
-X m_msg.qtitle,m_msg.ltitle,lib_label);
-#endif
-X libstr_p = &libstr[0];
-X n_libstr=sizeof(libstr);
-#else /* PRSS */
-X libstr_p = &m_msg.ltitle[0];
-X n_libstr= sizeof(m_msg.ltitle);
-X set_shuffle(m_msg); /* set count/width parameters in llgetaa.c */
-#endif
-X
-X fflush (outfd);
-X
-X tprev = s_time();
-X
-X if (m_msg.dfile[0] && (fdata=fopen(m_msg.dfile,"w"))!=NULL)
-X fprintf(fdata,"%3d\t%-50s\n",m_msg.n0,m_msg.qtitle);
-X
-X qtt.length += m_msg.n0;
-X qtt.entries++;
-X
-#ifdef COMP_THR
-X start_thr();
-X
-X /* now open the library and start reading */
-X /* get a buffer and fill it up */
-X get_rbuf(&cur_buf,max_work_buf);
-X
-X cur_buf->buf_cnt = 0;
-X cur_buf->have_results = 0;
-X cur_buf->buf[0].aa1b = cur_buf->start;
-X ntbuff = 0;
-X nseq = 0;
-#else /* ! COMP_THR */
-X /* initialize the comparison function, returning f_str */
-X init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
-X have_f_str=1;
-X
-X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
-X if (m_msg.qframe == 2) {
-X init_work ( aa0[1], m_msg.n0, &pst, &f_str[1]);
-X }
-X if (m_msg.qshuffle) {
-X init_work ( aa0s, m_msg.n0, &pst, &qf_str);
-X }
-#endif /* COMP_THR */
-X
-X /* open the library - start the search */
-X
-X for (iln = 0; iln < m_msg.nln; iln++) {
-X if ((m_msg.lb_mfd[iln] = m_file_p=
-X openlib(m_msg.lbnames[iln], m_msg.ldnaseq, lascii, !m_msg.quiet, m_msg.lb_mfd[iln]))
-X ==NULL) {
-X fprintf(stderr," cannot open library %s\n",m_msg.lbnames[iln]);
-X continue;
-X }
-#if !defined(PRSS) && !defined(COMP_MLIB)
-X else
-X printf ("searching %s %s\n",m_msg.lbnames[iln],lib_label);
-#endif
-X
-X loffset = 0l;
-X lcont = 0;
-X ocont = 0;
-X n1tot_v = n1tot_cnt = 0;
-X n1tot_cur = n1tot_ptr = NULL;
-X
-X /* get next buffer to read into */
-X maxt = maxn;
-X
-#ifndef COMP_THR
-X aa1ptr = aa1;
-#else
-X /* read sequence directly into buffer */
-X aa1ptr = aa1 = cur_buf->buf[nseq].aa1b;
-#endif
-X
-X while ((n1=GETLIB(aa1ptr,maxt,libstr_p,n_libstr,&lmark,&lcont,m_file_p,&l_off))>=0) {
-X
-X if (n_libstr <= MAX_UID) {
-X if ((bp=strchr(libstr_p,' '))!=NULL) *bp='\0';
-X }
-X
-X if (m_msg.term_code && !lcont &&
-X m_msg.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg.term_code) {
-X aa1ptr[n1++]=m_msg.term_code;
-X aa1ptr[n1]=0;
-X }
-X
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X /* for ALTIVEC, must pad with 15 NULL's */
-X for (id=0; id<SEQ_PAD; id++) {aa1ptr[n1+id]=0;}
-#endif
-X
-#ifdef DEBUG
-X if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') {
-X fprintf(stderr,"%s: aa1[%d] missing NULL boundaries: %d %d\n",libstr_p,n1,aa1[-1],aa1ptr[n1]);
-X }
-#endif
-X
-X /* check for a continued sequence and provide a pointer to
-X the n1_tot array if lcont || ocont */
-X n1tot_v += n1;
-X if (lcont && !ocont) { /* get a new pointer */
-X if (n1tot_cnt <= 0) {
-X if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
-X fprintf(stderr," cannot allocate n1tot_ptr\n");
-X exit(1);
-X }
-X else {n1tot_cnt=1000;}
-X }
-X n1tot_cnt--;
-X n1tot_cur = n1tot_ptr++;
-X }
-X
-X if (n1tot_v < m_msg.n1_low || n1tot_v > m_msg.n1_high) {
-X goto loop2;
-X }
-X
-X m_msg.db.entries++;
-X m_msg.db.length += n1;
-X if (m_msg.db.length > LONG_MAX) {
-X m_msg.db.length -= LONG_MAX; m_msg.db.carry++;
-X }
-X
-#ifdef DEBUG
-X /* This finds most reasons for core dumps */
-X if (pst.debug_lib)
-X for (i=0; i<n1; i++)
-X if (aa1[i]>=pst.nsqx)
-X {fprintf(stderr,
-X "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
-X libstr,i,n1,aa1[i],pst.nsqx,lcont,ocont,aa1ptr+i);
-X aa1[i]=0;
-X n1=i-1;
-X break;
-X }
-#endif
-X
-X /* don't count long sequences more than once */
-X if (aa1!=aa1ptr) {n1 += m_msg.loff; m_msg.db.entries--;}
-X
-#ifdef PROGRESS
-X if (!m_msg.quiet)
-X if (m_msg.db.entries % 200 == 199) {
-X fputc('.',stderr);
-X if (m_msg.db.entries % 10000 == 9999) fputc('\n',stderr);
-X else if (m_msg.db.entries % 1000 == 999) fputc(' ',stderr);
-X
-X }
-#endif
-X
-X if (n1<=1) {
-X /* if (igncnt++ <10)
-X fprintf(stderr,"Ignoring: %s\n",libstr);
-X */
-X goto loop2;
-X }
-X
-#ifdef PRSS
-X if (lmark==0) {
-X n1s = n1;
-X memcpy(aa1s,aa1,n1s);
-X m_msg.db.entries=0;
-X m_msg.db.length=0;
-X }
-#endif
-X
-X /* if COMP_THR - fill and empty buffers */
-#ifdef COMP_THR
-X ntbuff += n1+1;
-X
-X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
-X
-X cur_buf->buf_cnt++;
-X cur_buf_p = &(cur_buf->buf[nseq++]);
-X cur_buf_p->n1 = n1;
-X cur_buf_p->n1tot_p = n1tot_cur;
-X cur_buf_p->lseek = lmark;
-X cur_buf_p->cont = ocont+1;
-X cur_buf_p->m_file_p = (void *)m_file_p;
-X cur_buf_p->frame = itt;
-X memcpy(cur_buf_p->libstr,libstr,MAX_UID);
-#ifdef SUPERFAMNUM
-X cur_buf_p->nsfnum = nsfnum;
-X if ((cur_buf_p->sfnum[0]=sfnum[0])>0 &&
-X (cur_buf_p->sfnum[1]=sfnum[1])>0 &&
-X (cur_buf_p->sfnum[2]=sfnum[2])>0 &&
-X (cur_buf_p->sfnum[3]=sfnum[3])>0 &&
-X (cur_buf_p->sfnum[4]=sfnum[4])>0 &&
-X (cur_buf_p->sfnum[5]=sfnum[5])>0 &&
-X (cur_buf_p->sfnum[6]=sfnum[6])>0 &&
-X (cur_buf_p->sfnum[7]=sfnum[7])>0 &&
-X (cur_buf_p->sfnum[8]=sfnum[8])>0 &&
-X (cur_buf_p->sfnum[9]=sfnum[9])>0) ;
-#endif
-X
-X /* this assumes that max_buf_cnt is guaranteed %6=0 so that
-X additional pointers to the same buffer can be used
-X nseq now points to next buffer
-X */
-X
-X cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b;
-X } /* for (itt .. */
-X
-X /* make a copy of the overlap (threaded only) */
-X if (lcont) {
-X memcpy(aa1s,&aa1[n1-m_msg.loff],m_msg.loff);
-X }
-X
-X /* if the buffer is filled */
-X if (nseq >= max_buf_cnt || ntbuff >= buf_siz - maxn) {
-X
-X /* provide filled buffer to workers */
-X put_rbuf(cur_buf,max_work_buf);
-X
-X /* get an empty buffer to fill */
-X get_rbuf(&cur_buf,max_work_buf);
-X
-X /* "empty" buffers have results that must be processed */
-X if (cur_buf->buf_cnt && cur_buf->have_results) {
-X save_best(cur_buf,m_msg,pst,fdata,m_msg.qsfnum,&m_msg.hist,
-X &m_msg.pstat_void
-#ifdef PRSS
-X ,s_score,&s_n1
-#endif
-X );
-X
-X }
-X
-X /* now the buffer is truly empty, fill it up */
-X cur_buf->buf_cnt = 0;
-X cur_buf->have_results = 0;
-X /* point the first aa1 ptr to the buffer start */
-X aa1=cur_buf->buf[0].aa1b = cur_buf->start;
-X ntbuff = 0;
-X nseq=0;
-X }
-X else { /* room left in current buffer, increment ptrs */
-X aa1=cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b+n1+1;
-X }
-#else /* if !COMP_THR - do a bunch of searches */
-X
-X /* t_best and t_rbest are used to save the best score or shuffled
-X score from all the frames */
-X
-X t_best = t_rbest = t_qrbest = -1;
-X t_escore = t_rescore = t_qrescore = FLT_MAX;
-X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
-X
-X rst.score[0] = rst.score[1] = rst.score[2] = 0;
-X do_work (aa0[itt], m_msg.n0,aa1,n1,itt,&pst,f_str[itt],0,&rst);
-X
-X if (rst.score[pst.score_ix] > t_best) {
-X t_best = rst.score[pst.score_ix];
-X }
-X
-X if (fdata) {
-X fprintf(fdata,
-X "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8lld\n",
-X libstr,
-#ifdef SUPERFAMNUM
-X sfn_cmp(m_msg.qsfnum,sfnum),
-#else
-X 0,
-#endif
-X n1,itt,
-X rst.comp,rst.H,
-X rst.score[0],rst.score[1],rst.score[2],
-X rst.escore, rst.segnum, rst.seglen, lmark);
-X fflush(fdata);
-X }
-X
-#ifdef PRSS
-X if (lmark==0) {
-X s_score[0] = rst.score[0];
-X s_score[1] = rst.score[1];
-X s_score[2] = rst.score[2];
-X
-X s_n1 = n1;
-X aa1_loff = l_off;
-X }
-X t_best = t_rbest = rst.score[pst.score_ix];
-X t_escore = t_rescore = rst.escore;
-#else
-X if (m_msg.qshuffle) {
-X do_work (aa0s, m_msg.n0,aa1,n1,itt,&pst,qf_str,1,&rrst);
-X
-X if (rrst.score[pst.score_ix] > t_qrbest)
-X t_qrbest = rrst.score[pst.score_ix];
-X if (rrst.escore < t_qrescore)
-X t_qrescore = rrst.escore;
-X
-X if (itt==m_msg.nitt1 && nqstats < m_msg.shuff_max) {
-X qstats[nqstats].n1 = n1; /* save the best score */
-X qstats[nqstats].comp = rst.comp;
-X qstats[nqstats].H = rst.H;
-X qstats[nqstats].escore = t_qrescore;
-X qstats[nqstats++].score = t_qrbest;
-X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
-X t_qrescore = FLT_MAX;
-X }
-X }
-X
-X if (pst.zsflag >= 10) {
-X if (pst.zs_win > 0) wshuffle(aa1,aa1s,n1,pst.zs_win,&ieven);
-X else shuffle(aa1,aa1s,n1);
-X do_work (aa0[itt], m_msg.n0, aa1s, n1,itt,&pst,f_str[itt],0,&rrst);
-X if (rrst.score[pst.score_ix] > t_rbest) {
-X t_rbest = rrst.score[pst.score_ix];
-X t_rescore = rrst.escore;
-X }
-X }
-#endif
-X i_score = rst.score[pst.score_ix];
-X
-/* this section saves scores for statistics calculations. For
-X comparisons that can be from one of 2 or 6 frames, it should only
-X be run once, for the best of the 2 or 6 scores. t_rbest,t_rescore
-X have the best of the 2 or 6 scores from the frames. For proteins,
-X this is run for every score.
-X
-*/
-#ifdef PRSS /* don't save the first score (unshuffled) with PRSS */
-X if (lmark > 0) {
-#endif
-X
-X if (itt == m_msg.nitt1) {
-X if (nstats < MAXSTATS) {
-X stats[nstats].n1 = n1; /* save the best score */
-X stats[nstats].comp = rst.comp;
-X stats[nstats].H = rst.H;
-X if (pst.zsflag >=10) {
-X t_best = t_rbest;
-X t_escore = t_rescore;
-X }
-X stats[nstats].escore = t_escore;
-X stats[nstats++].score = t_best;
-X t_best = t_rbest = -1; /* reset t_rbest, t_best */
-X t_escore = t_rescore = FLT_MAX;
-X }
-X else if (pst.zsflag >= 0) {
-X if (!stats_done) {
-X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
-X &m_msg.hist,&m_msg.pstat_void,0);
-X stats_done = 1;
-X kstats = nstats;
-X for (i=0; i<MAXBEST; i++) {
-X bestp_arr[i]->zscore =
-X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
-X bestp_arr[i]->escore, bestp_arr[i]->n1,
-X bestp_arr[i]->comp, m_msg.pstat_void);
-X }
-X zbestcut = bestp_arr[nbest-1]->zscore;
-X }
-X
-#ifdef SAMP_STATS
-/* older versions saved the first MAXSTATS scores, and ignored the
-X rest in the statistics. With SAMP_STATS, scores after MAX_STATS
-X are sampled at random, and included in the sample set and the
-X statistics parameters are re-derived at the end of the run using
-X the sampled scores.
-X
-X It would be faster not to do the nrand(); if(jstats < MAXSTATS)
-X less often.
-*/
-X if (!m_msg.escore_flg) { /* only for zscores */
-X jstats = nrand(++kstats); /* no mod % 0 */
-X if (jstats < MAXSTATS) {
-X stats[jstats].n1 = n1; /* save the best score */
-X stats[jstats].comp = rst.comp;
-X stats[jstats].H = rst.H;
-X if (pst.zsflag >=10) t_best = t_rbest;
-X stats[jstats].score = t_best;
-X }
-X }
-#endif
-X } /* ( nstats >= MAXSTATS) && zsflag >= 0 */
-X } /* itt1 == nitt1 */
-#ifdef PRSS
-X }
-#endif
-X
-X /* this section completes work on the current score */
-X if (stats_done) { /* stats_done > 0 => nstats >= MAXSTATS */
-X zscore=(*find_zp)(i_score, rst.escore, n1, rst.comp,
-X m_msg.pstat_void);
-X
-X if (itt == m_msg.nitt1) {
-X if (pst.zsflag >= 10) t_best = t_rbest;
-X
-X addhistz((*find_zp)(t_best, t_escore, n1, rst.comp,
-X m_msg.pstat_void),
-X &m_msg.hist);
-X t_best = t_rbest = -1;
-X }
-X }
-X else zscore = (double) i_score;
-X
-#ifndef PRSS
-X if (zscore > zbestcut ) {
-X if (nbest >= MAXBEST) {
-X bestfull = nbest-MAXBEST/4;
-X selectbestz(bestp_arr,bestfull-1,nbest);
-X zbestcut = bestp_arr[bestfull-1]->zscore;
-X nbest = bestfull;
-X }
-X
-X bestp = bestp_arr[nbest++];
-X bestp->score[0] = rst.score[0];
-X bestp->score[1] = rst.score[1];
-X bestp->score[2] = rst.score[2];
-X bestp->comp = rst.comp;
-X bestp->H = rst.H;
-X bestp->zscore = zscore;
-X bestp->escore = rst.escore;
-X bestp->segnum = rst.segnum;
-X bestp->seglen = rst.seglen;
-X bestp->lseek = lmark;
-X bestp->cont = ocont+1;
-X bestp->m_file_p = m_file_p;
-X bestp->n1 = n1;
-X bestp->n1tot_p=n1tot_cur;
-X bestp->frame = itt;
-X memcpy(bestp->libstr,libstr,MAX_UID);
-#ifdef SUPERFAMNUM
-X bestp->nsfnum = nsfnum;
-X if ((bestp->sfnum[0]=sfnum[0])>0 &&
-X (bestp->sfnum[1]=sfnum[1])>0 &&
-X (bestp->sfnum[2]=sfnum[2])>0 &&
-X (bestp->sfnum[3]=sfnum[3])>0 &&
-X (bestp->sfnum[4]=sfnum[4])>0 &&
-X (bestp->sfnum[5]=sfnum[5])>0 &&
-X (bestp->sfnum[6]=sfnum[6])>0 &&
-X (bestp->sfnum[7]=sfnum[7])>0 &&
-X (bestp->sfnum[8]=sfnum[8])>0 &&
-X (bestp->sfnum[9]=sfnum[9])>0) ;
-#endif
-X }
-#else /* PRSS */
-X if (lmark == 0) {
-X bestp = bestp_arr[nbest++];
-X bestp->score[0] = rst.score[0];
-X bestp->score[1] = rst.score[1];
-X bestp->score[2] = rst.score[2];
-X bestp->comp = rst.comp;
-X bestp->H = rst.H;
-X bestp->zscore = zscore;
-X bestp->escore = rst.escore;
-X bestp->segnum = rst.segnum;
-X bestp->seglen = rst.seglen;
-X bestp->lseek = lmark;
-X bestp->cont = 0;
-X bestp->m_file_p = m_file_p;
-X bestp->n1 = n1;
-X bestp->n1tot_p=n1tot_cur;
-X bestp->frame = itt;
-X memcpy(bestp->libstr,libstr,MAX_UID);
-X bestp->nsfnum = 0;
-X }
-#endif
-X }
-#endif
-X
-X loop2:
-X if (lcont) {
-X maxt = m_msg.maxt3;
-#ifndef COMP_THR
-X memcpy(aa1,&aa1[n1-m_msg.loff],m_msg.loff);
-#else
-X memcpy(aa1,aa1s,m_msg.loff);
-#endif
-X aa1ptr= &aa1[m_msg.loff];
-X loffset += n1 - m_msg.loff;
-X ocont = lcont;
-X }
-X else {
-X maxt = maxn;
-X aa1ptr=aa1;
-X if (ocont) *n1tot_cur = n1tot_v;
-X ocont = 0;
-X loffset = 0l;
-X n1tot_v = 0;
-X n1tot_cur = NULL;
-X }
-X } /* end while((n1=getlib())) */
-X } /* end iln=1..nln */
-X
-X /* all done */
-X
-#ifdef COMP_THR
-X /* check last buffers for any results */
-X put_rbuf_done(max_workers,cur_buf,max_work_buf);
-X
-X for (i=0; i < num_reader_bufs; i++) {
-X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf);
-X if (reader_buf[reader_buf_readp]->buf_cnt > 0 &&
-X reader_buf[reader_buf_readp]->have_results) {
-X save_best(reader_buf[reader_buf_readp],m_msg,pst,fdata,m_msg.qsfnum,
-X &m_msg.hist, &m_msg.pstat_void
-#ifdef PRSS
-X ,s_score,&s_n1
-#endif
-X );
-X }
-X }
-#endif
-X
-#ifdef PROGRESS
-X if (!m_msg.quiet)
-X if (m_msg.db.entries >= 200) {fprintf(stderr," Done!\n");}
-#endif
-X
-X m_msg.nbr_seq = m_msg.db.entries;
-X get_param(&pst, gstring2,gstring3);
-X
-/* *************************** */
-/* analyze the last results */
-/* *************************** */
-X
-#ifndef PRSS
-#ifndef SAMP_STATS
-X if (!stats_done && nstats > 0) {
-#endif
-X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,&m_msg.hist,
-X &m_msg.pstat_void,stats_done);
-X if (m_msg.pstat_void != NULL) {
-X stats_done = 1;
-X for (i = 0; i < nbest; i++) {
-X bestp_arr[i]->zscore =
-X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
-X bestp_arr[i]->escore, bestp_arr[i]->n1,
-X bestp_arr[i]->comp, m_msg.pstat_void);
-X }
-#ifndef SAMP_STATS
-X }
-X else pst.zsflag = -1;
-#endif
-X }
-#else /* PRSS */
-X if (pst.zsflag < 10) pst.zsflag += 10;
-X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
-X &m_msg.hist, &m_msg.pstat_void,0);
-X stats_done = 1;
-X for (i = 0; i < nbest; i++) {
-X bestp_arr[i]->zscore = (*find_zp)(bestp_arr[i]->score[pst.score_ix],
-X bestp_arr[i]->escore, bestp_arr[i]->n1,
-X bestp_arr[i]->comp, m_msg.pstat_void);
-X }
-#endif
-X
-X if (pst.zdb_size <= 1) pst.zdb_size = m_msg.db.entries;
-X
-#ifdef COMP_THR
-X /* before I call last_calc/showbest/showalign, I need init_work() to
-X get an f_str. This duplicates some code above, which is used in
-X the non-threaded version
-X */
-X
-X if (!have_f_str) {
-X init_work(aa0[0],m_msg.n0,&pst,&f_str[0]);
-X have_f_str = 1;
-X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
-X
-X if (m_msg.qframe == 2) {
-X if ((aa0[1]=(unsigned char *)calloc((size_t)m_msg.n0+2,
-X sizeof(unsigned char)))==NULL) {
-X fprintf(stderr," cannot allocate aa0[1][%d] for alignments\n",
-X m_msg.n0+2);
-X }
-X *aa0[1]='\0';
-X aa0[1]++;
-X memcpy(aa0[1],aa0[0],m_msg.n0+1);
-X revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
-X init_work(aa0[1],m_msg.n0,&pst,&f_str[1]);
-X }
-X
-X /* I also need a "real" aa1 */
-X aa1 = buf_list[0].start;
-#ifdef PRSS
-X /* for PRSS - I need the original second (non-shuffled) sequence */
-X memcpy(aa1,aa1s,n1s+1);
-#endif
-X }
-#endif
-X
-/* now we have one set of scaled scores for in bestp_arr -
-X for FASTS/F, we need to do some additional processing */
-X
-X if (!m_msg.qshuffle) {
-X last_stats(aa0[0], m_msg.n0, stats,nstats, bestp_arr,nbest,
-X m_msg, pst, &m_msg.hist, &m_msg.pstat_void);
-X }
-X else {
-X last_stats(aa0[0], m_msg.n0,
-X qstats,nqstats, bestp_arr,nbest, m_msg, pst,
-X &m_msg.hist, &m_msg.pstat_void);
-X }
-X
-X /* here is a contradiction: if pst.zsflag < 0, then m_msg.pstat_void
-X should be NULL; if it is not, then process_hist() has been called */
-X if (pst.zsflag < 0 && m_msg.pstat_void != NULL) pst.zsflag = 1;
-X
-X if (m_msg.last_calc_flg) {
-X /* last_calc may need coefficients from last_stats() */
-X nbest = last_calc(aa0, aa1, maxn, bestp_arr, nbest, m_msg, &pst,
-X f_str, m_msg.pstat_void);
-X }
-X
-X scale_scores(bestp_arr,nbest,m_msg.db,pst,m_msg.pstat_void);
-X
-X get_param(&pst, gstring2,gstring3);
-X
-#ifdef PRSS
-X /* gettitle(m_msg.lname,m_msg.ltitle,sizeof(m_msg.ltitle)); */
-X printf("%.50s - %s %d %s%s\n vs %.60s - %s shuffled sequence\n",
-X m_msg.tname, m_msg.qtitle,m_msg.n0, m_msg.sqnam,
-X (m_msg.revcomp ? " (reverse complement)" : "\0"),
-X m_msg.lname,m_msg.ltitle);
-#endif
-X
-X prhist (stdout, m_msg, pst, m_msg.hist, nstats, m_msg.db, gstring2);
-X
-X tscan = s_time();
-X printf (" Scan time: ");
-X ptime(stdout,tscan-tprev);
-X printf ("\n");
-#ifdef COMP_MLIB
-X ttscan += tscan-tprev;
-#endif
-X
-X l3:
-X if (!m_msg.quiet) {
-X printf("Enter filename for results [%s]: ", m_msg.outfile);
-X fflush(stdout);
-X }
-X
-X rline[0]='\0';
-X if (!m_msg.quiet && fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
-X if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0';
-X if (rline[0]!='\0') strncpy(m_msg.outfile,rline,sizeof(m_msg.outfile));
-X if (m_msg.outfile[0]!='\0') {
-X if ((outfd=fopen(m_msg.outfile,"w"))==NULL) {
-X fprintf(stderr," could not open %s\n",m_msg.outfile);
-X if (!m_msg.quiet) goto l3;
-X else goto l4;
-X }
-X
-#ifdef PGM_DOC
-X fputs(argv_line,outfd);
-X fputc('\n',outfd);
-#endif
-X fputs(iprompt0,outfd);
-X fprintf(outfd," %s%s\n",verstr,refstr);
-X
-X fprintf(outfd," %s%s, %d %s\n vs %s %s\n",
-X qlabel, (m_msg.revcomp ? "-" : "\0"), m_msg.n0,
-X m_msg.sqnam, m_msg.ltitle, lib_label);
-X
-X prhist(outfd,m_msg,pst,m_msg.hist, nstats, m_msg.db, gstring2);
-X }
-X
-X l4:
-X if (m_msg.markx & MX_HTML) {
-X fputs("</pre>\n<p>\n<hr>\n<p>\n",outfd);
-X }
-X
-X /* code from p2_complib.c to pre-calculate -m 9 alignment info -
-X requires -q with -m 9 */
-X
-X if (m_msg.quiet || m_msg.markx & MX_M9SUMM) {
-X
-X /* to determine how many sequences to re-align (either for
-X do_opt() or calc_id() we need to modify m_msg.mshow to get
-X the correct number of alignments */
-X
-X if (m_msg.mshow_flg != 1 && pst.zsflag >= 0) {
-X for (i=0; i<nbest && bestp_arr[i]->escore< m_msg.e_cut; i++) {}
-X m_msg.mshow = i;
-X }
-X
-#ifndef PRSS
-X if (m_msg.mshow <= 0) { /* no results to display */
-X fprintf(outfd,"!! No sequences with E() < %f\n",m_msg.e_cut);
-X m_msg.nshow = 0;
-X goto end_l;
-X }
-#endif
-X }
-X
-#ifdef PRSS
-X memcpy(aa1,aa1s,n1s);
-X maxn = n1s;
-X nbest = 1;
-#endif
-X
-X showbest (stdout, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
-X m_msg.db, gstring2, f_str);
-X
-X if (outfd != stdout) {
-X t_quiet = m_msg.quiet;
-X m_msg.quiet = -1; /* should guarantee 1..nbest shown */
-X showbest (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
-X m_msg.db, gstring2, f_str);
-X m_msg.quiet = t_quiet;
-X }
-X
-X if (m_msg.nshow > 0) {
-X rline[0]='N';
-X if (!m_msg.quiet){
-X printf(" Display alignments also? (y/n) [n] "); fflush(stdout);
-X if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
-X }
-X else rline[0]='Y';
-X
-X if (toupper((int)rline[0])=='Y') {
-X if (!m_msg.quiet) {
-X printf(" number of alignments [%d]? ",m_msg.nshow);
-X fflush(stdout);
-X if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
-X if (rline[0]!=0) sscanf(rline,"%d",&m_msg.nshow);
-X m_msg.ashow=m_msg.nshow;
-X }
-X
-X if (m_msg.markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) {
-X fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
-X qlabel,(m_msg.revcomp ? "_rev":"\0"), m_msg.n0,
-X m_msg.sqnam,m_msg.lname);
-X }
-X
-X if (m_msg.markx & MX_M10FORM) {
-X fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
-X qlabel,(m_msg.revcomp ? "-":"\0"), m_msg.n0, m_msg.sqnam,
-X m_msg.lname);
-X fprintf(outfd,"; pg_name: %s\n",argv[0]);
-X fprintf(outfd,"; pg_ver: %s\n",mp_verstr);
-X fprintf(outfd,"; pg_argv:");
-X for (i=0; i<argc; i++)
-X fprintf(outfd," %s",argv[i]);
-X fputc('\n',outfd);
-X fputs(gstring3,outfd);
-X fputs(hstring1,outfd);
-X }
-X
-#ifndef PRSS
-X showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
-X m_msg, pst, gstring2, f_str);
-#else
-X if (pst.sw_flag > 0 || (!m_msg.quiet && m_msg.nshow>0)) {
-X showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
-X m_msg, pst, gstring2, f_str);
-X }
-#endif
-X
-X fflush(outfd);
-X }
-X }
-X
-X end_l:
-#if defined(COMP_THR) && defined(COMP_MLIB)
-X for (i=0; i<max_work_buf; i++) {
-X buf_list[i].buf_cnt=0;
-X buf_list[i].have_results=0;
-X }
-X
-X num_worker_bufs = 0;
-X num_reader_bufs = max_work_buf;
-X reader_done = 0;
-X worker_buf_workp = 0;
-X worker_buf_readp = 0;
-X reader_buf_workp = 0;
-X reader_buf_readp = 0;
-X
-X start_thread = 1; /* stop thread from starting again */
-#endif
-X
-X /* clean up alignment encodings */
-X for (i=0; i < m_msg.nshow; i++) {
-X if (bestp_arr[i]->have_ares) {
-X free(bestp_arr[i]->a_res.res);
-X bestp_arr[i]->a_res.res = NULL;
-X bestp_arr[i]->have_ares = 0;
-X }
-X }
-X
-X if (m_msg.qframe == 2) free(aa0[1]-1);
-X
-X if (have_f_str) {
-X if (f_str[1]!=f_str[0]) {
-X close_work (aa0[1], m_msg.n0, &pst, &f_str[1]);
-X }
-X close_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
-X have_f_str = 0;
-#ifndef COMP_THR
-X if (m_msg.qshuffle) close_work (aa0s, m_msg.n0, &pst, &qf_str);
-#endif
-X if (pst.pam_pssm) {
-X free_pam2p(pst.pam2p[0]);
-X free_pam2p(pst.pam2p[1]);
-X }
-X }
-X
-X for (iln=0; iln < m_msg.nln; iln++) {
-X if (m_msg.lb_mfd[iln]!=NULL) closelib(m_msg.lb_mfd[iln]);
-X }
-X
-X tddone = time(NULL);
-X tdone = s_time();
-X fflush(outfd);
-X
-X if (fdata) {
-X fprintf(fdata,"/** %s **/\n",gstring2);
-X fprintf(fdata,"%3ld%-50s\n",qtt.entries-1,m_msg.qtitle);
-X fflush(fdata);
-X }
-X
-#ifdef COMP_MLIB
-X ttdisp += tdone-tscan;
-X
-X maxn = m_msg.max_tot;
-X m_msg.n0 =
-X QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
-X &qseek, &qlcont,q_file_p,&m_msg.sq0off);
-X if (m_msg.n0 <= 0) break;
-X if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
-X strncpy(qlabel, m_msg.qtitle,sizeof(qlabel));
-X if (bp != NULL) *bp=' ';
-X qlabel[sizeof(qlabel)-1]='\0';
-X
-X if (m_msg.ann_flg) {
-X m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
-X }
-X
-X if (m_msg.term_code && m_msg.qdnaseq==SEQT_PROT &&
-X aa0[0][m_msg.n0-1]!=m_msg.term_code) {
-X aa0[0][m_msg.n0++]=m_msg.term_code;
-X aa0[0][m_msg.n0]=0;
-X }
-X
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X /* for ALTIVEC, must pad with 15 NULL's */
-X for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
-#endif
-X
-#ifdef SUPERFAMNUM
-X m_msg.nqsfnum = nsfnum;
-X for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
-X m_msg.nqsfnum_n = nsfnum_n;
-X for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
-#endif
-X }
-#endif
-X if (m_msg.markx & MX_M10FORM)
-X fprintf(outfd,">>><<<\n");
-X
-X tdone = s_time();
-X if ( m_msg.markx & MX_HTML) fputs("<p><pre>\n",outfd);
-X printsum(outfd, m_msg.db);
-X if ( m_msg.markx & MX_HTML) fputs("</pre>\n",outfd);
-#ifdef HTML_HEAD
-X if (m_msg.markx & MX_HTML) fprintf(outfd,"</body>\n</html>\n");
-#endif
-X if (outfd!=stdout) printsum(stdout,m_msg.db);
-X
-X exit(0);
-} /* End of main program */
-X
-void
-printsum(FILE *fd, struct db_str ntt)
-{
-X double db_tt;
-X char tstr1[26], tstr2[26];
-X
-X strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
-X strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
-X tstr1[24]=tstr2[24]='\0';
-X
-X /* Print timing to output file as well */
-X fprintf(fd, "\n\n%ld residues in %ld query sequences\n", qtt.length, qtt.entries);
-X if (ntt.carry == 0)
-X fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
-X else {
-X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
-X fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt.entries);
-X }
-X
-#ifndef COMP_THR
-X fprintf(fd," Scomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
-#else
-X fprintf(fd," Tcomplib [%s] (%d proc)\n start: %s done: %s\n", mp_verstr,
-X max_workers,tstr1,tstr2);
-#endif
-#ifndef COMP_MLIB
-X fprintf(fd," Scan time: ");
-X ptime(fd, tscan - tprev);
-X fprintf (fd," Display time: ");
-X ptime (fd, tdone - tscan);
-#else
-X fprintf(fd," Total Scan time: ");
-X ptime(fd, ttscan);
-X fprintf (fd," Total Display time: ");
-X ptime (fd, ttdisp);
-#endif
-X fprintf (fd,"\n");
-X fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
-}
-X
-void fsigint()
-{
-X struct db_str db;
-X
-X db.entries = db.length = db.carry = 0;
-X tdone = s_time();
-X tddone = time(NULL);
-X
-X printf(" /*** interrupted ***/\n");
-X if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
-X fprintf(stderr,"/*** interrupted ***/\n");
-X
-X printsum(stdout,db);
-X if (outfd!=stdout) printsum(outfd,db);
-X
-X exit(1);
-}
-X
-#ifdef COMP_THR
-void save_best(struct buf_head *cur_buf, struct mngmsg m_msg, struct pstruct pst,
-X FILE *fdata, int *qsfnum, struct hist_str *histp,
-X void **pstat_voidp
-#ifdef PRSS
-X , int *s_score, int *s_n1
-X
-#endif
-X )
-{
-X double zscore;
-X int i_score;
-X struct buf_str *p_rbuf, *cur_buf_p;
-X int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
-X double e_score, tm_escore, t_rescore, t_qrescore;
-X int jstats;
-X
-X sc_ix = pst.score_ix;
-X
-X cur_buf_p = cur_buf->buf;
-X
-X t_best = t_rbest = t_qrbest = -1;
-X tm_escore = t_rescore = t_qrescore = FLT_MAX;
-X
-X while (cur_buf->buf_cnt--) { /* count down the number of results */
-X p_rbuf = cur_buf_p++; /* step through the results buffer */
-X
-X i_score = p_rbuf->rst.score[sc_ix];
-X e_score = p_rbuf->rst.escore;
-X
-X /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
-X if (fdata) {
-X fprintf(fdata,
-X "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8ld\n",
-X p_rbuf->libstr,
-#ifdef SUPERFAMNUM
-X sfn_cmp(qsfnum,p_rbuf->sfnum),
-#else
-X 0,
-#endif
-X p_rbuf->n1,p_rbuf->frame,p_rbuf->rst.comp,p_rbuf->rst.H,
-X p_rbuf->rst.score[0],p_rbuf->rst.score[1],p_rbuf->rst.score[2],
-X p_rbuf->rst.escore, p_rbuf->rst.segnum, p_rbuf->rst.seglen, p_rbuf->lseek);
-X }
-X
-#ifdef PRSS
-X if (p_rbuf->lseek==0) {
-X s_score[0] = p_rbuf->rst.score[0];
-X s_score[1] = p_rbuf->rst.score[1];
-X s_score[2] = p_rbuf->rst.score[2];
-X *s_n1 = p_rbuf->n1;
-X
-X bestp = bestp_arr[nbest++];
-X bestp->score[0] = s_score[0];
-X bestp->score[1] = s_score[1];
-X bestp->score[2] = s_score[2];
-X bestp->n1 = *s_n1;
-X bestp->escore = p_rbuf->rst.escore;
-X bestp->segnum = p_rbuf->rst.segnum;
-X bestp->seglen = p_rbuf->rst.seglen;
-X bestp->zscore = zscore;
-X bestp->lseek = p_rbuf->lseek;
-X bestp->m_file_p = p_rbuf->m_file_p;
-X memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
-X bestp->n1tot_p = p_rbuf->n1tot_p;
-X bestp->frame = p_rbuf->frame;
-X
-X continue;
-X }
-#endif
-X
-X t_n1 = p_rbuf->n1;
-X if (i_score > t_best) tm_best = t_best = i_score;
-X if (e_score < tm_escore) tm_escore = e_score;
-X
-X if (m_msg.qshuffle) {
-X if (p_rbuf->qr_score > t_qrbest)
-X t_qrbest = p_rbuf->qr_score;
-X if (p_rbuf->qr_escore < t_qrescore)
-X t_qrescore = p_rbuf->qr_escore;
-X
-X if (p_rbuf->frame == m_msg.nitt1 && nqstats < m_msg.shuff_max) {
-X qstats[nqstats].n1 = p_rbuf->n1; /* save the best score */
-X qstats[nqstats].comp = p_rbuf->rst.comp;
-X qstats[nqstats].H = p_rbuf->rst.H;
-X qstats[nqstats].escore = t_qrescore;
-X qstats[nqstats++].score = t_qrbest;
-X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
-X t_qrescore = FLT_MAX;
-X }
-X }
-X
-X if (pst.zsflag >= 10 && p_rbuf->r_score > t_rbest) {
-X t_rbest = p_rbuf->r_score;
-X t_rescore = p_rbuf->r_escore;
-X }
-X
-X /* statistics done for best score of set */
-X
-X
-X if (p_rbuf->frame == m_msg.nitt1) {
-X if (nstats < MAXSTATS ) {
-X stats[nstats].n1 = t_n1;
-X stats[nstats].comp = p_rbuf->rst.comp;
-X stats[nstats].H = p_rbuf->rst.H;
-X if (pst.zsflag >= 10) {
-X tm_best = t_rbest;
-X tm_escore = t_rescore;
-X t_rbest = -1;
-X t_rescore = FLT_MAX;
-X }
-X stats[nstats].escore = tm_escore;
-X stats[nstats++].score = tm_best;
-X t_best = -1;
-X tm_escore = FLT_MAX;
-X }
-X else if (pst.zsflag > 0) {
-X if (!stats_done) {
-X pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
-X histp, pstat_voidp,0);
-X kstats = nstats;
-X stats_done = 1;
-X for (i=0; i<MAXBEST; i++) {
-X bestp_arr[i]->zscore =
-X (*find_zp)(bestp_arr[i]->score[pst.score_ix],
-X bestp_arr[i]->escore, bestp_arr[i]->n1,
-X bestp_arr[i]->comp, *pstat_voidp);
-X }
-X }
-#ifdef SAMP_STATS
-X else {
-X if (!m_msg.escore_flg) {
-X jstats = nrand(++kstats);
-X if (jstats < MAXSTATS) {
-X stats[jstats].n1 = t_n1;
-X stats[jstats].comp = p_rbuf->rst.comp;
-X stats[jstats].H = p_rbuf->rst.H;
-X if (pst.zsflag >= 10) {
-X tm_best = t_rbest;
-X }
-X stats[jstats].score = tm_best;
-X }
-X }
-X }
-#endif
-X }
-X }
-X
-X /* best saved for every score */
-X if (stats_done) {
-X
-X zscore=(*find_zp)(i_score, e_score, p_rbuf->n1,(double)p_rbuf->rst.comp,
-X *pstat_voidp);
-X
-X if (p_rbuf->frame == m_msg.nitt1) {
-X addhistz((*find_zp)(t_best, tm_escore, p_rbuf->n1, (double) p_rbuf->rst.comp,
-X *pstat_voidp), histp);
-X t_best = t_rbest = -1;
-X tm_escore = t_rescore = FLT_MAX;
-X }
-X }
-X else zscore = (double) i_score;
-X
-#ifndef PRSS
-X if (zscore > zbestcut) {
-X if (nbest >= MAXBEST) {
-X bestfull = nbest-MAXBEST/4;
-X selectbestz(bestp_arr,bestfull-1,nbest);
-X zbestcut = bestp_arr[bestfull-1]->zscore;
-X nbest = bestfull;
-X }
-X bestp = bestp_arr[nbest++];
-X bestp->score[0] = p_rbuf->rst.score[0];
-X bestp->score[1] = p_rbuf->rst.score[1];
-X bestp->score[2] = p_rbuf->rst.score[2];
-X bestp->comp = (double) p_rbuf->rst.comp;
-X bestp->H = (double) p_rbuf->rst.H;
-X bestp->escore = p_rbuf->rst.escore;
-X bestp->segnum = p_rbuf->rst.segnum;
-X bestp->seglen = p_rbuf->rst.seglen;
-X bestp->zscore = zscore;
-X bestp->lseek = p_rbuf->lseek;
-X memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
-X bestp->cont = p_rbuf->cont; /* not cont+1 because incremented already */
-X bestp->m_file_p = p_rbuf->m_file_p;
-X bestp->n1 = p_rbuf->n1;
-X bestp->n1tot_p = p_rbuf->n1tot_p;
-X bestp->frame = p_rbuf->frame;
-X bestp->nsfnum = p_rbuf->nsfnum;
-#ifdef SUPERFAMNUM
-X if ((bestp->sfnum[0] = p_rbuf->sfnum[0])>0 &&
-X (bestp->sfnum[1] = p_rbuf->sfnum[1])>0 &&
-X (bestp->sfnum[2] = p_rbuf->sfnum[2])>0 &&
-X (bestp->sfnum[3] = p_rbuf->sfnum[3])>0 &&
-X (bestp->sfnum[4] = p_rbuf->sfnum[4])>0 &&
-X (bestp->sfnum[5] = p_rbuf->sfnum[5])>0 &&
-X (bestp->sfnum[6] = p_rbuf->sfnum[6])>0 &&
-X (bestp->sfnum[7] = p_rbuf->sfnum[7])>0 &&
-X (bestp->sfnum[8] = p_rbuf->sfnum[8])>0 &&
-X (bestp->sfnum[9] = p_rbuf->sfnum[9])>0) ;
-#endif
-X }
-#endif
-X }
-}
-#endif
-SHAR_EOF
-chmod 0644 comp_lib.c ||
-echo 'restore of comp_lib.c failed'
-Wc_c="`wc -c < 'comp_lib.c'`"
-test 55202 -eq "$Wc_c" ||
- echo 'comp_lib.c: original size 55202, current size' "$Wc_c"
-fi
-# ============= compacc.c ==============
-if test -f 'compacc.c' -a X"$1" != X"-c"; then
- echo 'x - skipping compacc.c (File already exists)'
-else
-echo 'x - extracting compacc.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'compacc.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: compacc.c,v 1.61 2007/04/26 18:37:18 wrp Exp $ */
-X
-/* Concurrent read version */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#if defined(UNIX) || defined(WIN32)
-#include <sys/types.h>
-#endif
-X
-#include <limits.h>
-#include <float.h>
-X
-#include <string.h>
-#include <time.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "structs.h"
-X
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#undef XTERNAL
-X
-#ifdef PCOMPLIB
-#include "msg.h"
-extern int nnodes;
-#ifdef PVM_SRC
-#include "pvm3.h"
-extern int pinums[],hosttid;
-#endif
-#ifdef MPI_SRC
-#include "mpi.h"
-#endif
-#endif
-X
-extern time_t tdone, tstart; /* Timing */
-extern void abort ();
-extern void ptime ();
-X
-/* because it is used to pre-allocate space, maxn has various
-X constraints. For "simple" comparisons, it is simply the length of
-X the longest library sequence. But for translated comparisons, it
-X must be 3 or 6X the length of the query sequence.
-X
-X In addition, however, it can be reduced to make certain that
-X sequences are read in smaller chunks. And, maxn affect how large
-X overlaps must be when sequences are read in chunks.
-*/
-X
-int
-reset_maxn(struct mngmsg *m_msg, int maxn) {
-X
-X /* reduce maxn if requested */
-X if (m_msg->maxn > 0 && m_msg->maxn < maxn) maxn = m_msg->maxn;
-X
-X if (m_msg->qdnaseq==m_msg->ldnaseq || m_msg->qdnaseq==SEQT_DNA ||
-X m_msg->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX*/
-X
-X if (m_msg->n0> m_msg->max_tot/3) {
-X fprintf(stderr," query sequence is too long %d > %d %s\n",
-X m_msg->n0,
-X m_msg->max_tot/3,
-X m_msg->sqnam);
-X exit(1);
-X }
-X m_msg->loff = m_msg->n0;
-X m_msg->maxt3 = maxn-m_msg->loff;
-X }
-X else { /* is TFAST */
-X if (m_msg->n0 > MAXTST) {
-X fprintf(stderr," query sequence is too long %d %s\n",m_msg->n0,m_msg->sqnam);
-X exit(1);
-X }
-X
-X if (m_msg->n0*3 > maxn ) { /* n0*3 for the three frames - this
-X will only happen if maxn has been
-X set low manually */
-X
-X if (m_msg->n0*4+2 < m_msg->max_tot) { /* m_msg0*3 + m_msg0 */
-X fprintf(stderr,
-X " query sequence too long for library segment: %d - resetting to %d\n",
-X maxn,m_msg->n0*3);
-X maxn = m_msg->maxn = m_msg->n0*3;
-X }
-X else {
-X fprintf(stderr," query sequence too long for translated search: %d * 4 > %d %s\n",
-X m_msg->n0,maxn, m_msg->sqnam);
-X exit(1);
-X }
-X }
-X
-X /* set up some constants for overlaps */
-X m_msg->loff = 3*m_msg->n0;
-X m_msg->maxt3 = maxn-m_msg->loff-3;
-X m_msg->maxt3 -= m_msg->maxt3%3;
-X m_msg->maxt3++;
-X
-X maxn = maxn - 3; maxn -= maxn%3; maxn++;
-X }
-X return maxn;
-}
-X
-X
-int
-scanseq(unsigned char *seq, int n, char *str) {
-X int tot,i;
-X char aaray[128]; /* this must be set > nsq */
-X
-X for (i=0; i<128; i++) aaray[i]=0;
-X for (i=0; (size_t)i < strlen(str); i++) aaray[qascii[str[i]]]=1;
-X for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
-X return tot;
-}
-X
-/* subs_env takes a string, possibly with ${ENV}, and looks up all the
-X potential environment variables and substitutes them into the
-X string */
-X
-void subs_env(char *dest, char *src, int dest_size) {
-X char *last_src, *bp, *bp1;
-X
-X last_src = src;
-X
-X if ((bp = strchr(src,'$'))==NULL) {
-X strncpy(dest, src, dest_size);
-X dest[dest_size-1] = '\0';
-X }
-X else {
-X *dest = '\0';
-X while (strlen(dest) < dest_size-1 && bp != NULL ) {
-X /* copy stuff before ${*/
-X *bp = '\0';
-X strncpy(dest, last_src, dest_size);
-X *bp = '$';
-X
-X /* copy ENV */
-X if (*(bp+1) != '{') {
-X strncat(dest, "$", dest_size - strlen(dest) -1);
-X dest[dest_size-1] = '\0';
-X bp += 1;
-X }
-X else { /* have ${ENV} - put it in */
-X if ((bp1 = strchr(bp+2,'}'))==NULL) {
-X fprintf(stderr, "Unterminated ENV: %s\n",src);
-X break;
-X }
-X else {
-X *bp1 = '\0';
-X if (getenv(bp+2)!=NULL) {
-X strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);
-X dest[dest_size-1] = '\0';
-X *bp1 = '}';
-X }
-X bp = bp1+1; /* bump bp even if getenv == NULL */
-X }
-X }
-X last_src = bp;
-X
-X /* now get the next ${ENV} if present */
-X bp = strchr(last_src,'$');
-X }
-X /* now copy the last stuff */
-X strncat(dest, last_src, dest_size - strlen(dest) - 1);
-X dest[dest_size-1]='\0';
-X }
-}
-X
-X
-void selectbest(bptr,k,n) /* k is rank in array */
-X struct beststr **bptr;
-X int k,n;
-{
-X int v, i, j, l, r;
-X struct beststr *tmptr;
-X
-X l=0; r=n-1;
-X
-X while ( r > l ) {
-X v = bptr[r]->score[0];
-X i = l-1;
-X j = r;
-X do {
-X while (bptr[++i]->score[0] > v) ;
-X while (bptr[--j]->score[0] < v) ;
-X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
-X } while (j > i);
-X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
-X if (i>=k) r = i-1;
-X if (i<=k) l = i+1;
-X }
-}
-X
-void selectbestz(bptr,k,n) /* k is rank in array */
-X struct beststr **bptr;
-X int k,n;
-{
-X int i, j, l, r;
-X struct beststr *tmptr;
-X double v;
-X
-X l=0; r=n-1;
-X
-X while ( r > l ) {
-X v = bptr[r]->zscore;
-X i = l-1;
-X j = r;
-X do {
-X while (bptr[++i]->zscore > v) ;
-X while (bptr[--j]->zscore < v) ;
-X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
-X } while (j > i);
-X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
-X if (i>=k) r = i-1;
-X if (i<=k) l = i+1;
-X }
-}
-X
-/* improved shellsort with high-performance increments */
-/*
-shellsort(itemType a[], int l, int r)
-{ int i, j, k, h; itemType v;
-X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
-X 13776, 4592, 1968, 861, 336,
-X 112, 48, 21, 7, 3, 1 };
-X for ( k = 0; k < 16; k++)
-X for (h = incs[k], i = l+h; i <= r; i++)
-X {
-X v = a[i]; j = i;
-X while (j > h && a[j-h] > v)
-X { a[j] = a[j-h]; j -= h; }
-X a[j] = v;
-X }
-}
-*/
-X
-/* ?improved? version of sortbestz using optimal increments and fewer
-X exchanges */
-void sortbestz(struct beststr **bptr, int nbest)
-{
-X int gap, i, j, k;
-X struct beststr *tmp;
-X double v;
-X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
-X 13776, 4592, 1968, 861, 336,
-X 112, 48, 21, 7, 3, 1 };
-X
-X for ( k = 0; k < 16; k++) {
-X gap = incs[k];
-X for (i=gap; i < nbest; i++) {
-X tmp = bptr[i];
-X j = i;
-X v = bptr[i]->zscore;
-X while ( j >= gap && bptr[j-gap]->zscore < v) {
-X bptr[j] = bptr[j - gap];
-X j -= gap;
-X }
-X bptr[j] = tmp;
-X }
-X }
-}
-X
-X
-void sortbeste(struct beststr **bptr, int nbest)
-{
-X int gap, i, j, k;
-X struct beststr *tmp;
-X double v;
-X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
-X 13776, 4592, 1968, 861, 336,
-X 112, 48, 21, 7, 3, 1 };
-X
-X for ( k = 0; k < 16; k++) {
-X gap = incs[k];
-X for (i=gap; i < nbest; i++) {
-X j = i;
-X tmp = bptr[i];
-X v = tmp->escore;
-X while ( j >= gap && bptr[j-gap]->escore > v) {
-X bptr[j] = bptr[j - gap];
-X j -= gap;
-X }
-X bptr[j] = tmp;
-X }
-X }
-X
-X /* sometimes there are many high scores with E()==0.0, sort
-X those by z() score */
-X
-X j = 0;
-X while (j < nbest && bptr[j]->escore <= 2.0*DBL_MIN ) {j++;}
-X if (j > 1) sortbestz(bptr,j);
-}
-X
-extern double zs_to_Ec(double zs, long entries);
-X
-/*
-extern double ks_dev;
-extern int ks_df; */
-extern char hstring1[];
-X
-void
-prhist(FILE *fd, struct mngmsg m_msg,
-X struct pstruct pst,
-X struct hist_str hist,
-X int nstats,
-X struct db_str ntt,
-X char *gstring2)
-{
-X int i,j,hl,hll, el, ell, ev;
-X char hline[80], pch, *bp;
-X int mh1, mht;
-X int maxval, maxvalt, dotsiz, ddotsiz,doinset;
-X double cur_e, prev_e, f_int;
-X double max_dev, x_tmp;
-X double db_tt;
-X int n_chi_sq, cum_hl=0, max_i;
-X
-X
-X fprintf(fd,"\n");
-X
-X if (pst.zsflag_f < 0) {
-X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);
-X fprintf(fd,"\n%s\n",gstring2);
-X return;
-X }
-X
-X if (nstats > 20) {
-X max_dev = 0.0;
-X mh1 = hist.maxh-1;
-X mht = (3*hist.maxh-3)/4 - 1;
-X
-X if (!m_msg.nohist && mh1 > 0) {
-X for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
-X if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
-X if (i >= mht && hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
-X }
-X n_chi_sq = 0;
-X cum_hl = -hist.hist_a[0];
-X dotsiz = (maxval-1)/60+1;
-X ddotsiz = (maxvalt-1)/50+1;
-X doinset = (ddotsiz < dotsiz && dotsiz > 2);
-X
-X if (pst.zsflag_f>=0)
-X fprintf(fd," opt E()\n");
-X else
-X fprintf(fd," opt\n");
-X
-X prev_e = zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);
-X for (i=0; i<=mh1; i++) {
-X pch = (i==mh1) ? '>' : ' ';
-X pch = (i==0) ? '<' : pch;
-X hll = hl = hist.hist_a[i];
-X if (pst.zsflag_f>=0) {
-X cum_hl += hl;
-X f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
-X cur_e = zs_to_Ec(f_int,hist.entries);
-X ev = el = ell = (int)(cur_e - prev_e + 0.5);
-X if (hl > 0 && i > 5 && i < (90-hist.min_hist)/hist.histint) {
-X x_tmp = fabs(cum_hl - cur_e);
-X if ( x_tmp > max_dev) {
-X max_dev = x_tmp;
-X max_i = i;
-X }
-X n_chi_sq++;
-X }
-X if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
-X if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
-X fprintf(fd,"%c%3d %5d %5d:",
-X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
-X mh1*hist.histint+hist.min_hist,hl,ev);
-X }
-X else fprintf(fd,"%c%3d %5d :",
-X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
-X mh1*hist.histint+hist.min_hist,hl);
-X
-X if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
-X if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
-X for (j=0; j<hl; j++) hline[j]='=';
-X if (pst.zsflag_f>=0) {
-X if (el <= hl ) {
-X if (el > 0) hline[el-1]='*';
-X hline[hl]='\0';
-X }
-X else {
-X for (j = hl; j < el; j++) hline[j]=' ';
-X hline[el-1]='*';
-X hline[hl=el]='\0';
-X }
-X }
-X else hline[hl] = 0;
-X if (i==1) {
-X for (j=hl; j<10; j++) hline[j]=' ';
-X sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
-X }
-X if (doinset && i == mht-2) {
-X for (j = hl; j < 10; j++) hline[j]=' ';
-X sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
-X }
-X if (i >= mht&& doinset ) {
-X for (j = hl; j < 10; j++) hline[j]=' ';
-X hline[10]=':';
-X for (j = 11; j<11+hll; j++) hline[j]='=';
-X hline[11+hll]='\0';
-X if (pst.zsflag_f>=0) {
-X if (ell <= hll) hline[10+ell]='*';
-X else {
-X for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
-X hline[10+ell] = '*';
-X hline[11+ell] = '\0';
-X }
-X }
-X }
-X
-X fprintf(fd,"%s\n",hline);
-X prev_e = cur_e;
-X }
-X }
-X }
-X
-X if (ntt.carry==0) {
-X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length, ntt.entries);
-X }
-X else {
-X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
-X fprintf(fd, "%.0f residues in %5ld library sequences\n", db_tt, ntt.entries);
-X }
-X
-X if (pst.zsflag_f>=0) {
-X if (MAXSTATS < hist.entries)
-#ifdef SAMP_STATS
-X fprintf(fd," statistics sampled from %d to %ld sequences\n",
-X MAXSTATS,hist.entries);
-#else
-X fprintf(fd," statistics extrapolated from %d to %ld sequences\n",
-X MAXSTATS,hist.entries);
-#endif
-X /* summ_stats(stat_info); */
-X fprintf(fd," %s\n",hist.stat_info);
-X if (!m_msg.nohist && cum_hl > 0)
-X fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%d) at %3d\n",
-X max_dev/(float)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
-X if (m_msg.markx & MX_M10FORM) {
-X while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
-X if (cum_hl <= 0) cum_hl = -1;
-X sprintf(hstring1,"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
-X MAXSTATS,hist.entries,hist.stat_info,max_dev/(float)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
-X }
-X }
-X fprintf(fd,"\n%s\n",gstring2);
-X fflush(fd);
-}
-X
-extern char prog_name[], *verstr;
-X
-void s_abort (char *p, char *p1)
-{
-X int i;
-X
-X fprintf (stderr, "\n***[%s] %s%s***\n", prog_name, p, p1);
-#ifdef PCOMPLIB
-#ifdef PVM_SRC
-X for (i=FIRSTNODE; i< nnodes; i++) pvm_kill(pinums[i]);
-X pvm_exit();
-#endif
-#ifdef MPI_SRC
-X MPI_Abort(MPI_COMM_WORLD,1);
-X MPI_Finalize();
-#endif
-#endif
-X exit (1);
-}
-X
-#ifndef MPI_SRC
-void w_abort (char *p, char *p1)
-{
-X fprintf (stderr, "\n***[%s] %s%s***\n\n", prog_name, p, p1);
-X exit (1);
-}
-#endif
-X
-#ifndef PCOMPLIB
-/* copies from from to to shuffling */
-X
-extern int nrand(int);
-X
-void
-shuffle(unsigned char *from, unsigned char *to, int n)
-{
-X int i,j; unsigned char tmp;
-X
-X if (from != to) memcpy((void *)to,(void *)from,n);
-X
-X for (i=n; i>0; i--) {
-X j = nrand(i);
-X tmp = to[j];
-X to[j] = to[i-1];
-X to[i-1] = tmp;
-X }
-X to[n] = 0;
-}
-X
-/* copies from from to from shuffling, ieven changed for threads */
-void
-wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven)
-{
-X int i,j, k, mm;
-X unsigned char tmp, *top;
-X
-X memcpy((void *)to,(void *)from,n);
-X
-X mm = n%wsiz;
-X
-X if (*ieven) {
-X for (k=0; k<(n-wsiz); k += wsiz) {
-X top = &to[k];
-X for (i=wsiz; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X }
-X top = &to[n-mm];
-X for (i=mm; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X *ieven = 0;
-X }
-X else {
-X for (k=n; k>=wsiz; k -= wsiz) {
-X top = &to[k-wsiz];
-X for (i=wsiz; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X }
-X top = &to[0];
-X for (i=mm; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X *ieven = 1;
-X }
-X to[n] = 0;
-}
-X
-#endif
-X
-int
-sfn_cmp(int *q, int *s)
-{
-X if (*q == *s) return *q;
-X while (*q && *s) {
-X if (*q == *s) return *q;
-X else if (*q < *s) q++;
-X else if (*q > *s) s++;
-X }
-X return 0;
-}
-X
-#ifndef MPI_SRC
-X
-#define ESS 49
-X
-void
-revcomp(unsigned char *seq, int n, int *c_nt)
-{
-X unsigned char tmp;
-X int i, ni;
-X
-X for (i=0, ni = n-1; i< n/2; i++,ni--) {
-X tmp = c_nt[seq[i]];
-X seq[i] = c_nt[seq[ni]];
-X seq[ni] = tmp;
-X }
-X if ((n%2)==1) {
-X i = n/2;
-X seq[i] = c_nt[seq[i]];
-X }
-X seq[n]=0;
-}
-#endif
-X
-#ifdef PCOMPLIB
-X
-/* init_stage2 sets up the data structures necessary to send a subset
-X of sequences to the nodes, and then collects the results
-*/
-X
-/* wstage2[] FIRSTNODE .. nnodes has the next sequence to be do_opt()/do_walign()ed */
-/* wstage2p[] is a list of sequence numbers/frames, to be sent to workers */
-/* wstage2b[] is a list of bptr's that shares the index with wstage2p[] */
-X
-static int wstage2[MAXWRKR +1]; /* count of second stage scores */
-static struct stage2_str *wstage2p[MAXWRKR+1]; /* list of second stage sequences */
-static int wstage2i[MAXWRKR+1]; /* index into second stage sequences */
-static struct beststr *bbptr,
-X **wstage2b[MAXWRKR+1]; /* reverse pointers to bestr */
-X
-void
-do_stage2(struct beststr **bptr, int nbest, struct mngmsg m_msg0,
-X int s_func, struct qmng_str *qm_msp) {
-X
-X int i, is, ib, iw, nres;
-X int node, snode, node_done;
-X int bufid, numt, tid;
-X char errstr[120];
-X struct comstr2 bestr2[BFR2+1]; /* temporary structure array */
-X char *seqc_buff, *seqc;
-X int seqc_buff_len, aln_code_n;
-#ifdef MPI_SRC
-X MPI_Status mpi_status;
-#endif
-X
-X /* initialize the counter for each worker to 0 */
-X for (iw = FIRSTNODE; iw < nnodes; iw++) wstage2[iw] = 0;
-X
-X /* for each result, bump the counter for the worker that has
-X the sequence */
-X for (ib = 0; ib < nbest; ib++ ) { wstage2[bptr[ib]->wrkr]++; }
-X
-X /* now allocate enough space to send each worker a
-X list of its sequences stage2_str {seqnm, frame} */
-X for (iw = FIRSTNODE; iw < nnodes; iw++) {
-X if (wstage2[iw]>0) {
-X if ((wstage2p[iw]=
-X (struct stage2_str *)
-X calloc(wstage2[iw],sizeof(struct stage2_str)))==NULL) {
-X sprintf(errstr," cannot allocate sequence listp %d %d",
-X iw,wstage2[iw]);
-X s_abort(errstr,"");
-X }
-X
-X /* allocate space to remember the bptr's for each result */
-X if ((wstage2b[iw]=(struct beststr **)
-X calloc(wstage2[iw],sizeof(struct beststr *)))==NULL) {
-X sprintf(errstr," cannot allocate sequence listb %d %d",
-X iw,wstage2[iw]);
-X s_abort(errstr,"");
-X }
-X wstage2i[iw]=0;
-X }
-X else {
-X wstage2p[iw] = NULL;
-X wstage2b[iw] = NULL;
-X }
-X }
-X
-X /* for each result, set wstage2p[worker][result_index_in_worker] */
-X for (is = 0; is < nbest; is++) {
-X iw=bptr[is]->wrkr;
-X wstage2p[iw][wstage2i[iw]].seqnm = bptr[is]->seqnm;
-X wstage2p[iw][wstage2i[iw]].frame = bptr[is]->frame;
-X wstage2b[iw][wstage2i[iw]] = bptr[is];
-X wstage2i[iw]++;
-X }
-X
-X
-X /* at this point, wstage2i[iw] should equal wstage2[iw] */
-X node_done = 0;
-X for (node = FIRSTNODE; node < nnodes; node++) {
-X
-X /* fprintf(stderr,"node: %d stage2: %d\n",node,wstage2[node]); */
-X
-X /* if a worker has no results, move on */
-X if (wstage2[node]<=0) { node_done++; continue;}
-X
-X qm_msp->slist = wstage2[node]; /* set number of results to return */
-X qm_msp->s_func = s_func; /* set s_funct for do_opt/do_walign */
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)qm_msp,sizeof(struct qmng_str),1);
-X pvm_send(pinums[node],MSEQTYPE); /* send qm_msp */
-X pvm_initsend(PvmDataRaw); /* send the list of seqnm/frame */
-X pvm_pkbyte((char *)wstage2p[node],wstage2[node]*sizeof(struct stage2_str),1);
-X pvm_send(pinums[node],LISTTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(qm_msp,sizeof(struct qmng_str),MPI_BYTE,node,MSEQTYPE,
-X MPI_COMM_WORLD);
-X MPI_Send((char *)wstage2p[node],wstage2[node]*
-X sizeof(struct stage2_str),MPI_BYTE,node,LISTTYPE,
-X MPI_COMM_WORLD);
-#endif
-X }
-X
-X /* all the workers have their list of sequences */
-X /* reset the index of results to obtain */
-X for (iw = 0; iw < nnodes; iw++) wstage2i[iw]=0;
-X
-X while (node_done < nnodes-FIRSTNODE) {
-#ifdef PVM_SRC
-X bufid = pvm_recv(-1,LISTRTYPE); /* wait for results */
-X pvm_bufinfo(bufid,NULL,NULL,&tid);
-X /* get a chunk of comstr2 results */
-X pvm_upkbyte((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),1);
-X snode = (iw=tidtonode(tid));
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),
-X MPI_BYTE,MPI_ANY_SOURCE,LISTRTYPE,MPI_COMM_WORLD,
-X &mpi_status);
-X snode = mpi_status.MPI_SOURCE;
-X iw = snode;
-#endif
-X
-X seqc_buff = NULL;
-X if (s_func == DO_OPT_FLG && m_msg0.show_code==SHOW_CODE_ALIGN) {
-#ifdef PVM_SRC
-X bufid = pvm_recv(tid,CODERTYPE);
-X pvm_upkint(&seqc_buff_len,1,1); /* get the code string length */
-#endif
-#ifdef MPI_SRC
-X MPI_Recv((char *)&seqc_buff_len,1,MPI_INT, snode,
-X CODERTYPE,MPI_COMM_WORLD, &mpi_status);
-#endif
-X
-X seqc=seqc_buff = NULL;
-X if (seqc_buff_len > 0) { /* allocate space for it */
-X if ((seqc=seqc_buff=calloc(seqc_buff_len,sizeof(char)))==NULL) {
-X fprintf(stderr,"Cannot allocate seqc_buff: %d\n",seqc_buff_len);
-X seqc_buff_len=0;
-X }
-X else {
-#ifdef PVM_SRC
-X pvm_upkbyte(seqc_buff,seqc_buff_len*sizeof(char),1);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv((char *)seqc_buff,seqc_buff_len*sizeof(char),
-X MPI_BYTE,snode,CODERTYPE,MPI_COMM_WORLD, &mpi_status);
-#endif
-X }
-X }
-#ifdef PVM_SRC
-X pvm_freebuf(bufid);
-#endif
-X }
-X
-X /* get number of results in this message */
-X nres = bestr2[BFR2].seqnm & ~FINISHED;
-X /* check to see if finished */
-X if (bestr2[BFR2].seqnm&FINISHED) {node_done++;}
-X
-X seqc = seqc_buff;
-X
-X /* count through results from a specific worker */
-X for (i=0,is=wstage2i[iw]; i < nres; i++,is++) {
-X
-X /* get the (saved) bptr for this result */
-X bbptr=wstage2b[iw][is];
-X /* consistency check seqnm's must agree */
-X if (wstage2p[iw][is].seqnm == bbptr->seqnm) {
-X if (s_func == DO_CALC_FLG && m_msg0.last_calc_flg) {
-X bbptr->score[0] = bestr2[i].score[0];
-X bbptr->score[1] = bestr2[i].score[1];
-X bbptr->score[2] = bestr2[i].score[2];
-X bbptr->escore = bestr2[i].escore;
-X bbptr->segnum = bestr2[i].segnum;
-X bbptr->seglen = bestr2[i].seglen;
-X }
-X else if (m_msg0.stages > 1) {
-X bbptr->score[0] = bestr2[i].score[0];
-X bbptr->score[1] = bestr2[i].score[1];
-X bbptr->score[2] = bestr2[i].score[2];
-X }
-X
-X if (s_func == DO_OPT_FLG && m_msg0.markx & MX_M9SUMM) {
-X /* get score, alignment information, percents */
-X bbptr->sw_score = bestr2[i].sw_score;
-X memcpy(bbptr->aln_d,&bestr2[i].aln_d,sizeof(struct a_struct));
-X bbptr->percent = bestr2[i].percent;
-X bbptr->gpercent = bestr2[i].gpercent;
-X
-X if (m_msg0.show_code == 2) { /* if show code */
-X /* length of encoding */
-X aln_code_n = bbptr->aln_code_n = bestr2[i].aln_code_n;
-X if (aln_code_n > 0) {
-X if ((bbptr->aln_code =
-X (char *)calloc(aln_code_n+1,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate seq_code[%d:%d]: %d\n",
-X bbptr->wrkr,bbptr->seqnm,aln_code_n);
-X seqc += aln_code_n+1;
-X bbptr->aln_code_n = 0;
-X }
-X else {
-X strncpy(bbptr->aln_code,seqc,aln_code_n);
-X bbptr->aln_code[aln_code_n]='\0';
-X seqc += aln_code_n+1;
-X }
-X }
-X else {
-X fprintf(stderr," aln_code_n <=0: %d\n",aln_code_n);
-X }
-X }
-X }
-X }
-X else fprintf(stderr,"phase error in phase II return %d %d", iw,i);
-X }
-X if (seqc_buff != NULL) {
-X free(seqc_buff);
-X seqc_buff = NULL;
-X }
-X wstage2i[iw] += nres;
-X }
-X
-X for (iw=FIRSTNODE; iw < nnodes; iw++) {
-X if ((void *)wstage2p[iw]!=NULL) free((void *)wstage2p[iw]);
-X if ((void *)wstage2b[iw]!=NULL) free((void *)wstage2b[iw]);
-X }
-}
-X
-#endif
-SHAR_EOF
-chmod 0644 compacc.c ||
-echo 'restore of compacc.c failed'
-Wc_c="`wc -c < 'compacc.c'`"
-test 21270 -eq "$Wc_c" ||
- echo 'compacc.c: original size 21270, current size' "$Wc_c"
-fi
-# ============= create_seq_demo.sql ==============
-if test -f 'create_seq_demo.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping create_seq_demo.sql (File already exists)'
-else
-echo 'x - extracting create_seq_demo.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'create_seq_demo.sql' &&
-X
-DROP DATABASE seq_demo;
-CREATE DATABASE seq_demo;
-X
-USE seq_demo;
-X
-CREATE TABLE prot (
-id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
-seq TEXT NOT NULL,
-bin BLOB NOT NULL,
-len INT UNSIGNED NOT NULL
-);
-X
-CREATE TABLE annot (
-prot_id INT UNSIGNED NOT NULL,
-gi INT UNSIGNED NOT NULL PRIMARY KEY,
-db ENUM("gb","emb","dbj","prf","ref","pdb","pir","sp") NOT NULL,
-descr TEXT NOT NULL,
-X
-INDEX (prot_id),
-INDEX (db)
-);
-X
-CREATE TABLE sp (
-X gi INT UNSIGNED NOT NULL,
-X acc VARCHAR(10),
-X name VARCHAR(10),
-X
-X PRIMARY KEY (gi)
-);
-SHAR_EOF
-chmod 0644 create_seq_demo.sql ||
-echo 'restore of create_seq_demo.sql failed'
-Wc_c="`wc -c < 'create_seq_demo.sql'`"
-test 536 -eq "$Wc_c" ||
- echo 'create_seq_demo.sql: original size 536, current size' "$Wc_c"
-fi
-# ============= cvs_id ==============
-if test -f 'cvs_id' -a X"$1" != X"-c"; then
- echo 'x - skipping cvs_id (File already exists)'
-else
-echo 'x - extracting cvs_id (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'cvs_id' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: cvs_id,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
-SHAR_EOF
-chmod 0644 cvs_id ||
-echo 'restore of cvs_id failed'
-Wc_c="`wc -c < 'cvs_id'`"
-test 81 -eq "$Wc_c" ||
- echo 'cvs_id: original size 81, current size' "$Wc_c"
-fi
-# ============= dec_pthr_subs.c ==============
-if test -f 'dec_pthr_subs.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dec_pthr_subs.c (File already exists)'
-else
-echo 'x - extracting dec_pthr_subs.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dec_pthr_subs.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: dec_pthr_subs.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* this file isolates the pthreads calls from the main program */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/types.h>
-#include <signal.h>
-X
-#include "param.h"
-X
-#include <pthread.h>
-#define XTERNAL
-#include "thr.h"
-#undef XTERNAL
-#include "pthr_subs.h"
-X
-extern void work_thread (struct thr_str *work_info);
-X
-/* start the threads working */
-X
-void init_thr(int nthreads, struct thr_str *work_info)
-{
-X int status, i;
-X pthread_attr_t thread_attr;
-X
-X if (nthreads > MAX_WORKERS) {
-X fprintf ( stderr," cannot start %d threads, max: %d\n",
-X nthreads, MAX_WORKERS);
-X exit(1);
-X }
-X
-X /* mutex and condition variable initialisation */
-X
-X status = pthread_mutex_init(&reader_mutex, pthread_mutexattr_default);
-X check(status,"Reader_mutex init bad status\n");
-X
-X status = pthread_mutex_init(&worker_mutex, pthread_mutexattr_default);
-X check(status,"Worker_mutex init bad status\n");
-X
-X status = pthread_cond_init(&reader_cond_var, pthread_condattr_default);
-X check(status,"Reader_cond_var init bad status\n");
-X
-X status = pthread_cond_init(&worker_cond_var, pthread_condattr_default);
-X check(status,"Worker_cond_var init bad status\n");
-X
-X status = pthread_mutex_init(&start_mutex, pthread_mutexattr_default);
-X check(status,"Start_mutex init bad status\n");
-X
-X status = pthread_cond_init(&start_cond_var, pthread_condattr_default);
-X check(status,"Start_cond_var init bad status\n");
-X
-X /* change stacksize on threads */ /***************************/
-X
-X status = pthread_attr_create( &thread_attr );
-X check(status,"attribute create bad status\n");
-X
-X status = pthread_attr_setstacksize( &thread_attr, 1000000);
-X check(status,"stacksize change bad status\n");
-X
-X /* start the worker threads */
-X
-X for (work_info->worker=0; work_info->worker < nthreads;
-X work_info->worker++) {
-X /**********************/
-X status=pthread_create(&threads[work_info->worker],thread_attr,
-X (pthread_startroutine_t)&work_thread,
-X (pthread_addr_t)work_info);
-X check(status,"Pthread_create failed\n");
-X }
-}
-X
-void start_thr()
-{
-X int status;
-X
-X /* tell threads to proceed */
-X
-X status = pthread_mutex_lock(&start_mutex);
-X check(status,"Start_mutex lock bad status in main\n");
-X
-X start_thread = 0; /* lower predicate */
-X
-X status = pthread_cond_broadcast(&start_cond_var);
-X status = pthread_mutex_unlock(&start_mutex);
-X check(status,"Start_mutex unlock bad status in main\n");
-}
-X
-void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
-{
-X int status;
-X
-X status = pthread_mutex_lock(&reader_mutex); /* lock reader_buf structure */
-X
-X check(status,"Reader_mutex lock in master bad status\n");
-X
-X /* no reader bufs: wait for signal to proceed */
-X while (num_reader_bufs == 0) {
-X pthread_cond_wait(&reader_cond_var,&reader_mutex);
-X }
-X
-X *cur_buf = reader_buf[reader_buf_readp]; /* get the buffer address */
-X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf); /* increment index */
-X num_reader_bufs--;
-X
-X status = pthread_mutex_unlock(&reader_mutex); /* unlock structure */
-X check(status,"Reader_mutex unlock in master bad status\n");
-}
-X
-void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* give the buffer to a thread, and wait for more */
-X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
-X check(status,"Worker_mutex lock in master bad status\n");
-X
-X /* Put buffer onto available for workers list */
-X worker_buf[worker_buf_readp] = cur_buf;
-X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
-X num_worker_bufs++; /* increment number of buffers available to workers */
-X
-X /* Signal one worker to wake and start work */
-X status = pthread_cond_signal(&worker_cond_var);
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"Worker_mutex unlock in master bad status\n");
-}
-X
-void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
-{
-X int status, i;
-X void *exit_value;
-X
-X /* give the buffer to a thread, and wait for more */
-X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
-X check(status,"Worker_mutex lock in master bad status\n");
-X
-X /* Put buffer onto available for workers list */
-X worker_buf[worker_buf_readp] = cur_buf;
-X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
-X num_worker_bufs++; /* increment number of buffers available to workers */
-X
-X /* Signal one worker to wake and start work */
-X
-X reader_done = 1;
-X status = pthread_cond_broadcast(&worker_cond_var);
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"Worker_mutex unlock in master bad status\n");
-X
-X /* wait for all buffers available (means all do_workers are done) */
-X
-X for (i=0; i < nthreads; i++) {
-X status = pthread_join( threads[i], &exit_value);
-X check(status,"Pthread_join bad status\n");
-X
-X status = pthread_detach( &threads[i]);
-X check(status,"Pthread_detach bad status\n");
-X }
-}
-X
-void wait_thr()
-{
-X int status;
-X
-X /* Wait on master to give start signal */
-X status = pthread_mutex_lock(&start_mutex);
-X check(status,"Start_mutex lock bad status in worker\n");
-X
-X while (start_thread) {
-X status = pthread_cond_wait(&start_cond_var, &start_mutex);
-X check(status,"Start_cond_wait bad status in worker\n");
-X }
-X
-X status = pthread_mutex_unlock(&start_mutex);
-X check(status,"Start_mutex unlock bad status in worker\n");
-}
-X
-int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* get a buffer to work on */
-X status = pthread_mutex_lock(&worker_mutex);
-X check(status,"First worker_mutex lock in worker bad status\n");
-X
-X /* No worker_bufs available: wait for reader to produce some */
-X while (num_worker_bufs == 0) {
-X /* Exit if reader has finished */
-X if (reader_done) {
-X pthread_mutex_unlock(&worker_mutex);
-X return 0;
-X }
-X pthread_cond_wait(&worker_cond_var,&worker_mutex);
-X } /* end while */
-X
-X /* Get the buffer from list */
-X *cur_buf = worker_buf[worker_buf_workp];
-X worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
-X num_worker_bufs--;
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"First worker_mutex unlock in worker bad status\n");
-X return 1;
-}
-X
-void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* put buffer back on list for reader */
-X status = pthread_mutex_lock(&reader_mutex);
-X check(status,"Reader_mutex lock in worker bad status\n");
-X
-X reader_buf[reader_buf_workp] = cur_buf;
-X reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
-X num_reader_bufs++;
-X
-X /* No reader_bufs available: wake reader */
-X if (num_reader_bufs == 1) {
-X pthread_cond_signal(&reader_cond_var);
-X }
-X
-X status = pthread_mutex_unlock(&reader_mutex);
-X check(status,"Reader_mutex unlock in worker bad status\n");
-}
-SHAR_EOF
-chmod 0644 dec_pthr_subs.c ||
-echo 'restore of dec_pthr_subs.c failed'
-Wc_c="`wc -c < 'dec_pthr_subs.c'`"
-test 6955 -eq "$Wc_c" ||
- echo 'dec_pthr_subs.c: original size 6955, current size' "$Wc_c"
-fi
-# ============= dec_pthr_subs.h ==============
-if test -f 'dec_pthr_subs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping dec_pthr_subs.h (File already exists)'
-else
-echo 'x - extracting dec_pthr_subs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dec_pthr_subs.h' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: dec_pthr_subs.h,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp $ */
-X
-#include <pthread.h>
-X
-#define check(status,string) \
-X if (status == -1) perror(string) /* error macro for thread calls */
-X
-#ifndef XTERNAL
-pthread_t threads[MAX_WORKERS];
-X
-/* mutex stuff */
-X
-pthread_mutex_t reader_mutex; /* empty buffer pointer structure lock */
-pthread_mutex_t worker_mutex; /* full buffer pointer structure lock */
-X
-/* condition variable stuff */
-X
-pthread_cond_t reader_cond_var; /* condition variable for reader */
-pthread_cond_t worker_cond_var; /* condition variable for workers */
-X
-pthread_mutex_t start_mutex; /* start-up synchronisation lock */
-pthread_cond_t start_cond_var; /* start-up synchronisation condition variable */
-X
-extern pthread_t threads[];
-X
-/* mutex stuff */
-X
-extern pthread_mutex_t reader_mutex;
-extern pthread_mutex_t worker_mutex;
-X
-/* condition variable stuff */
-X
-extern pthread_cond_t reader_cond_var;
-extern pthread_cond_t worker_cond_var;
-X
-extern pthread_mutex_t start_mutex;
-extern pthread_cond_t start_cond_var;
-extern int start_thread;
-X
-#endif
-SHAR_EOF
-chmod 0644 dec_pthr_subs.h ||
-echo 'restore of dec_pthr_subs.h failed'
-Wc_c="`wc -c < 'dec_pthr_subs.h'`"
-test 1116 -eq "$Wc_c" ||
- echo 'dec_pthr_subs.h: original size 1116, current size' "$Wc_c"
-fi
-# ============= defs.h ==============
-if test -f 'defs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping defs.h (File already exists)'
-else
-echo 'x - extracting defs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'defs.h' &&
-/* Concurrent read version */
-X
-/* $Name: fa_34_26_5 $ - $Id: defs.h,v 1.26 2006/06/22 02:35:05 wrp Exp $ */
-X
-#ifdef SUNOS
-#include <sys/stdtypes.h>
-#endif
-X
-#ifndef IS_BIG_ENDIAN
-#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
-#define IS_BIG_ENDIAN
-#else
-#undef IS_BIG_ENDIAN
-#endif
-#endif
-X
-#if !defined(MAX_WORKERS) && !defined(PCOMPLIB)
-#define MAX_WORKERS 1
-#endif
-X
-/* 3-Oct-2003 - we can now have 2 nucleotide query types, DNA
-X and RNA. pst.dnaseq can also be SEQT_RNA.
-X ldnaseq can only be DNA */
-X
-#define SEQT_DNA 1
-#define SEQT_RNA 3 /* DNA and RNA seqtypes must be odd */
-X
-#define SEQT_PROT 0
-#define SEQT_UNK -1
-#define SEQT_OTHER 2
-X
-#ifndef DEF_NMLEN
-#define DEF_NMLEN 6
-#endif
-X
-/* unfortunately, there is an important relationship between MAXTRN and
-X MAXTST+MAXLIB embedded here. MAXTRN must be >= (MAXTST+MAXLIB)/3
-X or it will be possible for a translated DNA sequence to be longer
-X than the translation space available */
-X
-#define MAX_STR 512 /* standard label/message buffer */
-#define MAX_SSTR 32 /* short string */
-#define MAX_FN 120 /* maximum size of a file name */
-#define MAX_CH 40 /* maximum number of library choices */
-#ifndef SMALLMEM
-#define MAX_LF 500 /* maximum numer of library files */
-#else
-#define MAX_LF 80 /* maximum numer of library files */
-#endif
-X
-/* padding at the end of sequences for ALTIVEC, other vector
-X processors */
-#define SEQ_PAD 16
-X
-#define MAX_UID 20 /* length of libstr, used for character keys with SQL */
-X
-#define AVE_AA_LEN 400
-#define AVE_NT_LEN 5000
-#define MAX_AA_BUF 5000 /* 5000 later */
-#define MAX_NT_BUF 1000 /* 2000 later */
-X
-#ifndef SMALLMEM
-#define MAXTST 40000 /* longest query */
-#define MAXLIB 120000 /* longest library */
-#define MAXPLIB 600000 /* longest library with p_comp* */
-#define MIN_RES 2000 /* minimum amount allocated for alignment */
-#ifndef TFAST
-#define MAXTRN 80000 /* buffer for fastx translation */
-#else
-#define MAXTRN 180000 /* buffer for tfastx translation */
-#endif
-#define SEQDUP 1200 /* future - overlap */
-#ifndef PCOMPLIB
-#ifndef MAXBEST
-#define MAXBEST 60000 /* max number of best scores */
-#endif
-#define MAXSTATS 60000
-#else
-#ifndef MAXBEST
-#define MAXBEST 60000 /* max number of best scores */
-#endif
-#define MAXSTATS 60000
-#endif
-#define BIGNUM 1000000000
-#ifndef MAXINT
-#define MAXINT 2147483647
-#endif
-#define MAXLN 120 /* size of a library name */
-#else
-#define MAXTST 1500
-#define MAXLIB 10000
-#define MAXPLIB 100000 /* longest library with p_comp* */
-#define MIN_RES 1000
-#ifndef TFAST
-#define MAXTRN 6000
-#else
-#define MAXTRN 11500
-#endif
-#define SEQDUP 300
-#define MAXBEST 2000
-#define MAXSTATS 20000
-#define BIGNUM 32767
-#define MAXINT 32767
-#define MAXLN 40 /* size of a library name */
-#endif
-#if !defined(TFAST)
-#define MAXTOT (MAXTST+MAXLIB)
-#define MAXDIAG (MAXTST+MAXLIB)
-#else
-#define MAXTOT (MAXTST+MAXTRN)
-#define MAXDIAG (MAXTST+MAXTRN)
-#endif
-X
-#define MAXPAM 600 /* maximum allowable size of the pam matrix */
-#define PROF_MAX 500
-#define ALF_MAX 30
-X
-#ifdef SUPERFAMNUM
-#define NSFCHAR '!'
-#endif
-X
-#define max(a,b) (((a) > (b)) ? (a) : (b))
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-X
-#define MX_ATYPE 7 /* markx==0,1,2 7=> no alignment */
-#define MX_ASEP 8 /* markx==3 - separate lines */
-#define MX_AMAP 16 /* markx==4,5 - graphic map */
-#define MX_HTML 32 /* markx==6 - HTML */
-#define MX_M9SUMM 64 /* markx==9(c) */
-#define MX_M10FORM 128 /* markx==10 */
-X
-/* codes for -m 9 */
-#define SHOW_CODE_ID 1 /* identity only */
-#define SHOW_CODE_ALIGN 2 /* encoded alignment */
-SHAR_EOF
-chmod 0644 defs.h ||
-echo 'restore of defs.h failed'
-Wc_c="`wc -c < 'defs.h'`"
-test 3530 -eq "$Wc_c" ||
- echo 'defs.h: original size 3530, current size' "$Wc_c"
-fi
-# ============= dna.mat ==============
-if test -f 'dna.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping dna.mat (File already exists)'
-else
-echo 'x - extracting dna.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dna.mat' &&
-# Sample dna matrix
-X A C G T U R Y M W S K D H V B N X
-A 5 -4 -4 -4 -4 2 -1 2 2 -1 -1 1 1 1 -2 -1 -1
-C -4 5 -4 -4 -4 -1 2 2 -1 2 -1 -2 1 1 1 -1 -1
-G -4 -4 5 -4 -4 2 -1 -1 -1 2 2 1 -2 1 1 -1 -1
-T -4 -4 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
-U -4 -4 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
-R 2 -1 2 -1 -1 2 -2 -1 1 1 1 1 -1 1 -1 -1 -1
-Y -1 2 -1 2 2 -2 2 -1 1 1 1 -1 1 -1 1 -1 -1
-M 2 2 -1 -1 -1 -1 -1 2 1 1 -1 -1 1 1 -1 -1 -1
-W 2 -1 -1 2 2 1 1 1 2 -1 1 1 1 -1 -1 -1 -1
-S -1 2 2 -1 -1 1 1 1 -1 2 1 -1 -1 1 1 -1 -1
-K -1 -1 2 2 2 1 1 -1 1 1 2 1 -1 -1 1 -1 -1
-D 1 -2 1 1 1 1 -1 -1 1 -1 1 1 -1 -1 -1 -1 -1
-H 1 1 -2 1 1 -1 1 1 1 -1 -1 -1 1 -1 -1 -1 -1
-V 1 1 1 -2 -2 1 -1 1 -1 1 -1 -1 -1 1 -1 -1 -1
-B -2 1 1 1 1 -1 1 -1 -1 1 1 -1 -1 -1 1 -1 -1
-N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 dna.mat ||
-echo 'restore of dna.mat failed'
-Wc_c="`wc -c < 'dna.mat'`"
-test 976 -eq "$Wc_c" ||
- echo 'dna.mat: original size 976, current size' "$Wc_c"
-fi
-# ============= doinit.c ==============
-if test -f 'doinit.c' -a X"$1" != X"-c"; then
- echo 'x - skipping doinit.c (File already exists)'
-else
-echo 'x - extracting doinit.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'doinit.c' &&
-/* doinit.c general and function-specific initializations */
-X
-/* copyright (c) 1996, 1997, 1998 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: doinit.c,v 1.62 2007/01/08 15:38:46 wrp Exp $ */
-X
-/* this file performs general initializations of search parameters
-X
-X In addition, it calls several functions in init??.c that provide
-X program-specific initializations:
-X
-X f_initenv() - called from initenv()
-X f_getopt() - called from initenv() during a getopt() scan
-X f_getarg() - called from initenv() after the getopt() scan
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "upam.h" /* required for 'U' option change of nascii */
-X
-#include "structs.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#undef XTERNAL
-X
-extern char *s_optstr;
-extern int optind; /* used by getopt() */
-X
-#ifdef PCOMPLIB
-#define PARALLEL
-#include "p_mw.h"
-extern char pgmdir[];
-extern char managepgm[];
-extern char workerpgm[];
-extern int max_buf_cnt;
-#define MAX_WORKERS MAXWRKR
-#endif
-X
-char prog_name[MAX_FN];
-X
-extern void f_initenv(struct mngmsg *, struct pstruct *, unsigned char **);
-extern void f_lastenv(struct mngmsg *, struct pstruct *);
-extern void f_getopt(char, char *, struct mngmsg *, struct pstruct *);
-extern void f_getarg(int, char **, int, struct mngmsg *, struct pstruct *);
-void ann_ascii(int *qascii, char *ann_arr);
-int set_markx(int markx, int val);
-X
-int optcnt;
-int max_workers=MAX_WORKERS;
-#ifdef PCOMPLIB
-int worker_1=0;
-int worker_n=0;
-#endif
-extern char *optarg;
-X
-/* initenv () initializes the environment */
-void initenv (int argc, char **argv, struct mngmsg *m_msg,
-X struct pstruct *ppst, unsigned char **aa0)
-{
-X char *cptr, ctmp;
-X int copt, itmp;
-X
-X /* options for all search functions */
-X char *g_optstr = "ab:BC:d:DE:F:HiIJ:K:l:Lm:M:N:O:QqR:T:v:V:w:W:X:z:Z:";
-X char optstring[MAX_STR];
-X
-/* these initializations will be used by all functions */
-X
-X /* prog_name[] is only used for error messages */
-X strncpy(prog_name,argv[0],sizeof(prog_name));
-X prog_name[sizeof(prog_name)-1]='\0';
-X
-#ifdef PARALLEL
-X if ((cptr = getenv ("MANAGEPGM")) != NULL) strncpy (managepgm, cptr, 120);
-X if ((cptr = getenv ("WORKERPGM")) != NULL) strncpy (workerpgm, cptr, 120);
-X if ((cptr = getenv ("PGMDIR")) != NULL) strncpy (pgmdir, cptr, 120);
-#endif
-X
-X m_msg->ltitle[0] = '\0';
-X
-X if ((cptr=getenv("FASTLIBS"))!=NULL) {
-X strncpy(m_msg->flstr,cptr,MAX_FN);
-X m_msg->flstr[MAX_FN-1] = '\0';
-X }
-X else m_msg->flstr[0]='\0';
-X
-X m_msg->hist.hist_a = NULL;
-X m_msg->outfile[0] = '\0';
-X m_msg->ldnaseq = SEQT_PROT; /* library is protein */
-X m_msg->n1_low = 0;
-X m_msg->n1_high = BIGNUM;
-X m_msg->ql_start = 1; /* start with first query sequence */
-X m_msg->ql_stop = BIGNUM; /* end with the last query sequence */
-X
-X m_msg->pamd1 = MAXSQ;
-X m_msg->pamd2 = MAXSQ;
-X
-X m_msg->term_code = 0;
-X ppst->tr_type = 0;
-X ppst->debug_lib = 0;
-X m_msg->nshow = 20;
-#if defined(PCOMPLIB)
-X m_msg->nohist = 1;
-X m_msg->mshow = 20;
-#else
-X m_msg->nohist = 0;
-X m_msg->mshow = 50;
-#endif
-X m_msg->ashow = -1;
-X m_msg->nmlen = DEF_NMLEN;
-X m_msg->z_bits = 1;
-X m_msg->mshow_flg = 0;
-X m_msg->aln.llen = 0;
-X m_msg->aln.llcntx = 30;
-X m_msg->aln.llcntx_flg = 0;
-X m_msg->e_cut = 10.0;
-X m_msg->e_low = 0.0;
-X m_msg->e_cut_set = 0;
-X m_msg->revcomp = 0;
-X m_msg->self = 0;
-X m_msg->long_info = 0;
-X m_msg->maxn = 0;
-X m_msg->dupn = SEQDUP;
-X m_msg->dfile[0] = '\0';
-X m_msg->tname[0] = '\0';
-X m_msg->lname[0] = '\0';
-X m_msg->show_code = 0;
-X m_msg->aln.showall = 0;
-X m_msg->markx = 0;
-X m_msg->sq0off = m_msg->sq1off = 1;
-X strncpy(m_msg->sqnam,"aa",4);
-X strncpy(m_msg->sqtype,"protein",10);
-X m_msg->ann_flg = 0;
-X m_msg->ann_arr[0] = '\0';
-X m_msg->aa0a = NULL;
-X
-X ppst->zsflag = ppst->zsflag_f = 1;
-X ppst->zs_win = 0;
-X
-X ppst->zdb_size = -1;
-X ppst->dnaseq = SEQT_PROT; /* default is protein */
-X ppst->nt_align = 0;
-X
-X f_initenv (m_msg, ppst, aa0);
-X
-X strncpy (optstring, g_optstr, sizeof (optstring));
-X strncat (optstring, s_optstr, sizeof (optstring));
-X
-X while ((copt = getopt (argc, argv, optstring)) != EOF)
-X {
-X if (strchr (g_optstr, copt) != NULL)
-X {
-X switch (copt) { /* switches for all options */
-X case 'a': m_msg->aln.showall = 1; break;
-X case 'b':
-X if (optarg[0] == '$') {
-X m_msg->mshow = -1;
-X m_msg->e_cut = 10000000.0;
-X break;
-X }
-X else sscanf (optarg, "%d", &m_msg->mshow);
-X m_msg->e_cut = 10000000.0;
-X m_msg->e_cut_set = 1;
-X m_msg->mshow_flg = 1;
-X break;
-X case 'B': m_msg->z_bits = 0; break;
-X case 'C': sscanf(optarg,"%d",&m_msg->nmlen);
-X if (m_msg->nmlen > MAX_UID-1) m_msg->nmlen = MAX_UID-1;
-X break;
-X case 'd': sscanf(optarg,"%d",&m_msg->ashow);
-X if (m_msg->ashow > m_msg->mshow) m_msg->mshow=m_msg->ashow;
-X /* m_msg->ashow_flg = 1; (ashow_flg not in structs.h, not used)*/
-X break;
-X case 'D': ppst->debug_lib = 1;
-X break;
-X case 'E':
-X sscanf(optarg,"%lf",&m_msg->e_cut);
-X m_msg->e_cut_set = 1;
-X break;
-X case 'F':
-X sscanf(optarg,"%lg",&m_msg->e_low);
-X m_msg->e_cut_set = 1;
-X break;
-X case 'H':
-#if defined(PCOMPLIB)
-X m_msg->nohist = 0; break;
-#else
-X m_msg->nohist = 1; break;
-#endif
-X case 'i':
-X m_msg->revcomp = 1; break;
-#ifdef PARALLEL
-X case 'I':
-X m_msg->self = 1; break;
-X case 'J':
-X if (optarg[0]==':') {
-X m_msg->ql_start = 0;
-X sscanf(optarg,":%d",&m_msg->ql_stop);
-X m_msg->ql_stop++;
-X }
-X else if (!strchr(optarg,':')) {
-X m_msg->ql_stop = BIGNUM;
-X sscanf(optarg,"%d",&m_msg->ql_start);
-X }
-X else {
-X sscanf(optarg,"%d:%d",&m_msg->ql_start,&m_msg->ql_stop);
-X m_msg->ql_stop++;
-X }
-X break;
-X case 'K':
-X sscanf(optarg,"%d",&max_buf_cnt);
-X break;
-#endif
-X case 'l':
-X strncpy(m_msg->flstr,optarg,MAX_FN);
-X m_msg->flstr[MAX_FN-1]='\0';
-X break;
-X case 'L':
-X m_msg->long_info = 1; break;
-X case 'm':
-X sscanf(optarg,"%d%c",&itmp,&ctmp);
-X if (itmp==9 && ctmp=='c') {
-X m_msg->show_code = SHOW_CODE_ALIGN;
-X }
-X else if (itmp==9 && ctmp=='i') {
-X m_msg->show_code = SHOW_CODE_ID;
-X }
-X if (itmp > 6 && itmp != 10 && itmp != 9) itmp = 0;
-X m_msg->markx = set_markx(m_msg->markx,itmp);
-X break;
-X case 'M':
-X sscanf(optarg,"%d-%d",&m_msg->n1_low,&m_msg->n1_high);
-X if (m_msg->n1_low < 0) {
-X m_msg->n1_high = -m_msg->n1_low;
-X m_msg->n1_low = 0;
-X }
-X if (m_msg->n1_high == 0) m_msg->n1_high = BIGNUM;
-X if (m_msg->n1_low > m_msg->n1_high) {
-X fprintf(stderr," low cutoff %d greater than high %d\n",
-X m_msg->n1_low, m_msg->n1_high);
-X m_msg->n1_low = 0;
-X m_msg->n1_high = BIGNUM;
-X }
-X break;
-X case 'N':
-X sscanf(optarg,"%d",&m_msg->maxn);
-X break;
-X case 'p':
-X m_msg->qdnaseq = SEQT_PROT;
-X ppst->dnaseq = SEQT_PROT;
-X strncpy(m_msg->sqnam,"aa",4);
-X break;
-X case 'O':
-X strncpy(m_msg->outfile,optarg,MAX_FN);
-X m_msg->outfile[MAX_FN-1]='\0';
-X break;
-X case 'q':
-X case 'Q':
-X m_msg->quiet = 1;
-X break;
-X case 'R':
-X strncpy (m_msg->dfile, optarg, MAX_FN);
-X m_msg->dfile[MAX_FN-1]='\0';
-X break;
-X case 'T':
-#ifdef PCOMPLIB
-X if (strchr(optarg,'-') != NULL) {
-X sscanf(optarg,"%d-%d",&worker_1,&worker_n);
-X if (worker_1 > worker_n) {
-X worker_1 = worker_n = 0;
-X }
-X }
-X else
-#endif
-X sscanf (optarg, "%d", &max_workers);
-X if (max_workers < 0) max_workers=1;
-X break;
-X case 'v':
-X sscanf (optarg,"%d",&ppst->zs_win);
-X break;
-X case 'V':
-X strncpy(m_msg->ann_arr+1,optarg,MAX_FN-2);
-X m_msg->ann_arr[0]='\0';
-X m_msg->ann_arr[MAX_FN-2]='\0';
-X m_msg->ann_flg = 1;
-X ann_ascii(qascii, m_msg->ann_arr);
-X break;
-/*
-X case 'V':
-X fprintf(stderr," -V option not currently supported in parallel\n");
-X break;
-*/
-X case 'w':
-X sscanf (optarg,"%d",&m_msg->aln.llen);
-X if (m_msg->aln.llen < 10) m_msg->aln.llen = 10;
-X if (m_msg->aln.llen > 200) m_msg->aln.llen = 200;
-X if (!m_msg->aln.llcntx_flg) m_msg->aln.llcntx = m_msg->aln.llen/2;
-X break;
-X case 'W':
-X sscanf (optarg,"%d",&m_msg->aln.llcntx);
-X m_msg->aln.llcntx_flg = 1;
-X break;
-X case 'X':
-X sscanf (optarg,"%ld %ld",&m_msg->sq0off,&m_msg->sq1off); break;
-X case 'z':
-X sscanf(optarg,"%d",&ppst->zsflag);
-X break;
-X case 'Z':
-X sscanf(optarg,"%ld",&ppst->zdb_size);
-X break;
-X }
-X }
-X else if (strchr (s_optstr, copt))
-X f_getopt (copt, optarg, m_msg, ppst);
-X }
-X optind--;
-X
-X f_lastenv (m_msg, ppst);
-X
-X if (argc - optind < 3) return;
-X m_msg->tnamesize = sizeof (m_msg->tname);
-X if (argc - optind > 1) strncpy (m_msg->tname, argv[optind + 1],MAX_FN);
-X if (argc - optind > 2) { strncpy(m_msg->lname, argv[optind + 2],MAX_FN); }
-X f_getarg (argc, argv, optind, m_msg, ppst);
-}
-X
-int
-ann_scan(unsigned char *aa0, int n0, struct mngmsg *m_msg, int seqtype)
-{
-X unsigned char *aa0p, *aa0d, *aa0ad;
-X int n_n0;
-X
-X /* count how many "real" residues */
-X
-X if (seqtype==SEQT_UNK) {
-X for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
-X if (*aa0p > '@' || *aa0p == ESS ) n_n0++;
-X }
-X }
-X else {
-X for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
-X if (*aa0p < NANN ) n_n0++;
-X }
-X }
-X
-X aa0d = aa0;
-X /* n_n0 has the real sequence length */
-X if ((m_msg->aa0a = calloc(n_n0+2, sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate annotation sequence: %d\n",n_n0);
-X m_msg->ann_flg = 0;
-X if (seqtype==SEQT_UNK) {
-X for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
-X if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p;}
-X }
-X }
-X else {
-X for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
-X if (*aa0p < NANN) {*aa0d++ = *aa0p;}
-X }
-X }
-X *aa0d = '\0';
-X return n_n0;
-X }
-X
-X aa0ad = m_msg->aa0a;
-X if (seqtype==SEQT_UNK) {
-X for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
-X if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p; *aa0ad++='\0';}
-X else if (aa0ad > m_msg->aa0a) { aa0ad[-1] = *aa0p - NANN;}
-X }
-X }
-X else {
-X for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
-X if (*aa0p < NANN) {*aa0d++ = *aa0p; *aa0ad++='\0';}
-X else if (aa0ad > m_msg->aa0a) { aa0ad[-1] = *aa0p - NANN;}
-X }
-X }
-X *aa0ad = *aa0d = '\0';
-X return n_n0;
-}
-X
-void
-ann_ascii(int *qascii, char *ann_arr)
-{
-X char *ann_p;
-X int ann_ix = NANN+1;
-X
-X ann_arr[0] = ' ';
-X if (strchr(ann_arr+1,'*')) {qascii['*'] = NA;}
-X
-X for (ann_p = ann_arr+1; *ann_p; ann_p++) {
-X if (qascii[*ann_p] == NA) { qascii[*ann_p] = ann_ix++;}
-X }
-}
-X
-int
-set_markx(int markx, int val) {
-X
-X if (val < 3) {
-X return markx | (MX_ATYPE & val);
-X }
-X else if (val == 3) {
-X markx |= (MX_ATYPE + MX_ASEP);
-X }
-X else if (val == 4) {
-X markx |= (MX_ATYPE + MX_AMAP);
-X }
-X else if (val == 5) {
-X markx |= MX_AMAP;
-X }
-X else if (val == 6) {
-X markx |= (MX_HTML) ;
-X }
-X else if (val == 9) {
-X markx |= MX_M9SUMM;
-X }
-X else if (val == 10) {
-X markx |= MX_M10FORM;
-X }
-X
-X return markx;
-}
-SHAR_EOF
-chmod 0644 doinit.c ||
-echo 'restore of doinit.c failed'
-Wc_c="`wc -c < 'doinit.c'`"
-test 10740 -eq "$Wc_c" ||
- echo 'doinit.c: original size 10740, current size' "$Wc_c"
-fi
-# ============= drop_func.h ==============
-if test -f 'drop_func.h' -a X"$1" != X"-c"; then
- echo 'x - skipping drop_func.h (File already exists)'
-else
-echo 'x - extracting drop_func.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'drop_func.h' &&
-/* drop_func.h */
-X
-/* $Name: fa_34_26_5 $ - $Id: drop_func.h,v 1.7 2006/05/31 15:31:45 wrp Exp $ */
-X
-/* functions provided by each of the drop files */
-X
-/* Copyright (c) 2005 William R. Pearson and the University of Virginia */
-X
-X
-void /* initializes f_struct **f_arg */
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-#ifndef DROP_INTERN
-X void **f_arg
-#else
-X struct f_struct **f_arg
-#endif
-);
-X
-X
-void /* frees memory allocated in f_struct */
-close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-#ifndef DROP_INTERN
-X void **f_arg
-#else
-X struct f_struct **f_arg
-#endif
-);
-X
-void /* documents search function, parameters */
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2);
-X
-void /* calculates alignment score(s), returns them in rst */
-do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-#ifndef DROP_INTERN
-X void *f_arg,
-#else
-X struct f_struct *f_arg,
-#endif
-X int qr_flg, struct rstruct *rst);
-X
-void /* calculates optimal alignment score */
-do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-#ifndef DROP_INTERN
-X void *f_arg,
-#else
-X struct f_struct *f_arg,
-#endif
-X struct rstruct *rst
-X );
-X
-int /* produces encoding of alignment */
-do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-#ifndef DROP_INTERN
-X void *f_arg,
-#else
-X struct f_struct *f_arg,
-#endif
-X struct a_res_str *a_res,
-X int *have_ares);
-X
-void
-pre_cons(const unsigned char *aa, int n, int frame,
-#ifndef DROP_INTERN
-X void *f_arg
-#else
-X struct f_struct *f_arg
-#endif
-X );
-X
-void
-aln_func_vals(int frame, struct a_struct *aln);
-X
-int /* takes aa0, aa1, a_res, and produces seqc0, seqc1 */
-calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln, /* seqc0/seqc1 coordinates */
-X struct a_res_str a_res, /* aa0,aa1, coord, inc. res, nres */
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-#ifndef DROP_INTERN
-X void *f_arg
-#else
-X struct f_struct *f_arg
-#endif
-X );
-X
-/* calcons_a - takes aa0, aa1, a_res, and produces seqc0, seqc1,
-X * and seqc0a, seqc1a - the annotated sequences
-X */
-int
-calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr,
-#ifndef DROP_INTERN
-X void *f_arg
-#else
-X struct f_struct *f_arg
-#endif
-X );
-X
-int /* returns lenc - length of aligment */
-calc_code(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n,
-#ifndef DROP_INTERN
-X void *f_arg
-#else
-X struct f_struct *f_arg
-#endif
-X );
-X
-int /* returns lenc - length of alignment */
-calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-#ifndef DROP_INTERN
-X void *f_arg
-#else
-X struct f_struct *f_arg
-#endif
-X );
-SHAR_EOF
-chmod 0644 drop_func.h ||
-echo 'restore of drop_func.h failed'
-Wc_c="`wc -c < 'drop_func.h'`"
-test 3226 -eq "$Wc_c" ||
- echo 'drop_func.h: original size 3226, current size' "$Wc_c"
-fi
-# ============= dropff2.c ==============
-if test -f 'dropff2.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropff2.c (File already exists)'
-else
-echo 'x - extracting dropff2.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropff2.c' &&
-X
-/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
-X
-/* - dropffa.c,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp */
-X
-/* this code implements the "fastf" algorithm, which is designed to
-X deconvolve mixtures of protein sequences derived from mixed-peptide
-X Edman sequencing. The expected input is:
-X
-X >test | 40001 90043 | mgstm1
-X MGCEN,
-X MIDYP,
-X MLLAY,
-X MLLGY
-X
-X Where the ','s indicate the length/end of the sequencing cycle
-X data. Thus, in this example, the sequence is from a mixture of 4
-X peptides, M was found in the first position, G,I, and L(2) at the second,
-X C,D, L(2) at the third, etc.
-X
-X Because the sequences are derived from mixtures, there need not be
-X any partial sequence "MGCEN", the actual deconvolved sequence might be
-X "MLDGN".
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctype.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "structs.h"
-#include "tatstats.h"
-X
-#define EOSEQ 0
-#define ESS 49
-#define MAXHASH 32
-#define NMAP MAXHASH+1
-#define NMAP_X 23 /* re-code NMAP for 'X' */
-#define NMAP_Z 24 /* re-code NMAP for '*' */
-X
-#ifndef MAXSAV
-#define MAXSAV 10
-#endif
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-static char *verstr="4.21 May 2006 (ajm/wrp)";
-X
-int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
-void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
-extern void aancpy(char *to, char *from, int count, struct pstruct pst);
-X
-#ifdef TFAST
-extern int aatran(const unsigned char *ntseq, unsigned char *aaseq,
-X const int maxs, const int frame);
-#endif
-X
-struct hlstr { int next, pos;};
-X
-void savemax(struct dstruct *, struct f_struct *);
-X
-static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
-X int **, struct f_struct *);
-static int m1_spam(unsigned char *, int,
-X const unsigned char *, int,
-X struct savestr *, int **, int, struct f_struct *);
-X
-int sconn(struct savestr **v, int nsave, int cgap,
-X struct f_struct *, struct rstruct *, struct pstruct *,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int opt_prob);
-X
-void kpsort(struct savestr **, int);
-void kssort(struct savestr **, int);
-void kpsort(struct savestr **, int);
-X
-int
-sconn_a(unsigned char *, int, int, struct f_struct *,
-X struct a_res_str *);
-X
-/* initialize for fasta */
-X
-void
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int mhv, phv;
-X int hmax;
-X int i0, ii0, hv;
-X struct f_struct *f_str;
-X
-X int maxn0;
-X int i, j, q;
-X struct savestr *vmptr;
-X int *res;
-X
-X f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
-X if(f_str == NULL) {
-X fprintf(stderr, "Couldn't calloc f_str\n");
-X exit(1);
-X }
-X
-X ppst->sw_flag = 0;
-X
-X /* fastf3 cannot work with lowercase symbols as low complexity;
-X thus, NMAP must be disabled; this depends on aascii['X'] */
-X if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
-X if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
-X
-X /* this does not work for share ppst structs, as in threads */
-X /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
-X
-X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
-X if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
-X
-X if (mhv <= 0) {
-X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
-X exit (1);
-X }
-X
-X for (f_str->kshft = 0; mhv > 0; mhv /= 2)
-X f_str->kshft++;
-X
-/* kshft = 2; */
-X hmax = hv = (1 << f_str->kshft);
-X f_str->hmask = (hmax >> f_str->kshft) - 1;
-X
-X if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
-X exit (1);
-X }
-X for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
-X aa0 = f_str->aa0;
-X
-X if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
-X exit (1);
-X }
-X f_str->aa0ix = 0;
-X
-X if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
-X fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
-X hmax,f_str->hmask);
-X exit (1);
-X }
-X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh1 array\n");
-X exit (1);
-X }
-X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh2 array\n");
-X exit (1);
-X }
-X if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
-X fprintf (stderr, " cannot allocate hash link array");
-X exit (1);
-X }
-X
-X for (i0 = 0; i0 < hmax; i0++) {
-X f_str->harr[i0].next = -1;
-X f_str->harr[i0].pos = -1;
-X }
-X
-X for (i0 = 0; i0 < n0; i0++) {
-X f_str->link[i0].next = -1;
-X f_str->link[i0].pos = -1;
-X }
-X
-X /* encode the aa0 array */
-X /*
-X this code has been modified to allow for mixed peptide sequences
-X aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
-X the 'NULL' character resets the hash position counter, to indicate that
-X any of several residues can be in the same position.
-X We also need to keep track of the number of times this has happened, so that
-X we can redivide the sequence later
-X
-X i0 counts through the sequence
-X ii0 counts through the hashed sequence
-X
-X */
-X
-X f_str->nm0 = 1;
-X f_str->nmoff = -1;
-X phv = hv = 0;
-X for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
-X /* reset the counter and start hashing again */
-X if (aa0[i0] == ESS || aa0[i0] == 0) {
-X aa0[i0] = 0; /* set ESS to 0 */
-X /* fprintf(stderr," converted ',' to 0\n");*/
-X i0++; /* skip over the blank */
-X f_str->nm0++;
-X if (f_str->nmoff < 0) f_str->nmoff = i0;
-X phv = hv = 0;
-X ii0 = 0;
-X }
-X hv = ppst->hsq[aa0[i0]];
-X f_str->link[i0].next = f_str->harr[hv].next;
-X f_str->link[i0].pos = f_str->harr[hv].pos;
-X f_str->harr[hv].next = i0;
-X f_str->harr[hv].pos = ii0;
-X f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
-X }
-X if (f_str-> nmoff < 0) f_str->nmoff = n0;
-X
-X
-#ifdef DEBUG
-X /*
-X fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
-X */
-#endif
-X
-/*
-#ifdef DEBUG
-X fprintf(stderr," hmax: %d\n",hmax);
-X for ( hv=0; hv<hmax; hv++)
-X fprintf(stderr,"%2d %c %3d %3d\n",hv,
-X (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
-X f_str->harr[hv].pos,f_str->harr[hv].next);
-X fprintf(stderr,"----\n");
-X for ( hv=0; hv<n0; hv++)
-X fprintf(stderr,"%2d: %3d %3d\n",hv,
-X f_str->link[hv].pos,f_str->link[hv].next);
-#endif
-*/
-X
-X f_str->maxsav = MAXSAV;
-X if ((f_str->vmax = (struct savestr *)
-X calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
-X fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
-X exit(1);
-X }
-X
-X if ((f_str->vptr = (struct savestr **)
-X calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
-X fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
-X exit(1);
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
-X vmptr->used = (int *) calloc(n0, sizeof(int));
-X if(vmptr->used == NULL) {
-X fprintf(stderr, "Couldn't alloc vmptr->used\n");
-X exit(1);
-X }
-X }
-X
-/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
-X pam2[0][0] is now undefined for consistency with blast
-*/
-X
-X for (i0 = 1; i0 <= ppst->nsq; i0++)
-X f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
-X
-X ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
-X if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
-X ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
-X
-X f_str->ndo = 0;
-X f_str->noff = n0-1;
-X if (f_str->diag==NULL)
-X f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
-X sizeof (struct dstruct));
-X
-X if (f_str->diag == NULL)
-X {
-X fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
-X (long) MAXDIAG * (long) (sizeof (struct dstruct)));
-X exit (1);
-X }
-X
-#ifdef TFAST
-X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
-X exit (1);
-X }
-X f_str->aa1x++;
-#endif
-X
-X /* allocate space for the scoring arrays */
-X maxn0 = n0 + 4;
-X
-X maxn0 = max(3*n0/2,MIN_RES);
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X f_str->max_res = maxn0;
-X
-X /* Tatusov Statistics Setup */
-X
-X /* initialize priors array. */
-X if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate priors array.\n");
-X exit(1);
-X }
-X calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
-X
-X f_str->dotat = 0;
-X f_str->shuff_cnt = ppst->shuff_node;
-X
-X /* End of Tatusov Statistics Setup */
-X
-X *f_arg = f_str;
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/* pstring2 is the same information, but in a markx==10 format */
-void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-#ifndef TFAST
-X char *pg_str="FASTF";
-#else
-X char *pg_str="TFASTF";
-#endif
-X
-X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] join: %d",pg_str,verstr,
-X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->param_u.fa.cgap);
-X
-X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X
-X if (pstring2 != NULL) {
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
-; pg_join: %d\n",
-X pg_str,verstr, pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X pstr->param_u.fa.cgap);
-X }
-}
-X
-void
-close_work (const unsigned char *aa0, const int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X struct savestr *vmptr;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X free(vmptr->used);
-X
-X free(f_str->res);
-#ifdef TFAST
-X free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
-#endif
-X free(f_str->diag);
-X free(f_str->link);
-X free(f_str->pamh2);
-X free(f_str->pamh1);
-X free(f_str->harr);
-X free(f_str->aa0t);
-X free(f_str->aa0);
-X free(f_str->priors);
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-int do_fastf (unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst, int *hoff, int opt_prob)
-{
-X int nd; /* diagonal array size */
-X int lhval;
-X int kfact;
-X register struct dstruct *dptr;
-X register int tscor;
-X register struct dstruct *diagp;
-X struct dstruct *dpmax;
-X register int lpos;
-X int tpos, npos;
-X struct savestr *vmptr;
-X int scor, tmp;
-X int im, ib, nsave;
-X int cmps (); /* comparison routine for ksort */
-X int *hsq;
-X
-X hsq = ppst->hsq;
-X
-X if (n1 < 1) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X rst->segnum = 0;
-X rst->seglen = 0;
-X return 1;
-X }
-X
-X if (n0+n1+1 >= MAXDIAG) {
-X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X rst->escore = 2.0;
-X rst->segnum = 0;
-X rst->seglen = 0;
-X return -1;
-X }
-X
-X nd = n0 + n1;
-X
-X dpmax = &f_str->diag[nd];
-X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X
-X /* initialize the saved segment structures */
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
-X vmptr->score = 0;
-X memset(vmptr->used, 0, n0 * sizeof(int));
-X }
-X
-X f_str->lowmax = f_str->vmax;
-X f_str->lowscor = 0;
-X
-X /* start hashing */
-X
-X diagp = &f_str->diag[f_str->noff];
-X for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lpos++ ; diagp++;
-X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
-X if (lpos >= n1) break;
-X lhval = 0;
-X }
-X lhval = hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
-X tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
-X /* tscor gets position of end of current lpos diag run */
-X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
-X tscor++; /* move forward one */
-X if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
-X /* includes implicit -1 mismatch penalty */
-X scor = dptr->score; /* current score of this run */
-X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
-X f_str->lowscor < scor) /* if updating tscor makes run worse, */
-X savemax (dptr, f_str); /* save it */
-X
-X if ((tscor += scor) >= kfact) { /* add to current run if continuing */
-X /* is better than restart (kfact) */
-X dptr->score = tscor;
-X dptr->stop = lpos;
-X }
-X else {
-X dptr->score = kfact; /* starting over is better */
-X dptr->start = (dptr->stop = lpos);
-X }
-X }
-X else { /* continue current run */
-X dptr->score += f_str->pamh1[aa0[tpos]];
-X dptr->stop = lpos;
-X }
-X }
-X else { /* no diagonal run yet */
-X dptr->score = f_str->pamh2[lhval];
-X dptr->start = (dptr->stop = lpos);
-X }
-X } /* end tpos */
-X } /* end lpos */
-X
-X for (dptr = f_str->diag; dptr < dpmax;) {
-X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X f_str->ndo = nd;
-X
-/*
-X at this point all of the elements of aa1[lpos]
-X have been searched for elements of aa0[tpos]
-X with the results in diag[dpos]
-*/
-X
-X /* set up pointers for sorting */
-X
-X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
-X if (vmptr->score > 0) {
-X vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
-X f_str->vptr[nsave++] = vmptr;
-X }
-X }
-X
-X /* sort them */
-X kssort (f_str->vptr, nsave);
-X
-X
-#ifdef DEBUG
-X /*
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
-X fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
-X ppst->sq[aa1[im]]);
-X fputc('\n',stderr);
-X }
-X fprintf(stderr,"---\n");
-X */
-X /* now use m_spam to re-evaluate */
-X /*
-X for (tpos = 0; tpos < n0; tpos++) {
-X fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
-X if (tpos %10 == 9) fputc('\n',stderr);
-X }
-X fputc('\n',stderr);
-X */
-#endif
-X
-X f_str->aa0ix = 0;
-X for (ib=0; ib < nsave; ib++) {
-X if ((vmptr=f_str->vptr[ib])->score > 0) {
-X vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
-X ppst->pam2[0], ppst->pam_l, f_str);
-X }
-X }
-X /* reset aa0 - modified by m1_spam */
-X for (tpos = 0; tpos < n0; tpos++) {
-X if (aa0[tpos] >= 32) aa0[tpos] -= 32;
-X }
-X
-X kssort(f_str->vptr,nsave);
-X
-X for ( ; nsave > 0; nsave--)
-X if (f_str->vptr[nsave-1]->score >0) break;
-X
-X if (nsave <= 0) {
-X f_str->nsave = 0;
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X
-X return 1;
-X }
-X else f_str->nsave = nsave;
-X
-X
-#ifdef DEBUG
-X /*
-X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
-X fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
-X ppst->sq[aa1[im]]);
-X fputc('\n',stderr);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-#endif
-X
-X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
-X rst, ppst, aa0, n0, aa1, n1, opt_prob);
-X
-X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
-X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
-X
-X rst->score[1] = vmptr->score;
-X rst->score[0] = rst->score[2] = max (scor, vmptr->score);
-X
-X return 1;
-}
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int opt_prob;
-X int hoff, n10, i;
-X
-X if (qr_flg==1 && f_str->shuff_cnt <= 0) {
-X rst->escore = 2.0;
-X rst->score[0]=rst->score[1]=rst->score[2]= -1;
-X return;
-X }
-X
-X if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
-X else opt_prob = 0;
-X if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
-X if (qr_flg) {
-X opt_prob=1;
-X /* if (frame==1) */
-X f_str->shuff_cnt--;
-X }
-X
-X if (n1 < 1) {
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X rst->escore = 2.0;
-X return;
-X }
-X
-#ifdef TFAST
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X if (ppst->debug_lib)
-X for (i=0; i<n10; i++)
-X if (f_str->aa1x[i]>ppst->nsq) {
-X fprintf(stderr,
-X "residue[%d/%d] %d range (%d)\n",i,n1,
-X f_str->aa1x[i],ppst->nsq);
-X f_str->aa1x[i]=0;
-X n10=i-1;
-X }
-X
-X do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
-#else /* FASTF */
-X do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
-#endif
-X
-X rst->comp = rst->H = -1.0;
-X
-}
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct rstruct *rst)
-{
-X int optflag, tscore, hoff, n10;
-X
-X optflag = ppst->param_u.fa.optflag;
-X ppst->param_u.fa.optflag = 1;
-X
-#ifdef TFAST
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
-#else /* FASTA */
-X do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
-#endif
-X ppst->param_u.fa.optflag = optflag;
-}
-X
-void
-savemax (dptr, f_str)
-X register struct dstruct *dptr;
-X struct f_struct *f_str;
-{
-X register int dpos;
-X register struct savestr *vmptr;
-X register int i;
-X
-X dpos = (int) (dptr - f_str->diag);
-X
-/* check to see if this is the continuation of a run that is already saved */
-X
-X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
-X vmptr->start == dptr->start)
-X {
-X vmptr->stop = dptr->stop;
-X if ((i = dptr->score) <= vmptr->score)
-X return;
-X vmptr->score = i;
-X if (vmptr != f_str->lowmax)
-X return;
-X }
-X else
-X {
-X i = f_str->lowmax->score = dptr->score;
-X f_str->lowmax->dp = dpos;
-X f_str->lowmax->start = dptr->start;
-X f_str->lowmax->stop = dptr->stop;
-X dptr->dmax = f_str->lowmax;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X if (vmptr->score < i)
-X {
-X i = vmptr->score;
-X f_str->lowmax = vmptr;
-X }
-X f_str->lowscor = i;
-}
-X
-/* this version of spam() is designed to work with a collection of
-X subfragments, selecting the best amino acid at each position so
-X that, from each subfragment, each position is only used once.
-X
-X As a result, m_spam needs to know the number of fragments.
-X
-X In addition, it now requires a global alignment to the fragment
-X and resets the start and stop positions
-X
-X */
-X
-static int
-m1_spam (unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct savestr *dmax, int **pam2, int pam_l,
-X struct f_struct *f_str)
-{
-X int tpos, lpos, im, ii, nm, ci;
-X int tot, ctot, pv;
-X
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X unsigned char *aa0p;
-X const unsigned char *aa1p;
-X
-X lpos = dmax->start; /* position in library sequence */
-X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
-X /* force global alignment, reset start*/
-X if (tpos < lpos) {
-X lpos = dmax->start -= tpos;
-X tpos = 0;
-X }
-X else {
-X tpos -= lpos;
-X lpos = dmax->start = 0;
-X }
-X
-X dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
-X if (dmax->stop > n1) dmax->stop = n1;
-X
-X /*
-X if (dmax->start < 0) {
-X tpos = -dmax->start;
-X lpos = dmax->start=0;
-X }
-X else tpos = 0;
-X */
-X
-X aa1p = &aa1[lpos];
-X aa0p = &aa0[tpos];
-X
-X nm = f_str->nm0;
-X
-X tot = curv.score = maxv.score = 0;
-X for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
-X ctot = pam_l;
-X ci = -1;
-X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
-X if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
-X ctot = pv;
-X ci = ii;
-/* fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
-X }
-X }
-X tot += ctot;
-X if (ci >= 0 && aa0p[ci] < 32) {
-#ifdef DEBUG
-/* fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
-#endif
-X aa0p[ci] += 32;
-X dmax->used[&aa0p[ci] - aa0] = 1;
-X }
-X }
-X return tot;
-}
-X
-int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
-X struct savestr *dmax, struct pstruct *ppst,
-X struct f_struct *f_str)
-{
-X int **pam2;
-X int tpos, lpos, im, ii, nm, ci, lp0;
-X int tot, ctot, pv;
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X const unsigned char *aa1p;
-X unsigned char *aa0p, *aa0pt;
-X int aa0t_flg;
-X
-X pam2 = ppst->pam2[0];
-X aa0t_flg = 0;
-X
-X lpos = dmax->start; /* position in library sequence */
-X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
-X lp0 = lpos = dmax->start;
-X aa1p = &aa1[lpos];
-X aa0p = &aa0[tpos]; /* real aa0 sequence */
-X
-X /* the destination aa0 sequence (without nulls) */
-X aa0pt = &f_str->aa0t[f_str->aa0ix];
-X
-X curv.start = lpos;
-X nm = f_str->nm0;
-X
-X /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
-X if (lpos == 0 && tpos > 0)
-X for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31; /* filler character */
-X
-X tot = curv.score = maxv.score = 0;
-X for (; lpos <= dmax->stop; lpos++) {
-X ctot = ppst->pam_l;
-X ci = -1;
-X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
-X if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
-X ctot = pv;
-X ci = ii;
-X }
-X }
-X tot += ctot;
-X if (ci >= 0) {
-X if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
-X else {
-X *aa0pt++ = aa0p[ci];
-X aa0p[ci] += 32;
-X aa0t_flg=1;
-X }
-X }
-X aa0p++; aa1p++;
-X }
-X
-X if (aa0t_flg) {
-X dmax->dp -= f_str->aa0ix; /* shift ->dp for aa0t */
-X if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
-X fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
-X }
-X else
-X *aa0pt++ = 0; /* skip over NULL */
-X
-X aa0pt = &f_str->aa0t[f_str->aa0ix];
-X aa1p = &aa1[lp0];
-X
-X /*
-X for (im = 0; im < f_str->nmoff; im++)
-X fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
-X fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
-X */
-X
-X f_str->aa0ix += f_str->nmoff; /* update offset into aa0t */
-X }
-X /*
-X fprintf(stderr," ma_spam returning: %d\n",tot);
-X */
-X return tot;
-}
-X
-static int
-m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str)
-{
-X int tpos, lpos, lend, im, ii, nm;
-X int tot, ctot, pv;
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X const unsigned char *aa0p, *aa1p;
-X
-X lpos = dmax->start; /* position in library sequence */
-X tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
-X if (tpos > 0) {
-X if (lpos-tpos >= 0) {
-X lpos = dmax->start -= tpos; /* force global alignment, reset start*/
-X tpos = 0;
-X }
-X else {
-X tpos -= lpos;
-X lpos = dmax->start = 0;
-X }
-X }
-X
-X nm = f_str->nm0;
-X lend = dmax->stop;
-X if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
-X lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
-X if (lend >= n1) lend = n1-1;
-X }
-X
-X aa1p = &aa1[lpos];
-X aa0p = &aa0[tpos];
-X
-X curv.start = lpos;
-X
-X tot = curv.score = maxv.score = 0;
-X for (; lpos <= lend; lpos++) {
-X ctot = -10000;
-X for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
-X if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
-X ctot = pv;
-X }
-X }
-X tot += ctot;
-X aa0p++; aa1p++;
-X }
-X
-X /* reset dmax if necessary */
-X
-X return tot;
-}
-X
-/* sconn links up non-overlapping alignments and calculates the score */
-X
-int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
-X struct rstruct *rst, struct pstruct *ppst,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int opt_prob)
-{
-X int i, si, cmpp ();
-X struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
-X int lstart, plstop;
-X double tatprob;
-X
-X /* sarr[] saves each alignment score/position, and provides a link
-X back to the previous alignment that maximizes the score */
-X
-X /* sort the score left to right in lib pos */
-X kpsort (v, n);
-X
-X start = NULL;
-X
-X /* for the remaining runs, see if they fit */
-X for (i = 0, si = 0; i < n; i++) {
-X
-X /* if the score is less than the gap penalty, it never helps */
-X if (!opt_prob && (v[i]->score < cgap) ){ continue; }
-X
-X lstart = v[i]->start;
-X
-X /* put the run in the group */
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X sarr[si].prev = NULL;
-X sarr[si].tat = NULL;
-X
-X if(opt_prob) {
-X sarr[si].tatprob =
-X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0],ppst->nsq, f_str,
-X ppst->pseudocts, opt_prob,ppst->zsflag);
-X sarr[si].tat = sarr[si].newtat;
-X }
-X
-X /* if it fits, then increase the score */
-X for (sl = start; sl != NULL; sl = sl->next) {
-X plstop = sl->vp->stop;
-X /* if end < start or start > end, add score */
-X if (plstop < lstart ) {
-X if(!opt_prob) {
-X sarr[si].score = sl->score + v[i]->score;
-X sarr[si].prev = sl;
-X /*
-X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
-X i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
-X */
-X break;
-X } else {
-X tatprob =
-X calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0], ppst->nsq, f_str,
-X ppst->pseudocts, opt_prob, ppst->zsflag);
-X /* if our tatprob gets worse when we add this, forget it */
-X if(tatprob > sarr[si].tatprob) {
-X free(sarr[si].newtat->probs); /* get rid of new tat struct */
-X free(sarr[si].newtat);
-X continue;
-X } else {
-X sarr[si].tatprob = tatprob;
-X free(sarr[si].tat->probs); /* get rid of old tat struct */
-X free(sarr[si].tat);
-X sarr[si].tat = sarr[si].newtat;
-X sarr[si].prev = sl;
-X sarr[si].score = sl->score + v[i]->score;
-X /*
-X fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
-X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
-X */
-X break;
-X }
-X }
-X }
-X }
-X
-X /* now recalculate where the score fits - resort the scores */
-X if (start == NULL) {
-X start = &sarr[si];
-X } else {
-X if(!opt_prob) { /* sort by scores */
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X if (sarr[si].score > sj->score) { /* if new score > best score */
-X sarr[si].next = sj; /* previous best linked to best */
-X if (so != NULL)
-X so->next = &sarr[si]; /* old best points to new best */
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj; /* old-best saved in so */
-X }
-X } else { /* sort by tatprobs */
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X if ( sarr[si].tatprob < sj->tatprob ||
-X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
-X sarr[si].next = sj;
-X if (so != NULL)
-X so->next = &sarr[si];
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X }
-X }
-X si++;
-X }
-X
-X if(opt_prob) {
-X for (i = 0 ; i < si ; i++) {
-X free(sarr[i].tat->probs);
-X free(sarr[i].tat);
-X }
-X }
-X
-X if (start != NULL) {
-X
-X if(opt_prob)
-X rst->escore = start->tatprob;
-X else
-X rst->escore = 2.0;
-X
-X rst->segnum = rst->seglen = 0;
-X for(sj = start ; sj != NULL; sj = sj->prev) {
-X rst->segnum++;
-X rst->seglen += sj->vp->stop - sj->vp->start + 1;
-X }
-X return (start->score);
-X } else {
-X
-X if(opt_prob)
-X rst->escore = 1.0;
-X else
-X rst->escore = 2.0;
-X
-X rst->segnum = rst->seglen = 0;
-X return (0);
-X }
-}
-X
-void
-kssort (struct savestr **v, int n)
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->score >= v[j + gap]->score)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-void
-kpsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->start <= v[j + gap]->start)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-/* sorts alignments from right to left (back to front) based on stop */
-X
-void
-krsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->stop > v[j + gap]->stop)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X int hoff, n10;
-X struct rstruct rst;
-X int ib;
-X unsigned char *aa0t;
-X const unsigned char *aa1p;
-X
-#ifdef TFAST
-X f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
-X aa1p = f_str->aa1x;
-#else
-X n10 = n1;
-X aa1p = aa1;
-#endif
-X
-X do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1);
-X
-X /* the alignment portion takes advantage of the information left
-X over in f_str after do_fastf is done. in particular, it is
-X easy to run a modified sconn() to produce the alignments.
-X
-X unfortunately, the alignment display routine wants to have
-X things encoded as with bd_align and sw_align, so we need to do that.
-X */
-X
-X if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
-X fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
-X exit(1);
-X }
-X
-X kssort (f_str->vptr, f_str->nsave);
-X f_str->aa0ix = 0;
-X if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
-X for (ib=0; ib < f_str->nm0; ib++) {
-X if (f_str->vptr[ib]->score > 0) {
-X f_str->vptr[ib]->score =
-X ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
-X }
-X }
-X
-X /* after ma_spam is over, we need to reset aa0 */
-X for (ib = 0; ib < n0; ib++) {
-X if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
-X }
-X
-X kssort(f_str->vptr,f_str->nsave);
-X
-X for ( ; f_str->nsave > 0; f_str->nsave--)
-X if (f_str->vptr[f_str->nsave-1]->score >0) break;
-X
-X a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
-X free(aa0t);
-X
-X a_res->res = f_str->res;
-X *have_ares = 0;
-X return rst.score[0];
-}
-X
-/* this version of sconn is modified to provide alignment information */
-X
-int sconn_a (unsigned char *aa0, int n0, int cgap,
-X struct f_struct *f_str,
-X struct a_res_str *a_res)
-{
-X int i, si, cmpp (), n;
-X unsigned char *aa0p;
-X int sx, dx, doff;
-X
-X struct savestr **v;
-X struct slink {
-X int score;
-X struct savestr *vp;
-X struct slink *snext;
-X struct slink *aprev;
-X } *start, *sl, *sj, *so, sarr[MAXSAV];
-X int lstop, plstart;
-X int *res, nres, tres;
-X
-/* sort the score left to right in lib pos */
-X
-X v = f_str->vptr;
-X n = f_str->nsave;
-X
-X krsort (v, n); /* sort from left to right in library */
-X
-X start = NULL;
-X
-/* for each alignment, see if it fits */
-X
-X for (i = 0, si = 0; i < n; i++) {
-X
-/* if the score is less than the join threshold, skip it */
-X if (v[i]->score < cgap) continue;
-X
-X lstop = v[i]->stop; /* have right-most lstart */
-X
-/* put the alignment in the group */
-X
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].snext = NULL;
-X sarr[si].aprev = NULL;
-X
-/* if it fits, then increase the score */
-/* start points to a sorted (by total score) list of candidate
-X overlaps */
-X
-X for (sl = start; sl != NULL; sl = sl->snext) {
-X plstart = sl->vp->start;
-X if (plstart > lstop ) {
-X sarr[si].score = sl->score + v[i]->score;
-X sarr[si].aprev = sl;
-X break; /* quit as soon as the alignment has been added */
-X }
-X }
-X
-/* now recalculate the list of best scores */
-X if (start == NULL)
-X start = &sarr[si]; /* put the first one in the list */
-X else
-X for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
-X if (sarr[si].score > sj->score) { /* new score better than old */
-X sarr[si].snext = sj; /* snext best after new score */
-X if (so != NULL)
-X so->snext = &sarr[si]; /* prev_best->snext points to best */
-X else start = &sarr[si]; /* start points to best */
-X break; /* stop looking */
-X }
-X so = sj; /* previous candidate best */
-X }
-X si++; /* increment to snext alignment */
-X }
-X
-X /* we have the best set of alignments, write them to *res */
-X if (start != NULL) {
-X res = f_str->res; /* set a destination for the alignment ops */
-X tres = nres = 0; /* alignment op length = 0 */
-X aa0p = aa0; /* point into query (needed for calcons later) */
-X a_res->min1 = start->vp->start; /* start in library */
-X a_res->min0 = 0; /* start in query */
-X for (sj = start; sj != NULL; sj = sj->aprev ) {
-X doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
-X /*
-X fprintf(stderr,"doff: %3d\n",doff);
-X */
-X for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
-X dx <= sj->vp->stop; dx++) {
-X *aa0p++ = f_str->aa0t[sx++]; /* copy residue into aa0 */
-X tres++; /* bump alignment counter */
-X res[nres++] = 0; /* put 0-op in res */
-X }
-X sj->vp->dp -= doff;
-X if (sj->aprev != NULL) {
-X if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
-X /* put an insert op into res to get to next aligned block */
-X tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
-X }
-X /*
-X fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
-X sj->vp->start - sj->vp->dp + f_str->noff,
-X sj->vp->stop - sj->vp->dp + f_str->noff,
-X sj->vp->start,sj->vp->stop,sj->vp->dp,
-X f_str->noff,sj->vp->score);
-X fprintf(stderr,"%3d - %3d: %3d\n",
-X sj->vp->start,sj->vp->stop,sj->vp->score);
-X */
-X a_res->max1 = sj->vp->stop;
-X a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
-X }
-X
-X /*
-X fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
-X a_res->min0,a_res->max0,a_res->min1,a_res->max1);
-X */
-X
-X /* now replace f_str->aa0t with aa0 */
-X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
-X
-X return tres;
-X }
-X else return (0);
-}
-X
-/* calculate the 100% identical score */
-int
-shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
-{
-X int i, sum;
-X for (i=0,sum=0; i<n0; i++)
-X if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
-X return sum;
-}
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-X
-#ifdef TFAST
-X f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
-#endif
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-#ifdef TFAST
-X aln->qlrev = 0;
-X aln->qlfact = 1;
-X aln->llfact = aln->llmult = 3;
-X aln->frame = 0;
-X if (frame > 3) aln->llrev = 1;
-#else /* FASTF */
-X aln->llfact = aln->qlfact = aln->llmult = 1;
-X aln->llrev = aln->qlrev = 0;
-X aln->frame = 0;
-#endif
-}
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1, n0t;
-X int op, lenc, len_gap, nd, ns, itmp;
-X const unsigned char *aa1p;
-X char *sp0, *sp1, *sq, *spa;
-X int *rp;
-X int mins, smins;
-X
-X /* do not allow low complexity */
-X sq = pst.sq;
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X /* first fill in the ends */
-X /* a_res.min0--; a_res.min1--; */
-X n0 -= (f_str->nm0-1);
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
-X /* will we show all the start ?*/
-X smins=0;
-X mins = min(a_res.min1,aln->llen/2);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X
-X memset(seqca,M_BLANK,mins);
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X
-X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X *sp0 = sq[f_str->aa0t[i0++]];
-X *sp1 = sq[aa1p[i1++]];
-X n0t++;
-X lenc++;
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) { op = *rp++;}
-X if (op>0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X else {
-X *sp0++ = sq[f_str->aa0t[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X n0t++;
-X len_gap++;
-X lenc++;
-X }
-X }
-X }
-X
-X *spa = '\0';
-X *nc = lenc-len_gap;
-X
-X /* now we have the middle, get the right end */
-X /* ns is amount to be shown */
-X /* nd is amount remaining to be shown */
-X ns = mins + lenc + aln->llen;
-X ns -= (itmp = ns %aln->llen);
-X if (itmp>aln->llen/2) ns += aln->llen;
-X nd = ns - (mins+lenc);
-X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X /* there isn't any aa0 to get */
-X memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X memset(seqc0+mins+lenc,' ',nd);
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X
-X return mins+lenc+nd;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1, nn1, n0t;
-X int op, lenc, len_gap, nd, ns, itmp;
-X const unsigned char *aa1p;
-X char *sp0, *sp0a, *sp1, *sq, *spa;
-X int *rp;
-X int mins, smins;
-X
-X /* do not allow low complexity */
-X sq = pst.sq;
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X n0 -= (f_str->nm0-1);
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
-X /* will we show all the start ?*/
-X smins=0;
-X mins = min(a_res.min1,aln->llen/2);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X
-X memset(seqca,M_BLANK,mins);
-X memset(seqc0a,' ',mins);
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp0a = seqc0a+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X
-X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X *sp0a++ = ' ';
-X *sp0 = sq[f_str->aa0t[i0++]];
-X *sp1 = sq[aa1p[i1++]];
-X n0t++;
-X lenc++;
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) { op = *rp++;}
-X if (op>0) {
-X *sp0++ = '-';
-X *sp0a++ = ' ';
-X *sp1++ = sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X else {
-X *sp0++ = sq[f_str->aa0t[i0++]];
-X *sp0a++ = ' ';
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X n0t++;
-X len_gap++;
-X lenc++;
-X }
-X }
-X }
-X
-X *sp0a = *spa = '\0';
-X *nc = lenc-len_gap;
-X
-X /* now we have the middle, get the right end */
-X /* ns is amount to be shown */
-X /* nd is amount remaining to be shown */
-X ns = mins + lenc + aln->llen;
-X ns -= (itmp = ns %aln->llen);
-X if (itmp>aln->llen/2) ns += aln->llen;
-X nd = ns - (mins+lenc);
-X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X /* there isn't any aa0 to get */
-X memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X memset(seqc0+mins+lenc,' ',nd);
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X
-X return mins+lenc+nd;
-}
-X
-void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
-X
-X int i, j, k;
-X unsigned char tmp;
-X
-X for (i = f_str->nmoff-1 ; --i ; ) {
-X
-X /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
-X j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
-X
-X /* swap all i'th column residues for all j'th column residues */
-X for(k = 0 ; k < f_str->nm0 ; k++) {
-X tmp = aa0[(k * (f_str->nmoff)) + i];
-X aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
-X aa0[(k * (f_str->nmoff)) + j] = tmp;
-X }
-X }
-}
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, len_gap;
-X int p_op, op_cnt;
-X const unsigned char *aa1p;
-X char tmp_cnt[20];
-X char sp0, sp1, *sq;
-X int *rp;
-X int mins, smins;
-X int fnum = 0;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X rp = a_res.res;
-X lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
-X op_cnt = 0;
-X
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X tmp_cnt[0]='\0';
-X
-X fnum = f_str->aa0ti[i0] + 1;
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X if (p_op == 0) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt,fnum);
-X op_cnt = 1; p_op = 0;
-X fnum = f_str->aa0ti[i0] + 1;
-X }
-X op = *rp++;
-X lenc++;
-X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
-X sp0 = pst.sq[f_str->aa0t[i0++]];
-X sp1 = pst.sq[aa1p[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X if (p_op == 1) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
-X op_cnt = 1; p_op = 1; fnum = f_str->aa0ti[i0] + 1;
-X }
-X op--; lenc++; i1++; len_gap++;
-X }
-X else {
-X if (p_op == 2) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
-X op_cnt = 1; p_op = 2; fnum = f_str->aa0ti[i0] + 1;
-X }
-X op++; lenc++; i0++; len_gap++;
-X }
-X }
-X }
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
-X
-X return lenc - len_gap;
-}
-X
-void
-update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
-X
-X char op_char[4]={"=-+"};
-X char tmp_cnt[20];
-X
-X if (op == 0)
-X sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
-X else
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-int calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1, n0t;
-X int op, lenc, len_gap;
-X const unsigned char *aa1p;
-X int sp0, sp1;
-X int *rp;
-X int mins, smins;
-X char *sq;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X /* first fill in the ends */
-X /* a_res.min0--; a_res.min1--; */
-X n0 -= (f_str->nm0-1);
-X
-X /* now get the middle */
-X rp = a_res.res;
-X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
-X sp0 = sq[f_str->aa0t[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X n0t++;
-X lenc++;
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X }
-X else {
-X if (op==0) { op = *rp++;}
-X if (op>0) {
-X i1++;
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X else {
-X i0++;
-X op++;
-X n0t++;
-X len_gap++;
-X lenc++;
-X }
-X }
-X }
-X return lenc-len_gap;
-}
-X
-#ifdef PCOMPLIB
-X
-#include "structs.h"
-#include "p_mw.h"
-X
-void
-update_params(struct qmng_str *qm_msg,
-X struct mngmsg *m_msg, struct pstruct *ppst)
-{
-X m_msg->n0 = ppst->n0 = qm_msg->n0;
-X m_msg->nm0 = qm_msg->nm0;
-X m_msg->escore_flg = qm_msg->escore_flg;
-X m_msg->qshuffle = qm_msg->qshuffle;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropff2.c ||
-echo 'restore of dropff2.c failed'
-Wc_c="`wc -c < 'dropff2.c'`"
-test 48853 -eq "$Wc_c" ||
- echo 'dropff2.c: original size 48853, current size' "$Wc_c"
-fi
-# ============= dropfs2.c ==============
-if test -f 'dropfs2.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropfs2.c (File already exists)'
-else
-echo 'x - extracting dropfs2.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropfs2.c' &&
-/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropfs2.c,v 1.40 2007/02/26 21:56:59 wrp Exp $ */
-X
-/* changed to return 2.0, rather than -1.0, for failure */
-X
-/* Feb 4, 2005 - modifications to allow searches with ktup=2 for very
-X long queries. This is a temporary solution to savemax(), spam()
-X which do not preserve exact matches
-X
-X do_fasts() has been modified to allow higher maxsav for do_walign
-X than for do_work (2*nsegs, 6*nsegs)
-X */
-X
-/* this code implements the "fasts" algorithm, which compares a set of
-X protein fragments to a protein sequence. Comma's are used to separate
-X the sequence fragments, which need not be the same length.
-X
-X The expected input is:
-X
-X >mgstm1
-X MGDAPDFD,
-X MILGYW,
-X MLLEYTDS
-X
-X The fragments do not need to be in the correct order (which is
-X presumably unknown from the peptide sequencing.
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "tatstats.h"
-X
-#define EOSEQ 0
-#define ESS 49
-#define NMAP_X 23 /* for 'X' */
-#define NMAP_Z 24 /* for '*' */
-#define MAXHASH 32
-#define NMAP MAXHASH+1
-X
-static char *verstr="4.32 Feb 2007";
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-int shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq);
-static void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
-extern void aancpy(char *to, char *from, int count, struct pstruct pst);
-X
-#ifdef TFAST
-extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);
-#endif
-X
-void savemax(struct dstruct *, struct f_struct *, int maxsav, int exact,int t_end);
-X
-int spam(const unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);
-int sconn(struct savestr **v,
-X int nsave,
-X struct f_struct *,
-X struct rstruct *,
-X struct pstruct *,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int opt_prob);
-X
-void kpsort(struct savestr **, int);
-void kssort(struct savestr **, int); /* sort by score */
-int sconn_a(unsigned char *, int,
-X const unsigned char *, int,
-X struct f_struct *,
-X struct a_res_str *,
-X struct pstruct *);
-void kpsort(struct savestr **, int);
-X
-/* initialize for fasta */
-X
-void
-init_work (unsigned char *aa0, const int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg
-X )
-{
-X int mhv, phv;
-X int hmax, nsegs;
-X int i0, ib, hv, old_hv;
-X int pamfact;
-X struct f_struct *f_str;
-X /* these used to be globals, but do not need to be */
-X int ktup, fact, kt1;
-X
-X int maxn0;
-X int stmp; /* temporary score */
-X int i, j, q;
-X int tat_size;
-X int *res;
-X
-X unsigned char *query;
-X int k, l, m, n, N, length, index;
-X
-X double *tatprobptr;
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
-X ktup = ppst->param_u.fa.ktup;
-X if ( ktup > 2 ) {
-X ktup = ppst->param_u.fa.ktup = 2;
-X }
-X fact = ppst->param_u.fa.scfact;
-X
-X /* fasts3 cannot work with lowercase symbols as low complexity;
-X thus, NMAP must be disabled; this depends on aascii['X'] */
-X if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
-X if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
-X /* this does not work in a threaded environment */
-X /* else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
-X
-X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
-X if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
-X
-X if (mhv <= 0) {
-X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
-X exit (1);
-X }
-X
-X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
-X
-/* kshft = 2; */
-X kt1 = ktup-1;
-X hv = 1;
-X for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
-X hmax = hv;
-X f_str->hmask = (hmax >> f_str->kshft) - 1;
-X
-X if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0ti = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0ti array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0b = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0b array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0e = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0e array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0i = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0i array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0s = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0s array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->aa0l = (int *) calloc(n0+1, sizeof(int))) == NULL) {
-X fprintf (stderr, " cannot allocate f_str0->aa0l array; %d\n",n0+1);
-X exit (1);
-X }
-X
-X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
-X hmax, f_str->hmask);
-X exit (1);
-X }
-X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh1 array\n");
-X exit (1);
-X }
-X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh2 array\n");
-X exit (1);
-X }
-X
-X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash link array");
-X exit (1);
-X }
-X
-X /* for FASTS/FASTM, we want to know when we get to the end of a peptide,
-X so we can ensure that we set the end and restart */
-X
-X if ((f_str->l_end = (int *) calloc (n0, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate link end array");
-X exit (1);
-X }
-X
-X for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
-X for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
-X for (i0 = 0; i0 < n0; i0++) f_str->l_end[i0] = 0;
-X
-X /* count the number of peptides */
-X nsegs = 1;
-X for (i0 = 0; i0 < n0; i0++) {
-X if (aa0[i0] == ESS || aa0[i0] == 0) nsegs++;
-X }
-X
-X /* allocate space for peptides offsets, nm_u */
-X if ((f_str->nmoff = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
-X fprintf(stderr, " cannot allocat nmoff array: %d\n", nsegs);
-X exit(1);
-X }
-X
-X if ((f_str->nm_u = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
-X fprintf(stderr, " cannot allocat nm_u array: %d\n", nsegs);
-X exit(1);
-X }
-X
-X phv = hv = 0;
-X f_str->nmoff[0] = 0;
-X f_str->nm0 = 1;
-X
-X /* encode the aa0 array */
-X if (kt1 > 0) {
-X hv = ppst->hsq[aa0[0]];
-X phv = ppst->pam2[0][aa0[0]][aa0[0]];
-X }
-X
-X for (i0=kt1 ; i0 < n0; i0++) {
-X if (aa0[i0] == ESS || aa0[i0] == 0) {
-X /* fprintf(stderr," converted %d to 0\n",aa0[i0]); */
-X aa0[i0] = EOSEQ; /* set ESS to 0 */
-X f_str->nmoff[f_str->nm0++] = i0+1;
-X f_str->l_end[i0-1] = 1;
-X phv = hv = 0;
-X if (kt1 > 0) {
-X i0++;
-X hv = ppst->hsq[aa0[i0]];
-X phv = ppst->pam2[0][aa0[i0]][aa0[i0]];
-X }
-X continue;
-X }
-X
-X hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
-X f_str->link[i0] = f_str->harr[hv];
-X f_str->harr[hv] = i0;
-X f_str->pamh2[hv] = (phv += ppst->pam2[0][aa0[i0]][aa0[i0]]);
-X phv -= ppst->pam2[0][aa0[i0 - kt1]][aa0[i0 - kt1]];
-X }
-X f_str->l_end[n0-1] = 1;
-X
-X f_str->nmoff[f_str->nm0] = n0+1;
-X
-X /*
-#ifdef DEBUG
-X fprintf(stderr, ">>%s\n",qtitle);
-X for (j=0; j<f_str->nm0; j++) {
-X for (i=f_str->nmoff[j]; i < f_str->nmoff[j+1]-1; i++) {
-X fprintf(stderr,"%c",ppst->sq[aa0[i]]);
-X }
-X fprintf(stderr," %d\n",aa0[i]);
-X }
-X
-X for (j=1; j<=ppst->nsq; j++) {
-X fprintf(stderr, "%c %d\n", ppst->sq[j], f_str->harr[j]);
-X }
-X
-X for (j=0; j<=n0; j++) {
-X fprintf(stderr, "%c %d\n", ppst->sq[aa0[j]], f_str->link[j]);
-X }
-X
-#endif
-X */
-X
-X /* build an integer array of the max score that can be achieved
-X from that position - use in savemax to mark some segments as
-X fixed */
-X
-X /* setup aa0b[], aa0e[], which specify the begining and end of each
-X segment */
-X
-X stmp = 0;
-X q = -1;
-X for (ib = i0 = 0; i0 < n0; i0++) {
-X f_str->aa0l[i0] = i0 - q;
-X if (aa0[i0]==EOSEQ) {
-X f_str->aa0b[i0] = -1;
-X f_str->aa0e[i0] = -1;
-X f_str->aa0i[i0] = -1;
-X f_str->aa0l[i0] = -1;
-X q = i0;
-X if (i0 > 0)f_str->aa0s[i0-1] = stmp;
-X stmp = 0;
-X ib++;
-X }
-X else {
-X stmp += ppst->pam2[0][aa0[i0]][aa0[i0]];
-X }
-X
-X f_str->aa0b[i0] = f_str->nmoff[ib];
-X f_str->aa0e[i0] = f_str->nmoff[ib+1]-2;
-X f_str->aa0i[i0] = ib;
-X
-X /*
-X fprintf(stderr,"%2d %c: %2d %2d %2d\n",i0,ppst->sq[aa0[i0]],
-X f_str->aa0b[i0],f_str->aa0e[i0],f_str->aa0i[i0]);
-X */
-X }
-X f_str->aa0s[n0-1]=stmp; /* save last best possible score */
-X
-X /* maxsav - maximum number of peptide alignments saved in search */
-X /* maxsav_w - maximum number of peptide alignments saved in
-X alignment */
-X
-X f_str->maxsav = max(MAXSAV,2*f_str->nm0);
-X f_str->maxsav_w = max(MAXSAV,6*f_str->nm0);
-X
-X if ((f_str->vmax = (struct savestr *)
-X calloc(f_str->maxsav_w,sizeof(struct savestr)))==NULL) {
-X fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav_w);
-X exit(1);
-X }
-X
-X if ((f_str->vptr = (struct savestr **)
-X calloc(f_str->maxsav_w,sizeof(struct savestr *)))==NULL) {
-X fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav_w);
-X exit(1);
-X }
-X
-X if ((f_str->sarr = (struct slink *)
-X calloc(f_str->maxsav_w,sizeof(struct slink)))==NULL) {
-X fprintf(stderr, "Couldn't allocate sarr[%d].\n",f_str->maxsav_w);
-X exit(1);
-X }
-X
-X /* Tatusov Statistics Setup */
-X
-X /* initialize priors array. */
-X if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate priors array.\n");
-X exit(1);
-X }
-X
-X calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
-X
-X /* pre-calculate the Tatusov probability array for each full segment */
-X
-X if(ppst->zsflag >= 1 && ppst->zsflag <= 3 && f_str->nm0 <= 10) {
-X
-X tat_size = (1<<f_str->nm0) -1;
-X f_str->dotat = 1;
-X f_str->tatprobs = (struct tat_str **) malloc((size_t)tat_size*sizeof(struct tat_str *));
-X if (f_str->tatprobs == NULL) {
-X fprintf (stderr, " cannot allocate tatprobs array: %ld\n",
-X tat_size * sizeof(struct tat_str *));
-X exit (1);
-X }
-X
-X f_str->intprobs = (double **) malloc((size_t)tat_size * sizeof(double *));
-X if(f_str->intprobs == NULL) {
-X fprintf(stderr, "Couldn't allocate intprobs array.\n");
-X exit(1);
-X }
-X
-X for(k = 0, l = f_str->nm0 ; k < l ; k++) {
-X query = &(aa0[f_str->nmoff[k]]);
-X length = f_str->nmoff[k+1] - f_str->nmoff[k] - 1;
-X
-X /* this segment alone */
-X index = (1 << k) - 1;
-X generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), NULL);
-X
-X /* integrate the probabilities */
-X N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
-X tatprobptr = (double *) calloc(N+1, sizeof(double));
-X if(tatprobptr == NULL) {
-X fprintf(stderr, "Couldn't calloc tatprobptr.\n");
-X exit(1);
-X }
-X f_str->intprobs[index] = tatprobptr;
-X
-X for (i = 0; i <= N ; i++ ) {
-X tatprobptr[i] = f_str->tatprobs[index]->probs[i];
-X for (j = i + 1 ; j <= N ; j++ ) {
-X tatprobptr[i] += f_str->tatprobs[index]->probs[j];
-X }
-X }
-X
-X /* this segment built on top of all other subcombinations */
-X for(i = 0, j = (1 << k) - 1 ; i < j ; i++) {
-X index = (1 << k) + i;
-X generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), f_str->tatprobs[i]);
-X
-X /* integrate the probabilities */
-X N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
-X tatprobptr = (double *) calloc(N+1, sizeof(double));
-X if(tatprobptr == NULL) {
-X fprintf(stderr, "Couldn't calloc tatprobptr.\n");
-X exit(1);
-X }
-X f_str->intprobs[index] = tatprobptr;
-X
-X for (m = 0; m <= N ; m++ ) {
-X tatprobptr[m] = f_str->tatprobs[index]->probs[m];
-X for (n = m + 1 ; n <= N ; n++ ) {
-X tatprobptr[m] += f_str->tatprobs[index]->probs[n];
-X }
-X }
-X }
-X }
-X } else {
-X f_str->dotat = 0;
-X f_str->shuff_cnt = ppst->shuff_node;
-X }
-X
-X /* End of Tatusov Statistics Setup */
-X
-X /*
-X for (i0=1; i0<=ppst->nsq; i0++) {
-X fprintf(stderr," %c: %2d ",ppst->sq[i0],f_str->harr[i0]);
-X hv = f_str->harr[i0];
-X while (hv >= 0) {
-X fprintf(stderr," %2d",f_str->link[hv]);
-X hv = f_str->link[hv];
-X }
-X fprintf(stderr,"\n");
-X }
-X */
-X
-/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
-X pam2[0][0] is now undefined for consistency with blast
-*/
-X for (i0 = 1; i0 <= ppst->nsq; i0++)
-X f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
-X
-X f_str->ndo = 0;
-X f_str->noff = n0-1;
-X if (f_str->diag==NULL)
-X f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
-X sizeof (struct dstruct));
-X if (f_str->diag == NULL) {
-X fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
-X (long) MAXDIAG * (long) (sizeof (struct dstruct)));
-X exit (1);
-X }
-X
-#ifdef TFAST
-X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
-X exit (1);
-X }
-X f_str->aa1x++;
-#endif
-X
-X maxn0 = max(3*n0/2,MIN_RES);
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X f_str->max_res = maxn0;
-X
-X *f_arg = f_str;
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/* pstring2 is the same information, but in a markx==10 format */
-void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-#ifdef FASTS
-#ifndef TFAST
-X char *pg_str="FASTS";
-#else
-X char *pg_str="TFASTS";
-#endif
-#endif
-X
-#ifdef FASTM
-#ifndef TFAST
-X char *pg_str="FASTM";
-#else
-X char *pg_str="TFASTM";
-#endif
-#endif
-X
-X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] ktup=%d",pg_str,verstr,
-X pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->param_u.fa.ktup);
-X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X if (pstring2 != NULL) {
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
-; pg_gap-pen: %d %d\n; pg_ktup: %d\n",
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval,
-X pstr->ggapval,pstr->param_u.fa.ktup);
-X }
-}
-X
-void
-close_work (const unsigned char *aa0, const int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X int i, j;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X
-X free(f_str->res);
-#ifdef TFAST
-X free(f_str->aa1x - 1); /* because f_str->aa1x got ++'ed when allocated! */
-#endif
-X free(f_str->diag);
-X free(f_str->l_end);
-X free(f_str->link);
-X free(f_str->pamh2);
-X free(f_str->pamh1);
-X free(f_str->harr);
-X free(f_str->vmax);
-X free(f_str->vptr);
-X free(f_str->sarr);
-X free(f_str->aa0i);
-X free(f_str->aa0e);
-X free(f_str->aa0b);
-X free(f_str->aa0ti);
-X free(f_str->aa0t);
-X free(f_str->nmoff);
-X free(f_str->nm_u);
-X
-X if(f_str->dotat) {
-X for(i = 0, j = (1 << f_str->nm0) - 1 ; i < j ; i++) {
-X free(f_str->tatprobs[i]->probs);
-X free(f_str->tatprobs[i]);
-X free(f_str->intprobs[i]);
-X }
-X free(f_str->tatprobs);
-X free(f_str->intprobs);
-X }
-X
-X free(f_str->priors);
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-void do_fasts (const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst, int *hoff, int opt_prob,
-X int maxsav)
-{
-X int nd; /* diagonal array size */
-X int lhval;
-X int kfact;
-X register struct dstruct *dptr;
-X register int tscor;
-X register struct dstruct *diagp;
-X struct dstruct *dpmax;
-X register int lpos;
-X int tpos;
-X struct savestr *vmptr, *vmaxmax;
-X int scor, tmp;
-X int im, ib, nsave;
-X int cmps (); /* comparison routine for ksort */
-X int ktup;
-X int doffset;
-X
-X
-X vmaxmax = &f_str->vmax[maxsav];
-X
-X ktup = ppst->param_u.fa.ktup;
-X
-X if (n1 < ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X rst->segnum = 0;
-X rst->seglen = 0;
-X return;
-X }
-X
-X if (n0+n1+1 >= MAXDIAG) {
-X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X rst->escore = 2.0;
-X rst->segnum = 0;
-X rst->seglen = 0;
-X return;
-X }
-X
-X nd = n0 + n1;
-X
-X dpmax = &f_str->diag[nd];
-X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
-X {
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) {
-X vmptr->score = 0;
-X vmptr->exact = 0;
-X }
-X f_str->lowmax = f_str->vmax;
-X f_str->lowscor = 0;
-X
-X /* start hashing */
-X diagp = &f_str->diag[f_str->noff];
-X for (lhval=lpos=0; lpos < n1; lpos++, diagp++) {
-X if (ppst->hsq[aa1[lpos]]>=NMAP) { /* skip residue */
-X lpos++ ; diagp++;
-X while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
-X if (lpos >= n1) break;
-X lhval = 0;
-X }
-X
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
-X
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X
-X dptr = &diagp[-tpos];
-X
-X if (f_str->l_end[tpos]) {
-X if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) {
-X dptr->stop = lpos;
-X dptr->score = f_str->aa0s[tpos];
-X savemax(dptr, f_str, maxsav, 1, tpos);
-X dptr->dmax = NULL;
-X }
-X
-X else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) {
-X /*
-X fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n",
-X tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]],
-X dptr->start, dptr->stop,
-X dptr->stop - dptr->start, f_str->aa0l[tpos]);
-X */
-X dptr->stop = lpos;
-X dptr->start = lpos - f_str->aa0l[tpos];
-X dptr->score = f_str->aa0s[tpos];
-X savemax(dptr, f_str, maxsav, 1, tpos);
-X dptr->dmax = NULL;
-X }
-X }
-X else if ((tscor = dptr->stop) >= 0) {
-X tscor++; /* tscor is stop of current, increment it */
-X if ((tscor -= lpos) <= 0) { /* tscor, the end of the current
-X match, is before lpos, so there
-X is a mismatch - this is also the
-X mismatch cost */
-X tscor *= 2;
-X scor = dptr->score; /* save the run score on the diag */
-X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0
-X && f_str->lowscor < scor) {
-X /* if what we will get (tscor + kfact) is < 0 and the
-X score is better than the worst savemax() score, save
-X it */
-X savemax (dptr, f_str, maxsav,0,-1);
-X }
-X
-X /* if extending is better than starting over, extend */
-X if ((tscor += scor) >= kfact) {
-X dptr->score = tscor;
-X dptr->stop = lpos;
-X if (f_str->l_end[tpos]) {
-X if (dptr->score == f_str->aa0s[tpos]) {
-X savemax(dptr, f_str, maxsav,1,tpos);
-X dptr->dmax = NULL;
-X }
-X else if (dptr->score > f_str->lowscor)
-X savemax(dptr, f_str, maxsav,0,tpos);
-X }
-X }
-X else { /* otherwise, start new */
-X dptr->score = kfact;
-X dptr->start = dptr->stop = lpos;
-X }
-X }
-X else { /* tscor is after lpos, so extend one residue */
-X dptr->score += f_str->pamh1[aa0[tpos]];
-X dptr->stop = lpos;
-X if (f_str->l_end[tpos]) {
-X if (dptr->score == f_str->aa0s[tpos]) {
-X savemax(dptr, f_str, maxsav,1,tpos);
-X dptr->dmax = NULL;
-X }
-X else if (dptr->score > f_str->lowscor)
-X savemax(dptr, f_str, maxsav,0,tpos);
-X }
-X }
-X }
-X else { /* start new */
-X dptr->score = f_str->pamh2[lhval];
-X dptr->start = dptr->stop = lpos;
-X }
-X } /* end tpos */
-X } /* end lpos */
-X
-X for (dptr = f_str->diag; dptr < dpmax;) {
-X if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X f_str->ndo = nd;
-X
-/*
-X at this point all of the elements of aa1[lpos]
-X have been searched for elements of aa0[tpos]
-X with the results in diag[dpos]
-*/
-X
-X for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) {
-X if (vmptr->score > 0) {
-X /*
-X
-X fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d",
-X (vmptr->exact ? 'x' : ' '),
-X f_str->noff+vmptr->start-vmptr->dp,
-X f_str->noff+vmptr->stop-vmptr->dp,
-X vmptr->start,vmptr->stop,
-X vmptr->dp,vmptr->score);
-X */
-X vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
-X /*
-X fprintf(stderr," sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop);
-X */
-X if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr;
-X }
-X }
-X
-X if (nsave <= 0) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X rst->segnum = 0;
-X rst->seglen = 0;
-X f_str->nsave = 0;
-X return;
-X }
-X
-X /*
-X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->vptr[ib]->exact ? 'x' : ' ',
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-X kssort(f_str->vptr,nsave);
-X
-X /* make certain each seg is used only once */
-X
-X for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0;
-X for (ib=0; ib < nsave; ib++) {
-X doffset = f_str->vptr[ib]->dp - f_str->noff;
-X tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset];
-X if (f_str->nm_u[tpos] == 0) {
-X f_str->nm_u[tpos]=1;
-X } else {
-X f_str->vptr[ib]->score = -1;
-X }
-X }
-X
-X kssort(f_str->vptr,nsave);
-X for (ib = nsave-1; ib >= 0; ib--)
-X if (f_str->vptr[ib]->score > -1) break;
-X nsave = ib+1;
-X
-X scor = sconn (f_str->vptr, nsave,
-X f_str, rst, ppst, aa0, n0, aa1, n1,
-X opt_prob);
-X
-X if (rst->escore < 0.0) rst->escore = 2.0;
-X kssort(f_str->vptr,nsave);
-X
-X /* here we should use an nsave that is consistent with sconn and nm0 */
-X
-X f_str->nsave = nsave;
-X if (nsave > f_str->nm0) f_str->nsave = f_str->nm0;
-X
-X rst->score[1] = f_str->vptr[0]->score;
-X rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);
-X
-}
-X
-void do_work (const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int opt_prob;
-X int hoff, n10, i;
-X
-X if (qr_flg==1 && f_str->shuff_cnt <= 0) {
-X rst->escore = 2.0;
-X rst->score[0]=rst->score[1]=rst->score[2]= -1;
-X return;
-X }
-X
-X if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
-X else opt_prob = 0;
-X if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
-X if (qr_flg) {
-X opt_prob=1;
-X /* if (frame==1) */
-X f_str->shuff_cnt--;
-X }
-X
-X if (n1 < ppst->param_u.fa.ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X rst->escore = 2.0;
-X return;
-X }
-#ifdef TFAST
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X if (ppst->debug_lib)
-X for (i=0; i<n10; i++)
-X if (f_str->aa1x[i]>ppst->nsq) {
-X fprintf(stderr,
-X "residue[%d/%d] %d range (%d)\n",i,n1,
-X f_str->aa1x[i],ppst->nsq);
-X f_str->aa1x[i]=0;
-X n10=i-1;
-X }
-X
-X do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
-#else /* FASTA */
-X do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
-#endif
-X
-X rst->comp = rst->H = -1.0;
-}
-X
-void do_opt (const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst)
-{
-X int lag, tscore, hoff, n10;
-X
-#ifdef TFAST
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);
-#else /* FASTA */
-X do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);
-#endif
-}
-X
-X
-/* modify savemax() so that full length 100% matches are marked
-X so that they cannot be removed - if we have a 100% match, mark "exact"
-X
-X modify savemax() to split alignments that include a comma
-*/
-X
-/* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr),
-X and places it in the set of runs to be saved (in f_str->vmax[])
-*/
-X
-void
-savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav,
-X int exact, int tpos)
-{
-X register int dpos; /* position along the diagonal, -n0 .. n1 */
-X int i, j, lowj;
-X register struct savestr *vmptr;
-X struct savestr *vmaxmax;
-X
-X vmaxmax = &f_str->vmax[maxsav];
-X
-X dpos = (int) (dptr - f_str->diag); /* current diagonal */
-X
-/* check to see if this is the continuation of a run that is already saved */
-/* if we are at the end of the query, save it regardless */
-X
-/* if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */
-X
-X if ((vmptr = dptr->dmax) != NULL /* have an active run */
-X && vmptr->dp == dpos && /* on the correct diagonal */
-X vmptr->start == dptr->start) { /* and it starts at the same place */
-X vmptr->stop = dptr->stop; /* update the end of the match in vmax[] */
-X
-X if (exact == 1) {
-X /*
-X fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n",
-X dptr->score, dptr->start, dptr->stop,
-X vmptr->start, vmptr->stop, dptr->stop - dptr->start+1);
-X */
-X exact = 1;
-X }
-X
-X
-/* if the score is worse, don't update, return - if the score gets bad
-X enough, it will restart in the diagonal scan */
-X if ((i = dptr->score) <= vmptr->score) { return;}
-X
-/* score is better, update */
-X vmptr->score = i;
-X
-X vmptr->exact = exact;
-/* if the score is not the worst, return */
-X if (vmptr != f_str->lowmax) { return;}
-X }
-X else { /* not a continuation */
-X /* save in the lowest place */
-X /*
-X fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d",
-X f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop,
-X dptr->score, dptr->start, dptr->stop);
-X */
-X
-X vmptr = f_str->lowmax;
-X
-X /*
-X if (exact == 1) {
-X fprintf(stderr,"have new exact match: %d - %d:%d = %d\n",
-X dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1);
-X }
-X */
-X vmptr->exact = exact;
-X
-X i = vmptr->score = dptr->score; /* 'i' is used as a bound */
-X vmptr->dp = dpos;
-X vmptr->start = dptr->start;
-X vmptr->stop = dptr->stop;
-X dptr->dmax = vmptr;
-X }
-X
-X /* rescan the list for the worst score */
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[maxsav] ; vmptr++) {
-X if (vmptr->score < i && !vmptr->exact) {
-X i = vmptr->score;
-X f_str->lowmax = vmptr;
-X }
-X }
-X
-X f_str->lowscor = i;
-}
-X
-/* this version of spam scans the diagonal to find the best local score,
-X then resets the boundaries for a global alignment and re-scans */
-X
-/* NOOVERHANG allows one to score any overhanging alignment as zero.
-X Useful for SAGE alignments. Normally, one allows overhangs because
-X of the possibility of partial sequences.
-*/
-X
-#undef NOOVERHANG
-X
-/*
-X May, 2005 - spam() has an intesting bug that occurs when two
-X peptides match in order, separated by one position (the comma). In
-X this case, spam() splits the match, and only returns the better of
-X the two matches. So, if spam splits an alignment at a comma, it
-X needs the ability to insert the missing match.
-X
-*/
-X
-int spam (const unsigned char *aa0, const unsigned char *aa1,int n1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str)
-{
-X int lpos, doffset;
-X int tot, mtot;
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X register const unsigned char *aa0p, *aa1p;
-X
-X doffset = dmax->dp - f_str->noff;
-X curv.start = dmax->start;
-X aa1p = &aa1[dmax->start];
-X aa0p = &aa0[dmax->start - doffset];
-X
-X tot = curv.score = maxv.score = 0;
-X for (lpos = dmax->start; lpos <= dmax->stop; lpos++) {
-X tot += pam2[*aa0p++][*aa1p++];
-X if (tot > curv.score) {
-X curv.stop = lpos; /* here, curv.stop is actually curv.max */
-X curv.score = tot;
-X }
-X else if (tot < 0) {
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X tot = curv.score = 0;
-X curv.start = lpos+1;
-X }
-X }
-X
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X
-X if (maxv.score <= 0) return 0;
-X
-X /* now, reset the boundaries of the alignment using aa0b[]
-X and aa0e[], which specify the residues that start and end
-X the segment */
-X
-X maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset;
-X if (maxv.start < 0) {
-X maxv.start = 0;
-#ifdef NOOVERHANG
-X return 0;
-#endif
-X }
-X
-X maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset;
-X if (maxv.stop > n1) {
-X maxv.stop = n1-1;
-#ifdef NOOVERHANG
-X return 0;
-#endif
-X }
-X aa1p = &aa1[lpos = maxv.start];
-X aa0p = &aa0[lpos - doffset];
-X
-X for (tot=0; lpos <= maxv.stop; lpos++) {
-X tot += pam2[*aa0p++][*aa1p++];
-X }
-X
-X maxv.score = tot;
-X
-/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
-X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
-X dmax->start,maxv.stop,dmax->stop);
-*/
-X dmax->start = maxv.start;
-X dmax->stop = maxv.stop;
-X
-X return maxv.score;
-}
-X
-int sconn (struct savestr **v, int n,
-X struct f_struct *f_str,
-X struct rstruct *rst, struct pstruct *ppst,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1, int opt_prob)
-{
-X int i, si, cmpp ();
-X struct slink *start, *sl, *sj, *so, *sarr;
-X int lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop;
-X double tatprob;
-X int dotat;
-X
-X sarr = f_str->sarr;
-X
-X /* sort the score left to right in lib pos */
-X kpsort (v, n);
-X
-X start = NULL;
-X rst->score[0] = 0;
-X rst->escore = 2.0;
-X
-/* for the remaining runs, see if they fit */
-/* lstart/lstop -> start/stop in library sequence
-X tstart/tstop -> start/stop in query sequence
-X plstart/plstop ->
-*/
-X
-X for (i = 0, si = 0; i < n; i++) {
-X
-X /* the segment is worth adding; find out where? */
-X lstart = v[i]->start;
-X ltmp = v[i]->stop;
-X tstart = lstart - v[i]->dp + f_str->noff;
-X tstop = ltmp - v[i]->dp + f_str->noff;
-X
-X /* put the run in the group */
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X sarr[si].prev = NULL;
-X sarr[si].tat = NULL;
-X
-/*
-X opt_prob for FASTS only has to do with using aa1 for priors,
-X i.e. we always calculate tatprobs for segments in FASTS (unlike
-X FASTF)
-*/
-X if(opt_prob) {
-X sarr[si].tatprob =
-X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0], ppst->nsq, f_str,
-X ppst->pseudocts, opt_prob, ppst->zsflag);
-X if (sarr[si].tatprob < 0.0) {
-X fprintf(stderr," negative tatprob: %lg\n",sarr[si].tatprob);
-X sarr[si].tatprob = 1.0;
-X }
-X sarr[si].tat = sarr[si].newtat;
-X }
-X
-/* if it fits, then increase the score
-X
-X start points to the highest scoring run
-X -> next is the second highest, etc.
-X put the segment into the highest scoring run that it fits into
-*/
-X for (sl = start; sl != NULL; sl = sl->next) {
-X ltmp = sl->vp->start;
-X /* plstop -> previous lstop */
-X plstop = sl->vp->stop;
-X /* ptstart -> previous t(query) start */
-X ptstart = ltmp - sl->vp->dp + f_str->noff;
-X /* ptstop -> previous t(query) stop */
-X ptstop = plstop - sl->vp->dp + f_str->noff;
-#ifndef FASTM
-X /* if the previous library stop is before the current library start */
-X if (plstop < lstart && ( ptstop < tstart || ptstart > tstop))
-#else
-X /* if the previous library stop is before the current library start */
-X if (plstop < lstart && ptstop < tstart)
-#endif
-X {
-X if(!opt_prob) {
-X sarr[si].score = sl->score + v[i]->score;
-X sarr[si].prev = sl;
-X break;
-X } else {
-X tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0], ppst->nsq, f_str,
-X ppst->pseudocts, opt_prob, ppst->zsflag);
-X /* if our tatprob gets worse when we add this, forget it */
-X if(tatprob > sarr[si].tatprob) {
-X free(sarr[si].newtat->probs); /* get rid of new tat struct */
-X free(sarr[si].newtat);
-X continue; /* reuse this sarr[si] */
-X } else {
-X sarr[si].tatprob = tatprob;
-X free(sarr[si].tat->probs); /* get rid of old tat struct */
-X free(sarr[si].tat);
-X sarr[si].tat = sarr[si].newtat;
-X sarr[si].prev = sl;
-X sarr[si].score = sl->score + v[i]->score;
-X /*
-X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
-X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
-X */
-X break;
-X }
-X }
-X }
-X }
-X
-X /* now recalculate where the score fits */
-X if (start == NULL) start = &sarr[si];
-X else {
-X if(!opt_prob) {
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X if (sarr[si].score > sj->score) {
-X sarr[si].next = sj;
-X if (so != NULL)
-X so->next = &sarr[si];
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X } else {
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X if ( sarr[si].tatprob < sj->tatprob ||
-X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
-X sarr[si].next = sj;
-X if (so != NULL)
-X so->next = &sarr[si];
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X }
-X }
-X
-X si++;
-X }
-X
-X if(opt_prob) {
-X for (i = 0 ; i < si ; i++) {
-X free(sarr[i].tat->probs);
-X free(sarr[i].tat);
-X }
-X }
-X
-X if (start != NULL) {
-X if(opt_prob) {
-X rst->escore = start->tatprob;
-X } else {
-X rst->escore = 2.0;
-X }
-X
-X rst->segnum = rst->seglen = 0;
-X for(sj = start ; sj != NULL; sj = sj->prev) {
-X rst->segnum++;
-X rst->seglen += sj->vp->stop - sj->vp->start + 1;
-X }
-X return (start->score);
-X } else {
-X rst->escore = 1.0;
-X }
-X
-X rst->segnum = rst->seglen = 0;
-X return (0);
-}
-X
-void
-kssort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->score >= v[j + gap]->score)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-void
-kpsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->start <= v[j + gap]->start)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-/* calculate the 100% identical score */
-int
-shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq)
-{
-X int i, sum;
-X for (i=0,sum=0; i<n0; i++)
-X if (aa0[i] != EOSEQ && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
-X return sum;
-}
-X
-/* sorts alignments from right to left (back to front) based on stop */
-X
-void
-krsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->stop > v[j + gap]->stop)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X int hoff, n10;
-X struct rstruct rst;
-X int ib, i;
-X unsigned char *aa0t;
-X const unsigned char *aa1p;
-X struct savestr *vmptr;
-X
-#ifdef TFAST
-X f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
-X aa1p = f_str->aa1x;
-#else
-X n10 = n1;
-X aa1p = aa1;
-#endif
-X
-X do_fasts(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1, f_str->maxsav_w);
-X
-X /* the alignment portion takes advantage of the information left
-X over in f_str after do_fasts is done. in particular, it is
-X easy to run a modified sconn() to produce the alignments.
-X
-X unfortunately, the alignment display routine wants to have
-X things encoded as with bd_align and sw_align, so we need to do that.
-X */
-X
-X /* unnecessary; do_fasts just did this */
-X /* kssort(f_str->vptr,f_str->nsave); */
-X
-X /* at some point, we want one best score for each of the segments */
-X
-X for ( ; f_str->nsave > 0; f_str->nsave--)
-X if (f_str->vptr[f_str->nsave-1]->score >0) break;
-X
-X if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
-X fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
-X exit(1);
-X }
-X
-X /* copy aa0[] into f_str->aa0t[] */
-X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0t[i] = aa0[i];
-X f_str->aa0t[i] = aa0t[i] = '\0';
-X
-X a_res->nres = sconn_a (aa0t,n0,aa1p,n10,f_str, a_res, ppst);
-X
-X free(aa0t);
-X
-X a_res->res = f_str->res;
-X *have_ares = 0;
-X return rst.score[0];
-}
-X
-/* this version of sconn is modified to provide alignment information */
-/* in addition, it needs to know whether a segment has been used before */
-X
-/* sconn_a fills in the res[nres] array, but this is passed implicitly
-X through f_str->res[f_str->nres] */
-X
-int sconn_a (unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X struct pstruct *ppst)
-{
-X int i, si, cmpp (), n;
-X unsigned char *aa0p;
-X int sx, dx, doff, *aa0tip;
-X
-X struct savestr **v;
-X struct slink *start, *sl, *sj, *so, *sarr;
-X int lstart, lstop, ltmp, plstart, tstart, plstop, ptstop, ptstart, tstop;
-X
-X int *res, nres, tres;
-X
-X double tatprob;
-X
-/* sort the score left to right in lib pos */
-X
-X v = f_str->vptr;
-X n = f_str->nsave;
-X sarr = f_str->sarr;
-X
-X /* set things up in case nothing fits */
-X if (n <=0 || v[0]->score <= 0) return 0;
-X
-X if (v[0]->score < 0) {
-X sarr[0].vp = v[0];
-X sarr[0].score = v[0]->score;
-X sarr[0].next = NULL;
-X sarr[0].prev = NULL;
-X start = &sarr[0];
-X }
-X else {
-X
-X krsort (v, n); /* sort from left to right in library */
-X
-X start = NULL;
-X
-X /* for each alignment, see if it fits */
-X
-X
-X for (i = 0, si = 0; i < n; i++) {
-X /* if the score is less than the join threshold, skip it */
-X
-X if (v[i]->score < 0) continue;
-X
-X lstart = v[i]->start;
-X lstop = v[i]->stop;
-X tstart = lstart - v[i]->dp + f_str->noff;
-X tstop = lstop - v[i]->dp + f_str->noff;
-X
-X /* put the alignment in the group */
-X
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X sarr[si].prev = NULL;
-X sarr[si].tat = NULL;
-X
-X sarr[si].tatprob =
-X calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0], ppst->nsq, f_str,
-X ppst->pseudocts, 1, ppst->zsflag);
-X sarr[si].tat = sarr[si].newtat;
-X
-X
-X /* if it fits, then increase the score */
-X /* start points to a sorted (by total score) list of candidate
-X overlaps */
-X
-X for (sl = start; sl != NULL; sl = sl->next) {
-X plstart = sl->vp->start;
-X plstop = sl->vp->stop;
-X ptstart = plstart - sl->vp->dp + f_str->noff;
-X ptstop = plstop - sl->vp->dp + f_str->noff;
-#ifndef FASTM
-X if (plstart > lstop && (ptstop < tstart || ptstart > tstop)) {
-#else
-X if (plstop > lstart && ptstart > tstop) {
-#endif
-X /* alignment always uses probabilistic scoring ... */
-X /* sarr[si].score = sl->score + v[i]->score;
-X sarr[si].prev = sl;
-X break; */ /* quit as soon as the alignment has been added */
-X
-X tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
-X ppst->pam2[0], ppst->nsq, f_str,
-X ppst->pseudocts, 1, ppst->zsflag);
-X /* if our tatprob gets worse when we add this, forget it */
-X if(tatprob > sarr[si].tatprob) {
-X free(sarr[si].newtat->probs); /* get rid of new tat struct */
-X free(sarr[si].newtat);
-X continue; /* reuse this sarr[si] */
-X } else {
-X sarr[si].tatprob = tatprob;
-X free(sarr[si].tat->probs); /* get rid of old tat struct */
-X free(sarr[si].tat);
-X sarr[si].tat = sarr[si].newtat;
-X sarr[si].prev = sl;
-X sarr[si].score = sl->score + v[i]->score;
-X /*
-X fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
-X i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
-X */
-X break;
-X }
-X }
-X }
-X
-X /* now recalculate the list of best scores */
-X if (start == NULL)
-X start = &sarr[si]; /* put the first one in the list */
-X else
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X /* if (sarr[si].score > sj->score) { */ /* new score better than old */
-X if ( sarr[si].tatprob < sj->tatprob ||
-X ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
-X sarr[si].next = sj; /* next best after new score */
-X if (so != NULL)
-X so->next = &sarr[si]; /* prev_best->next points to best */
-X else start = &sarr[si]; /* start points to best */
-X break; /* stop looking */
-X }
-X so = sj; /* previous candidate best */
-X }
-X si++; /* increment to next alignment */
-X }
-X }
-X
-X for (i = 0 ; i < si ; i++) {
-X free(sarr[i].tat->probs);
-X free(sarr[i].tat);
-X }
-X
-X res = f_str->res;
-X tres = nres = 0;
-X aa0p = aa0;
-X aa0tip = f_str->aa0ti; /* point to temporary index */
-X a_res->min1 = start->vp->start;
-X a_res->min0 = 0;
-X
-X for (sj = start; sj != NULL; sj = sj->prev ) {
-X doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
-X
-X /* fprintf(stderr,"doff: %3d\n",doff); */
-X
-X for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
-X dx <= sj->vp->stop; dx++) {
-X *aa0tip++ = f_str->aa0i[sx]; /* save index */
-X *aa0p++ = f_str->aa0t[sx++]; /* save sequence at index */
-X tres++;
-X res[nres++] = 0;
-X }
-X sj->vp->dp -= doff;
-X if (sj->prev != NULL) {
-X if (sj->prev->vp->start - sj->vp->stop - 1 > 0 )
-X tres += res[nres++] = (sj->prev->vp->start - sj->vp->stop - 1);
-X }
-X
-X /*
-X fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
-X sj->vp->start - sj->vp->dp + f_str->noff,
-X sj->vp->stop - sj->vp->dp + f_str->noff,
-X sj->vp->start,sj->vp->stop,sj->vp->dp,
-X f_str->noff,sj->vp->score);
-X
-X fprintf(stderr,"%3d - %3d: %3d\n",
-X sj->vp->start,sj->vp->stop,sj->vp->score);
-X */
-X a_res->max1 = sj->vp->stop+1;
-X a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
-X }
-X
-X /*
-X fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
-X a_res->min0,a_res->max0,a_res->min1,a_res->max1);
-X */
-X
-X /* now replace f_str->aa0t with aa0
-X (f_str->aa0t is permanent, aa0 is not)*/
-X for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
-X
-X return tres;
-}
-X
-/* for fasts (and fastf), pre_cons needs to set up f_str as well as do
-X necessary translations - for right now, simply do do_walign */
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-X
-#ifdef TFAST
-X f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
-#endif
-X
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-#ifdef TFAST
-X aln->qlrev = 0;
-X aln->qlfact= 1;
-X aln->llfact = aln->llmult = 3;
-X if (frame > 3) aln->llrev = 1;
-X else aln->llrev = 0;
-X aln->frame = 0;
-#else /* FASTS */
-X aln->llfact = aln->llmult = aln->qlfact = 1;
-X aln->llrev = aln->qlrev = 0;
-X aln->frame = 0;
-#endif
-}
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1, n0t;
-X int op, lenc, len_gap, nd, ns, itmp;
-X const unsigned char *aa1p;
-X char *sp0, *sp1, *spa;
-X int *rp;
-X int mins, smins;
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X n0 -= (f_str->nm0-1);
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
-X /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llen/2);
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llen/2);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X
-X memset(seqca,M_BLANK,mins);
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X /* op is the previous "match/insert" operator; *rp is the current
-X operator or repeat count */
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
-X op = *rp++; /* get the next match/insert operator */
-X
-X /* get the alignment symbol */
-X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
-X *sp1 = pst.sq[aa1p[i1++]];
-X n0t++;
-X lenc++;
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X }
-X else { /* either op != 0 (previous was insert) or *rp != 0
-X (current is insert) */
-X if (op==0) { op = *rp++;} /* previous was match, start insert */
-X /* previous was insert - count through gap */
-X *sp0++ = '-';
-X *sp1++ = pst.sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X }
-X
-X *spa = '\0';
-X *nc = lenc-len_gap;
-/* now we have the middle, get the right end */
-X
-X ns = mins + lenc + aln->llen;
-X ns -= (itmp = ns %aln->llen);
-X if (itmp>aln->llen/2) ns += aln->llen;
-X nd = ns - (mins+lenc);
-X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0t-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
-X n0t-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X
-X return mins+lenc+nd;
-}
-X
-int
-calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1, nn1, n0t;
-X int op, lenc, len_gap, nd, ns, itmp, p_ac, fnum, o_fnum;
-X const unsigned char *aa1p;
-X unsigned char *aa0ap;
-X char *sp0, *sp0a, *sp1, *spa;
-X int *rp;
-X int mins, smins;
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X n0 -= (f_str->nm0-1);
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
-X /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llen/2);
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llen/2);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X
-X memset(seqca,M_BLANK,mins);
-X memset(seqc0a,' ', mins);
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp0a = seqc0a+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X n0t=lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=op=p_ac= 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X /* op is the previous "match/insert" operator; *rp is the current
-X operator or repeat count */
-X
-X o_fnum = f_str->aa0ti[i0];
-X aa0ap = &aa0a[f_str->nmoff[o_fnum]+i0];
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X fnum = f_str->aa0ti[i0];
-X if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
-X if (p_ac == 0) { /* previous code was a match */
-X if (fnum != o_fnum) { /* continuing a match, but with a different fragment */
-X aa0ap = &aa0a[f_str->nmoff[fnum]];
-X o_fnum = fnum;
-X }
-X }
-X else {
-X p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0];
-X aa0ap = &aa0a[f_str->nmoff[fnum]];
-X }
-X op = *rp++; /* get the next match/insert operator */
-X
-X /* get the alignment symbol */
-X if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
-X *sp0a++ = ann_arr[*aa0ap++];
-X *sp1 = pst.sq[aa1p[i1++]];
-X n0t++;
-X lenc++;
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X }
-X else { /* either op != 0 (previous was insert) or *rp != 0
-X (current is insert) */
-X if (op==0) { op = *rp++;} /* previous was match, start insert */
-X /* previous was insert - count through gap */
-X if (p_ac != 1) {
-X p_ac = 1; fnum = f_str->aa0ti[i0];
-X }
-X
-X *sp0++ = '-';
-X *sp1++ = pst.sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X }
-X
-X *sp0a = *spa = '\0';
-X *nc = lenc-len_gap;
-/* now we have the middle, get the right end */
-X
-X ns = mins + lenc + aln->llen;
-X ns -= (itmp = ns %aln->llen);
-X if (itmp>aln->llen/2) ns += aln->llen;
-X nd = ns - (mins+lenc);
-X if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0t-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
-X n0t-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X return mins+lenc+nd;
-}
-X
-void aaptrshuffle(unsigned char *res, int n) {
-X
-X int i, j;
-X unsigned char tmp;
-X
-X for( i = n; --i; ) {
-X
-X /* j = nrand(i); if (i == j) continue; */ /* shuffle */
-X j = (n - 1) - i; if (i <= j ) break; /* reverse */
-X
-X tmp = res[i];
-X res[i] = res[j];
-X res[j] = tmp;
-X }
-}
-X
-void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
-X
-X int i;
-X int j;
-X
-X for(i = 0 ; i < f_str->nm0 ; i++) { /* for each fragment */
-X
-X aaptrshuffle(&(aa0[f_str->nmoff[i]]),
-X f_str->nmoff[i+1] - f_str->nmoff[i] - 1 );
-X
-X }
-X
-}
-X
-/* build an array of match/ins/del - length strings */
-int
-calc_code(const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, len_gap;
-X int p_ac, op_cnt;
-X const unsigned char *aa1p;
-X char tmp_cnt[20];
-X char sp0, sp1, *sq;
-X int *rp;
-X int mins, smins;
-X int o_fnum,fnum = 0;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = len_gap =aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=aln->nfs=op=p_ac = 0;
-X op_cnt = 0;
-X
-X i0 = a_res.min0; /* start in aa0 (f_str->aa0t) */
-X i1 = a_res.min1; /* start in aa1 */
-X tmp_cnt[0]='\0';
-X
-X o_fnum = f_str->aa0ti[i0] + 1; /* fragment number */
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X fnum = f_str->aa0ti[i0]+1;
-X if (op == 0 && *rp == 0) { /* previous was match, this is match */
-X if (p_ac == 0) { /* previous code was a match */
-X if (fnum == o_fnum) { op_cnt++;}
-X else { /* continuing a match, but with a different fragment */
-X update_code(al_str,al_str_n-strlen(al_str), p_ac, op_cnt, o_fnum);
-X o_fnum = fnum;
-X op_cnt=1;
-X }
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_ac,op_cnt,o_fnum);
-X op_cnt = 1; p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0] + 1;
-X }
-X op = *rp++;
-X lenc++;
-X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
-X sp0 = pst.sq[f_str->aa0t[i0++]];
-X sp1 = pst.sq[aa1p[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (p_ac == 1) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
-X op_cnt = 1; p_ac = 1; fnum = f_str->aa0ti[i0] + 1;
-X }
-X op--; lenc++; i1++; len_gap++;
-X }
-X }
-X update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
-X
-X return lenc - len_gap;
-}
-X
-/* update_code(): if "op" == 0, this is the end of a match of length
-X "op_cnt" involving fragment "fnum"
-X otherwise, this is an insertion (op==1) or deletion (op==2)
-*/
-X
-void
-update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
-X
-X char op_char[4]={"=-+"};
-X char tmp_cnt[20];
-X
-X if (op == 0)
-X sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
-X else
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-int
-calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, len_gap;
-X const unsigned char *aa1p;
-X int sp0, sp1;
-X int *rp;
-X int mins, smins;
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amin1 = a_res.min1;
-X aln->amax0 = a_res.max0;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X n0 -= (f_str->nm0-1);
-X
-X /* now get the middle */
-X rp = a_res.res;
-X lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X
-X if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
-X
-X sp0 = pst.sq[f_str->aa0t[i0++]];
-X sp1 = pst.sq[aa1p[i1++]];
-X lenc++;
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X }
-X else {
-X if (op==0) { op = *rp++;}
-X i1++;
-X op--;
-X len_gap++;
-X lenc++;
-X }
-X }
-X return lenc-len_gap;
-}
-X
-#ifdef PCOMPLIB
-X
-#include "structs.h"
-#include "p_mw.h"
-X
-void
-update_params(struct qmng_str *qm_msg,
-X struct mngmsg *m_msg, struct pstruct *ppst)
-{
-X m_msg->n0 = ppst->n0 = qm_msg->n0;
-X m_msg->nm0 = qm_msg->nm0;
-X m_msg->escore_flg = qm_msg->escore_flg;
-X m_msg->qshuffle = qm_msg->qshuffle;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropfs2.c ||
-echo 'restore of dropfs2.c failed'
-Wc_c="`wc -c < 'dropfs2.c'`"
-test 59078 -eq "$Wc_c" ||
- echo 'dropfs2.c: original size 59078, current size' "$Wc_c"
-fi
-# ============= dropfx.c ==============
-if test -f 'dropfx.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropfx.c (File already exists)'
-else
-echo 'x - extracting dropfx.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropfx.c' &&
-X
-/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropfx.c,v 1.68 2007/04/26 18:37:18 wrp Exp $ */
-X
-/* implements the fastx algorithm, see:
-X
-X W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
-X DNA sequences with protein sequences" Genomics 46:24-36
-X
-X see dropnfa.c for better variable descriptions and comments
-*/
-X
-/* 18-Sept-2006 - remove global variables used for alignment */
-X
-/* 22-June-2006 - correct incorrect alignment coordinates generated
-X after pro_dna() on projected DNA region.
-*/
-X
-/* 9-May-2003 -> 3.46 changed lx_band to use projected protein
-X boundary end. this fixes some addressing issues on MacOSX, and
-X speeds up alignment on very long proteins
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-#define XTERNAL
-#include "upam.h"
-X
-/* this must be consistent with upam.h */
-#define MAXHASH 32
-#define NMAP MAXHASH+1
-X
-/* globals for fasta */
-#define MAXWINDOW 64
-X
-#ifndef MAXSAV
-#define MAXSAV 10
-#endif
-X
-#ifndef ALLOCN0
-static char *verstr="3.5 Sept 2006";
-#else
-static char *verstr="3.5an0 May 2006";
-#endif
-X
-struct dstruct /* diagonal structure for saving current run */
-{
-X int score; /* hash score of current match */
-X int start; /* start of current match */
-X int stop; /* end of current match */
-X struct savestr *dmax; /* location in vmax[] where best score data saved */
-};
-X
-struct savestr
-{
-X int score; /* pam score with segment optimization */
-X int score0; /* pam score of best single segment */
-X int gscore; /* score from global match */
-X int dp; /* diagonal of match */
-X int start; /* start of match in lib seq */
-X int stop; /* end of match in lib seq */
-};
-X
-struct swstr { int H, E;};
-X
-struct bdstr { int CC, DD, CP, DP;};
-X
-void savemax();
-void kpsort();
-X
-struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
-X
-struct f_struct {
-X struct dstruct *diag;
-X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
-X struct savestr *vptr[MAXSAV];
-X struct savestr *lowmax;
-X int ndo;
-X int noff;
-X int hmask; /* hash constants */
-X int *pamh1; /* pam based array */
-X int *pamh2; /* pam based kfact array */
-X int *link, *harr; /* hash arrays */
-X int kshft; /* shift width */
-X int nsav, lowscor; /* number of saved runs, worst saved run */
-#ifndef TFAST
-X unsigned char *aa0x; /* contains translated codons 111222333*/
-X unsigned char *aa0y; /* contains translated codons 123123123*/
-#else
-X unsigned char *aa1x; /* contains translated codons 111222333 */
-X unsigned char *aa1y; /* contains translated codons 123123123 */
-#endif
-X struct sx_s *cur;
-X int *waa0;
-X int *waa1;
-X int *res;
-X int max_res;
-};
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-static int dmatchx(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval, int gshift,
-X struct f_struct *f_str);
-X
-int shscore(unsigned char *aa0, int n0, int **pam2);
-int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
-int spam (const unsigned char *aa0, const unsigned char *aa1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str);
-int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
-int lx_band(const unsigned char *prot_seq, int len_prot,
-X const unsigned char *dna_prot_seq, int len_dna_prot,
-X int **pam_matrix, int gopen, int gext,
-X int gshift, int start_diag, int width, struct f_struct *f_str);
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
-X
-extern void w_abort (char *p, char *p1);
-X
-/* initialize for fasta */
-X
-void
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int mhv, phv;
-X int hmax;
-X int i0, hv;
-X int pamfact;
-X int btemp;
-X struct f_struct *f_str;
-X int ktup; /* word size examined */
-X int fact; /* factor used to scale ktup match value */
-X int kt1; /* ktup-1 */
-X int lkt; /* last ktup - initiall kt1, but can be increased
-X for hsq >= NMAP */
-X
-X int maxn0;
-X int *pwaa;
-X int i, j, q;
-X struct swstr *ss, *r_ss;
-X int *waa;
-X int *res;
-X int nsq, ip, *hsq;
-#ifndef TFAST
-X int last_n0, itemp;
-X unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;
-X int n0x, n0x3;
-#endif
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx; ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X nsq = ppst->nsq; ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
-X n0 / ppst->param_u.fa.bestscale +
-X ppst->param_u.fa.bkfact *
-X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
-X btemp = min (btemp, ppst->param_u.fa.bestmax);
-X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
-X
-X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
-X if (ppst->param_u.fa.optcut_set != 1)
-#ifndef TFAST
-X ppst->param_u.fa.optcut = (btemp*5)/4;
-#else
-X ppst->param_u.fa.optcut = (btemp*4)/3;
-#endif
-X
-#ifdef OLD_FASTA_GAP
-X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
-#else
-X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
-#endif
-X pamfact = ppst->param_u.fa.pamfact;
-X ktup = ppst->param_u.fa.ktup;
-X fact = ppst->param_u.fa.scfact * ktup;
-X
-X if (pamfact == -1)
-X pamfact = 0;
-X else if (pamfact == -2)
-X pamfact = 1;
-X
-X for (i0 = 1, mhv = -1; i0 <=nsq; i0++)
-X if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
-X
-X if (mhv <= 0) {
-X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
-X exit (1);
-X }
-X
-X for (f_str->kshft = 0; mhv > 0; mhv /= 2)
-X f_str->kshft++;
-X
-/* kshft = 2; */
-X kt1 = ktup - 1;
-X hv = 1;
-X for (i0 = 0; i0 < ktup; i0++) {
-X hv = hv << f_str->kshft;
-X }
-X hmax = hv;
-X f_str->hmask = (hmax >> f_str->kshft) - 1;
-X
-X
-X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash array\n");
-X exit (1);
-X }
-X if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh1 array\n");
-X exit (1);
-X }
-X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh2 array\n");
-X exit (1);
-X }
-X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash link array");
-X exit (1);
-X }
-X
-#ifdef TFAST
-X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
-X exit (1);
-X }
-X f_str->aa1x++;
-X
-X if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);
-X exit (1);
-X }
-X f_str->aa1y++;
-#else /* FASTX */
-X maxn0 = n0 + 2;
-X if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
-X exit (1);
-X }
-X aa0x++;
-X f_str->aa0x = aa0x;
-X
-X if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);
-X exit (1);
-X }
-X aa0y++;
-X f_str->aa0y = aa0y;
-X
-X last_n0 = 0;
-X for (itemp=0; itemp<3; itemp++) {
-X n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);
-X /*
-X for (i=0; i<n0x; i++) {
-X fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-X */
-X last_n0 += n0x+1;
-X }
-X
-X /* fprintf(stderr,"\n"); */
-X
-X for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {
-X for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
-X *fd=EOSEQ;
-X }
-X
-X /* now switch aa0 and aa0x for hashing functions */
-X /* this seems dangerous in threaded code, but only the pointer is changed,
-X not the data itself */
-X
-X fs = aa0;
-X aa0 = aa0x;
-X aa0x = fs;
-X
-#endif
-X
-X for (i0 = 0; i0 < hmax; i0++)
-X f_str->harr[i0] = -1;
-X for (i0 = 0; i0 < n0; i0++)
-X f_str->link[i0] = -1;
-X
-X /* encode the aa0 array */
-X
-X phv = hv = 0;
-X lkt = kt1;
-X for (i0 = 0; i0 < min(lkt,n0); i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}
-X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
-X }
-X
-X for (; i0 < n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0;
-X lkt = i0+ktup;
-X /* restart hv, phv calculation */
-X for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}
-X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
-X }
-X }
-X if (i0 >= n0) break;
-X hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
-X f_str->link[i0] = f_str->harr[hv];
-X f_str->harr[hv] = i0;
-X if (pamfact) {
-X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
-X /* this check should always be true, but just in case */
-X if (hsq[aa0[i0-kt1]]<NMAP)
-X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
-X }
-X else f_str->pamh2[hv] = fact * ktup;
-X }
-X
-#ifndef TFAST
-X /* done hashing, now switch aa0, aa0x back */
-X fs = aa0;
-X aa0 = aa0x;
-X aa0x = fs;
-#endif
-X
-/* this has been modified from 0..<nsq to 1..<=nsq because the
-X pam2[0][0] is now undefined for consistency with blast
-*/
-X
-X if (pamfact)
-X for (i0 = 1; i0 <= nsq; i0++)
-X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
-X else
-X for (i0 = 1; i0 <= nsq; i0++)
-X f_str->pamh1[i0] = fact;
-X
-X f_str->ndo = 0; /* used to save time on diagonals with long queries */
-X
-#ifndef ALLOCN0
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
-X (long) MAXDIAG *sizeof (struct dstruct));
-X exit (1);
-X };
-#else
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
-X (long)n0*sizeof (struct dstruct));
-X exit (1);
-X };
-#endif
-X
-X
-X if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
-X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X pwaa = waa;
-X for (i=0; i<=nsq; i++) {
-X for (j=0;j<n0; j++) {
-X *pwaa = ppst->pam2[ip][i][aa0[j]];
-X pwaa++;
-X }
-X }
-X f_str->waa0 = waa;
-X
-X if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
-X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X pwaa = waa;
-X for (i=0; i<=nsq; i++) {
-X for (j=0;j<n0; j++) {
-X *pwaa = ppst->pam2[0][i][aa0[j]];
-X pwaa++;
-X }
-X }
-X f_str->waa1 = waa;
-X
-#ifndef TFAST
-X maxn0 = max(2*n0,MIN_RES);
-#else
-X /* maxn0 needs to be large enough to accomodate introns
-X for TFASTX. For all other functions, it will be
-X more reasonable. */
-X maxn0 = max(4*n0,MIN_RES);
-#endif
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X f_str->max_res = maxn0;
-X
-X *f_arg = f_str;
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/* pstring2 is the same information, but in a markx==10 format */
-void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-#ifndef TFAST
-X char *pg_str="FASTX";
-#else
-X char *pg_str="TFASTX";
-#endif
-X
-X if (!pstr->param_u.fa.optflag)
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d width: %3d",pg_str,verstr,
-#else
-X sprintf (pstring1, "%s (%s) function [%s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d width: %3d",pg_str,verstr,
-#endif
-X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx,pstr->pam_xm,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->gdelval, pstr->ggapval, pstr->gshift,
-X pstr->param_u.fa.optwid);
-X else
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d shift: %3d, width: %3d",pg_str,verstr,
-#else
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d shift: %3d, width: %3d",pg_str,verstr,
-#endif
-X pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx, pstr->pam_xm,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
-X pstr->gshift,pstr->param_u.fa.optwid);
-X
-X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X
-X if (pstring2 != NULL) {
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
-; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#else
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
-; pg_open_ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#endif
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
-X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
-X pstr->param_u.fa.cgap);
-X }
-}
-X
-void
-close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X free(f_str->cur);
-#ifndef TFAST
-X f_str->aa0y--;
-X free(f_str->aa0y);
-X f_str->aa0x--;
-X free(f_str->aa0x);
-#else
-X f_str->aa1y--;
-X free(f_str->aa1y);
-X f_str->aa1x--;
-X free(f_str->aa1x);
-#endif
-X free(f_str->res);
-X free(f_str->waa1);
-X free(f_str->waa0);
-X free(f_str->diag);
-X free(f_str->link);
-X free(f_str->pamh2);
-X free(f_str->pamh1);
-X free(f_str->harr);
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-void do_fastx (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst, int *hoff)
-{
-X int nd; /* diagonal array size */
-X int lhval;
-X int kfact;
-X int i;
-X int my_hoff;
-X register struct dstruct *dptr;
-X register int tscor;
-X
-#ifndef ALLOCN0
-X register struct dstruct *diagp;
-#else
-X register int dpos;
-X int lposn0;
-#endif
-X struct dstruct *dpmax;
-X register int lpos;
-X int tpos;
-X struct savestr *vmptr;
-X int scor, tmp;
-X int im, ib, nsave;
-X int ktup, kt1, *hsq, ip, lkt;
-#ifndef TFAST
-X int n0x31, n0x32;
-X n0x31 = (n0-2)/3;
-X n0x32 = n0x31+1+(n0-n0x31-1)/2;
-#else
-X const unsigned char *fs;
-X unsigned char *fd;
-X int n1x31, n1x32, last_n1, itemp;
-X n1x31 = (n1-2)/3;
-X n1x32 = n1x31+1+(n1-n1x31-1)/2;
-#endif
-X
-X if (ppst->ext_sq_set) {
-X ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X ktup = ppst->param_u.fa.ktup;
-X kt1 = ktup-1;
-X
-X if (n1 < ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-X if (n0+n1+1 >= MAXDIAG) {
-X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X return;
-X }
-X
-X f_str->noff = n0 - 1;
-X
-#ifdef ALLOCN0
-X nd = n0;
-#endif
-X
-#ifndef ALLOCN0
-X nd = n0 + n1;
-#endif
-X
-X dpmax = &f_str->diag[nd];
-X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
-X {
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X vmptr->score = 0;
-X f_str->lowmax = f_str->vmax;
-X f_str->lowscor = 0;
-X
-X /* start hashing */
-X lhval = 0;
-X lkt = kt1;
-X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lhval = 0; lkt=lpos+ktup; continue;
-#ifdef ALLOCN0 /* reinitialize dptr */
-X dptr = &f_str->diag[lpos % nd];
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr->score = 0;
-#endif
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X }
-X
-#ifndef ALLOCN0
-X diagp = &f_str->diag[f_str->noff + lkt];
-X for (; lpos < n1; lpos++, diagp++) {
-X /* if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; continue;} */
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lpos++ ; diagp++;
-X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
-X if (lpos >= n1) break;
-X lhval = 0;
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
-#else
-X lposn0 = f_str->noff + lpos;
-X for (; lpos < n1; lpos++, lposn0++) {
-X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X dpos = lposn0 - tpos;
-X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
-#endif
-X tscor += ktup;
-X if ((tscor -= lpos) <= 0) { /* better to start over */
-X scor = dptr->score;
-X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
-#ifdef ALLOCN0
-X savemax (dptr, dpos, f_str);
-#else
-X savemax (dptr, f_str);
-#endif
-X if ((tscor += scor) >= kfact) {
-X dptr->score = tscor;
-X dptr->stop = lpos;
-X }
-X else {
-X dptr->score = kfact;
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X } /* continue current run in diagonal */
-X else {
-X dptr->score += f_str->pamh1[aa0[tpos]];
-X dptr->stop = lpos;
-X }
-X }
-X else {
-X dptr->score = f_str->pamh2[lhval];
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X } /* end tpos */
-X
-#ifdef ALLOCN0
-X /* reinitialize diag structure */
-X loopl:
-X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor) {
-X savemax (dptr, lpos, f_str);
-X }
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr->score = 0;
-#endif
-X } /* end lpos */
-X
-#ifdef ALLOCN0
-X for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
-X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
-X savemax (dptr, dpos, f_str);
-X }
-#else
-X for (dptr = f_str->diag; dptr < dpmax;) {
-X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X f_str->ndo = nd;
-#endif
-X
-/*
-X at this point all of the elements of aa1[lpos]
-X have been searched for elements of aa0[tpos]
-X with the results in diag[dpos]
-*/
-X
-X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X {
-X /*
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+vmptr->start-vmptr->dp,
-X f_str->noff+vmptr->stop-vmptr->dp,
-X vmptr->start,vmptr->stop,
-X vmptr->dp,vmptr->score);
-X */
-X if (vmptr->score > 0) {
-X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
-X f_str->vptr[nsave++] = vmptr;
-X }
-X }
-X
-X if (nsave <= 0) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-#ifndef TFAST
-X /* FASTX code here to modify the start, stop points for
-X the three phases of the translated protein sequence
-X */
-X /*
-X fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-X for (ib=0; ib<nsave; ib++) {
-X if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
-X f_str->vptr[ib]->dp += n0x32;
-X if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
-X f_str->vptr[ib]->dp += n0x31;
-X }
-X
-X /*
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X */
-#else
-X
-X /* TFASTX code here to modify the start, stop points for
-X the three phases of the translated protein sequence
-X TFASTX modifies library start points, rather than
-X query start points
-X */
-X
-X /*
-X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-X
-X for (ib=0; ib<nsave; ib++) {
-X if (f_str->vptr[ib]->start >= n1x32) {
-X f_str->vptr[ib]->start -= n1x32;
-X f_str->vptr[ib]->stop -= n1x32;
-X f_str->vptr[ib]->dp -= n1x32;
-X }
-X if (f_str->vptr[ib]->start >= n1x31) {
-X f_str->vptr[ib]->start -= n1x31;
-X f_str->vptr[ib]->stop -= n1x31;
-X f_str->vptr[ib]->dp -= n1x31;
-X }
-X }
-X
-X /*
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X */
-X
-#endif /* TFASTX */
-X
-X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
-X ppst->param_u.fa.pgap, f_str);
-X
-X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
-X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
-X
-/* kssort (f_str->vptr, nsave); */
-X
-X rst->score[1] = vmptr->score; /* best single score - init1*/
-X rst->score[0] = max (scor, vmptr->score); /* initn */
-X rst->score[2] = rst->score[0]; /* initn */
-X
-X my_hoff=f_str->noff - vmptr->dp;
-X
-X /*
-X if (n1 > 5000) {
-X fprintf(stderr," Long n1: %d\n",n1);
-X }
-X */
-X
-X if (ppst->param_u.fa.optflag) {
-X if (rst->score[0] > ppst->param_u.fa.optcut) {
-#ifndef TFAST
-X rst->score[2] = dmatchx(aa0, n0,aa1,n1,my_hoff,
-X ppst->param_u.fa.optwid, ppst->pam2[ip],
-X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
-#else /* TFASTX */
-X /* generate f_str->aa1y */
-/*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[aa1[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-X fprintf(stderr,"\n-----\n");
-*/
-X for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
-X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
-X *fd=EOSEQ;
-X }
-X
-/*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[f_str->aa1y[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-*/
-X rst->score[2] = dmatchx(aa0, n0, aa1, n1, my_hoff=vmptr->dp-f_str->noff,
-X ppst->param_u.fa.optwid, ppst->pam2[ip],
-X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
-#endif /* TFASTX */
-X }
-X }
-X *hoff = my_hoff;
-}
-X
-/* returns rst.score[0] - initn
-X rst.score[1] - init1
-X rst.score[2] - opt
-*/
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int hoff;
-X int last_n1, itx, itt, n10, i;
-X
-#ifdef TFAST
-X unsigned char *aa1x;
-X /* aa0 has a protein sequence */
-X /* aa1 has a raw DNA sequence */
-X
-X itt = frame;
-X last_n1 = 0;
-X aa1x = f_str->aa1x;
-X for (itx= itt*3; itx< itt*3+3; itx++) {
-X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
-X /*
-X fprintf(stderr," itt %d itx: %d\n",itt,itx);
-X for (i=0; i<n10; i++) {
-X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-X */
-X last_n1 += n10+1;
-X }
-X n10 = last_n1-1;
-#endif
-X
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X rst->segnum = rst->seglen = 1;
-X
-#ifndef TFAST
-X do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#else /* tfastx */
-X do_fastx (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
-#endif
-}
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct rstruct *rst)
-{
-X int optflag, tscore, hoff;
-X
-X optflag = ppst->param_u.fa.optflag;
-X ppst->param_u.fa.optflag = 1;
-X
-#ifndef TFAST
-X do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#else
-X do_fastx (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#endif
-X
-X ppst->param_u.fa.optflag = optflag;
-}
-X
-#ifdef ALLOCN0
-void
-savemax (dptr, dpos, f_str)
-X register struct dstruct *dptr;
-X int dpos;
-X struct f_struct *f_str;
-{
-X register struct savestr *vmptr;
-X register int i;
-X
-#else
-void
-savemax (dptr, f_str)
-X register struct dstruct *dptr;
-X struct f_struct *f_str;
-{
-X register int dpos;
-X register struct savestr *vmptr;
-X register int i;
-X
-X dpos = (int) (dptr - f_str->diag);
-X
-#endif
-X
-/* check to see if this is the continuation of a run that is already saved */
-X
-X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
-X vmptr->start == dptr->start)
-X {
-X vmptr->stop = dptr->stop;
-X if ((i = dptr->score) <= vmptr->score)
-X return;
-X vmptr->score = i;
-X if (vmptr != f_str->lowmax)
-X return;
-X }
-X else
-X {
-X i = f_str->lowmax->score = dptr->score;
-X f_str->lowmax->dp = dpos;
-X f_str->lowmax->start = dptr->start;
-X f_str->lowmax->stop = dptr->stop;
-X dptr->dmax = f_str->lowmax;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X if (vmptr->score < i)
-X {
-X i = vmptr->score;
-X f_str->lowmax = vmptr;
-X }
-X f_str->lowscor = i;
-}
-X
-int spam (const unsigned char *aa0, const unsigned char *aa1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str)
-{
-X int lpos;
-X int tot, mtot;
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X const unsigned char *aa0p, *aa1p;
-X
-X aa1p = &aa1[lpos = dmax->start];
-X aa0p = &aa0[lpos - dmax->dp + f_str->noff];
-X curv.start = lpos;
-X
-X tot = curv.score = maxv.score = 0;
-X for (; lpos <= dmax->stop; lpos++) {
-X tot += pam2[*aa0p++][*aa1p++];
-X if (tot > curv.score) {
-X curv.stop = lpos;
-X curv.score = tot;
-X }
-X else if (tot < 0) {
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X tot = curv.score = 0;
-X curv.start = lpos+1;
-X }
-X }
-X
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X
-/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
-X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
-X dmax->start,maxv.stop,dmax->stop);
-*/
-X dmax->start = maxv.start;
-X dmax->stop = maxv.stop;
-X
-X return maxv.score;
-}
-X
-#define XFACT 10
-X
-int sconn (struct savestr **v, int n,
-X int cgap, int pgap, struct f_struct *f_str)
-{
-X int i, si;
-X struct slink {
-X int score;
-X struct savestr *vp;
-X struct slink *next;
-X } *start, *sl, *sj, *so, sarr[MAXSAV];
-X int lstart, tstart, plstop, ptstop;
-X
-/* sort the score left to right in lib pos */
-X
-X kpsort (v, n);
-X
-X start = NULL;
-X
-/* for the remaining runs, see if they fit */
-X
-X for (i = 0, si = 0; i < n; i++)
-X {
-X
-/* if the score is less than the gap penalty, it never helps */
-X if (v[i]->score < cgap)
-X continue;
-X lstart = v[i]->start;
-X tstart = lstart - v[i]->dp + f_str->noff;
-X
-/* put the run in the group */
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X
-/* if it fits, then increase the score */
-X for (sl = start; sl != NULL; sl = sl->next)
-X {
-X plstop = sl->vp->stop;
-X ptstop = plstop - sl->vp->dp + f_str->noff;
-X if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
-X sarr[si].score = sl->score + v[i]->score + pgap;
-X break;
-X }
-X }
-X
-/* now recalculate where the score fits */
-X if (start == NULL)
-X start = &sarr[si];
-X else
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next)
-X {
-X if (sarr[si].score > sj->score)
-X {
-X sarr[si].next = sj;
-X if (so != NULL)
-X so->next = &sarr[si];
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X si++;
-X }
-X
-X if (start != NULL)
-X return (start->score);
-X else
-X return (0);
-}
-X
-void
-kssort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->score >= v[j + gap]->score)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-void
-kpsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->start <= v[j + gap]->start)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-static int
-dmatchx(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval, int gshift,
-X struct f_struct *f_str)
-{
-X
-X hoff -= window/2;
-X
-#ifndef TFAST
-X return lx_band(aa1,n1,f_str->aa0y,n0,
-X pam2,
-#ifdef OLD_FASTA_GAP
-X -(gdelval-ggapval),
-#else
-X -gdelval,
-#endif
-X -ggapval,-gshift,
-X hoff,window,f_str);
-#else
-X return lx_band(aa0,n0,f_str->aa1y,n1,
-X pam2,
-#ifdef OLD_FASTA_GAP
-X -(gdelval-ggapval),
-#else
-X -gdelval,
-#endif
-X -ggapval,-gshift,
-X hoff,window,f_str);
-#endif
-}
-X
-static void
-init_row(struct sx_s *row, int sp) {
-X int i;
-X for (i = 0; i < sp; i++) {
-X row[i].C1 = row[i].I1 = 0;
-X row[i].C2 = row[i].I2 = 0;
-X row[i].C3 = row[i].I3 = 0;
-X row[i].flag = 0;
-X }
-}
-X
-int
-lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
-X int len_prot, /* length of prot. seq */
-X const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
-X int len_dna_prot, /* length trans. seq. */
-X int **pam_matrix, /* scoring matrix */
-X int gopen, int gext, /* gap open, gap extend penalties */
-X int gshift, /* frame-shift penalty */
-X int start_diag, /* start diagonal of band */
-X int width, /* width for band alignment */
-X struct f_struct *f_str)
-{
-X void *ckalloc();
-X int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;
-X int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
-X register int *wt;
-X const unsigned char *dp;
-X register struct sx_s *ap, *aq;
-X
-X sp = width+7;
-X gg = gopen+gext;
-X /* sp = sp/3; */
-X if (f_str->cur == NULL)
-X f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
-X
-X init_row(f_str->cur, sp);
-X
-X /*
-X if (start_diag %3 !=0) start_diag = start_diag/3-1;
-X else start_diag = start_diag/3;
-X */
-X
-X /*
-X if (width % 3 != 0) width = width/3+1;
-X else width = width /3;
-X */
-X
-X /* currently, this code assumes that the DNA sequence is longer than the
-X protein sequence. This is not always true. len_prot in the loop below
-X should be decreased to the projection of the DNA on the protein */
-X
-X x1 = start_diag; /* x1 = lower bound of DNA */
-X
-X
-X end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
-X end_prot = min(end_prot,len_prot);
-X
-X /* i counts through protein sequence, x1 through DNAp */
-X
-X for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {
-X bd = min(x1+width, len_dna_prot/3); /* upper bound of band */
-X bd1 = max(0,x1); /* lower bound of band */
-X wt = pam_matrix[prot_seq[i]];
-X del = 1-x1; /*adjustment*/
-X bd += del;
-X bd1 +=del;
-X
-X ap = &f_str->cur[bd1];
-X aq = ap+1;
-X e1 = f_str->cur[bd1-1].C3;
-X e2 = ap->C1;
-X cd1 = cd2= cd3= 0;
-X
-X for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
-X sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];
-X if (cd1 > sc) sc = cd1;
-X cd1 -= gext;
-X if ((ci = aq->I1) > 0) {
-X if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
-X else {
-X ap->C1 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd1 < sc) cd1 = sc;
-X ap->I1 = max(ci-gext, sc);
-X } else ap->I1 = ci-gext;
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I1 = ap->C1 = 0;
-X } else {
-X ap->C1 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd1 < sc) cd1 = sc;
-X ap->I1 = sc;
-X } else ap->I1 = 0;
-X }
-X }
-X sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];
-X if (cd2 > sc) sc = cd2;
-X cd2 -= gext;
-X if ((ci = aq->I2) > 0) {
-X if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
-X else {
-X ap->C2 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd2 < sc) cd2 = sc;
-X ap->I2 = max(ci-gext, sc);
-X }
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I2 = ap->C2 = 0;
-X } else {
-X ap->C2 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd2 < sc) cd2 = sc;
-X ap->I2 = sc;
-X } else ap->I2 = 0;
-X }
-X }
-X sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];
-X if (cd3 > sc) sc = cd3;
-X cd3 -= gext;
-X if ((ci = aq++->I3) > 0) {
-X if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
-X else {
-X ap->C3 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd3 < sc) cd3 = sc;
-X ap->I3 = max(ci-gext, sc);
-X }
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I3 = ap->C3 = 0;
-X } else {
-X ap->C3 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd3 < sc) cd3 = sc;
-X ap->I3 = sc;
-X } else ap->I3 = 0;
-X }
-X }
-X }
-X }
-X /* printf("The best score is %d\n", best); */
-X return best+gopen+gext;
-}
-X
-/* ckalloc - allocate space; check for success */
-void *ckalloc(size_t amount)
-{
-X void *p;
-X
-X if ((p = (void *)malloc( (size_t)amount)) == NULL)
-X w_abort("Ran out of memory.","");
-X return(p);
-}
-X
-/* calculate the 100% identical score */
-int
-shscore(unsigned char *aa0, int n0, int **pam2)
-{
-X int i, sum;
-X for (i=0,sum=0; i<n0; i++)
-X sum += pam2[aa0[i]][aa0[i]];
-X return sum;
-}
-X
-#define SGW1 100
-#define SGW2 300
-#define WIDTH 60
-X
-/* code above is to convert sequence into numbers */
-X
-typedef struct mat *match_ptr;
-X
-typedef struct mat {
-X int i, j, l;
-X match_ptr next;
-} match_node;
-X
-typedef struct {
-X int i,j;
-} state;
-X
-typedef state *state_ptr;
-X
-typedef struct st_s { int C, I, D;} *st_ptr;
-X
-/* static st_ptr up=NULL, down, tp; */
-/* static int *st_up; */
-/* static int gop, gext, shift; */
-X
-void *ckalloc(size_t);
-static match_ptr small_global(), global();
-static int local_align(), find_best();
-static void init_row2(), init_ROW();
-X
-int
-pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
-X int len_prot, /* length of prot. seq */
-X const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
-X int len_dna_prot, /* length trans. seq. */
-X int **pam_matrix, /* scoring matrix */
-X int gopen, int gex, /* gap open, gap extend penalties */
-X int gshift, /* frame-shift penalty */
-X int max_res,
-X struct a_res_str *a_res) /* alignment info */
-{
-X match_ptr align, ap, aq;
-X int x, y, ex, ey, i, score;
-X int *alignment;
-X st_ptr up, down, tp;
-X
-X /* these globals removed */
-X /* gext = gex; gop = gopen; shift = gshift; */
-X
-X /* for fastx (but not tfastx), these could be moved into init_work(),
-X and done only once */
-X
-X up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X
-X /*local alignment find the best local alignment x (prot) and y (DNA)
-X is the starting position of the best local alignment
-X and ex (prot) ey (DNA) is the ending position */
-X score= local_align(&x, &y, &ex, &ey, pam_matrix,
-X gopen, gex, gshift,
-X dna_prot_seq, len_dna_prot,
-X prot_seq, len_prot, up, down);
-X
-X /* this is very strange, since local_align initialized up, down */
-X up += 3; down += 3; tp += 3;
-X
-X /* x, y - start in prot, dna_prot */
-X a_res->min0 = x; /* prot */
-X a_res->max0 = ex; /* prot */
-X
-X a_res->min1 = y; /* DNA-prot */
-X a_res->max1 = ey; /* DNA-prot */
-X
-X align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift,
-X dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp);
-X
-X alignment = a_res->res;
-X
-X /* from earlier version */
-X /* alignment[0] = x; */ /* start of alignment in prot */
-X /* alignment[1] = y; */ /* start of alignment in DNA */
-X
-X for (ap = align, i= 0; ap; i++) {
-X if (i < max_res) {alignment[i] = ap->l;}
-X aq = ap->next; free(ap); ap = aq;
-X }
-X
-X if (i >= max_res) {
-X fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);
-X }
-X
-X up = &up[-3]; down = &down[-3]; tp = &tp[-3];
-X free(up); free(tp); free(down);
-X /* free(st_up); */ /* moved into local align */
-X
-X a_res->nres = i; /* i has the length of the alignment */
-X return score;
-}
-X
-static void
-swap(void **a, void **b) {
-X void *t;
-X
-X t = *a;
-X *a = *b;
-X *b = t;
-}
-X
-/*
-X local alignment find the best local alignment x and y
-X is the starting position of the best local alignment
-X and ex ey is the ending position
-*/
-static int
-local_align(int *x, int *y, int *ex, int *ey,
-X int **wgts, int gop, int gext, int shift,
-X unsigned char *dnap, int ld,
-X unsigned char *pro, int lp,
-X st_ptr up, st_ptr down) {
-X
-X int i, j, score, x1,x2,x3,x4, e1, e2 = 0, e3,
-X sc, del, e, best = 0, *wt, cd, ci;
-X state_ptr cur_st, last_st, cur_i_st;
-X st_ptr cur, last;
-X unsigned char *dp;
-X int *st_up, *cur_d_st;
-X
-/*
-X Array rowiC store the best scores of alignment ending at a position
-X Arrays rowiD, and rowiI store the best scores of alignment ending
-X at a position with a deletion or insrtion
-X Arrays sti stores the starting position of the best alignment whose
-X score stored in the corresponding row array.
-X The program stores two rows to complete the computation, same is
-X for the global alignment routine.
-*/
-X
-X /* for fastx (but not tfastx), this could be moved into init_work(),
-X and done only once */
-X st_up = (int *) ckalloc(sizeof(int)*(ld+10));
-X init_row2(st_up, ld+5);
-X
-X ld += 2;
-X init_ROW(up, ld+1); /* set to zero */
-X init_ROW(down, ld+1); /* set to zero */
-X
-X
-X cur = up+1;
-X last = down+1;
-X
-X /* for fastx (but not tfastx), these could be moved into init_work(),
-X and done only once */
-X cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X
-X cur_d_st = st_up;
-X
-X dp = dnap-2;
-X for (i = 0; i < lp; i++) {
-X wt = &wgts[pro[i]][0];
-X for (j = 0; j < 2; j++) {
-X cur_st[j].i = i+1;
-X cur_st[j].j = j+1;
-X }
-X for (j = 2; j < ld; j++) {
-X score = wt[dp[j]];
-X del = -1;
-X if (j >= 3) {
-X sc = -score;
-X e3 = e2-shift; e2 = last[j-3].C;
-X e1 = last[j-2].C-shift;
-X if (e1 > sc) {sc = e1; del = 2;}
-X if (e2 > sc) {sc = e2; del = 3;}
-X if (e3 > sc) {sc = e3; del = 4;}
-X } else {
-X sc = e2 = 0;
-X if (sc < -score) sc=-score;
-X else del = 3;
-X }
-X sc += score;
-X if (sc < (ci=last[j].I)) {
-X sc = ci; del = 0;
-X }
-X if (sc < (cd=cur[j].D)) {
-X sc = cd; del = 5;
-X }
-X cur[j].C = sc;
-X e = sc - gop;
-X if (e > cd) {
-X cur[j+3].D = e-gext;
-X cur_d_st[j+3] = 3;
-X } else {
-X cur[j+3].D = cd-gext;
-X cur_d_st[j+3] = cur_d_st[j]+3;
-X }
-X switch(del) {
-X case 5:
-X e1 = cur_d_st[j];
-X cur_st[j].i = cur_st[j-e1].i;
-X cur_st[j].j = cur_st[j-e1].j;
-X break;
-X case 0:
-X cur_st[j].i = cur_i_st[j].i;
-X cur_st[j].j = cur_i_st[j].j;
-X break;
-X case 2:
-X case 3:
-X case 4:
-X if (i) {
-X if (j-del >= 0) {
-X cur_st[j].i = last_st[j-del].i;
-X cur_st[j].j = last_st[j-del].j;
-X } else {
-X cur_st[j].i = i;
-X cur_st[j].j = 0;
-X }
-X } else {
-X cur_st[j].i = 0;
-X cur_st[j].j = max(0, j-del+1);
-X }
-X break;
-X case -1:
-X cur_st[j].i = i+1;
-X cur_st[j].j = j+1;
-X break;
-X }
-X if (e > ci) {
-X cur[j].I = e -gext;
-X cur_i_st[j].i = cur_st[j].i;
-X cur_i_st[j].j = cur_st[j].j;
-X } else {
-X cur[j].I = ci- gext;
-X }
-X if (sc > best) {
-X x1 = cur_st[j].i;
-X x2 = cur_st[j].j;
-X best =sc;
-X x3 = i;
-X x4 = j;
-X }
-X }
-X swap((void **)&last, (void **)&cur);
-X swap((void **)&cur_st, (void **)&last_st);
-X }
-X /* printf("The best score is %d\n", best); */
-X *x = x1; *y = x2; *ex = x3; *ey = x4;
-X free(cur_st); free(last_st); free(cur_i_st);
-X free(st_up);
-X return best;
-}
-X
-/*
-X Both global_up and global_down do linear space score only global
-X alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
-X global_up do the algorithm upwards, from row x towards row y.
-X global_down do the algorithm downwards, from row y towards x.
-*/
-X
-static void
-global_up(st_ptr *row1, st_ptr *row2,
-X int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext, int shift,
-X unsigned char *dnap,
-X unsigned char *pro,
-X int N) {
-X int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score, *wt;
-X st_ptr cur, last;
-X
-X cur = *row1; last = *row2;
-X sc = -gop-gext;
-X for (j = 1; j <= ey-y+1; j++) {
-X if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
-X else { last[j].I = last[j].C = -10000;}
-X cur[j].I = -10000;
-X }
-X last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
-X last[0].D = last[1].D = last[2].D = -10000;
-X if (N) last[0].I = -gext; else last[0].I = -gop-gext;
-X for (i = 1; i <= ex-x+1; i++) {
-X wt = &wgts[pro[i+x-1]][0]; e2 = last[0].C; e1 = -10000;
-X for (j = 0; j <= ey-y+1; j++) {
-X t = j+y;
-X sc = -10000;
-X if (t < 3) score = -10000;
-X else score = wt[dnap[t-3]];
-X if (j < 4) {
-X if (j == 3) sc = e2;
-X else if (j == 2) sc = e2-shift;
-X } else {
-X e3 = e2; e2 = e1;
-X e1 = last[j-2].C;
-X sc = max(max(e1, e3)-shift, e2);
-X }
-X sc += score;
-X sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
-X cur[j].C = sc;
-X cur[j+3].D = max(cd, sc-gop)-gext;
-X cur[j].I = max(ci, sc-gop)-gext;
-X }
-X swap((void **)&last, (void **)&cur);
-X }
-X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
-X if (*row1 != last) swap((void **)row1, (void **)row2);
-}
-X
-static void
-global_down(st_ptr *row1, st_ptr *row2,
-X int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext, int shift,
-X unsigned char *dnap, unsigned char *pro,
-X int N) {
-X int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, s1, s2, s3, *wt;
-X st_ptr cur, last;
-X
-X cur = (*row1); last = *row2;
-X sc = -gop-gext;
-X for (j = ey-y; j >= 0; j--) {
-X if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
-X else last[j].I = last[j].C = -10000;
-X }
-X last[ey-y+1].C = 0;
-X cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
-X last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
-X if (N) last[ey-y+1].I = -gext; else last[ey-y+1].I = -gop-gext;
-X for (i = ex-x; i >= 0; i--) {
-X wt = &wgts[pro[i+x]][0]; e2 = last[ey-y+1].C;
-X e1 = s2 = s3 = -10000;
-X for (j = ey-y+1; j >= 0; j--) {
-X t = j+y;
-X s1 = wt[dnap[t-1]];
-X sc = -10000;
-X if (t+3 > ey) {
-X if (t+2==ey) sc = e2+s2;
-X else if (t+1==ey) sc = e2-shift+s1;
-X } else {
-X e3 = e2; e2 = e1;
-X e1 = last[j+2].C;
-X sc = max(max(e1+s1, e3+s3)-shift, e2+s2);
-X }
-X if (sc < (cd= cur[j].D)) {
-X sc = cd;
-X cur[j-3].D = cd-gext;
-X } else cur[j-3].D =max(cd, sc-gop)-gext;
-X if (sc < (ci= last[j].I)) {
-X sc = ci; del = 0;
-X cur[j].I = ci - gext;
-X } else cur[j].I = max(sc-gop,ci)-gext;
-X cur[j].C = sc;
-X s3 = s2; s2 = s1;
-X }
-X swap((void **)&last, (void **)&cur);
-X }
-X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
-X if (*row1 != last) swap((void **)row1, (void **)row2);
-}
-X
-static void
-init_row2(int *row, int ld) {
-X int i;
-X for (i = 0; i < ld; i++) row[i] = 0;
-}
-X
-static void
-init_ROW(st_ptr row, int ld) {
-X int i;
-X for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
-}
-X
-static match_ptr
-combine(match_ptr x1, match_ptr x2, int st) {
-X match_ptr x;
-X
-X if (x1 == NULL) return x2;
-X for (x = x1; x->next; x = x->next);
-X x->next = x2;
-X if (st) {
-X for (x = x2; x; x = x->next) {
-X x->j++;
-X if (x->l == 3 || x->l == 4) break;
-X }
-X x->l--;
-X }
-X return x1;
-}
-X
-/*
-X global use the two upwards and downwards score only linear
-X space global alignment subroutine to recursively build the
-X alignment.
-*/
-X
-match_ptr
-global(int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext, int shift,
-X unsigned char *dnap,
-X unsigned char *pro,
-X int N1, int N2,
-X st_ptr *up_stp, st_ptr *dn_stp, st_ptr *tp_stp
-X )
-{
-X int m;
-X int m1, m2;
-X match_ptr x1, x2, mm1, mm2;
-X /*printf("%d %d %d %d\n", x,y, ex, ey);*/
-X /*
-X if the space required is limited, we can do a quadratic space
-X algorithm to find the alignment.
-X */
-X if (ex <= x) {
-X mm1 = NULL; mm2= NULL;
-X for (m = y+3; m <= ey; m+=3) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = 5; x1->next = mm1;
-X if (mm1== NULL) mm2 = x1;
-X mm1 = x1;
-X }
-X if (ex == x) {
-X if ((ey-y) % 3 != 0) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = ((ey-y) % 3) +1; x1->next = NULL;
-X if (mm2) mm2->next = x1;
-X else mm1 = x1;
-X } else {
-X if (mm2) mm2->l = 4;
-X }
-X }
-X return mm1;
-X }
-X if (ey <= y) {
-X mm1 = NULL;
-X for (m = x; m <= ex; m++) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = 0; x1->next = mm1; mm1 = x1;
-X }
-X return mm1;
-X }
-X if (ex -x < SGW1-1 && ey-y < SGW2-1)
-X return small_global(x,y,ex,ey,
-X wgts, gop, gext, shift,
-X dnap, pro, N1, N2);
-X m = (x+ex)/2;
-X /*
-X Do the score only global alignment from row x to row m, m is
-X the middle row of x and ex. Store the information of row m in
-X upC, upD, and upI.
-X */
-X global_up(up_stp, tp_stp, x, y, m, ey,
-X wgts, gop, gext, shift,
-X dnap, pro, N1);
-X
-X /*
-X Do the score only global alignment downwards from row ex
-X to row m+1, store information of row m+1 in downC downI and downD
-X */
-X global_down(dn_stp, tp_stp, m+1, y, ex, ey,
-X wgts, gop, gext, shift,
-X dnap, pro, N2);
-X
-X /*
-X Use these information of row m and m+1, to find the crossing
-X point of the best alignment with the middle row. The crossing
-X point is given by m1 and m2. Then we recursively call global
-X itself to compute alignments in two smaller regions found by
-X the crossing point and combine the two alignments to form a
-X whole alignment. Return that alignment.
-X */
-X if (find_best(*up_stp, *dn_stp, &m1, &m2, ey-y+1, y, gop)) {
-X x1 = global(x, y, m, m1, wgts, gop, gext, shift, dnap, pro, N1, 0,
-X up_stp, dn_stp, tp_stp);
-X x2 = global(m+1, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 0, N2,
-X up_stp, dn_stp, tp_stp);
-X if (m1 == m2) x1 = combine(x1,x2,1);
-X else x1 = combine(x1, x2,0);
-X } else {
-X x1 = global(x, y, m-1, m1, wgts, gop, gext, shift, dnap, pro, N1, 1,
-X up_stp, dn_stp, tp_stp);
-X x2 = global(m+2, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 1, N2,
-X up_stp, dn_stp, tp_stp);
-X mm1 = (match_ptr) ckalloc(sizeof(match_node));
-X mm1->i = m; mm1->l = 0; mm1->j = m1;
-X mm2 = (match_ptr) ckalloc(sizeof(match_node));
-X mm2->i = m+1; mm2->l = 0; mm2->j = m1;
-X mm1->next = mm2; mm2->next = x2;
-X x1 = combine(x1, mm1, 0);
-X }
-X return x1;
-}
-X
-static int
-find_best(st_ptr up, st_ptr down,
-X int *m1, int *m2,
-X int ld, int y, int gop) {
-X int i, best = -100000, j = 0, s1, s2, s3, s4, st;
-X up++;
-X for (i = 1; i < ld; i++) {
-X s2 = up[i-1].C + down[i].C;
-X s4 = up[i-1].I + down[i].I + gop;
-X if (best < s2) {
-X best = s2; j = i; st = 1;
-X }
-X if (best < s4) {
-X best = s4; j = i; st = 0;
-X }
-X }
-X *m1 = j-1+y;
-X *m2 = j+y;
-X /*printf("find best score =%d\n", best);*/
-X return st;
-}
-X
-/*
-X An alignment is represented as a linked list whose element
-X is of type match_node. Each element represent an edge in the
-X path of the alignment graph. The fields of match_node are
-X l --- gives the type of the edge.
-X i, j --- give the end position.
-*/
-X
-static match_ptr
-small_global(int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext, int shift,
-X unsigned char *dnap, unsigned char *pro,
-X int N1, int N2) {
-X static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
-X int i, j, e, sc, score, del, k, t, *wt, ci, cd;
-X int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
-X match_ptr mp, first;
-X
-X /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
-X sc = -gop-gext; C[0][0] = 0;
-X if (N1) I[0] = -gext; else I[0] = sc;
-X for (j = 1; j <= ey-y+1; j++) {
-X if (j % 3== 0) {
-X C[0][j] = sc; sc -= gext; I[j] = sc-gop;
-X } else I[j] = C[0][j] = -10000;
-X st[0][j] = 5;
-X }
-X lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
-X cI = I;
-X for (i = 1; i <= ex-x+1; i++) {
-X cC = &C[i][0];
-X wt = &wgts[pro[i+x-1]][0]; cst = &st[i][0];
-X for (j = 0; j <=ey-y+1; j++) {
-X sc = -10000; del = 0;
-X ci = cI[j];
-X cd= cD[j];
-X t = j+y;
-X if (t < 3) score = -10000;
-X else score = wt[dnap[t-3]];
-X if (j >= 4) {
-X e2 = lC[j-2]-shift; sc = lC[j-3]; e4 = lC[j-4]-shift;
-X del = 3;
-X if (e2 > sc) { sc = e2; del = 2;}
-X if (e4 >= sc) { sc = e4; del = 4;}
-X } else {
-X if (j ==3) {sc= lC[0]; del = 3;}
-X else if (j == 2) {sc = lC[0]-shift; del = 2;}
-X }
-X sc = sc+score;
-X if (sc < ci) {
-X sc = ci; del = 0;
-X }
-X if (sc <= cd) {
-X sc = cd;
-X del = 5;
-X }
-X cC[j] = sc;
-X sc -= gop;
-X if (sc < cd) {
-X del += 10;
-X cD[j+3] = cd - gext;
-X } else cD[j+3] = sc -gext;
-X if (sc < ci) {
-X del += 20;
-X cI[j] = ci-gext;
-X } else cI[j] = sc-gext;
-X *(cst++) = del;
-X }
-X lC = cC;
-X }
-X if (N2 && ci +gop > cC[ey-y+1]) {
-X st[ex-x+1][ey-y+1] = 0;
-X /*printf("small score = %d\n", ci+gop);*/
-X } /*else printf("small score =%d\n", cC[ey-y+1]);*/
-X first = NULL; e = 1;
-X for (i = ex+1, j = ey+1; i > x || j > y; i--) {
-X mp = (match_ptr) ckalloc(sizeof(match_node));
-X mp->i = i-1;
-X k = (t=st[i-x][j-y])%10;
-X mp->j = j-1;
-X if (e == 5 && (t/10)%2 == 1) k = 5;
-X if (e == 0 && (t/20)== 1) k = 0;
-X if (k == 5) { j -= 3; i++; e=5;}
-X else {j -= k;if (k==0) e= 0; else e = 1;}
-X mp->l = k;
-X mp->next = first;
-X first = mp;
-X }
-X
-X /* for (i = 0; i <= ex-x; i++) {
-X for (j = 0; j <= ey-y; j++)
-X printf("%d ", C[i][j]);
-X printf("\n");
-X }
-X */
-X return first;
-}
-X
-X
-#define XTERNAL
-#include "upam.h"
-X
-extern void display_alig(a, dna, pro,length, ld)
-int *a;
-unsigned char *dna, *pro;
-int length, ld;
-{
-X int len = 0, i, j, x, y, lines, k;
-X static char line1[100], line2[100], line3[100],
-X tmp[10] = " ";
-X unsigned char *dna1, c1, c2, c3, *st;
-X
-X dna1 = ckalloc((size_t)ld);
-X for (st = dna, i = 0; i < ld; i++, st++) dna1[i] = aa[*st];
-X line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-1;
-X
-X for (len = 0, j = 2, lines = 0; j < length; j++) {
-X i = a[j];
-X /*printf("%d %d %d\n", i, len, b->j);*/
-X if (i > 0 && i < 5) tmp[i-2] = aa[pro[x++]];
-X if (i == 5) {
-X i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
-X if (a[j+1] == 2) tmp[2] = ' ';
-X }
-X if (i > 0) {
-X strncpy(&line1[len], (const char *)&dna1[y], i); y+=i;
-X } else {line1[len] = '-'; i = 1; tmp[0] = aa[pro[x++]];}
-X strncpy(&line2[len], tmp, i);
-X for (k = 0; k < i; k++) {
-X if (tmp[k] != ' ' && tmp[k] != '-') {
-X if (k == 2) tmp[k] = '\\';
-X else if (k == 1) tmp[k] = '|';
-X else tmp[k] = '/';
-X } else tmp[k] = ' ';
-X }
-X if (i == 1) tmp[0] = ' ';
-X strncpy(&line3[len], tmp, i);
-X tmp[0] = tmp[1] = tmp[2] = ' ';
-X len += i;
-X line1[len] = line2[len] =line3[len] = '\0';
-X if (len >= WIDTH) {
-X printf("\n%5d", WIDTH*lines++);
-X for (k = 10; k <= WIDTH; k+=10)
-X printf(" . :");
-X if (k-5 < WIDTH) printf(" .");
-X c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
-X line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
-X printf("\n %s\n %s\n %s\n", line1, line3, line2);
-X line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
-X strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
-X strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
-X strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
-X len = len - WIDTH;
-X }
-X }
-X printf("\n%5d", WIDTH*lines);
-X for (k = 10; k < len; k+=10)
-X printf(" . :");
-X if (k-5 < len) printf(" .");
-X printf("\n %s\n %s\n %s\n", line1, line3, line2);
-}
-X
-X
-/* alignment store the operation that align the protein and dna sequence.
-X The code of the number in the array is as follows:
-X 0: delete of an amino acid.
-X 2: frame shift, 2 nucleotides match with an amino acid
-X 3: match an amino acid with a codon
-X 4: the other type of frame shift
-X 5: delete of a codon
-X
-X
-X Also the first two element of the array stores the starting point
-X in the protein and dna sequences in the local alignment.
-X
-X Display looks like where WIDTH is assumed to be divisible by 10.
-X
-X 0 . : . : . : . : . : . :
-X CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG
-X P M I L G Y W N V R G L T H P I R M L L
-X
-X 60 . : . : . : . : . : . :
-X GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT
-X E Y T D S S Y D E K R Y T M G D A P D F
-*/
-X
-X
-/* fatal - print message and die */
-void fatal(msg)
-char *msg;
-{
-X fprintf(stderr, "%s\n", msg);
-X exit(1);
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X int score;
-X int i, last_n1, itemp, n10;
-X int n_aa, n_nt, hoff, nt_min, nt_max, w_fact;
-X unsigned char *fs, *fd;
-X struct rstruct rst;
-X int itx;
-X
-#ifndef TFAST /* FASTX */
-X n_aa = n1;
-X n_nt = n0;
-X
-X /* check for large differences in sequence length */
-X nt_min = 0; nt_max = n_nt;
-X if (n_nt > 6 * n_aa) {
-X /* find out where the diagonal is - get hoff
-X hoff < 0 => seq0 is in the middle of seq1
-X */
-X do_fastx(f_str->aa0x, n0, aa1, n1, ppst, f_str, &rst, &hoff);
-X if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
-X else w_fact = 2;
-X
-X if (hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
-X nt_min = max(0,(hoff-w_fact*n_aa)*3);
-X nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
-X }
-X else {
-X nt_max = min(3*w_fact*n_aa,n_nt);
-X }
-X }
-X
-X a_res->res = f_str->res;
-X
-X score = pro_dna(aa1, n1, f_str->aa0y+nt_min, nt_max-nt_min, ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval - ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X -ppst->gshift,
-X f_str->max_res, a_res);
-X
-X /* correct for nt_min missing residues in alignment */
-X
-#else /* TFASTX */
-X
-X /*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[f_str->aa1x[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-X fprintf(stderr,"\n-----\n");
-X */
-X
-X last_n1 = 0;
-X for (itx=3*frame; itx<3+3*frame; itx++) {
-X n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
-/*
-X for (i=0; i<n10; i++) {
-X fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-*/
-X last_n1 += n10+1;
-X }
-X n10 = last_n1-1;
-X
-X /* create aa1y from aa1x */
-X for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
-X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
-X *fd=EOSEQ;
-X }
-X /*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[f_str->aa1y[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-X fprintf(stderr,"\n-----\n");
-X */
-X
-X n_aa = n0;
-X n_nt = n1;
-X
-X /* check for large differences in sequence length */
-X nt_min = 0; nt_max = n_nt;
-X if (n_nt > 6 * n_aa) {
-X /* find out where the diagonal is - get hoff
-X hoff < 0 => seq0 is in the middle of seq1
-X */
-X do_fastx(aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);
-X if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
-X else w_fact = 2;
-X
-X if ( hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
-X nt_min = max(0,(hoff-w_fact*n_aa)*3);
-X nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
-X }
-X else {
-X nt_max = min(3*w_fact*n_aa,n_nt);
-X }
-X }
-X
-X a_res->res = f_str->res;
-X
-X score = pro_dna(aa0, n0, f_str->aa1y+nt_min, nt_max-nt_min, ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval - ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X -ppst->gshift,
-X f_str->max_res, a_res);
-X
-#endif /* TFASTX */
-X
-X /* pro_dna always compares protein to DNA, and returns protein
-X coordinates in a_res->min0,max0 */
-X
-X a_res->min1 += nt_min;
-X a_res->max1 += nt_min;
-X
-X /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0); */
-X
-X *have_ares = 1;
-X return score;
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-#ifndef TFAST
-X aln->llrev = 0;
-X aln->llfact = 1;
-X aln->llmult = 1;
-X aln->qlfact = 3;
-X aln->frame = 0;
-X if (frame > 0) aln->qlrev = 1;
-X else aln->qlrev = 0;
-#else /* TFASTX */
-X aln->qlfact = 1;
-X aln->qlrev = 0;
-X aln->llfact = 3;
-X aln->llmult = 1;
-X aln->frame = 0;
-X if (frame > 0) aln->llrev = 1;
-X else aln->llrev = 0;
-#endif /* TFASTX */
-}
-X
-/* this function is required for programs like tfastx/y/s that do
-X translations on DNA sequences and save them in f_str->aa1??
-*/
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-#ifdef TFAST
-X int i, last_n1, itemp, n10;
-X unsigned char *fs, *fd;
-X int itx;
-X
-X last_n1 = 0;
-X for (itx=3*frame; itx<3+3*frame; itx++) {
-X n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
-/*
-X for (i=0; i<n10; i++) {
-X fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-*/
-X last_n1 += n10+1;
-X }
-X n10 = last_n1-1;
-X
-X /* create aa1y from aa1x */
-X for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
-X for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
-X *fd=EOSEQ;
-X }
-#endif
-}
-X
-X
-/*
-X Alignment: store the operation that align the protein and dna sequence.
-X The code of the number in the array is as follows:
-X 0: delete of an amino acid.
-X 2: frame shift, 2 nucleotides match with an amino acid
-X 3: match an amino acid with a codon
-X 4: the other type of frame shift
-X 5: delete of a codon
-X
-X The first two elements of the array stores the starting point
-X in the protein and dna sequences in the local alignment.
-*/
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1, i, j;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char *sp0, *sp1, *spa, *sq;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X
-X
-#ifndef TFAST /* FASTX */
-X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
-X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
-X
-X ap0 = f_str->aa0y; /* translated DNA */
-X ap1 = aa1; /* protein */
-X
-X sp0 = seqc0;
-X sp1 = seqc1;
-#else /* TFASTX */
-X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
-X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
-X
-X ap1 = aa0; /* protein */
-X ap0 = f_str->aa1y; /* translated DNA */
-X
-X sp1 = seqc0;
-X sp0 = seqc1;
-#endif
-X
-X rp = a_res.res;
-X rpmax = rp+a_res.nres;
-X
-X spa = seqca;
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
-X i0 = a_res.min1;
-X i1 = a_res.min0;
-X
-X while (rp < rpmax) {
-X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
-X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
-X */
-X switch (*rp++) {
-X case 0: /* aa insertion */
-X *sp0++ = '-';
-X *sp1++ = sq[ap1[i1++]];
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_d++;
-X break;
-X case 2: /* -1 frameshift */
-X nfs++;
-X *sp0++ = '/';
-X i0 -= 1;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 3: /* codon/aa match */
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 4: /* +1 frameshift */
-X nfs++;
-X *sp0++ = '\\';
-X i0 += 1;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 5: /* codon insertion */
-X *sp0++ = sq[ap0[i0]];
-X i0 += 3;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_p++;
-X break;
-X }
-X }
-X *spa = '\0';
-X
-#ifndef TFAST /* FASTX */
-X aln->amax0 = i0;
-X aln->amax1 = i1;
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0;
-X aln->amax0 = i1;
-X aln->amin1 = aln->smin1;
-X aln->amin0 = aln->smin0;
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X
-X if (lenc < 0) lenc = 1;
-X *nc = lenc;
-/* now we have the middle, get the right end */
-X return lenc+not_c;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1, i, j;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char *sp0, *sp0a, *sp1, *spa, *sq;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-#ifndef TFAST /* FASTX */
-X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
-X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
-X
-X ap0 = f_str->aa0y; /* translated DNA */
-X ap1 = aa1; /* protein */
-#else /* TFASTX */
-X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
-X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
-X
-X ap1 = aa0;
-X ap0 = f_str->aa1y;
-#endif
-X
-X rp = a_res.res;
-X rpmax = &a_res.res[a_res.nres];
-X
-#ifndef TFAST
-X sp0 = seqc0;
-X sp1 = seqc1;
-#else
-X sp1 = seqc0;
-X sp0 = seqc1;
-#endif
-X spa = seqca;
-X sp0a = seqc0a;
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
-X i0 = a_res.min1;
-X i1 = a_res.min0;
-X
-X while (rp < rpmax) {
-X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
-X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
-X */
-X switch (*rp++) {
-X case 0: /* aa insertion */
-X *sp0++ = '-';
-X *sp1++ = sq[ap1[i1++]];
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X lenc++;
-X ngap_d++;
-X break;
-X case 2: /* -1 frameshift */
-X nfs++;
-X *sp0++ = '/';
-X i0 -= 1;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X not_c++;
-X
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-#ifndef TFAST
-X *sp0a++ = ' ';
-#else
-X *sp0a++ = ann_arr[aa0a[i1]];
-#endif
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 3: /* codon/aa match */
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-#ifndef TFAST
-X *sp0a++ = ' ';
-#else
-X *sp0a++ = ann_arr[aa0a[i1]];
-#endif
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 4: /* +1 frameshift */
-X nfs++;
-X *sp0a++ = ' ';
-X *sp0++ = '\\';
-X i0 += 1;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X
-X if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
-X
-X *sp0 = sq[ap0[i0]];
-X i0 += 3;
-X *sp1 = sq[ap1[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 5: /* codon insertion */
-X *sp0a++ = ' ';
-X *sp0++ = sq[ap0[i0]];
-X i0 += 3;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_p++;
-X break;
-X }
-X }
-X *sp0a = *spa = '\0';
-X
-#ifndef TFAST
-X aln->amax0 = i0;
-X aln->amax1 = i1;
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0;
-X aln->amax0 = i1;
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X
-X if (lenc < 0) lenc = 1;
-X *nc = lenc;
-/* now we have the middle, get the right end */
-X return lenc+not_c;
-}
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, i, j;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char op_char[10];
-X int op, op_cnt;
-X char sp0, sp1, *sq;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X
-#ifndef TFAST /* FASTX */
-X strncpy(op_char,"- /=\\+*",sizeof(op_char));
-X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
-X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
-X
-X ap0 = f_str->aa0y;
-X ap1 = aa1;
-#else /* TFASTX */
-X strncpy(op_char,"+ /=\\-*",sizeof(op_char));
-X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
-X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
-X
-X ap1 = aa0;
-X ap0 = f_str->aa1y;
-#endif
-X
-X rp = a_res.res;
-X rpmax = &a_res.res[a_res.nres];
-X
-X op_cnt = lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
-X op = 3; /* code for a match - all alignments start with a match */
-X
-X i0 = a_res.min1;
-X i1 = a_res.min0;
-X
-X while (rp < rpmax) {
-X switch (*rp++) {
-X case 0: /* aa insertion */
-X if (op == 0) op_cnt++;
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 0; op_cnt = 1;
-X }
-X i1++;
-X lenc++;
-X ngap_d++;
-X break;
-X case 2: /* -1 frameshift */
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 2; op_cnt = 1;
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 3; op_cnt = 1;
-X nfs++;
-X i0 -= 1;
-X not_c++;
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X lenc++;
-X break;
-X case 3: /* codon/aa match */
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X
-X if (op == 3 || op == 6) {
-X if (sp0 != '*' && sp1 != '*') {
-X if (op == 6 ) {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op_cnt = 1; op = 3;
-X }
-X else {op_cnt++;}
-X }
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op_cnt = 1; op = 6;
-X }
-X }
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X if (op == 2 || op == 4) op_cnt = 2;
-X else op_cnt = 1;
-X op = 3;
-X }
-X lenc++;
-X break;
-X case 4: /* +1 frameshift */
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 4; op_cnt = 1;
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 3; op_cnt = 1;
-X
-X nfs++;
-X i0 += 1;
-X not_c++;
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X lenc++;
-X break;
-X case 5: /* codon insertion */
-X if (op == 5) op_cnt++;
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op = 5; op_cnt = 1;
-X }
-X i0 += 3;
-X lenc++;
-X ngap_p++;
-X break;
-X }
-X }
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X
-#ifndef TFAST
-X aln->amax0 = i0;
-X aln->amax1 = i1;
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0;
-X aln->amax0 = i1;
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X
-X if (lenc < 0) lenc = 1;
-X
-X return lenc;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
-X
-X char tmp_cnt[20];
-X
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X strncat(al_str,tmp_cnt,al_str_max-1);
-X al_str[al_str_max-1]='\0';
-}
-X
-int calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, i, j;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char sp0, sp1, *sq;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X
-#ifndef TFAST /* FASTX */
-X aln->amin1 = aln->smin1 = a_res.min0; /* prot */
-X aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
-X
-X ap0 = f_str->aa0y;
-X ap1 = aa1;
-#else /* TFASTX */
-X aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
-X aln->amin1 = aln->smin1 = a_res.min1; /* prot */
-X
-X ap1 = aa0;
-X ap0 = f_str->aa1y;
-#endif
-X
-X rp = a_res.res;
-X rpmax = &a_res.res[a_res.nres];
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
-X i0 = a_res.min1;
-X i1 = a_res.min0;
-X
-X while (rp < rpmax) {
-X /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
-X ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
-X */
-X switch (*rp++) {
-X case 0: /* aa insertion */
-X i1++;
-X lenc++;
-X ngap_d++;
-X break;
-X case 2: /* -1 frameshift */
-X nfs++;
-X i0 -= 1;
-X not_c++;
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X lenc++;
-X break;
-X case 3: /* codon/aa match */
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X lenc++;
-X break;
-X case 4: /* +1 frameshift */
-X nfs++;
-X i0 += 1;
-X not_c++;
-X if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
-X sp0 = sq[ap0[i0]];
-X i0 += 3;
-X sp1 = sq[ap1[i1++]];
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X lenc++;
-X break;
-X case 5: /* codon insertion */
-X i0 += 3;
-X lenc++;
-X ngap_p++;
-X break;
-X }
-X }
-X
-#ifndef TFAST
-X aln->amax0 = i0;
-X aln->amax1 = i1;
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0;
-X aln->amax0 = i1;
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X
-X if (lenc < 0) lenc = 1;
-/* now we have the middle, get the right end */
-X return lenc;
-}
-X
-#ifdef PCOMPLIB
-#include "p_mw.h"
-void
-update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
-{
-X ppst->n0 = qm_msg->n0;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropfx.c ||
-echo 'restore of dropfx.c failed'
-Wc_c="`wc -c < 'dropfx.c'`"
-test 73324 -eq "$Wc_c" ||
- echo 'dropfx.c: original size 73324, current size' "$Wc_c"
-fi
-# ============= dropfz2.c ==============
-if test -f 'dropfz2.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropfz2.c (File already exists)'
-else
-echo 'x - extracting dropfz2.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropfz2.c' &&
-X
-/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropfz2.c,v 1.57 2007/04/26 18:37:19 wrp Exp $ */
-X
-/* 18-Sept-2006 - removed static global variables for alignment */
-X
-/* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
-X
-/* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
-X
-/* implements an improved version of the fasty algorithm, see:
-X
-X W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
-X DNA sequences with protein sequences" Genomics 46:24-36
-X
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <ctype.h>
-X
-#include "defs.h"
-#include "param.h"
-#define XTERNAL
-#include "upam.h"
-#include "uascii.h"
-X
-#define NT_N 16
-X
-/* globals for fasta */
-#define MAXWINDOW 64
-X
-#ifndef MAXSAV
-#define MAXSAV 10
-#endif
-X
-#ifndef ALLOCN0
-static char *verstr="3.5 Sept 2006";
-#else
-static char *verstr="3.5an0 Sept 2006";
-#endif
-X
-struct dstruct /* diagonal structure for saving current run */
-{
-X int score; /* hash score of current match */
-X int start; /* start of current match */
-X int stop; /* end of current match */
-X struct savestr *dmax; /* location in vmax[] where best score data saved */
-};
-X
-struct savestr
-{
-X int score; /* pam score with segment optimization */
-X int score0; /* pam score of best single segment */
-X int gscore; /* score from global match */
-X int dp; /* diagonal of match */
-X int start; /* start of match in lib seq */
-X int stop; /* end of match in lib seq */
-};
-X
-void savemax();
-void kpsort();
-X
-struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
-X
-struct wgt { int iii, ii, iv;};
-struct wgtc {char c2, c3, c4, c5;};
-X
-typedef struct st_s { int C, I, D;} *st_ptr;
-X
-struct f_struct {
-X struct dstruct *diag;
-X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
-X struct savestr *vptr[MAXSAV];
-X struct savestr *lowmax;
-X int ndo;
-X int noff;
-X int hmask; /* hash constants */
-X int *pamh1; /* pam based array */
-X int *pamh2; /* pam based kfact array */
-X int *link, *harr; /* hash arrays */
-X int kshft; /* shift width */
-X int nsav, lowscor; /* number of saved runs, worst saved run */
-#ifndef TFAST
-X unsigned char *aa0x, *aa0v; /* aa0x - 111122223333 */
-#else
-X unsigned char *aa1x, *aa1v; /* aa1x - 111122223333 */
-#endif /* aa1v - computed codons */
-X struct sx_s *cur;
-X struct wgt **weight0;
-X struct wgt **weight1;
-X struct wgtc **weight_c;
-X int *waa;
-X int *res;
-X int max_res;
-X st_ptr up, down, tp;
-};
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-static int dmatchx(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval, int gshift,
-X struct f_struct *f_str);
-X
-int shscore(unsigned char *aa0, int n0, int **pam2);
-int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
-int spam (const unsigned char *aa0, const unsigned char *aa1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str);
-int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
-int lx_band(const unsigned char *prot_seq, int len_prot,
-X const unsigned char *dna_prot_seq, int len_dna_prot,
-X int **pam_matrix, int gopen, int gext,
-X int gshift, int start_diag, int width, struct f_struct *f_str);
-static void update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
-extern void w_abort (char *p, char *p1);
-extern void aagetmap(char *to, int n);
-X
-/* initialize for fasta */
-/* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
-/* Assume naa=47, and wgts[47][23] matches both upper and lower case
-amoino acids with another amino acid. And also assume the DNA letter
-does not have upper/lower case difference. If you also allow DNA
-sequence to be upper/lower case letters, more needs be changed. Not
-only here, but also in the alignment code, the way that pack a codon
-into a number between 0-63 need be changed. */
-X
-/* modified so that if **weightci==NULL, do not fiddle with characters */
-X
-void
-init_weights(struct wgt ***weighti, struct wgtc ***weightci,
-X int **wgts, int gshift, int gsubs, int naa)
-{
-X int i, j, do_wgtc=0;
-X int aa, b, a, x, y, z;
-X int *wwt, e;
-X struct wgt **weight;
-X struct wgtc **weightc;
-X char aacmap[64];
-X int temp[49][64]; /*change*/
-X char le[49][64];
-X
-X
-X if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate weights array: %d\n",naa);
-X exit(1);
-X }
-X
-X weight = *weighti;
-X for (aa=0; aa <= naa; aa++) {
-X if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
-X exit(1);
-X }
-X }
-X
-X if (weightci !=NULL) {
-X if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
-X sizeof(struct wgtc *)))==NULL) {
-X fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
-X exit(1);
-X }
-X weightc = *weightci;
-X
-X for (aa=0; aa <= naa; aa++) {
-X if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
-X exit(1);
-X }
-X }
-X do_wgtc = 1;
-X }
-X else do_wgtc = 0;
-X
-X aagetmap(aacmap,64);
-X
-X for (aa = 0; aa <= naa; aa++) { /* change*/
-X wwt = wgts[aa];
-X for (i = 0; i < 64; i++) { /* j iterates through the codons */
-X x = -1000;
-X y = i;
-X for (j = 0; j < 64; j++) { /* j iterates through the codons */
-X z = ((~i & j) | (i & ~j));
-X b = 0; /* score = 0 */
-X if (z % 4) b-= gsubs;
-X if (z /16) b-= gsubs;
-X if ((z /4) % 4) b -= gsubs;
-X b += wwt[aascii[aacmap[j]]]; /* add the match score for char j*/
-X if (b > x) {
-X x = b; /* x has the score */
-X y = j; /* y has the character */
-X }
-X }
-X /* if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y); */
-X temp[aa][i] = x;
-X le[aa][i] = y;
-X }
-X /* printf("\n"); */
-X }
-X
-X for (aa= 0; aa <= naa; aa++) {
-X wwt = temp[aa];
-X for (i = 0; i < 256; i++) {
-X for (x=-100,b = 0; b < 4; b++) {
-X z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
-X if (x < (e=wwt[z])) {
-X x = e;
-X if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
-X }
-X }
-X weight[aa][i].iv=x-gshift;
-X weight[aa][i].iii = wwt[i%64];
-X
-X if (do_wgtc) {
-X weightc[aa][i].c5 = aacmap[le[aa][i%64]];
-X weightc[aa][i].c3 = aacmap[i%64];
-X }
-X x = i %16;
-X for (y = -100, b = 0; b < 3; b++) {
-X z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2)));
-X for (a = 0; a < 4; a++) {
-X if ((e =wwt[z+(a<<(b*2))]) > y) {
-X y = e;
-X if (do_wgtc)
-X weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
-X }
-X }
-X }
-X weight[aa][i].ii = y-gshift;
-X }
-X }
-X /*106=CGGG*/
-X for (aa = 0; aa <= naa; aa++) {
-X weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
-X weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
-X if (do_wgtc) {
-X weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
-X = weightc[aa][106].c2 = 'X';
-X }
-X }
-}
-X
-void
-free_weights(struct wgt ***weighti0, struct wgt ***weighti1,
-X struct wgtc ***weightci, int naa)
-{
-X int aa;
-X struct wgt **weight0;
-X struct wgt **weight1;
-X struct wgtc **weightc;
-X
-X weight0 = *weighti0;
-X weight1 = *weighti1;
-X weightc = *weightci;
-X
-X for (aa=0; aa <= naa; aa++) {free(weight0[aa]);}
-X for (aa=0; aa <= naa; aa++) {free(weight1[aa]);}
-X for (aa=0; aa <= naa; aa++) {free(weightc[aa]);}
-X
-X free(weight0);
-X free(weight1);
-X free(weightc);
-}
-X
-static void
-pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v)
-{
-X int dnav, i;
-X dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
-X for (i=2; i<n0; i++) {
-X dnav = ((dnav<<2)+hnt[aa0[i]])&255;
-X if (aa0[i] == NT_N || aa0[i-1]==NT_N || aa0[i-2] == NT_N)
-X aa0v[i-2] = 106;
-X else {
-X if (dnav == 106/*CGGG*/) dnav = 42/*AGGG*/;
-X aa0v[i-2]=dnav;
-X }
-X }
-}
-X
-static void
-pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v)
-{
-X int dnav, i, ir;
-X dnav = (3-hnt[aa0[n0-1]]<<2) + 3-hnt[aa0[n0-2]];
-X for (i=2, ir=n0-3; i<n0; i++,ir--) {
-X dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255;
-X if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)
-X aa0v[i-2] = 106;
-X else {
-X if (dnav == 106) dnav = 42;
-X aa0v[i-2]=dnav;
-X }
-X }
-}
-X
-void
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int mhv, phv;
-X int hmax;
-X int i0, hv;
-X int pamfact;
-X int btemp;
-X struct f_struct *f_str;
-X struct bdstr *bss;
-X /* these used to be globals, but do not need to be */
-X int ktup, fact, kt1, lkt;
-X
-X int maxn0;
-X int *pwaa;
-X int i, j, q;
-X struct swstr *ss, *r_ss;
-X int *waa;
-X int *res;
-X int nsq, ip, *hsq, naat;
-#ifndef TFAST
-X int last_n0, itemp, dnav;
-X unsigned char *fd, *fs, *aa0x, *aa0v;
-X int n0x, n0x3;
-#endif
-X
-X if (nt[NT_N] != 'N') {
-X fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
-X exit(1);
-X }
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx; ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X nsq = ppst->nsq; ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
-X n0 / ppst->param_u.fa.bestscale +
-X ppst->param_u.fa.bkfact *
-X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
-X btemp = min (btemp, ppst->param_u.fa.bestmax);
-X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
-X
-X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
-X if (ppst->param_u.fa.optcut_set != 1)
-#ifndef TFAST
-X ppst->param_u.fa.optcut = (btemp*5)/4;
-#else
-X ppst->param_u.fa.optcut = (btemp*4)/3;
-#endif
-X
-#ifdef OLD_FASTA_GAP
-X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
-#else
-X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
-#endif
-X pamfact = ppst->param_u.fa.pamfact;
-X ktup = ppst->param_u.fa.ktup;
-X fact = ppst->param_u.fa.scfact * ktup;
-X
-#ifndef TFAST
-X /* before hashing, we must set up some space and translate the sequence */
-X
-X maxn0 = n0 + 2;
-X if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
-X exit (1);
-X }
-X aa0x++;
-X f_str->aa0x = aa0x;
-X
-X
-X if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
-X exit (1);
-X }
-X aa0v++;
-X f_str->aa0v = aa0v;
-X
-X /* make a precomputed codon number series */
-X pre_com(aa0, n0, aa0v);
-X
-X last_n0 = 0;
-X for (itemp=0; itemp<3; itemp++) {
-X n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
-X /* for (i=0; i<n0x; i++) {
-X fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-X */
-X last_n0 += n0x+1;
-X }
-X
-X /* fprintf(stderr,"\n"); */
-X n0x = n0;
-X n0x3 = n0x/3;
-X
-X /* now switch aa0 and aa0x for hashing functions */
-X fs = aa0;
-X aa0 = aa0x;
-X aa0x = fs;
-#endif
-X
-X if (ppst->ext_sq_set) naat = MAXLC;
-X else naat = MAXUC;
-X
-X init_weights(&f_str->weight0, NULL,
-X ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
-X init_weights(&f_str->weight1, &f_str->weight_c,
-X ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
-X
-X if (pamfact == -1)
-X pamfact = 0;
-X else if (pamfact == -2)
-X pamfact = 1;
-X
-X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
-X if (hsq[i0] < NMAP && hsq[i0] > mhv)
-X mhv = ppst->hsq[i0];
-X
-X if (mhv <= 0)
-X {
-X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
-X exit (1);
-X }
-X
-X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
-X
-/* kshft = 2; */
-X kt1 = ktup - 1;
-X hv = 1;
-X for (i0 = 0; i0 < ktup; i0++)
-X hv = hv << f_str->kshft;
-X hmax = hv;
-X f_str->hmask = (hmax >> f_str->kshft) - 1;
-X
-X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash array\n");
-X exit (1);
-X }
-X if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh1 array\n");
-X exit (1);
-X }
-X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh2 array\n");
-X exit (1);
-X }
-X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash link array");
-X exit (1);
-X }
-X
-X for (i0 = 0; i0 < hmax; i0++)
-X f_str->harr[i0] = -1;
-X for (i0 = 0; i0 < n0; i0++)
-X f_str->link[i0] = -1;
-X
-X /* encode the aa0 array */
-X phv = hv = 0;
-X lkt = kt1;
-X for (i0 = 0; i0 < min(n0,lkt); i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0; lkt = i0+ktup; continue;
-X }
-X hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
-X }
-X
-X for (; i0 < n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0;
-X /* restart hv, phv calculation */
-X for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0;
-X lkt = i0+ktup;
-X continue;
-X }
-X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
-X }
-X }
-X if (i0 >= n0) break;
-X hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
-X f_str->link[i0] = f_str->harr[hv];
-X f_str->harr[hv] = i0;
-X if (pamfact) {
-X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
-X if (hsq[aa0[i0-kt1]] < NMAP)
-X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
-X }
-X else f_str->pamh2[hv] = fact * ktup;
-X }
-X
-/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
-X pam2[0][0] is now undefined for consistency with blast
-*/
-X
-X if (pamfact)
-X for (i0 = 1; i0 <= ppst->nsq; i0++)
-X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
-X else
-X for (i0 = 1; i0 <= ppst->nsq; i0++)
-X f_str->pamh1[i0] = fact;
-X
-X f_str->ndo = 0; /* used to save time on diagonals with long queries */
-X
-X
-#ifndef ALLOCN0
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
-X MAXDIAG *sizeof (struct dstruct));
-X exit (1);
-X };
-#else
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
-X (long)n0*sizeof (struct dstruct));
-X exit (1);
-X };
-#endif
-X
-#ifndef TFAST
-X /* done hashing, now switch aa0, aa0x back */
-X fs = aa0;
-X aa0 = aa0x;
-X aa0x = fs;
-#else
-X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
-X exit (1);
-X }
-X f_str->aa1x++;
-X
-X if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
-X sizeof(unsigned char))) == NULL) {
-X fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
-X exit (1);
-X }
-X f_str->aa1v++;
-X
-#endif
-X
-X if ((waa= (int *)malloc (sizeof(int)*(ppst->nsq+1)*n0)) == NULL) {
-X fprintf(stderr,"cannot allocate waa struct %3d\n",ppst->nsq*n0);
-X exit(1);
-X }
-X
-X pwaa = waa;
-X for (i=0; i<=ppst->nsq; i++) {
-X for (j=0;j<n0; j++) {
-X *pwaa = ppst->pam2[ip][i][aa0[j]];
-X pwaa++;
-X }
-X }
-X f_str->waa = waa;
-X
-#ifndef TFAST
-X maxn0 = max(2*n0,MIN_RES);
-#else
-X maxn0 = max(4*n0,MIN_RES);
-#endif
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X f_str->max_res = maxn0;
-X
-X *f_arg = f_str;
-}
-X
-/* pstring1 is a message to the manager, currently 512 */
-/* pstring2 is the same information, but in a markx==10 format */
-void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-#ifndef TFAST
-X char *pg_str="FASTY";
-#else
-X char *pg_str="TFASTY";
-#endif
-X
-X if (!pstr->param_u.fa.optflag)
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
-#else
-X sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
-#endif
-X pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->gdelval, pstr->ggapval, pstr->gshift, pstr->gsubs,
-X pstr->param_u.fa.optwid);
-X else
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
-#else
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
-#endif
-X pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
-X pstr->gshift,pstr->gsubs,pstr->param_u.fa.optwid);
-X
-X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X
-X if (pstring2 != NULL) {
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
-; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#else
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
-; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#endif
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
-X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
-X pstr->param_u.fa.cgap);
-X }
-}
-X
-void
-close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X int naat;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X if (ppst->ext_sq_set) naat = MAXLC;
-X else naat = MAXUC;
-X free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
-X free(f_str->cur);
-#ifndef TFAST
-X f_str->aa0v--;
-X free(f_str->aa0v);
-X f_str->aa0x--;
-X free(f_str->aa0x);
-#else /* TFAST */
-X f_str->aa1x--;
-X free(f_str->aa1x);
-X f_str->aa1v--;
-X free(f_str->aa1v);
-#endif
-X free(f_str->res);
-X free(f_str->waa);
-X free(f_str->diag);
-X free(f_str->link);
-X free(f_str->pamh2);
-X free(f_str->pamh1);
-X free(f_str->harr);
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-void do_fasta (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst, int *hoff)
-{
-X int nd; /* diagonal array size */
-X int lhval;
-X int kfact;
-X int i;
-X register struct dstruct *dptr;
-X register int tscor;
-X int xdebug = 0;
-X
-#ifndef ALLOCN0
-X register struct dstruct *diagp;
-#else
-X register int dpos;
-X int lposn0;
-#endif
-X struct dstruct *dpmax;
-X register int lpos;
-X int tpos;
-X struct savestr *vmptr;
-X int scor, tmp;
-X int im, ib, nsave;
-X int ktup, kt1, *hsq, ip, lkt;
-#ifndef TFAST
-X int n0x31, n0x32;
-X n0x31 = (n0-2)/3;
-X n0x32 = n0x31+1+(n0-n0x31-1)/2;
-#else
-X unsigned char *fs, *fd;
-X int n1x31, n1x32, last_n1, itemp;
-X n1x31 = (n1-2)/3;
-X n1x32 = n1x31+1+(n1-n1x31-1)/2;
-#endif
-X
-X if (ppst->ext_sq_set) {
-X ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X ktup = ppst->param_u.fa.ktup;
-X kt1 = ktup-1;
-X
-X if (n1 < ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-X if (n0+n1+1 >= MAXDIAG) {
-X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X return;
-X }
-X
-X f_str->noff = n0 - 1;
-X
-#ifdef ALLOCN0
-X nd = n0;
-#endif
-X
-#ifndef ALLOCN0
-X nd = n0 + n1;
-#endif
-X
-X dpmax = &f_str->diag[nd];
-X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
-X {
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X vmptr->score = 0;
-X f_str->lowmax = f_str->vmax;
-X f_str->lowscor = 0;
-X
-X if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
-X aa1[1400]==23 && aa1[1401]!=23) {
-X xdebug = 1;
-X }
-X else xdebug = 0;
-X
-X /* start hashing */
-X lhval = 0;
-X lkt = kt1;
-X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
-X /* restart lhval calculation */
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lhval = 0; lkt=lpos+ktup;
-X continue;
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
-X }
-X
-#ifndef ALLOCN0
-X diagp = &f_str->diag[f_str->noff + lkt];
-X for (; lpos < n1; lpos++, diagp++) {
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lpos++ ; diagp++;
-X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
-X if (lpos >= n1) break;
-X lhval = 0;
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
-#else
-X lposn0 = f_str->noff + lpos;
-X for (; lpos < n1; lpos++, lposn0++) {
-X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X dpos = lposn0 - tpos;
-X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
-#endif
-X tscor += ktup;
-X if ((tscor -= lpos) <= 0) {
-X scor = dptr->score;
-X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
-#ifdef ALLOCN0
-X savemax (dptr, dpos, f_str);
-#else
-X savemax (dptr, f_str);
-#endif
-X if ((tscor += scor) >= kfact) {
-X dptr->score = tscor;
-X dptr->stop = lpos;
-X }
-X else {
-X dptr->score = kfact;
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X }
-X else {
-X dptr->score += f_str->pamh1[aa0[tpos]];
-X dptr->stop = lpos;
-X }
-X }
-X else {
-X dptr->score = f_str->pamh2[lhval];
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X } /* end tpos */
-X
-#ifdef ALLOCN0
-X /* reinitialize diag structure */
-X loopl:
-X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor)
-X savemax (dptr, lpos, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr->score = 0;
-#endif
-X } /* end lpos */
-X
-#ifdef ALLOCN0
-X for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
-X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
-X savemax (dptr, dpos, f_str);
-X }
-#else
-X for (dptr = f_str->diag; dptr < dpmax;) {
-X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X f_str->ndo = nd;
-#endif
-X
-/*
-X at this point all of the elements of aa1[lpos]
-X have been searched for elements of aa0[tpos]
-X with the results in diag[dpos]
-*/
-X /*
-X if (xdebug)
-X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",
-X n0, f_str->noff,n1,n1x31,n1x32);
-X */
-X
-X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X {
-X /*
-X if (xdebug)
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+vmptr->start-vmptr->dp,
-X f_str->noff+vmptr->stop-vmptr->dp,
-X vmptr->start,vmptr->stop,
-X vmptr->dp,vmptr->score);
-X */
-X if (vmptr->score > 0) {
-X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[0], f_str);
-X f_str->vptr[nsave++] = vmptr;
-X }
-X }
-X
-X if (nsave <= 0) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-#ifndef TFAST
-X /* FASTX code here to modify the start, stop points for
-X the three phases of the translated protein sequence
-X */
-X
-X /*
-X fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-X
-X for (ib=0; ib<nsave; ib++) {
-X if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
-X f_str->vptr[ib]->dp += n0x32;
-X if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
-X f_str->vptr[ib]->dp += n0x31;
-X }
-X
-X /*
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X */
-#else
-X /* TFAST code here to modify the start, stop points for
-X the three phases of the translated protein sequence
-X TFAST modifies library start points, rather than
-X query start points
-X */
-X
-X /*
-X fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X
-X fprintf(stderr,"---\n");
-X */
-X
-X for (ib=0; ib<nsave; ib++) {
-X if (f_str->vptr[ib]->start >= n1x32) {
-X f_str->vptr[ib]->start -= n1x32;
-X f_str->vptr[ib]->stop -= n1x32;
-X f_str->vptr[ib]->dp -= n1x32;
-X }
-X if (f_str->vptr[ib]->start >= n1x31) {
-X f_str->vptr[ib]->start -= n1x31;
-X f_str->vptr[ib]->stop -= n1x31;
-X f_str->vptr[ib]->dp -= n1x31;
-X }
-X }
-X
-X /*
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X */
-X
-#endif /* TFAST */
-X
-X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
-X ppst->param_u.fa.pgap, f_str);
-X
-X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
-X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
-X
-/* kssort (f_str->vptr, nsave); */
-X
-X rst->score[1] = vmptr->score;
-X rst->score[0] = max (scor, vmptr->score);
-X rst->score[2] = rst->score[0]; /* initn */
-X
-X if (ppst->param_u.fa.optflag) {
-X if (rst->score[0] > ppst->param_u.fa.optcut) {
-#ifndef TFAST
-X rst->score[2] = dmatchx(aa0, n0,aa1,n1,*hoff=f_str->noff - vmptr->dp,
-X ppst->param_u.fa.optwid, ppst->pam2[0],
-X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
-#else /* TFAST */
-X /* generate f_str->aa1x */
-/*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[aa1[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-X fprintf(stderr,"\n-----\n");
-*/
-/*
-X fprintf(stderr,"n1: %d, aa1x[n1]: %d; EOSEQ: %d\n",
-X n1,f_str->aa1x[n1],EOSEQ);
-X for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
-X for (fd= &f_str->aa1x[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
-X fprintf(stderr,"fs stopped at: %d\n",(int)(fs-f_str->aa1x));
-X *fd=EOSEQ;
-X }
-*/
-/*
-X for (i=0; i<n1; i++) {
-X fputc(ppst->sq[f_str->aa1x[i]],stderr);
-X if (i%60==59) fputc('\n',stderr);
-X }
-*/
-X rst->score[2] = dmatchx(aa0, n0, aa1, n1, *hoff=vmptr->dp-f_str->noff,
-X ppst->param_u.fa.optwid, ppst->pam2[0],
-X ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
-#endif /* TFAST */
-X }
-X }
-}
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int hoff;
-X int last_n1, itx, dnav, n10, i, ir;
-X unsigned char *aa1x;
-X
-X rst->escore = 1.0;
-X rst->segnum = rst->seglen = 1;
-X
-X if (n1 < ppst->param_u.fa.ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-#ifndef TFAST
-X do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#else
-X /* make a precomputed codon number series */
-X
-X if (frame == 0) {
-X pre_com(aa1, n1, f_str->aa1v);
-X }
-X else {
-X pre_com_r(aa1, n1, f_str->aa1v);
-X }
-X
-X /* make translated sequence */
-X last_n1 = 0;
-X aa1x = f_str->aa1x;
-X for (itx= frame*3; itx< frame*3+3; itx++) {
-X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
-X /*
-X fprintf(stderr," itt %d frame: %d\n",itx,frame);
-X for (i=0; i<n10; i++) {
-X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-X
-X fprintf(stderr,"n10: %d aa1x[] %d last_n1: %d\n",n10,aa1x[last_n1+n10],
-X last_n1);
-X */
-X last_n1 += n10+1;
-X }
-X n10 = last_n1-1;
-X
-X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
-#endif
-}
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct rstruct *rst)
-{
-X int optflag, tscore, hoff;
-X
-X optflag = ppst->param_u.fa.optflag;
-X ppst->param_u.fa.optflag = 1;
-X
-#ifndef TFAST
-X do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#else
-X do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#endif
-X
-X ppst->param_u.fa.optflag = optflag;
-}
-X
-#ifdef ALLOCN0
-void
-savemax (dptr, dpos, f_str)
-X register struct dstruct *dptr;
-X int dpos;
-X struct f_struct *f_str;
-{
-X register struct savestr *vmptr;
-X register int i;
-X
-#else
-void
-savemax (dptr, f_str)
-X register struct dstruct *dptr;
-X struct f_struct *f_str;
-{
-X register int dpos;
-X register struct savestr *vmptr;
-X register int i;
-X
-X dpos = (int) (dptr - f_str->diag);
-X
-#endif
-X
-/* check to see if this is the continuation of a run that is already saved */
-X
-X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
-X vmptr->start == dptr->start)
-X {
-X vmptr->stop = dptr->stop;
-X if ((i = dptr->score) <= vmptr->score)
-X return;
-X vmptr->score = i;
-X if (vmptr != f_str->lowmax)
-X return;
-X }
-X else
-X {
-X i = f_str->lowmax->score = dptr->score;
-X f_str->lowmax->dp = dpos;
-X f_str->lowmax->start = dptr->start;
-X f_str->lowmax->stop = dptr->stop;
-X dptr->dmax = f_str->lowmax;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X if (vmptr->score < i)
-X {
-X i = vmptr->score;
-X f_str->lowmax = vmptr;
-X }
-X f_str->lowscor = i;
-}
-X
-int spam (const unsigned char *aa0,
-X const unsigned char *aa1,
-X struct savestr *dmax, int **pam2,
-X struct f_struct *f_str)
-{
-X int lpos;
-X int tot, mtot;
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X const unsigned char *aa0p, *aa1p;
-X
-X aa1p = &aa1[lpos = dmax->start];
-X aa0p = &aa0[lpos - dmax->dp + f_str->noff];
-X curv.start = lpos;
-X
-X tot = curv.score = maxv.score = 0;
-X for (; lpos <= dmax->stop; lpos++) {
-X tot += pam2[*aa0p++][*aa1p++];
-X if (tot > curv.score) {
-X curv.stop = lpos;
-X curv.score = tot;
-X }
-X else if (tot < 0) {
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X tot = curv.score = 0;
-X curv.start = lpos+1;
-X }
-X }
-X
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X
-/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
-X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
-X dmax->start,maxv.stop,dmax->stop);
-*/
-X dmax->start = maxv.start;
-X dmax->stop = maxv.stop;
-X
-X return maxv.score;
-}
-X
-#define XFACT 10
-X
-int sconn (struct savestr **v, int n,
-X int cgap, int pgap, struct f_struct *f_str)
-{
-X int i, si;
-X struct slink {
-X int score;
-X struct savestr *vp;
-X struct slink *next;
-X } *start, *sl, *sj, *so, sarr[MAXSAV];
-X int lstart, tstart, plstop, ptstop;
-X
-/* sort the score left to right in lib pos */
-X
-X kpsort (v, n);
-X
-X start = NULL;
-X
-/* for the remaining runs, see if they fit */
-X
-X for (i = 0, si = 0; i < n; i++)
-X {
-X
-/* if the score is less than the gap penalty, it never helps */
-X if (v[i]->score < cgap)
-X continue;
-X lstart = v[i]->start;
-X tstart = lstart - v[i]->dp + f_str->noff;
-X
-/* put the run in the group */
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X
-/* if it fits, then increase the score */
-X for (sl = start; sl != NULL; sl = sl->next)
-X {
-X plstop = sl->vp->stop;
-X ptstop = plstop - sl->vp->dp + f_str->noff;
-X if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
-X sarr[si].score = sl->score + v[i]->score + pgap;
-X break;
-X }
-X }
-X
-/* now recalculate where the score fits */
-X if (start == NULL)
-X start = &sarr[si];
-X else
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next)
-X {
-X if (sarr[si].score > sj->score)
-X {
-X sarr[si].next = sj;
-X if (so != NULL)
-X so->next = &sarr[si];
-X else
-X start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X si++;
-X }
-X
-X if (start != NULL)
-X return (start->score);
-X else
-X return (0);
-}
-X
-void
-kssort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->score >= v[j + gap]->score)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-void
-kpsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->start <= v[j + gap]->start)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-static int
-dmatchx(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval, int gshift,
-X struct f_struct *f_str)
-{
-X
-X hoff -= window/2;
-X
-#ifndef TFAST
-X return lx_band(aa1,n1,f_str->aa0v,n0-2,
-X pam2,
-#ifdef OLD_FASTA_GAP
-X -(gdelval - ggapval),
-#else
-X -gdelval,
-#endif
-X -ggapval,-gshift,
-X hoff,window,f_str);
-#else
-X return lx_band(aa0,n0,f_str->aa1v,n1-2,
-X pam2,
-#ifdef OLD_FASTA_GAP
-X -(gdelval - ggapval),
-#else
-X -gdelval,
-#endif
-X -ggapval,-gshift,
-X hoff,window,f_str);
-#endif
-}
-X
-static void
-init_row(struct sx_s *row, int sp) {
-X int i;
-X for (i = 0; i < sp; i++) {
-X row[i].C1 = row[i].I1 = 0;
-X row[i].C2 = row[i].I2 = 0;
-X row[i].C3 = row[i].I3 = 0;
-X row[i].flag = 0;
-X }
-}
-X
-int lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
-X int len_prot, /* length of prot. seq */
-X const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
-X int len_dna_prot, /* length trans. seq. */
-X int **pam_matrix, /* scoring matrix */
-X int gopen, int gext, /* gap open, gap extend penalties */
-X int gshift, /* frame-shift penalty */
-X int start_diag, /* start diagonal of band */
-X int width, /* width for band alignment */
-X struct f_struct *f_str)
-{
-X void *ckalloc();
-X int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
-X struct sx_s *last, *tmp;
-X int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
-X const unsigned char *dp;
-X register struct sx_s *ap, *aq;
-X struct wgt *wt, *ww;
-X int aa, b, a,x,y,z;
-X
-X sp = width+7;
-X gg = gopen+gext;
-X /* sp = sp/3+1; */
-X
-X if (f_str->cur == NULL) {
-X f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
-X }
-X
-X init_row(f_str->cur, sp);
-X
-X /*
-X if (start_diag %3 !=0) start_diag = start_diag/3-1;
-X else start_diag = start_diag/3;
-X if (width % 3 != 0) width = width/3+1;
-X else width = width /3;
-X */
-X
-X x1 = start_diag; /* x1 = lower bound of DNA */
-X x2 = 1; /* the amount of position shift from last row*/
-X
-X end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
-X end_prot = min(end_prot,len_prot);
-X
-X /* i counts through protein sequence, x1 through DNAp */
-X
-X for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
-X bd = min(x1+width, (len_dna_prot+2)/3); /* upper bound of band */
-X bd1 = max(0,x1); /* lower bound of band */
-X wt = f_str->weight0[prot_seq[i]];
-X del = 1-x1; /*adjustment*/
-X bd += del;
-X bd1 +=del;
-X
-X ap = &f_str->cur[bd1]; aq = ap+1;
-X e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
-X for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
-X ww = &wt[(unsigned char) *dp++];
-X sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
-X if (cd1 > sc) sc = cd1;
-X cd1 -= gext;
-X if ((ci = aq->I1) > 0) {
-X if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
-X else {
-X ap->C1 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd1 < sc) cd1 = sc;
-X ap->I1 = max(ci-gext, sc);
-X } else ap->I1 = ci-gext;
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I1 = ap->C1 = 0;
-X } else {
-X ap->C1 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd1 < sc) cd1 = sc;
-X ap->I1 = sc;
-X } else ap->I1 = 0;
-X }
-X }
-X ww = &wt[(unsigned char) *dp++];
-X sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
-X if (cd2 > sc) sc = cd2;
-X cd2 -= gext;
-X if ((ci = aq->I2) > 0) {
-X if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
-X else {
-X ap->C2 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd2 < sc) cd2 = sc;
-X ap->I2 = max(ci-gext, sc);
-X }
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I2 = ap->C2 = 0;
-X } else {
-X ap->C2 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd2 < sc) cd2 = sc;
-X ap->I2 = sc;
-X } else ap->I2 = 0;
-X }
-X }
-X ww = &wt[(unsigned char)*dp++];
-X sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
-X if (cd3 > sc) sc = cd3;
-X cd3 -= gext;
-X if ((ci = aq++->I3) > 0) {
-X if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
-X else {
-X ap->C3 = sc;
-X sc -= gg;
-X if (sc > 0) {
-X if (sc > best) best =sc;
-X if (cd3 < sc) cd3 = sc;
-X ap->I3 = max(ci-gext, sc);
-X }
-X }
-X } else {
-X if (sc <= 0) {
-X ap->I3 = ap->C3 = 0;
-X } else {
-X ap->C3 = sc; sc-=gg;
-X if (sc >0) {
-X if (sc > best) best =sc;
-X if (cd3 < sc) cd3 = sc;
-X ap->I3 = sc;
-X } else ap->I3 = 0;
-X }
-X }
-X }
-X }
-X /* printf("The best score is %d\n", best); */
-X return best+gg;
-}
-X
-/* ckalloc - allocate space; check for success */
-void *ckalloc(size_t amount)
-{
-X void *p;
-X
-X if ((p = (void *)malloc( (size_t)amount)) == NULL)
-X w_abort("Ran out of memory.","");
-X return(p);
-}
-X
-/* calculate the 100% identical score */
-int
-shscore(unsigned char *aa0, int n0, int **pam2)
-{
-X int i, sum;
-X for (i=0,sum=0; i<n0; i++)
-X sum += pam2[aa0[i]][aa0[i]];
-X return sum;
-}
-X
-#define SGW1 100
-#define SGW2 300
-#define WIDTH 60
-X
-typedef struct mat *match_ptr;
-X
-typedef struct mat {
-X int i, j, l;
-X match_ptr next;
-} match_node;
-X
-typedef struct { int i,j;} state;
-typedef state *state_ptr;
-X
-X
-void *ckalloc();
-static match_ptr small_global(), global();
-static int local_align(), find_best();
-static void init_row2(), init_ROW();
-X
-int
-pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
-X int len_prot, /* length of prot. seq */
-X const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
-X int len_dna_prot, /* length trans. seq. */
-X int **pam_matrix, /* scoring matrix */
-X int gopen, int gext, /* gap open, gap extend penalties */
-X int gshift, /* frame-shift penalty */
-X struct f_struct *f_str,
-X int max_res,
-X struct a_res_str *a_res) /* alignment info */
-{
-X match_ptr align, ap, aq;
-X int x, y, ex, ey, i, score;
-X int *alignment;
-X
-X f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
-X
-X /*local alignment find the best local alignment x and y
-X is the starting position of the best local alignment
-X and ex ey is the ending position */
-X
-X score= local_align(&x, &y, &ex, &ey,
-X pam_matrix, gopen, gext,
-X dna_prot_seq, len_dna_prot,
-X prot_seq, len_prot, f_str);
-X
-X f_str->up += 3; f_str->down += 3; f_str->tp += 3;
-X
-X /* x, y - start in prot, dna_prot */
-X a_res->min0 = x; /* prot */
-X a_res->min1 = y; /* DNA */
-X a_res->max0 = ex; /* prot */
-X a_res->max1 = ey; /* DNA */
-X
-X align = global(x, y, ex, ey,
-X pam_matrix, gopen, gext,
-X dna_prot_seq, prot_seq,
-X 0, 0, f_str);
-X
-X alignment = a_res->res;
-X
-X for (ap = align, i= 0; ap; i++) {
-X if (i < max_res) alignment[i] = ap->l;
-X aq = ap->next; free(ap); ap = aq;
-X }
-X if (i >= max_res)
-X fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
-X
-X /* up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
-X free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
-X
-X a_res->nres = i;
-X return score;
-}
-X
-static void
-swap(void **a, void **b)
-{
-X void *t = *a;
-X *a = *b; *b = t;
-}
-X
-/*
-X local alignment find the best local alignment x and y
-X is the starting position of the best local alignment
-X and ex ey is the ending position
-*/
-static int
-local_align(int *x, int *y, int *ex, int *ey,
-X int **wgts, int gop, int gext,
-X const unsigned char *dnap, int ld,
-X const unsigned char *pro, int lp,
-X struct f_struct *f_str)
-{
-X int i, j, score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
-X sc, del, e, best = 0, cd, ci, c;
-X struct wgt *wt, *ww;
-X state_ptr cur_st, last_st, cur_i_st;
-X st_ptr cur, last;
-X const unsigned char *dp;
-X int *cur_d_st, *st_up;
-X
-X /*
-X Array rowiC stores the best scores of alignment ending at a position
-X Arrays rowiD and rowiI store the best scores of alignment ending
-X at a position with a deletion or insrtion
-X Arrays sti stores the starting position of the best alignment whose
-X score stored in the corresponding row array.
-X The program stores two rows to complete the computation, same is
-X for the global alignment routine.
-X */
-X
-X
-X st_up = (int *) ckalloc(sizeof(int)*(ld+10));
-X init_row2(st_up, ld+5);
-X
-X ld += 2;
-X
-X init_ROW(f_str->up, ld+1);
-X init_ROW(f_str->down, ld+1);
-X cur = f_str->up+1;
-X last = f_str->down+1;
-X
-X cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
-X cur_d_st = st_up;
-X dp = dnap-2;
-X for (i = 0; i < lp; i++) {
-X wt = f_str->weight1[pro[i]]; e2 =0; e1 = last[0].C;
-X for (j = 0; j < 2; j++) {
-X cur_st[j].i = i+1;
-X cur_st[j].j = j+1;
-X }
-X for (j = 2; j < ld; j++) {
-X ww = &wt[(unsigned char) dp[j]];
-X del = -1;
-X if (j >= 3) {
-X sc = 0;
-X e3 = e2; e2 = e1;
-X e1 = last[j-2].C;
-X if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
-X if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
-X if ((e = e3+ww->iv) > sc) {sc = e; del = 4;}
-X } else {
-X sc = e2 = 0;
-X if (ww->iii > 0) {sc = ww->iii; del = 3;}
-X }
-X if (sc < (ci=last[j].I)) {
-X sc = ci; del = 0;
-X }
-X if (sc < (cd=cur[j].D)) {
-X sc = cd; del = 5;
-X }
-X cur[j].C = sc;
-X e = sc - gop;
-X if (e > cd) {
-X cur[j+3].D = e-gext;
-X cur_d_st[j+3] = 3;
-X } else {
-X cur[j+3].D = cd-gext;
-X cur_d_st[j+3] = cur_d_st[j]+3;
-X }
-X switch(del) {
-X case 5:
-X c = cur_d_st[j];
-X cur_st[j].i = cur_st[j-c].i;
-X cur_st[j].j = cur_st[j-c].j;
-X break;
-X case 0:
-X cur_st[j].i = cur_i_st[j].i;
-X cur_st[j].j = cur_i_st[j].j;
-X break;
-X case 2:
-X case 3:
-X case 4:
-X if (i) {
-X if (j-del >= 0) {
-X cur_st[j].i = last_st[j-del].i;
-X cur_st[j].j = last_st[j-del].j;
-X } else {
-X cur_st[j].i = i;
-X cur_st[j].j = 0;
-X }
-X } else {
-X cur_st[j].i = 0;
-X cur_st[j].j = max(0, j-del+1);
-X }
-X break;
-X case -1:
-X cur_st[j].i = i+1;
-X cur_st[j].j = j+1;
-X break;
-X }
-X if (e > ci) {
-X cur[j].I = e -gext;
-X cur_i_st[j].i = cur_st[j].i;
-X cur_i_st[j].j = cur_st[j].j;
-X } else {
-X cur[j].I = ci- gext;
-X }
-X if (sc > best) {
-X x1 = cur_st[j].i;
-X x2 = cur_st[j].j;
-X best =sc;
-X x3 = i;
-X x4 = j;
-X }
-X }
-X swap((void *)&last, (void *)&cur);
-X swap((void *)&cur_st, (void *)&last_st);
-X }
-X /* printf("The best score is %d\n", best);*/
-X *x = x1; *y = x2; *ex = x3; *ey = x4;
-X free(cur_st); free(last_st); free(cur_i_st);
-X free(st_up);
-X return best;
-}
-X
-/*
-X Both global_up and global_down do linear space score only global
-X alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
-X global_up do the algorithm upwards, from row x towards row y.
-X global_down do the algorithm downwards, from row y towards x.
-*/
-X
-static void
-global_up(st_ptr *row1, st_ptr *row2,
-X int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext,
-X unsigned char *dnap, unsigned char *pro,
-X int N, struct f_struct *f_str)
-{
-X int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
-X struct wgt *wt, *ww;
-X st_ptr cur, last;
-X
-X cur = *row1; last = *row2;
-X sc = -gop;
-X for (j = 0; j <= ey-y+1; j++) {
-X if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
-X else { last[j].I = last[j].C = -10000;}
-X }
-X last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
-X last[0].D = last[1].D = last[2].D = -10000;
-X if (N) last[0].I = -gext;
-X for (i = 1; i <= ex-x+1; i++) {
-X wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
-X for (j = 0; j <= ey-y+1; j++) {
-X t = j+y;
-X sc = -10000;
-X ww = &wt[(unsigned char) dnap[t-3]];
-X if (j < 4) {
-X if (j == 3) {
-X sc = e2+ww->iii;
-X } else if (j == 2) {
-X sc = e2 + ww->ii;
-X }
-X } else {
-X e3 = e2; e2 = e1;
-X e1 = last[j-2].C;
-X sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
-X }
-X sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
-X cur[j].C = sc;
-X cur[j+3].D = max(cd, sc-gop)-gext;
-X cur[j].I = max(ci, sc-gop)-gext;
-X }
-X swap((void *)&last, (void *)&cur);
-X }
-X /*printf("global up score =%d\n", last[ey-y+1].C);*/
-X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
-X if (*row1 != last) swap((void *)row1, (void *)row2);
-}
-X
-static void
-global_down(st_ptr *row1, st_ptr *row2,
-X int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext,
-X unsigned char *dnap, unsigned char *pro,
-X int N, struct f_struct *f_str)
-{
-X int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, score;
-X struct wgt *wt, *w1, *w2, *w3;
-X st_ptr cur, last;
-X
-X cur = (*row1); last = *row2;
-X sc = -gop;
-X for (j = ey-y+1; j >= 0; j--) {
-X if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
-X else last[j].I = last[j].C = -10000;
-X cur[j].I = -10000;
-X }
-X last[ey-y+1].C = 0;
-X if (N) last[ey-y+1].I = -gext;
-X cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
-X last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
-X for (i = ex-x; i >= 0; i--) {
-X wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C;
-X e1 = -10000;
-X w3 = &wt[(unsigned char) dnap[ey]];
-X w2 = &wt[(unsigned char) dnap[ey-1]];
-X for (j = ey-y+1; j >= 0; j--) {
-X t = j+y;
-X w1 = &wt[(unsigned char) dnap[t-1]];
-X sc = -10000;
-X if (t+3 > ey) {
-X if (t+2 == ey) {
-X sc = e2+w2->iii;
-X } else if (t+1 == ey) {
-X sc = e2+w1->ii;
-X }
-X } else {
-X e3 = e2; e2 = e1;
-X e1 = last[j+2].C;
-X sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
-X }
-X if (sc < (cd= cur[j].D)) {
-X sc = cd;
-X cur[j-3].D = cd-gext;
-X } else cur[j-3].D =max(cd, sc-gop)-gext;
-X if (sc < (ci= last[j].I)) {
-X sc = ci;
-X cur[j].I = ci - gext;
-X } else cur[j].I = max(sc-gop,ci)-gext;
-X cur[j].C = sc;
-X w3 = w2; w2 = w1;
-X }
-X swap((void *)&last, (void *)&cur);
-X }
-X for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
-X if (*row1 != last) swap((void *)row1, (void *)row2);
-}
-X
-static void
-init_row2(int *row, int ld) {
-X int i;
-X for (i = 0; i < ld; i++) row[i] = 0;
-}
-X
-static void init_ROW(st_ptr row, int ld) {
-X int i;
-X for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
-}
-X
-static match_ptr
-combine(match_ptr x1, match_ptr x2, int st) {
-X match_ptr x;
-X
-X if (x1 == NULL) return x2;
-X for (x = x1; x->next; x = x->next);
-X x->next = x2;
-X if (st) {
-X for (x = x2; x; x = x->next) {
-X x->j++;
-X if (x->l == 3 || x->l == 4) break;
-X }
-X x->l--;
-X }
-X return x1;
-}
-X
-/*
-X global use the two upwards and downwards score only linear
-X space global alignment subroutine to recursively build the
-X alignment.
-*/
-X
-match_ptr
-global(int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext,
-X unsigned char *dnap, unsigned char *pro, int N1, int N2,
-X struct f_struct *f_str)
-{
-X int m;
-X int m1, m2;
-X match_ptr x1, x2, mm1, mm2;
-X
-X /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
-X /*
-X if the space required is limited, we can do a quadratic space
-X algorithm to find the alignment.
-X */
-X
-X if (ex <= x) {
-X mm1 = NULL;
-X for (m = y+3; m <= ey; m+=3) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = 5; x1->next = mm1;
-X if (mm1== NULL) mm2 = x1;
-X mm1 = x1;
-X }
-X if (ex == x) {
-X if ((ey-y) % 3 != 0) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = ((ey-y) % 3) +1; x1->next = NULL;
-X if (mm1) mm2->next = x1; else mm1 = x1;
-X } else mm2->l = 4;
-X }
-X return mm1;
-X }
-X if (ey <= y) {
-X mm1 = NULL;
-X for (m = x; m <= ex; m++) {
-X x1 = (match_ptr) ckalloc(sizeof(match_node));
-X x1->l = 0; x1->next = mm1; mm1 = x1;
-X }
-X return mm1;
-X }
-X if (ex -x < SGW1 && ey-y < SGW2)
-X return small_global(x,y,ex,ey,wgts, gop, gext, dnap, pro, N1, N2,f_str);
-X m = (x+ex)/2;
-X /*
-X Do the score only global alignment from row x to row m, m is
-X the middle row of x and ex. Store the information of row m in
-X upC, upD, and upI.
-X */
-X global_up(&f_str->up, &f_str->tp, x, y, m, ey,
-X wgts, gop, gext,
-X dnap, pro, N1, f_str);
-X /*
-X Do the score only global alignment downwards from row ex
-X to row m+1, store information of row m+1 in downC downI and downD
-X */
-X global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
-X wgts, gop, gext,
-X dnap, pro, N2, f_str);
-X
-X /*
-X Use this information for row m and m+1 to find the crossing
-X point of the best alignment with the middle row. The crossing
-X point is given by m1 and m2. Then we recursively call global
-X itself to compute alignments in two smaller regions found by
-X the crossing point and combine the two alignments to form a
-X whole alignment. Return that alignment.
-X */
-X if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
-X x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
-X x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
-X if (m1 == m2) x1 = combine(x1,x2,1);
-X else x1 = combine(x1, x2,0);
-X } else {
-X x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
-X x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
-X mm1 = (match_ptr) ckalloc(sizeof(match_node));
-X mm1->i = m; mm1->l = 0; mm1->j = m1;
-X mm2 = (match_ptr) ckalloc(sizeof(match_node));
-X mm2->i = m+1; mm2->l = 0; mm2->j = m1;
-X mm1->next = mm2; mm2->next = x2;
-X x1 = combine(x1, mm1, 0);
-X }
-X return x1;
-}
-X
-static int
-find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
-X
-X int i, best = -1000, j = 0, s1, s2, s3, s4, st;
-X
-X for (i = 1; i < ld; i++) {
-X s2 = up[i].C + down[i].C;
-X s4 = up[i].I + down[i].I + gop;
-X if (best < s2) {
-X best = s2; j = i; st = 1;
-X }
-X if (best < s4) {
-X best = s4; j = i; st = 0;
-X }
-X }
-X *m1 = j-1+y;
-X *m2 = j+y;
-X /*printf("score=%d\n", best);*/
-X return st;
-}
-X
-/*
-X An alignment is represented as a linked list whose element
-X is of type match_node. Each element represent an edge in the
-X path of the alignment graph. The fields of match_node are
-X l --- gives the type of the edge.
-X i, j --- give the end position.
-*/
-X
-static match_ptr
-small_global(int x, int y, int ex, int ey,
-X int **wgts, int gop, int gext,
-X unsigned char *dnap, unsigned char *pro,
-X int N1, int N2, struct f_struct *f_str) {
-X
-X static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
-X int i, j, e, sc, score, del, k, t, ci, cd;
-X int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
-X match_ptr mp, first;
-X struct wgt *wt, *ww;
-X
-X /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
-X sc = -gop-gext; C[0][0] = 0;
-X if (N1) I[0] = -gext; else I[0] = sc;
-X
-X for (j = 1; j <= ey-y+1; j++) {
-X if (j % 3== 0) {
-X C[0][j] = sc; sc -= gext; I[j] = sc-gop;
-X } else I[j] = C[0][j] = -10000;
-X st[0][j] = 5;
-X }
-X lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
-X cI = I;
-X for (i = 1; i <= ex-x+1; i++) {
-X cC = &C[i][0];
-X wt = f_str->weight1[pro[i+x-1]]; cst = &st[i][0];
-X for (j = 0; j <=ey-y+1; j++) {
-X ci = cI[j];
-X cd= cD[j];
-X t = j+y;
-X ww = &wt[(unsigned char) dnap[t-3]];
-X if (j >= 4) {
-X sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;
-X e4 = lC[j-4]+ww->iv; del = 3;
-X if (e2 > sc) { sc = e2; del = 2;}
-X if (e4 >= sc) { sc = e4; del = 4;}
-X } else {
-X if (j == 3) {
-X sc = lC[0]+ww->iii; del =3;
-X } else if (j == 2) {
-X sc = lC[0]+ww->ii; del = 2;
-X } else {sc = -10000; del = 0;}
-X }
-X if (sc < ci) {
-X sc = ci; del = 0;
-X }
-X if (sc <= cd) {
-X sc = cd;
-X del = 5;
-X }
-X cC[j] = sc;
-X sc -= gop;
-X if (sc <= cd) {
-X del += 10;
-X cD[j+3] = cd - gext;
-X } else cD[j+3] = sc -gext;
-X if (sc < ci) {
-X del += 20;
-X cI[j] = ci-gext;
-X } else cI[j] = sc-gext;
-X *(cst++) = del;
-X }
-X lC = cC;
-X }
-X /*printf("small global score =%d\n", C[ex-x+1][ey-y+1]);*/
-X if (N2 && cC[ey-y+1] < ci+gop) st[ex-x+1][ey-y+1] =0;
-X first = NULL; e = 1;
-X for (i = ex+1, j = ey+1; i > x || j > y; i--) {
-X mp = (match_ptr) ckalloc(sizeof(match_node));
-X mp->i = i-1;
-X k = (t=st[i-x][j-y])%10;
-X mp->j = j-1;
-X if (e == 5 && (t/10)%2 == 1) k = 5;
-X if (e == 0 && (t/20)== 1) k = 0;
-X if (k == 5) { j -= 3; i++; e=5;}
-X else {j -= k;if (k==0) e= 0; else e = 1;}
-X mp->l = k;
-X mp->next = first;
-X first = mp;
-X }
-X
-X /* for (i = 0; i <= ex-x; i++) {
-X for (j = 0; j <= ey-y; j++)
-X printf("%d ", C[i][j]);
-X printf("\n");
-X }
-X */
-X return first;
-}
-X
-#define XTERNAL
-#include "upam.h"
-X
-void
-display_alig(int *a, unsigned char *dna, unsigned char *pro,
-X int length, int ld, struct f_struct *f_str)
-{
-X int len = 0, i, j, x, y, lines, k, iaa;
-X static char line1[100], line2[100], line3[100],
-X tmp[10] = " ", *st;
-X char *dna1, c1, c2, c3;
-X
-X line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
-X
-X printf("\n%5d\n%5d", y+3, x);
-X for (len = 0, j = 2, lines = 0; j < length; j++) {
-X i = a[j];
-X line3[len] = ' ';
-X switch (i) {
-X case 3:
-X y += 3;
-X line2[len] = aa[iaa=pro[x++]];
-X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
-X if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
-X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
-X break;
-X case 2:
-X y += 2;
-X line1[len] = '\\';
-X line2[len++] = ' ';
-X line2[len] = aa[iaa=pro[x++]];
-X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
-X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
-X break;
-X case 4:
-X y += 4;
-X line1[len] = '/';
-X line2[len++] = ' ';
-X line2[len] = aa[iaa=pro[x++]];
-X line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
-X line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
-X break;
-X case 5:
-X y += 3;
-X line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
-X line2[len] = '-';
-X break;
-X case 0:
-X line1[len] = '-';
-X line2[len] = aa[pro[x++]];
-X break;
-X }
-X len++;
-X line1[len] = line2[len] = line3[len] = '\0';
-X if (len >= WIDTH) {
-X for (k = 10; k <= WIDTH; k+=10)
-X printf(" . :");
-X if (k-5 < WIDTH) printf(" .");
-X c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
-X line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
-X printf("\n %s\n %s\n %s\n", line1, line3, line2);
-X line1[WIDTH] = c1; line2[WIDTH] = c2;
-X strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
-X strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
-X strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
-X len = len - WIDTH;
-X printf("\n%5d\n%5d", y+3, x);
-X }
-X }
-X for (k = 10; k < len; k+=10)
-X printf(" . :");
-X if (k-5 < len) printf(" .");
-X printf("\n %s\n %s\n %s\n", line1, line3, line2);
-}
-X
-X
-/* alignment store the operation that align the protein and dna sequence.
-X The code of the number in the array is as follows:
-X 0: delete of an amino acid.
-X 2: frame shift, 2 nucleotides match with an amino acid
-X 3: match an amino acid with a codon
-X 4: the other type of frame shift
-X 5: delete of a codon
-X
-X
-X Also the first two element of the array stores the starting point
-X in the protein and dna sequences in the local alignment.
-X
-X Display looks like where WIDTH is assumed to be divisible by 10.
-X
-X 0 . : . : . : . : . : . :
-X AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
-X I S G S V F N R Q L A G S V F N R Q L A
-X AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
-X
-X 60 . : . : . : . : . : . :
-X /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
-X G S V F N R Q L A G S V F N R Q L A G S V F
-X G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
-X
-For frame shift, the middle row show the letter in the original sequence,
-and the letter in the top row is the amino acid that is chose by the
-alignment (translated codon chosen from 4 nucleotides, or 2+1).
-*/
-X
-/* fatal - print message and die */
-void
-fatal(msg)
-X char *msg;
-{
-X fprintf(stderr, "%s\n", msg);
-X exit(1);
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X int score;
-X int i, ir, last_n1, itemp, n10, itx, dnav;
-X unsigned char *aa1x;
-X
-X a_res->res = f_str->res;
-X
-#ifndef TFAST
-X score = pro_dna(aa1, n1, f_str->aa0v, n0-2, ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval - ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X -ppst->gshift,
-X f_str, f_str->max_res, a_res);
-X /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
-X
-#else
-X /* make a precomputed codon number series */
-X if (frame==0) {
-X pre_com(aa1, n1, f_str->aa1v);
-X }
-X else { /* must do things backwards */
-X pre_com_r(aa1, n1, f_str->aa1v);
-X }
-X
-X /* make translated sequence */
-X last_n1 = 0;
-X aa1x = f_str->aa1x;
-X for (itx= frame*3; itx< frame*3+3; itx++) {
-X n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
-X /*
-X fprintf(stderr," itt %d itx: %d\n",itt,itx);
-X for (i=0; i<n10; i++) {
-X fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
-X if ((i%60)==59) fprintf(stderr,"\n");
-X }
-X fprintf(stderr,"\n");
-X */
-X last_n1 += n10+1;
-X }
-X n10 = last_n1-1;
-X
-X score = pro_dna(aa0, n0, f_str->aa1v, n1-2, ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval - ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X -ppst->gshift,
-X f_str, f_str->max_res, a_res);
-X /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0,f_str); */
-#endif
-X a_res->res = f_str->res;
-X *have_ares = 1;
-X
-X return score;
-}
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-X
-#ifdef TFAST
-X int i, last_n1, itemp, n10;
-X unsigned char *fs, *fd;
-X int itx;
-X
-X /* make a precomputed codon number series */
-X if (frame==0) {
-X pre_com(aa1, n1, f_str->aa1v);
-X }
-X else { /* must do things backwards */
-X pre_com_r(aa1, n1, f_str->aa1v);
-X }
-#endif
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-#ifndef TFAST
-X aln->llrev = 0;
-X aln->llfact = 1;
-X aln->llmult = 1;
-X aln->qlfact = 3;
-X aln->frame = 0;
-X if (frame > 0) aln->qlrev = 1;
-X else aln->qlrev = 0;
-#else /* TFASTX */
-X aln->qlfact = 1;
-X aln->qlrev = 0;
-X aln->llfact = 3;
-X aln->llmult = 1;
-X aln->frame = 0;
-X if (frame > 0) aln->llrev = 1;
-X else aln->llrev = 0;
-#endif /* TFASTX */
-}
-X
-#include "structs.h"
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char *sp0, *sp1, *spa, *sq;
-X unsigned char aap;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X int *res;
-X
-X /* don't fill in the ends */
-X
-X
-X res = a_res.res;
-X rpmax = &res[a_res.nres]; /* end of alignment info */
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X /* res[0] has start of protein sequence */
-X /* res[1] has start of translated DNA sequence */
-X
-#ifndef TFAST /* FASTX */
-X ap0 = f_str->aa0v; /* computed codons -> ap0*/
-X ap1 = aa1; /* protein sequence -> ap1 */
-X aln->smin1 = a_res.min0; /* start in protein sequence */
-X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
-#else /* TFASTYZ */
-X ap0 = f_str->aa1v; /* computed codons -> ap0*/
-X ap1 = aa0; /* protein sequence */
-X aln->smin0 = a_res.min0; /* start in protein sequence */
-X aln->smin1 = a_res.min1; /* start in codon sequence */
-#endif
-X
-X rp = a_res.res; /* start of alignment info */
-X
-/* now get the middle */
-X spa = seqca;
-#ifndef TFAST
-X sp0 = seqc0; /* sp0/seqc0 is codon sequence */
-X sp1 = seqc1; /* sp1/seqc1 is protein sequence */
-#else
-X sp1 = seqc0; /* sp1/seqc0 is protein sequence */
-X sp0 = seqc1; /* sp0/seqc1 is codon sequence */
-#endif
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
-X i0 = a_res.min1-3; /* start of codon sequence */
-X i1 = a_res.min0; /* start of protein sequence */
-X
-X while (rp < rpmax ) {
-X switch (*rp++) {
-X case 3: /* match */
-X i0 += 3;
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
-X
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 2: /* frame shift +2, then match */
-X nfs++;
-X i0 += 2;
-X *sp0++ = '/';
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 4: /* frame shift, -1, then match */
-X nfs++;
-X i0 += 4;
-X *sp0++ = '\\';
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 5: /* insertion in 1 */
-X i0 += 3;
-X *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_p++;
-X break;
-X case 0: /* insertion in 0 */
-X *sp0++ = '-';
-X *sp1++ = sq[ap1[i1++]];
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_d++;
-X break;
-X }
-X }
-X
-X *spa = '\0';
-X
-#ifndef TFAST
-X aln->amax0 = i0+3; /* end of codon sequence */
-X aln->amax1 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0+3; /* end of codon sequence */
-X aln->amax0 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X aln->amin0 = aln->smin0;
-X aln->amin1 = aln->smin1;
-X
-X if (lenc < 0) lenc = 1;
-X
-X *nc = lenc;
-/* now we have the middle, get the right end */
-X
-X return lenc+not_c;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1;
-X int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
-X char *sp0, *sp0a, *sp1, *spa, *sq;
-X unsigned char aap;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X /* don't fill in the ends */
-X
-X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X /* res[0] has start of protein sequence */
-X /* res[1] has start of translated DNA sequence */
-X
-#ifndef TFAST
-X ap0 = f_str->aa0v; /* computed codons -> ap0*/
-X ap1 = aa1; /* protein sequence -> ap1 */
-X aln->smin1 = a_res.min0; /* start in protein sequence */
-X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
-#else /* TFASTYZ */
-X ap0 = f_str->aa1v; /* computed codons -> ap0*/
-X ap1 = aa0; /* protein sequence */
-X aln->smin0 = a_res.min0; /* start in protein sequence */
-X aln->smin1 = a_res.min1; /* start in codon sequence */
-#endif
-X
-X rp = a_res.res; /* start of alignment info */
-X
-X
-/* now get the middle */
-X spa = seqca;
-X sp0a = seqc0a;
-#ifndef TFAST
-X sp0 = seqc0; /* sp0/seqc0 is codon sequence */
-X sp1 = seqc1; /* sp1/seqc1 is protein sequence */
-#else
-X sp1 = seqc0; /* sp1/seqc0 is protein sequence */
-X sp0 = seqc1; /* sp0/seqc1 is codon sequence */
-#endif
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
-X i0 = a_res.min1-3; /* start of codon sequence */
-X i1 = a_res.min0; /* start of protein sequence */
-X
-X while (rp < rpmax ) {
-X switch (*rp++) {
-X case 3: /* match */
-X i0 += 3;
-X *sp0a++ = ' ';
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
-X
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 2: /* frame shift +2, then match */
-X nfs++;
-X i0 += 2;
-X *sp0a++ = ' ';
-X *sp0++ = '/';
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X
-#ifndef TFAST
-X *sp0a++ = ' ';
-#else
-X *sp0a++ = ann_arr[aa0a[i1]];
-#endif
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 4: /* frame shift, -1, then match */
-X nfs++;
-X i0 += 4;
-#ifndef TFAST
-X *sp0a++ = ' ';
-#else
-X *sp0a++ = ann_arr[aa0a[i1]];
-#endif
-X *sp0++ = '\\';
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X not_c++;
-X *sp1 = sq[aap=ap1[i1++]];
-X *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
-X if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X sp0++; sp1++; spa++;
-X lenc++;
-X break;
-X case 5: /* insertion in 1 */
-X i0 += 3;
-X *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X lenc++;
-X ngap_p++;
-X break;
-X case 0: /* insertion in 0 */
-X *sp0++ = '-';
-#ifndef TFAST
-X *sp0a++ = ' ';
-#else
-X *sp0a++ = ann_arr[aa0a[i1]];
-#endif
-X *sp1++ = sq[ap1[i1++]];
-X *spa++ = M_DEL;
-X lenc++;
-X ngap_d++;
-X break;
-X }
-X }
-X
-X *sp0a = *spa = '\0';
-X
-#ifndef TFAST
-X aln->amax0 = i0+3; /* end of codon sequence */
-X aln->amax1 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0+3; /* end of codon sequence */
-X aln->amax0 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X aln->amin0 = aln->smin0;
-X aln->amin1 = aln->smin1;
-X
-X if (lenc < 0) lenc = 1;
-X
-X *nc = lenc;
-/* now we have the middle, get the right end */
-X
-X return lenc+not_c;
-}
-X
-void
-update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
-X
-X char tmp_cnt[20];
-X
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1;
-X int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
-X int op, op_cnt;
-X char sp0, sp1, op_char[10];
-X unsigned char aap;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X /* don't fill in the ends */
-X
-#ifndef TFAST
-X strncpy(op_char,"- /=\\+*",sizeof(op_char));
-X ap0 = f_str->aa0v; /* computed codons -> ap0*/
-X ap1 = aa1; /* protein sequence -> ap1 */
-X aln->smin1 = a_res.min0; /* start in protein sequence */
-X aln->smin0= a_res.min1; /* start in DNA/codon sequence */
-#else /* TFASTYZ */
-X strncpy(op_char,"+ /=\\-*",sizeof(op_char));
-X ap0 = f_str->aa1v; /* computed codons -> ap0*/
-X ap1 = aa0; /* protein sequence */
-X aln->smin0 = a_res.min0; /* start in protein sequence */
-X aln->smin1 = a_res.min1; /* start in codon sequence */
-#endif
-X
-X rp = a_res.res; /* start of alignment info */
-X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
-X
-/* now get the middle */
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
-X op_cnt = 0;
-X op = 3;
-X
-X i0 = a_res.min1-3; /* start of codon sequence */
-X i1 = a_res.min0; /* start of protein sequence */
-X
-X while (rp < rpmax ) {
-X switch (*rp++) {
-X case 3: /* match */
-X sp1 = pst.sq[aap=ap1[i1++]];
-X i0 += 3;
-X sp0 = f_str->weight_c[aap][ap0[i0]].c5;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X
-X if (op == 3 || op == 6) {
-X if (sp0 != '*' && sp1 != '*') {
-X if (op == 6 ) {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op_cnt = 1; op = 3;
-X }
-X else {op_cnt++;}
-X }
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
-X op_cnt = 1; op = 6;
-X }
-X }
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op_cnt = 1; op = 3;
-X }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 2: /* -1 frame shift */
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 2; op_cnt = 1;
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 3; op_cnt = 1;
-X
-X nfs++;
-X i0 += 2;
-X not_c++;
-X sp1 = pst.sq[aap=ap1[i1++]];
-X sp0 = f_str->weight_c[aap][ap0[i0]].c2;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 4: /* +1 frame shift */
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 4; op_cnt = 1;
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 3; op_cnt = 1;
-X
-X nfs++;
-X i0 += 4;
-X not_c++;
-X sp1 = pst.sq[aap=ap1[i1++]];
-X sp0 = f_str->weight_c[aap][ap0[i0]].c4;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 5: /* insert in 1 */
-X if (op == 5) op_cnt++;
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 5; op_cnt = 1;
-X }
-X
-X i0 += 3;
-X lenc++;
-X ngap_p++;
-X break;
-X case 0: /* insert in 0 */
-X if (op == 0) op_cnt++;
-X else {
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X op = 0; op_cnt = 1;
-X }
-X
-X i1++;
-X lenc++;
-X ngap_d++;
-X break;
-X }
-X }
-X
-X update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
-X
-#ifndef TFAST
-X aln->amax0 = i0+3; /* end of codon sequence */
-X aln->amax1 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0+3; /* end of codon sequence */
-X aln->amax0 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X aln->amin0 = aln->smin0;
-X aln->amin1 = aln->smin1;
-X
-X if (lenc < 0) lenc = 1;
-X
-/* now we have the middle, get the right end */
-X
-X return lenc;
-}
-X
-int calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1;
-X int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
-X char sp0, sp1;
-X unsigned char aap;
-X const unsigned char *ap0, *ap1;
-X int *rp, *rpmax;
-X
-X /* don't fill in the ends */
-X
-#ifndef TFAST /* FASTYZ */
-X ap0 = f_str->aa0v; /* computed codons -> ap0*/
-X ap1 = aa1; /* protein sequence -> ap1 */
-X aln->smin1 = a_res.min0; /* start in protein sequence */
-X aln->smin0 = a_res.min1; /* start in DNA/codon sequence */
-#else /* TFASTYZ */
-X ap0 = f_str->aa1v; /* computed codons -> ap0*/
-X ap1 = aa0; /* protein sequence */
-X aln->smin0 = a_res.min0; /* start in protein sequence */
-X aln->smin1 = a_res.min1; /* start in codon sequence */
-#endif
-X
-X rp = a_res.res; /* start of alignment info */
-X rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
-X
-/* now get the middle */
-X
-X lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
-X i0 = a_res.min1-3; /* start of codon sequence */
-X i1 = a_res.min0; /* start of protein sequence */
-X
-X while (rp < rpmax ) {
-X switch (*rp++) {
-X case 3:
-X i0 += 3;
-X sp1 = pst.sq[aap=ap1[i1++]];
-X sp0 = f_str->weight_c[aap][ap0[i0]].c5;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 2:
-X nfs++;
-X i0 += 2;
-X not_c++;
-X sp1 = pst.sq[aap=ap1[i1++]];
-X sp0 = f_str->weight_c[aap][ap0[i0]].c2;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 4:
-X nfs++;
-X i0 += 4;
-X not_c++;
-X sp1 = pst.sq[aap=ap1[i1++]];
-X sp0 = f_str->weight_c[aap][ap0[i0]].c4;
-X if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
-X if (sp0 == sp1) aln->nident++;
-X lenc++;
-X break;
-X case 5:
-X i0 += 3;
-X lenc++;
-X ngap_p++;
-X break;
-X case 0:
-X i1++;
-X lenc++;
-X ngap_d++;
-X break;
-X }
-X }
-X
-#ifndef TFAST
-X aln->amax0 = i0+3; /* end of codon sequence */
-X aln->amax1 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_d;
-X aln->ngap_l = ngap_p;
-#else
-X aln->amax1 = i0+3; /* end of codon sequence */
-X aln->amax0 = i1; /* end of protein sequence */
-X aln->ngap_q = ngap_p;
-X aln->ngap_l = ngap_d;
-#endif
-X aln->nfs = nfs;
-X aln->amin0 = aln->smin0;
-X aln->amin1 = aln->smin1;
-X
-X if (lenc < 0) lenc = 1;
-X
-/* now we have the middle, get the right end */
-X
-X return lenc;
-}
-X
-#ifdef PCOMPLIB
-#include "p_mw.h"
-void
-update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
-{
-X ppst->n0 = qm_msg->n0;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropfz2.c ||
-echo 'restore of dropfz2.c failed'
-Wc_c="`wc -c < 'dropfz2.c'`"
-test 77360 -eq "$Wc_c" ||
- echo 'dropfz2.c: original size 77360, current size' "$Wc_c"
-fi
-# ============= dropgsw.c ==============
-if test -f 'dropgsw.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropgsw.c (File already exists)'
-else
-echo 'x - extracting dropgsw.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropgsw.c' &&
-/* copyright (c) 1996 William R. Pearson */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropgsw.c,v 1.80 2006/10/19 15:12:11 wrp Exp $ */
-X
-/* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe */
-X
-/* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates */
-X
-/* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included */
-X
-/* 14-May-2003 - modified to return alignment start at 0, rather than
-X 1, for begin:end alignments
-X
-X 25-Feb-2003 - modified to support Altivec parallel Smith-Waterman
-X
-X 22-Sep-2003 - removed Altivec support at request of Sencel lawyers
-*/
-X
-/* the do_walign() code in this file is not thread_safe */
-/* init_work(), do_work(), are thread safe */
-X
-/* this code uses an implementation of the Smith-Waterman algorithm
-X designed by Phil Green, U. of Washington, that is 1.5 - 2X faster
-X than my Miller and Myers implementation. */
-X
-/* the shortcuts used in this program prevent it from calculating scores
-X that are less than the gap penalty for the first residue in a gap. As
-X a result this code cannot be used with very large gap penalties, or
-X with very short sequences, and probably should not be used with prss3.
-*/
-X
-/* version 3.2 fixes a subtle bug that was encountered while running
-X do_walign() interspersed with do_work(). This happens only with -m
-X 9 and pvcomplib. The fix was to more explicitly zero-out ss[] at
-X the beginning of do_work.
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-X
-static char *verstr="5.5 Sept 2006";
-X
-#include "dropgsw.h"
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-#ifdef SW_ALTIVEC
-#include "smith_waterman_altivec.h"
-#endif
-#ifdef SW_SSE2
-#include "smith_waterman_sse2.h"
-#endif
-X
-struct swstr {int H, E;};
-X
-extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
-X double *aa0_f, double **kp);
-extern int do_karlin(const unsigned char *aa1, int n1,
-X int **pam2, struct pstruct *ppst,
-X double *aa0_f, double *kar_p, double *lambda, double *H);
-X
-static int
-ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int **W, int IW, int G, int H, int *res, int *nres,
-X struct f_struct *f_str);
-X
-static int
-FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
-X int n0, int n1, int low, int up,
-X int **W, int GG,int HH, int MW,
-X struct f_struct *f_str);
-X
-static
-void DISPLAY(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int *S, int AP, int BP, char *sq);
-X
-extern void aancpy(char *to, char *from, int count, struct pstruct pst);
-X
-/* initialize for Smith-Waterman optimal score */
-X
-void
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int maxn0, ip;
-X int *pwaa_s, *pwaa_a;
-X int e, f, i, j, l;
-X int *res;
-X struct f_struct *f_str;
-X int **pam2p;
-X struct swstr *ss;
-X int nsq;
-X
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X int data,bias;
-X unsigned char * pc;
-X unsigned short * ps;
-X int overflow;
-X
-X int n_count;
-X int col_len;
-#endif
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx; ip = 1;
-X }
-X else {
-X nsq = ppst->nsq; ip = 0;
-X }
-X
-X /* allocate space for function globals */
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-X if(ppst->zsflag == 6 || ppst->zsflag == 16) {
-X f_str->kar_p = NULL;
-X init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
-X }
-X
-X /* allocate space for the scoring arrays */
-X if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
-X exit (1);
-X }
-X ss++;
-X
-X ss[n0].H = -1; /* this is used as a sentinel - normally H >= 0 */
-X ss[n0].E = 1;
-X f_str->ss = ss;
-X
-X /* initialize variable (-S) pam matrix */
-X if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X /* initialize pam2p[1] pointers */
-X if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
-X exit(1);
-X }
-X
-X pam2p = f_str->pam2p[1];
-X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X for (i=1; i<n0; i++) {
-X pam2p[i]= pam2p[0] + (i*(nsq+1));
-X }
-X
-X /* initialize universal (alignment) matrix */
-X if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X /* initialize pam2p[0] pointers */
-X if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
-X exit(1);
-X }
-X
-X pam2p = f_str->pam2p[0];
-X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X for (i=1; i<n0; i++) {
-X pam2p[i]= pam2p[0] + (i*(nsq+1));
-X }
-X
-X /*
-X pwaa effectively has a sequence profile --
-X pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
-X pwaa[n0..2n0-1] has pam scores for residue 1 (A)
-X pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
-X
-X thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
-X *pwaa++ rapidly moves though the scores of the aa1p[] position
-X without further indexing
-X
-X For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
-X a different score in each position.
-X */
-X
-X if (ppst->pam_pssm) {
-X pwaa_s = f_str->waa_s;
-X pwaa_a = f_str->waa_a;
-X for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
-X for (f = 0; f < n0; f++) { /* for each position in aa0 */
-X *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
-X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
-X }
-X }
-X }
-X else { /* initialize scanning matrix */
-X pwaa_s = f_str->waa_s;
-X pwaa_a = f_str->waa_a;
-X for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
-X for (f = 0; f < n0; f++) { /* for each position in aa0 */
-X *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
-X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
-X }
-X }
-X
-#if defined(SW_ALTIVEC)
-X
-X /* First we allocate memory for the workspace - i.e. the single row
-X * of storage for H/F. Since this might be run on Linux or AIX too,
-X * we don't assume anything about the memory allocation but align
-X * it ourselves. We need two vectors (16 bytes each) per element,
-X * and some padding space to make it cache-line aligned.
-X
-X * MAXTST+MAXLIB is longest allowed database sequence length...
-X * this should be m_msg.max_tot, but m_msg is not available, but
-X * ppst->maxlen has maxn, which is appropriate.
-X */
-X
-X f_str->workspace_memory = (void *)malloc(2*16*(ppst->maxlen+SEQ_PAD)+256);
-X f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
-X
-X
-X
-X /* We always use a scoring profile in altivec, but the layout is a bit strange
-X * in order to optimize memory access order and thus cache efficiency.
-X * Normally we first try 8-bit scoring in altivec, and if this leads to overflow
-X * we recompute the score with 16-bit accuracy. Because of this we need to construct
-X * two score profiles.
-X * Since altivec always loads 16 bytes from aligned memory, corresponding to 8 or 16
-X * elements (for 16 and 8 bit scoring, respectively), we organize the scoring
-X * profile like this for 8-bit accuracy:
-X *
-X * 1. The profile starts on 256-byte aligned memory (cache line on G5 is 128 bytes).
-X * 2. First we have the score for the full alphabet for the first 16 residues of
-X * the query, i.e. positions 0-15 are the scores for the first 16 query letters
-X * vs. the first in the alphabet, positions 16-31 the scores for the same 16
-X * query positions against alphabet letter two, etc.
-X * 3. After alphabet_size*16bytes we start with the scores for residues 16-31 in
-X * the query, organized in the same way.
-X * 4. At the end of the query sequence, we pad the scoring to the next 16-tuple
-X * with neutral scores.
-X * 5. The total size of the profile is thus alphabet_size*N, where N is the
-X * size of the query rounded up to the next 16-tuple.
-X *
-X * The word (16-bit) profile is identical, but scores are stored as 8-tuples.
-X */
-X
-X f_str->word_score_memory = (void *)malloc(10*2*(nsq+2)*(n0+1+16)+256);
-X f_str->byte_score_memory = (void *)malloc(10*(nsq+2)*(n0+1+16)+256);
-X
-X f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
-X f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
-X
-X overflow = 0;
-X
-X if (ppst->pam_pssm) {
-X /* Use a position-specific scoring profile.
-X * This is essentially what we are going to construct anyway, but we'll
-X * reorder it to suit altivec.
-X */
-X bias = 127;
-X for(i = 1; i <= nsq ; i++) {
-X for(j = 0; j < n0 ; j++) {
-X data = ppst->pam2p[ip][j][i];
-X if(data<bias) bias = data;
-X }
-X }
-X
-X /* Fill our specially organized byte- and word-size scoring arrays. */
-X ps = f_str->word_score;
-X for(f = 0; f<n0 ; f+=8) {
-X /* e=0 */
-X for(i=0 ; i<8 ; i++) {
-X *ps++ = (unsigned short) 0;
-X }
-X /* for each chunk of 8 residues in our query */
-X for(e = 1; e<=nsq; e++) {
-X for(i=0 ; i<8 ; i++) {
-X l = f + i;
-X if(l<n0) {
-X data = ppst->pam2p[ip][l][e] - bias;
-X }
-X else {
-X data = 0;
-X }
-X *ps++ = (unsigned short)data;
-X }
-X }
-X }
-X pc = f_str->byte_score;
-X for(f = 0; f<n0 ; f+=16) {
-X /* e=0 */
-X for(i=0 ; i<16 ; i++) {
-X *pc++ = (unsigned char)0;
-X }
-X
-X for(e = 1; e<=nsq; e++) {
-X for(i=0 ; i<16 ; i++) {
-X l = f + i;
-X if(l<n0) {
-X data = ppst->pam2p[ip][l][e] - bias;
-X }
-X else {
-X data = 0;
-X }
-X if(data>255) {
-X /*
-X printf("Fatal error. data: %d bias: %d, position: %d/%d, Score out of range for 8-bit Altivec/VMX datatype.\n",data,bias,l,e);
-X exit(1);
-X */
-X overflow = 1;
-X }
-X *pc++ = (unsigned char)data;
-X }
-X }
-X }
-X }
-X else {
-X /* Classical simple substitution matrix */
-X /* Find the bias to use in the substitution matrix */
-X bias = 127;
-X for(i = 1; i <= nsq ; i++) {
-X for(j = 1; j <= nsq ; j++) {
-X data = ppst->pam2[ip][i][j];
-X if(data<bias) bias = data;
-X }
-X }
-X /* Fill our specially organized byte- and word-size scoring arrays. */
-X ps = f_str->word_score;
-X for(f = 0; f<n0 ; f+=8) {
-X /* e=0 */
-X for(i=0 ; i<8 ; i++) {
-X *ps++ = (unsigned short) 0;
-X }
-X /* for each chunk of 8 residues in our query */
-X for(e = 1; e<=nsq; e++) {
-X for(i=0 ; i<8 ; i++) {
-X l = f + i;
-X if(l<n0) {
-X data = ppst->pam2[ip][aa0[l]][e] - bias;
-X }
-X else {
-X data = 0;
-X }
-X *ps++ = (unsigned short)data;
-X }
-X }
-X }
-X pc = f_str->byte_score;
-X for(f = 0; f<n0 ; f+=16) {
-X /* e=0 */
-X for(i=0 ; i<16 ; i++) {
-X *pc++ = (unsigned char)0;
-X }
-X
-X for(e = 1; e<=nsq; e++) {
-X for(i=0 ; i<16 ; i++) {
-X l = f + i;
-X if (l<n0) {
-X data = ppst->pam2[ip][aa0[l]][e] - bias;
-X }
-X else {
-X data = 0;
-X }
-X if(data>255) {
-X /*
-X printf("Fatal error. Score out of range for 8-bit Altivec/VMX datatype.\n");
-X exit(1);
-X */
-X overflow = 1;
-X }
-X *pc++ = (unsigned char)data;
-X }
-X }
-X }
-X }
-X
-X f_str->bias = (unsigned char) (-bias);
-X f_str->alphabet_size = nsq+1;
-X
-X /* Some variable to keep track of how many 8-bit runs we need to rerun
-X * in 16-bit accuracy. If there are too many reruns it can be faster
-X * to use 16-bit alignments directly.
-X */
-X
-X /* We can only do 8-bit alignments if the scores were small enough. */
-X if(overflow==0) f_str->try_8bit = 1;
-X else f_str->try_8bit = 0;
-X
-X f_str->done_8bit = 0;
-X f_str->done_16bit = 0;
-X
-#endif /* SW_ALTIVEC */
-X
-#if defined(SW_SSE2)
-X /* First we allocate memory for the workspace - i.e. two rows for H and
-X * one row for F. We also need enough space to hold a temporary
-X * scoring profile which will be query_length * 16 (sse2 word length).
-X * Since this might be run on Linux or AIX too, we don't assume
-X * anything about the memory allocation but align it ourselves.
-X */
-X f_str->workspace_memory = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256);
-X f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
-X
-X /* We always use a scoring profile for the SSE2 implementation, but the layout
-X * is a bit strange. The scoring profile is parallel to the query, but is
-X * accessed in a stripped pattern. The query is divided into equal length
-X * segments. The number of segments is equal to the number of elements
-X * processed in the SSE2 register. For 8-bit calculations, the query will
-X * be divided into 16 equal length parts. If the query is not long enough
-X * to fill the last segment, it will be filled with neutral weights. The
-X * first element in the SSE register will hold a value from the first segment,
-X * the second element of the SSE register will hold a value from the
-X * second segment and so on. So if the query length is 288, then each
-X * segment will have a length of 18. So the first 16 bytes will have
-X * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will
-X * have the following weights: Q2, Q20, Q38, ... Q272; and so on until
-X * all parts of all segments have been written. The last seqment will
-X * have the following weights: Q18, Q36, Q54, ... Q288. This will be
-X * done for the entire alphabet.
-X */
-X
-X f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof (short) * (nsq + 1) + 256);
-X f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof (char) * (nsq + 1) + 256);
-X
-X f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
-X f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
-X
-X overflow = 0;
-X
-X if (ppst->pam_pssm) {
-X /* Use a position-specific scoring profile.
-X * This is essentially what we are going to construct anyway, but we'll
-X * reorder it to suit sse2.
-X */
-X bias = 127;
-X for (i = 1; i <= nsq ; i++) {
-X for (j = 0; j < n0 ; j++) {
-X data = ppst->pam2p[ip][j][i];
-X if (data < bias) {
-X bias = data;
-X }
-X }
-X }
-X
-X /* Fill our specially organized byte- and word-size scoring arrays. */
-X ps = f_str->word_score;
-X col_len = (n0 + 7) / 8;
-X n_count = (n0 + 7) & 0xfffffff8;
-X for (f = 0; f < n_count; ++f) {
-X *ps++ = 0;
-X }
-X for (f = 1; f <= nsq ; f++) {
-X for (e = 0; e < col_len; e++) {
-X for (i = e; i < n_count; i += col_len) {
-X if ( i < n0) { data = ppst->pam2p[ip][i][f];}
-X else {data = 0;}
-X *ps++ = (unsigned short)data;
-X }
-X }
-X }
-X pc = f_str->byte_score;
-X col_len = (n0 + 15) / 16;
-X n_count = (n0 + 15) & 0xfffffff0;
-X for (f = 0; f < n_count; ++f) {
-X *pc++ = 0;
-X }
-X for (f = 1; f <= nsq ; f++) {
-X for (e = 0; e < col_len; e++) {
-X for (i = e; i < n_count; i += col_len) {
-X if ( i < n0 ) { data = ppst->pam2p[ip][i][f] - bias;}
-X else {data = 0 - bias;}
-X if (data > 255) {
-X printf("Fatal error. data: %d bias: %d, position: %d/%d, "
-X "Score out of range for 8-bit SSE2 datatype.\n",
-X data, bias, f, e);
-X exit(1);
-X }
-X *pc++ = (unsigned char)data;
-X }
-X }
-X }
-X }
-X else
-X {
-X /* Classical simple substitution matrix */
-X /* Find the bias to use in the substitution matrix */
-X bias = 127;
-X for (i = 1; i <= nsq ; i++) {
-X for (j = 1; j <= nsq ; j++) {
-X data = ppst->pam2[ip][i][j];
-X if (data < bias) {
-X bias = data;
-X }
-X }
-X }
-X
-X /* Fill our specially organized byte- and word-size scoring arrays. */
-X ps = f_str->word_score;
-X col_len = (n0 + 7) / 8;
-X n_count = (n0 + 7) & 0xfffffff8;
-X for (f = 0; f < n_count; ++f) {
-X *ps++ = 0;
-X }
-X for (f = 1; f <= nsq ; f++) {
-X for (e = 0; e < col_len; e++) {
-X for (i = e; i < n_count; i += col_len) {
-X if (i >= n0) {
-X data = 0;
-X } else {
-X data = ppst->pam2[ip][aa0[i]][f];
-X }
-X *ps++ = (unsigned short)data;
-X }
-X }
-X }
-X
-X pc = f_str->byte_score;
-X col_len = (n0 + 15) / 16;
-X n_count = (n0 + 15) & 0xfffffff0;
-X for (f = 0; f < n_count; ++f) {
-X *pc++ = 0;
-X }
-X for (f = 1; f <= nsq ; f++) {
-X for (e = 0; e < col_len; e++) {
-X for (i = e; i < n_count; i += col_len) {
-X if (i >= n0) {
-X data = -bias;
-X } else {
-X data = ppst->pam2[ip][aa0[i]][f] - bias;
-X }
-X if (data > 255) {
-X printf("Fatal error. data: %d bias: %d, position: %d/%d, "
-X "Score out of range for 8-bit SSE2 datatype.\n",
-X data, bias, f, e);
-X exit(1);
-X }
-X *pc++ = (unsigned char)data;
-X }
-X }
-X }
-X }
-X
-X f_str->bias = (unsigned char) (-bias);
-X f_str->alphabet_size = nsq+1;
-X
-X /* Some variable to keep track of how many 8-bit runs we need to rerun
-X * in 16-bit accuracy. If there are too many reruns it can be faster
-X * to use 16-bit alignments directly.
-X */
-X
-X /* We can only do 8-bit alignments if the scores were small enough. */
-X f_str->try_8bit = (overflow == 0) ? 1 : 0;
-X
-X f_str->done_8bit = 0;
-X f_str->done_16bit = 0;
-#endif /* SW_SSE2 */
-X
-X /* these structures are used for producing alignments */
-X
-X maxn0 = max(3*n0/2,MIN_RES); /* minimum allocation for alignment */
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X
-X
-X *f_arg = f_str;
-}
-X
-void close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X if (f_str->kar_p !=NULL) free(f_str->kar_p);
-X f_str->ss--;
-X free(f_str->ss);
-X free(f_str->res);
-X free(f_str->waa_a);
-X free(f_str->pam2p[0][0]);
-X free(f_str->pam2p[0]);
-X free(f_str->waa_s);
-X free(f_str->pam2p[1][0]);
-X free(f_str->pam2p[1]);
-X
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X free(f_str->workspace_memory);
-X free(f_str->word_score_memory);
-X free(f_str->byte_score_memory);
-#endif
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/*void get_param(struct pstruct *pstr,char *pstring1)*/
-void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-X char pg_str[120];
-X char psi_str[120];
-X
-#if defined(SW_ALTIVEC)
-X strncpy(pg_str,"Smith-Waterman (Altivec/VMX, Erik Lindahl 2004)",sizeof(pg_str));
-#endif
-#if defined(SW_SSE2)
-X strncpy(pg_str,"Smith-Waterman (SSE2, Michael Farrar 2006)",sizeof(pg_str));
-#endif
-#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
-X strncpy(pg_str,"Smith-Waterman (PGopt)",sizeof(pg_str));
-#endif
-X
-X if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
-X else { psi_str[0]='\0';}
-X
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
-#else
-X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
-#endif
-X pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled\n");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X if (pstring2 != NULL) {
-#ifdef OLD_FASTA_GAP
-X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
-#else
-X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
-#endif
-X pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
-X }
-}
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int score;
-X double lambda, H;
-X int i;
-X
-#ifdef SW_ALTIVEC
-X if(f_str->try_8bit)
-X {
-X score = smith_waterman_altivec_byte(aa0,
-X f_str->byte_score,
-X n0,
-X aa1,
-X n1,
-X f_str->bias,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X
-X f_str->done_8bit++;
-X
-X if(score>=255)
-X {
-X /* Overflow, so we have to redo it in 16 bits. */
-X score = smith_waterman_altivec_word(aa0,
-X f_str->word_score,
-X n0,
-X aa1,
-X n1,
-X f_str->bias,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X
-X /* The 8 bit version is roughly 50% faster than the 16 bit version,
-X * so we are fine if less than about 1/3 of the runs have to
-X * be rerun with 16 bits. If it is more, and we have tried at least
-X * 500 sequences, we switch off the 8-bit mode.
-X */
-X f_str->done_16bit++;
-X if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
-X f_str->try_8bit = 0;
-X }
-X }
-X else
-X {
-X /* Just use the 16-bit altivec version directly */
-X score = smith_waterman_altivec_word(aa0,
-X f_str->word_score,
-X n0,
-X aa1,
-X n1,
-X f_str->bias,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X }
-X
-#endif /* not Altivec */
-X
-#if defined(SW_SSE2)
-X
-X if(f_str->try_8bit)
-X {
-X score = smith_waterman_sse2_byte(aa0,
-X f_str->byte_score,
-X n0,
-X aa1,
-X n1,
-X f_str->bias,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X
-X f_str->done_8bit++;
-X
-X if(score>=255)
-X {
-X /* Overflow, so we have to redo it in 16 bits. */
-X score = smith_waterman_sse2_word(aa0,
-X f_str->word_score,
-X n0,
-X aa1,
-X n1,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X
-X /* The 8 bit version is roughly 50% faster than the 16 bit version,
-X * so we are fine if less than about 1/3 of the runs have to
-X * be rerun with 16 bits. If it is more, and we have tried at least
-X * 500 sequences, we switch off the 8-bit mode.
-X */
-X f_str->done_16bit++;
-X if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
-X f_str->try_8bit = 0;
-X }
-X }
-X else
-X {
-X /* Just use the 16-bit altivec version directly */
-X score = smith_waterman_sse2_word(aa0,
-X f_str->word_score,
-X n0,
-X aa1,
-X n1,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X f_str);
-X }
-#endif
-X
-#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
-X
-X score = FLOCAL_ALIGN(aa0,aa1,n0,n1,0,0,
-X NULL,
-#ifndef OLD_FASTA_GAP
-X -(ppst->gdelval + ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X ppst->ggapval,0,f_str);
-#endif
-X
-X rst->score[0] = score;
-X
-X if(( ppst->zsflag == 6 || ppst->zsflag == 16) &&
-X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
-X f_str->kar_p, &lambda, &H)>0)) {
-X rst->comp = 1.0/lambda;
-X rst->H = H;
-X }
-X else {rst->comp = rst->H = -1.0;}
-X
-}
-X
-static int
-FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
-X int n0, int n1, int low, int up,
-X int **W, int GG,int HH, int MW,
-X struct f_struct *f_str) {
-X
-X register int *pwaa;
-X register struct swstr *ssj;
-X struct swstr *ss;
-X register int h, e, f, p;
-X int temp, score;
-X int gap_ext, n_gap_init;
-X
-X const unsigned char *aa1p;
-X ss = f_str->ss;
-X ss[n0].H = -1;
-X ss[n0].E = 1;
-X
-X n_gap_init = GG;
-X gap_ext = HH;
-X
-X score = 0;
-X for (h=0; h<n0; h++) { /* initialize 0th row */
-X ss[h].H = ss[h].E = 0;
-X }
-X
-X aa1p=aa1;
-X while (*aa1p) { /* relies on aa1[n1]==0 for EOS flag */
-X /* waa_s has the offsets for each residue in aa0 into pam2 */
-X /* waa_s has complexity (-S) dependent scores */
-X pwaa = f_str->waa_s + (*aa1p++)*n0;
-X ssj = ss;
-X
-X e = f = h = p = 0;
-X zero_f: /* in this section left-gap f==0, and is never examined */
-X
-X while (1) { /* build until h > n_gap_init (f < 0 until h > n_gap_init) */
-X /* bump through the pam[][]'s for each of the aa1[] matches to
-X aa0[], because of the way *pwaa is set up */
-X
-X h = p + *pwaa++; /* increment diag value */
-X p = ssj->H; /* get next diag value */
-X if ((e = ssj->E) > 0 ) { /* >0 from up-gap */
-X if (p == -1) goto next_row; /* done, -1=ss[n0].H sentinel */
-X if (h < e) h = e; /* up-gap better than diag */
-X else
-X if (h > n_gap_init) { /* we won't starting a new up-gap */
-X e += gap_ext; /* but we might be extending one */
-X goto transition; /* good h > n_gap_diag; scan f */
-X }
-X e += gap_ext; /* up-gap decreased */
-X ssj->E = (e > 0) ? e : 0; /* set to 0 if < 0 */
-X ssj++->H = h; /* diag match updated */
-X }
-X else { /* up-gap (->E) is 0 */
-X if ( h > 0) { /* diag > 0 */
-X if (h > n_gap_init) { /* we won't be starting a new up-gap */
-X e = 0; /* and we won't be extending one */
-X goto transition; /* good h > n_gap_diag; scan f */
-X }
-X ssj++->H = h; /* update diag */
-X }
-X else ssj++->H = 0; /* update diag to 0 */
-X }
-X }
-X
-X /* here h > n_gap_init and h > e, => the next f will be > 0 */
-X transition:
-#ifdef DEBUG
-X if ( h > 10000)
-X fprintf(stderr,"h: %d ssj: %d\n",h, (int)(ssj-ss));
-#endif
-X if ( score < h ) score = h; /* save best score, only when h > n_gap_init */
-X
-X temp = h - n_gap_init; /* best score for starting a new gap */
-X if ( f < temp ) f = temp; /* start a left-gap? */
-X if ( e < temp ) e = temp; /* start an up-gap? */
-X ssj->E = ( e > 0 ) ? e : 0; /* update up-gap */
-X ssj++->H = h; /* update diag */
-X e = 0;
-X
-X do { /* stay here until f <= 0 */
-X h = p + *pwaa++; /* diag + match/mismatch */
-X p = ssj->H; /* save next (right) diag */
-X
-X if ( h < f ) h = f; /* update diag using left gap */
-X f += gap_ext; /* update next left-gap */
-X
-X if ((e = ssj->E) > 0) { /* good up gap */
-X if (p == -1) goto next_row; /* at the end of the row */
-X if ( h < e ) h = e; /* update diag using up-gap */
-X else
-X if ( h > n_gap_init ) {
-X e += gap_ext; /* update up gap */
-X goto transition; /* good diag > n_gap_init, restart */
-X }
-X e += gap_ext; /* update up-gap */
-X ssj->E = (e > 0) ? e : 0; /* e must be >= 0 */
-X ssj++->H = h; /* update diag */
-X }
-X else { /* up-gap <= 0 */
-X if ( h > n_gap_init ) {
-X e = 0;
-X goto transition; /* good diag > n_gap_init; restart */
-X }
-X ssj++->H = h; /* update diag */
-X }
-X } while ( f > 0 ); /* while left gap f > 0 */
-X goto zero_f; /* otherwise, go to f==0 section */
-X next_row:
-X ;
-X } /* end while(*aap1) {} */
-X
-X return score;
-X
-} /* here we should be all done */
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst)
-{
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X const unsigned char *aa0p, *aa1p;
-X register int *pwaa;
-X register int i, j;
-X register struct swstr *ssj;
-X struct swstr *ss;
-X int *res, *waa;
-X int e, f, h, p;
-X int q, r, m;
-X int score;
-X int cost, I, J, K, L;
-X
-X ss = f_str->ss;
-X
-X res = f_str->res;
-X waa = f_str->waa_a; /* this time use universal pam2[0] */
-X
-X
-#ifdef OLD_FASTA_GAP
-X q = -(ppst->gdelval - ppst->ggapval);
-#else
-X q = -ppst->gdelval;
-#endif
-X
-X r = -ppst->ggapval;
-X m = q + r;
-X
-X /* initialize 0th row */
-X for (ssj=ss; ssj<ss+n0; ssj++) {
-X ssj->H = 0;
-X ssj->E = -q;
-X }
-X
-X score = 0;
-X aa1p = aa1;
-X i = 0;
-X while (*aa1p) {
-X h = p = 0;
-X f = -q;
-X pwaa = waa + (*aa1p++ * n0);
-X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
-X if ((h = h - m) > /* gap open from left best */
-X /* gap extend from left gapped */
-X (f = f - r)) f = h; /* if better, use new gap opened */
-X if ((h = ssj->H - m) > /* gap open from up best */
-X /* gap extend from up gap */
-X (e = ssj->E - r)) e = h; /* if better, use new gap opened */
-X h = p + *pwaa++; /* diagonal match */
-X if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
-X if (h < f ) h = f; /* left gap better, reset */
-X if (h < e ) h = e; /* up gap better, reset */
-X p = ssj->H; /* save previous best score */
-X ssj->H = h; /* save (new) up diag-matched */
-X ssj->E = e; /* save upper gap opened */
-X if (h > score) { /* ? new best score */
-X score = h; /* save best */
-X I = i; /* row */
-X J = (int)(ssj-ss); /* column */
-X }
-X }
-X i++;
-X } /* done with forward pass */
-X if (score <= 0) return 0;
-X
-X /* to get the start point, go backwards */
-X
-X /* 18-June-2003 fix bug in backtracking code to identify start of
-X alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
-X pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
-X */
-X
-X cost = K = L = 0;
-X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
-X
-X for (i=I; i>=0; i--) {
-X h = f = -1;
-X p = (i == I) ? 0 : -1;
-X for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
-X f = max (f,h-q)-r;
-X ssj->E=max(ssj->E,ssj->H-q)-r;
-X h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
-X p = ssj->H;
-X ssj->H=h;
-X if (h > cost) {
-X cost = h;
-X K = i;
-X L = (int)(ssj-ss);
-X if (cost >= score) goto found;
-X }
-X }
-X }
-X
-found:
-X
-/* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
-X
-/* in the f_str version, the *res array is already allocated at 4*n0/3 */
-X
-X a_res->res = f_str->res;
-X *have_ares = 1;
-X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
-X
-/* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
-X
-X
-/* this code no longer refers to aa0[], it uses pam2p[0][L] instead */
-X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
-X a_res->res,&a_res->nres,f_str);
-X
-/* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
-X
-/* return *res and nres */
-X
-X return score;
-}
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int *S, int **W, int IW, int G, int H, int *nres);
-X
-#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
-X
-/* Append "Delete k" op */
-#define DEL(k) \
-{ if (*last < 0) \
-X *last = (*sapp)[-1] -= (k); \
-X else { \
-X *last = (*sapp)[0] = -(k); \
-X (*sapp)++; \
-X } \
-}
-X
-/* Append "Insert k" op */
-#define INS(k) \
-{ if (*last > 0) \
-X *last = (*sapp)[-1] += (k); \
-X else { \
-X *last = (*sapp)[0] = (k); \
-X (*sapp)++; \
-X } \
-}
-X
-/*
-#define XTERNAL
-#include "upam.h"
-X
-void
-print_seq_prof(unsigned char *A, int M,
-X unsigned char *B, int N,
-X int **w, int iw, int dir) {
-X char c_max;
-X int i_max, j_max, i,j;
-X
-X char *c_dir="LRlr";
-X
-X for (i=1; i<=min(60,M); i++) {
-X fprintf(stderr,"%c",aa[A[i]]);
-X }
-X fprintf(stderr, - %d\n,M);
-X
-X for (i=0; i<min(60,M); i++) {
-X i_max = -1;
-X for (j=1; j<21; j++) {
-X if (w[iw+i][j]> i_max) {
-X i_max = w[iw+i][j];
-X j_max = j;
-X }
-X }
-X fprintf(stderr,"%c",aa[j_max]);
-X }
-X fputc(':',stderr);
-X
-X for (i=1; i<=min(60,N); i++) {
-X fprintf(stderr,"%c",aa[B[i]]);
-X }
-X
-X fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
-}
-*/
-X
-/* align(A,B,M,N,tb,te,last) returns the cost of an optimum conversion between
-X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
-X and appends such a conversion to the current script. */
-X
-static int
-align(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int tb, int te, int **w, int iw, int g, int h,
-X struct f_struct *f_str, int dir,
-X int **sapp, int *last)
-{
-X
-X int midi, midj, type; /* Midpoint, type, and cost */
-X int midc;
-X int c1, c2;
-X
-X register int i, j;
-X register int c, e, d, s;
-X int m, t, *wa;
-X struct swstr *f_ss, *r_ss;
-X
-/* print_seq_prof(A,M,B,N,w,iw,dir); */
-X
-X m = g + h;
-X
-X f_ss = f_str->f_ss;
-X r_ss = f_str->r_ss;
-X
-/* Boundary cases: M <= 1 or N == 0 */
-X
-X if (N <= 0) {
-X if (M > 0) {DEL(M)}
-X return -gap(M);
-X }
-X
-X if (M <= 1) {
-X if (M <= 0) {
-X INS(N)
-X return -gap(N);
-X }
-X
-X if (tb < te) tb = te;
-X midc = (tb-h) - gap(N);
-X midj = 0;
-/* wa = w[A[1]]; */
-X wa = w[iw];
-X for (j = 1; j <= N; j++) {
-X c = -gap(j-1) + wa[B[j]] - gap(N-j);
-X if (c > midc) { midc = c; midj = j;}
-X }
-X if (midj == 0) { DEL(1) INS(N) }
-X else {
-X if (midj > 1) { INS(midj-1)}
-X *last = (*sapp)[0] = 0;
-X (*sapp)++;
-X if (midj < N) { INS(N-midj)}
-X }
-X return midc;
-X }
-X
-/* Divide: Find optimum midpoint (midi,midj) of cost midc */
-X
-X midi = M/2; /* Forward phase: */
-X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
-X t = -g;
-X for (j = 1; j <= N; j++) {
-X f_ss[j].H = t = t-h;
-X f_ss[j].E = t-g;
-X }
-X t = tb;
-X for (i = 1; i <= midi; i++) {
-X s = f_ss[0].H;
-X f_ss[0].H = c = t = t-h;
-X e = t-g;
-/* wa = w[A[i]]; */
-X wa = w[iw+i-1];
-X for (j = 1; j <= N; j++) {
-X if ((c = c - m) > (e = e - h)) e = c;
-X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
-X c = s + wa[B[j]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = f_ss[j].H;
-X f_ss[j].H = c;
-X f_ss[j].E = d;
-X }
-X }
-X f_ss[0].E = f_ss[0].H;
-X
-X r_ss[N].H = 0; /* Reverse phase: */
-X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
-X
-X for (j = N-1; j >= 0; j--) {
-X r_ss[j].H = t = t-h;
-X r_ss[j].E = t-g;
-X }
-X
-X t = te;
-X for (i = M-1; i >= midi; i--) {
-X s = r_ss[N].H;
-X r_ss[N].H = c = t = t-h;
-X e = t-g;
-/* wa = w[A[i+1]]; */
-X wa = w[iw+i];
-X for (j = N-1; j >= 0; j--) {
-X if ((c = c - m) > (e = e - h)) { e = c; }
-X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) { d = c; }
-X c = s + wa[B[j+1]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = r_ss[j].H;
-X r_ss[j].H = c;
-X r_ss[j].E = d;
-X }
-X }
-X r_ss[N].E = r_ss[N].H;
-X
-X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
-X midj = 0;
-X type = 1;
-X
-X for (j = 0; j <= N; j++) {
-X if ((c = f_ss[j].H + r_ss[j].H) >= midc) {
-X if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)) {
-X midc = c;
-X midj = j;
-X }
-X }
-X }
-X
-X for (j = N; j >= 0; j--) {
-X if ((c = f_ss[j].E + r_ss[j].E + g) > midc) {
-X midc = c;
-X midj = j;
-X type = 2;
-X }
-X }
-X
-/* Conquer: recursively around midpoint */
-X
-X if (type == 1)
-X { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0,sapp,last);
-X c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
-X }
-X else
-X { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp,last);
-X DEL(2);
-X align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
-X }
-X return midc;
-}
-X
-/* Interface and top level of comparator */
-X
-static int
-ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int **W, int IW, int G, int H, int *S, int *NC,
-X struct f_struct *f_str)
-{
-X struct swstr *f_ss, *r_ss;
-X int *sapp, last;
-X int c, ck;
-X
-X sapp = S;
-X last = 0;
-X
-X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
-X exit (1);
-X }
-X f_ss++;
-X f_str->f_ss = f_ss;
-X
-X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
-X exit (1);
-X }
-X r_ss++;
-X f_str->r_ss = r_ss;
-X
-X /* print_seq_prof(A,M,W,IW); */
-X c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp,&last); /* OK, do it */
-X
-X ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
-X if (c != ck) {
-X fprintf(stdout,"*** Check_score error. %d != %d ***\n",c,ck);
-X fprintf(stderr,"*** Check_score error. %d != %d ***\n",c,ck);
-X }
-X
-X f_ss--; r_ss--;
-X free(r_ss); free(f_ss);
-X
-X return c;
-}
-X
-/* Alignment display routine */
-X
-static void
-DISPLAY(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int *S, int AP, int BP, char *sq)
-{ register char *a, *b, *c;
-X register int i, j, op;
-X int lines, ap, bp;
-X
-X char ALINE[51], BLINE[51], CLINE[51];
-X
-X i = j = op = lines = 0;
-X ap = AP;
-X bp = BP;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X while (i < M || j < N)
-X { if (op == 0 && *S == 0)
-X { op = *S++;
-X *a = sq[A[++i]];
-X *b = sq[B[++j]];
-X *c++ = (*a++ == *b++) ? '|' : ' ';
-X }
-X else
-X { if (op == 0)
-X op = *S++;
-X if (op > 0)
-X { *a++ = ' ';
-X *b++ = sq[B[++j]];
-X op--;
-X }
-X else
-X { *a++ = sq[A[++i]];
-X *b++ = ' ';
-X op++;
-X }
-X *c++ = '-';
-X }
-X if (a >= ALINE+50 || (i >= M && j >= N))
-X { *a = *b = *c = '\0';
-X printf("\n%5d ",50*lines++);
-X for (b = ALINE+10; b <= a; b += 10)
-X printf(" . :");
-X if (b <= a+5)
-X printf(" .");
-X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
-X ap = AP + i;
-X bp = BP + j;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X }
-X }
-}
-X
-/* CHECK_SCORE - return the score of the alignment stored in S */
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int *S, int **w, int iw,
-X int g, int h, int *NC)
-{
-X register int i, j, op, nc;
-X int score;
-X
-X /* print_seq_prof(A,M,w,iw); */
-X
-X score = i = j = op = nc = 0;
-X while (i < M || j < N) {
-X op = *S++;
-X if (op == 0) {
-X score = w[iw+i][B[++j]] + score;
-X i++;
-X nc++;
-X }
-X else if (op > 0) {
-X score = score - (g+op*h);
-X j += op;
-X nc += op;
-X } else {
-X score = score - (g-op*h);
-X i -= op;
-X nc -= op;
-X }
-X }
-X *NC = nc;
-X return score;
-}
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-X
-#ifdef TFAST
-X f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
-#endif
-X
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-X aln->llfact = aln->llmult = aln->qlfact = 1;
-X aln->qlrev = aln->llrev = 0;
-X aln->frame = 0;
-}
-X
-/* 29-June-2003 this version has been modified to use pst.pam2p
-X instead of pam2 to indicate similarity */
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc, struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1;
-X int op, lenc, nd, ns, itmp;
-X char *sp0, *sp1, *spa, *sq;
-X int mins, smins;
-X int *rp;
-X
-X if (pst.ext_sq_set) { sq = pst.sqx; }
-X else { sq = pst.sq; }
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else { /* we are not showing the start */
-X /* mins has the amount of unaligned context to be shown */
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X
-X aln->smin0=a_res.min0 - mins;
-X aln->smin1=a_res.min1 - mins;
-X
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X }
-X
-/* now get the middle */
-X
-X memset(seqca,M_BLANK,mins);
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1[i1++]];
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
-X (*sp0=='U' && *sp1=='T'))) {
-X aln->nident++; *spa=M_IDENT;
-X }
-X
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *spa = '\0';
-/* now we have the middle, get the right end */
-X
-#ifndef LFASTA
-X /* how much extra to show at end ? */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,n1-a_res.max1))
-X nd = max(n0-a_res.max0,n1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(n1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
-X }
-X
-#else /* LFASTA */
-X nd = 0;
-#endif
-X /* #undef LFASTA */
-X return mins+lenc+nd;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1;
-X int op, lenc, nd, ns, itmp;
-X char *sp0, *sp0a, *sp1, *spa, *sq;
-X int *rp;
-X int mins, smins;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0 - smins;
-X aln->smin1=a_res.min1 - smins;
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X }
-X
-/* now get the middle */
-X
-X memset(seqca,M_BLANK,mins);
-X memset(seqc0a,' ',mins);
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp0a = seqc0a+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X *sp0a++ = ann_arr[aa0a[i0]];
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1[i1++]];
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
-X (*sp0=='U' && *sp1=='T'))) {
-X aln->nident++; *spa=M_IDENT;
-X }
-X
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1[i1++]];
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0a++ = ann_arr[aa0a[i0]];
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *sp0a = *spa = '\0';
-/* now we have the middle, get the right end */
-X
-X /* how much extra to show at end ? */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,n1-a_res.max1))
-X nd = max(n0-a_res.max0,n1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(n1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
-X }
-X
-X return mins+lenc+nd;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt);
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc;
-X int p_op, op_cnt;
-X const unsigned char *aa1p;
-X char tmp_cnt[20];
-X char sp0, sp1, *sq;
-X int *rp;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
-X op_cnt = 0;
-X
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X tmp_cnt[0]='\0';
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X
-X if (p_op == 0 || p_op==3) {
-X if (sp0 != '*' && sp1 != '*') {
-X if (p_op == 3) {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X else {op_cnt++;}
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 3;
-X }
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X
-X op = *rp++;
-X lenc++;
-X
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X else if (pst.nt_align) {
-X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
-X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
-X else if (toupper(sp0) == 'N') aln->ngap_q++;
-X else if (toupper(sp1) == 'N') aln->ngap_l++;
-X }
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X if (p_op == 1) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 1;
-X }
-X op--; lenc++; i1++; aln->ngap_q++;
-X }
-X else {
-X if (p_op == 2) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 2;
-X }
-X op++; lenc++; i0++; aln->ngap_l++;
-X }
-X }
-X }
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X
-X return lenc;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt) {
-X
-X char op_char[5]={"=-+*"};
-X char tmp_cnt[20];
-X
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-int calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1, n_id;
-X int op, lenc;
-X int sp0, sp1;
-X const unsigned char *aa1p;
-X int *rp;
-X char *sq;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFAST
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X if (toupper(sp0) == toupper(sp1)) n_id++;
-X else if (pst.nt_align &&
-X ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
-X else {op++; lenc++; i0++; aln->ngap_l++; }
-X }
-X }
-X aln->nident = n_id;
-X return lenc;
-}
-X
-#ifdef PCOMPLIB
-#include "p_mw.h"
-void
-update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
-{
-X ppst->n0 = qm_msg->n0;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropgsw.c ||
-echo 'restore of dropgsw.c failed'
-Wc_c="`wc -c < 'dropgsw.c'`"
-test 55870 -eq "$Wc_c" ||
- echo 'dropgsw.c: original size 55870, current size' "$Wc_c"
-fi
-# ============= dropgsw.h ==============
-if test -f 'dropgsw.h' -a X"$1" != X"-c"; then
- echo 'x - skipping dropgsw.h (File already exists)'
-else
-echo 'x - extracting dropgsw.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropgsw.h' &&
-X
-/* global definitions shared by dropgsw.c and altivec.c */
-X
-/* definitions for SW */
-X
-struct f_struct {
-X struct swstr *ss;
-X struct swstr *f_ss, *r_ss;
-X int *waa_s, *waa_a;
-X int **pam2p[2];
-X int *res;
-X double aa0_f[MAXSQ];
-X double *kar_p;
-#if defined(SW_ALTIVEC) || defined(SW_SSE2)
-X unsigned char bias;
-X unsigned short * word_score;
-X unsigned char * byte_score;
-X void * workspace;
-X int alphabet_size;
-X void * word_score_memory;
-X void * byte_score_memory;
-X void * workspace_memory;
-X int try_8bit;
-X int done_8bit;
-X int done_16bit;
-#endif
-};
-X
-SHAR_EOF
-chmod 0644 dropgsw.h ||
-echo 'restore of dropgsw.h failed'
-Wc_c="`wc -c < 'dropgsw.h'`"
-test 677 -eq "$Wc_c" ||
- echo 'dropgsw.h: original size 677, current size' "$Wc_c"
-fi
-# ============= dropnfa.c ==============
-if test -f 'dropnfa.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropnfa.c (File already exists)'
-else
-echo 'x - extracting dropnfa.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropnfa.c' &&
-X
-/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropnfa.c,v 1.81 2007/04/26 18:37:19 wrp Exp $ */
-X
-/* 18-Sep-2006 - removed global variables for alignment from nw_align
-X and bg_align */
-X
-/* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates */
-X
-/* 14-May-2003 - modified to return alignment start at 0, rather than
-X 1, for begin:end alignments
-*/
-X
-/*
-X implements the fasta algorithm, see:
-X
-X W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological
-X sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448
-X
-X This version uses Smith-Waterman for final protein alignments
-X
-X W. R. Pearson (1996) "Effective protein sequence comparison"
-X Methods Enzymol. 266:227-258
-X
-X
-X 26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty
-X
-X 4-Nov-2001 - modify spam() while(1).
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-X
-/* this must be consistent with upam.h */
-#define MAXHASH 32
-#define NMAP MAXHASH+1
-X
-/* globals for fasta */
-#define MAXWINDOW 64
-X
-#ifndef MAXSAV
-#define MAXSAV 10
-#endif
-X
-#ifndef ALLOCN0
-static char *verstr="3.5 Sept 2006";
-#else
-static char *verstr="3.5an0 Sept 2006";
-#endif
-X
-extern void w_abort(char *, char *);
-int shscore(const unsigned char *aa0, int n0, int **pam2);
-extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
-X double *aa0_f, double **kp);
-extern void init_karlin_a(struct pstruct *, double *, double **);
-extern int do_karlin(const unsigned char *, int n1, int **,
-X struct pstruct *, double *, double *,
-X double *, double *);
-extern void aancpy(char *to, char *from, int count, struct pstruct pst);
-char *ckalloc(size_t);
-X
-#ifdef TFASTA
-extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
-#endif
-X
-#include "dropnfa.h"
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-struct swstr { int H, E;};
-X
-static int
-dmatch (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval,
-X struct f_struct *f_str);
-X
-/* initialize for fasta */
-X
-void
-init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int mhv, phv;
-X int hmax;
-X int i0, hv;
-X int pamfact;
-X int btemp;
-X struct f_struct *f_str;
-X /* these used to be globals, but do not need to be */
-X int ktup; /* word size examined */
-X int fact; /* factor used to scale ktup match value */
-X int kt1; /* ktup-1 */
-X int lkt; /* last ktup - initiall kt1, but can be increased
-X for hsq >= NMAP */
-X
-X int maxn0; /* used in band alignment */
-X int *pwaa; /* pam[aa0[]] profile */
-X int i, j;
-X struct swstr *ss;
-X int *waa;
-X int nsq, ip, *hsq;
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx; ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X nsq = ppst->nsq; ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-#ifndef TFASTA
-X if((ppst->zsflag%10) == 6) {
-X f_str->kar_p = NULL;
-X init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
-X }
-#endif
-X
-X btemp = 2 * ppst->param_u.fa.bestoff / 3 +
-X n0 / ppst->param_u.fa.bestscale +
-X ppst->param_u.fa.bkfact *
-X (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
-X
-X if (ppst->nt_align)
-X btemp = (btemp*ppst->pam_h)/5; /* normalize to standard +5/-4 */
-X
-X btemp = min (btemp, ppst->param_u.fa.bestmax);
-X if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
-X
-X ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
-X
-X if (ppst->param_u.fa.optcut_set != 1)
-#ifndef TFASTA
-X ppst->param_u.fa.optcut = btemp;
-#else
-X ppst->param_u.fa.optcut = (btemp*3)/2;
-#endif
-X
-#ifndef OLD_FASTA_GAP
-X ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
-#else
-X ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
-#endif
-X pamfact = ppst->param_u.fa.pamfact;
-X ktup = ppst->param_u.fa.ktup;
-X fact = ppst->param_u.fa.scfact * ktup;
-X
-X if (pamfact == -1) pamfact = 0;
-X else if (pamfact == -2) pamfact = 1;
-X
-X for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
-X if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
-X
-X if (mhv <= 0) {
-X fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
-X exit (1);
-X }
-X
-X for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
-X
-/* kshft = 2; */
-X kt1 = ktup - 1;
-X hv = 1;
-X for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
-X hmax = hv;
-X f_str->hmask = (hmax >> f_str->kshft) - 1;
-X
-X if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
-X hmax, f_str->hmask);
-X exit (1);
-X }
-X
-X if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh1 array nsq=%d\n",nsq);
-X exit (1);
-X }
-X
-X if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate pamh2 array hmax=%d\n",hmax);
-X exit (1);
-X }
-X
-X if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
-X fprintf (stderr, " cannot allocate hash link array n0=%d",n0);
-X exit (1);
-X }
-X
-X for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
-X for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
-X
-X /* encode the aa0 array */
-X phv = hv = 0;
-X lkt = kt1;
-X /* restart hv, phv calculation */
-X for (i0 = 0; i0 < min(lkt,n0); i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;}
-X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
-X }
-X
-X for (; i0 < n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0;
-X /* restart hv, phv calculation */
-X for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
-X if (hsq[aa0[i0]] >= NMAP) {
-X hv=phv=0;
-X lkt = i0+ktup;
-X continue;
-X }
-X hv = (hv << f_str->kshft) + hsq[aa0[i0]];
-X phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
-X }
-X }
-X if (i0 >= n0) break;
-X hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
-X f_str->link[i0] = f_str->harr[hv];
-X f_str->harr[hv] = i0;
-X if (pamfact) {
-X f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
-X /* this check should always be true, but just in case */
-X if (hsq[aa0[i0-kt1]]<NMAP)
-X phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
-X }
-X else f_str->pamh2[hv] = fact * ktup;
-X }
-X
-/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
-X pam2[0][0] is now undefined for consistency with blast
-*/
-X
-X if (pamfact)
-X for (i0 = 1; i0 <= nsq; i0++)
-X f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
-X else
-X for (i0 = 1; i0 <= nsq; i0++)
-X f_str->pamh1[i0] = fact;
-X
-X f_str->ndo = 0;
-X f_str->noff = n0-1;
-#ifndef ALLOCN0
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
-X MAXDIAG *sizeof (struct dstruct));
-X exit (1);
-X };
-#else
-X if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
-X sizeof (struct dstruct)))==NULL) {
-X fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
-X (long)n0*sizeof (struct dstruct));
-X exit (1);
-X };
-#endif
-X
-X
-#ifdef TFASTA
-X if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
-X sizeof(unsigned char)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
-X exit (1);
-X }
-X f_str->aa1x++;
-#endif
-X
-X f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4,
-X sizeof(struct bdstr));
-X f_str->bss++;
-X
-X /* allocate space for the scoring arrays */
-X maxn0 = n0 + 4;
-X if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
-X exit (1);
-X }
-X ss++;
-X f_str->ss = ss;
-X
-X /* initialize the "variable" pam array */
-X
-X if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X pwaa = waa;
-X for (i=0; i<=nsq; i++) {
-X for (j=0;j<n0; j++) {
-X *pwaa = ppst->pam2[ip][aa0[j]][i];
-X pwaa++;
-X }
-X }
-X f_str->waa0 = waa;
-X
-X /* initialize the "conventional" pam array used for alignments */
-X
-X if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X pwaa = waa;
-X for (i=0; i<=nsq; i++) {
-X for (j=0;j<n0; j++) {
-X *pwaa = ppst->pam2[0][aa0[j]][i];
-X pwaa++;
-X }
-X }
-X f_str->waa1 = waa;
-X
-X f_str->max_res = max(3*n0/2,MIN_RES);
-X
-X /* now we need alignment storage - get it */
-X if ((f_str->res = (int *)calloc((size_t)f_str->max_res,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",f_str->max_res);
-X exit(1);
-X }
-X
-X *f_arg = f_str;
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/* pstring2 is the same information, but in a markx==10 format */
-void
-get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-#ifndef TFASTA
-X char *pg_str="FASTA";
-#else
-X char *pg_str="TFASTA";
-#endif
-X
-X if (!pstr->param_u.fa.optflag)
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, width: %3d",
-#else
-X sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, width: %3d",
-#endif
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->gdelval, pstr->ggapval, pstr->param_u.fa.optwid);
-X else
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d, width: %3d",
-#else
-X sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d, width: %3d",
-#endif
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set) ? "xS":"\0",
-X pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
-X pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
-X pstr->param_u.fa.optwid);
-X if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
-X /*
-X if (pstr->zsflag==0) strcat(pstring1," not-scaled");
-X else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
-X */
-X
-X if (pstring2 != NULL) {
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
-; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#else
-X sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
-; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
-#endif
-X pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval,
-X pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
-X pstr->param_u.fa.cgap);
-X }
-}
-X
-void
-close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X
-X
-X f_str = *f_arg;
-X
-X
-X if (f_str != NULL) {
-X if (f_str->kar_p!=NULL) free(f_str->kar_p);
-X f_str->ss--;
-X f_str->bss--;
-X
-X free(f_str->res);
-X free(f_str->waa1);
-X free(f_str->waa0);
-X free(f_str->ss);
-X free(f_str->bss);
-X free(f_str->diag);
-X free(f_str->link);
-X free(f_str->pamh2);
-X free(f_str->pamh1);
-X free(f_str->harr);
-X
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-#ifdef ALLOCN0
-void savemax (struct dstruct *, int, struct f_struct *);
-#else
-void savemax (struct dstruct *, struct f_struct *);
-#endif
-X
-int spam (const unsigned char *, const unsigned char *, struct savestr *,
-X int **, int, int, int);
-int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff);
-void kpsort(struct savestr **, int);
-X
-static int
-ALIGN(const unsigned char *, const unsigned char *, int, int,
-X int **, int, int, int *, int *, struct f_struct *);
-X
-static int
-LOCAL_ALIGN(const unsigned char *, const unsigned char *,
-X int, int, int, int,
-X int **, int, int, int *, int *, int *, int *, int,
-X struct f_struct *);
-X
-static int
-B_ALIGN(const unsigned char *A, const unsigned char *B, int M,
-X int N, int low, int up, int **W, int G, int H, int *S,
-X int *nS, int MW, int MX, struct bdstr *bss);
-X
-static void
-do_fasta (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst, struct f_struct *f_str,
-X struct rstruct *rst, int *hoff)
-{
-X int nd; /* diagonal array size */
-X int lhval;
-X int kfact;
-X register struct dstruct *dptr;
-X register int tscor;
-X
-#ifndef ALLOCN0
-X register struct dstruct *diagp;
-#else
-X register int dpos;
-X int lposn0;
-#endif
-X int noff;
-X struct dstruct *dpmax;
-X register int lpos;
-X int tpos;
-X struct savestr *vmptr;
-X int scor, ib, nsave;
-X int xdrop, do_extend;
-X int ktup, kt1, lkt, *hsq, ip;
-X
-X if (ppst->ext_sq_set) {
-X ip = 1;
-X hsq = ppst->hsqx;
-X }
-X else {
-X ip = 0;
-X hsq = ppst->hsq;
-X }
-X
-X xdrop = -ppst->pam_l;
-X /* do extended alignment in spam iff protein or short sequences */
-X do_extend = !ppst->nt_align || (n0 < 50) || (n1 < 50);
-X
-X ktup = ppst->param_u.fa.ktup;
-X kt1 = ktup-1;
-X
-X if (n1 < ktup) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-X if (n0+n1+1 >= MAXDIAG) {
-X fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
-X rst->score[0] = rst->score[1] = rst->score[2] = -1;
-X return;
-X }
-X
-#ifdef ALLOCN0
-X nd = n0;
-#else
-X nd = n0 + n1;
-#endif
-X
-X dpmax = &f_str->diag[nd];
-X for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
-X {
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X vmptr->score = 0;
-X f_str->lowmax = f_str->vmax;
-X f_str->lowscor = 0;
-X
-X /* start hashing */
-X lhval = 0;
-X lkt = kt1;
-X for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos <n1; lpos++) {
-X /* restart lhval calculation */
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lhval = 0; lkt = lpos + ktup;
-X continue;
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X }
-X
-X noff = f_str->noff;
-#ifndef ALLOCN0
-X diagp = &f_str->diag[noff + lkt];
-X for (; lpos < n1; lpos++, diagp++) {
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lpos++ ; diagp++;
-X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
-X if (lpos >= n1) break;
-X lhval = 0;
-X }
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
-#else
-X lposn0 = noff + lpos;
-X for (; lpos < n1; lpos++, lposn0++) {
-X if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
-X /*
-X if (hsq[aa1[lpos]]>=NMAP) {
-X lpos++; lposn0++;
-X while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; lposn0++;}
-X }
-X */
-X lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
-X for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
-X dpos = lposn0 - tpos;
-X if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
-#endif
-X tscor += ktup;
-X if ((tscor -= lpos) <= 0) {
-X scor = dptr->score;
-X if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
-#ifdef ALLOCN0
-X savemax (dptr, dpos, f_str);
-#else
-X savemax (dptr, f_str);
-#endif
-X if ((tscor += scor) >= kfact) {
-X dptr->score = tscor;
-X dptr->stop = lpos;
-X }
-X else {
-X dptr->score = kfact;
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X }
-X else {
-X dptr->score += f_str->pamh1[aa0[tpos]];
-X dptr->stop = lpos;
-X }
-X }
-X else {
-X dptr->score = f_str->pamh2[lhval];
-X dptr->start = (dptr->stop = lpos) - kt1;
-X }
-X } /* end tpos */
-X
-#ifdef ALLOCN0
-X /* reinitialize diag structure */
-X loopl:
-X if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor)
-X savemax (dptr, lpos, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr->score = 0;
-#endif
-X } /* end lpos */
-X
-#ifdef ALLOCN0
-X for (tpos = 0, dpos = noff + n1 - 1; tpos < n0; tpos++, dpos--) {
-X if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
-X savemax (dptr, dpos, f_str);
-X }
-#else
-X for (dptr = f_str->diag; dptr < dpmax;) {
-X if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
-X dptr->stop = -1;
-X dptr->dmax = NULL;
-X dptr++->score = 0;
-X }
-X f_str->ndo = nd;
-#endif
-X
-/*
-X at this point all of the elements of aa1[lpos]
-X have been searched for elements of aa0[tpos]
-X with the results in diag[dpos]
-*/
-X for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
-X /*
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X noff+vmptr->start-vmptr->dp,
-X noff+vmptr->stop-vmptr->dp,
-X vmptr->start,vmptr->stop,
-X vmptr->dp,vmptr->score);
-X
-X */
-X if (vmptr->score > 0) {
-X vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], xdrop,
-X noff,do_extend);
-X f_str->vptr[nsave++] = vmptr;
-X }
-X }
-X
-X if (nsave <= 0) {
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X return;
-X }
-X
-X /*
-X fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,noff);
-X for (ib=0; ib<nsave; ib++) {
-X fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
-X noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
-X noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
-X f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
-X f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
-X }
-X fprintf(stderr,"---\n");
-X */
-X
-X scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
-X ppst->param_u.fa.pgap, noff);
-X
-X for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
-X if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
-X
-/* kssort (f_str->vptr, nsave); */
-X
-X rst->score[1] = vmptr->score;
-X rst->score[0] = max (scor, vmptr->score);
-X rst->score[2] = rst->score[0]; /* initn */
-X
-X if (ppst->param_u.fa.optflag) {
-X if (rst->score[0] > ppst->param_u.fa.optcut)
-X rst->score[2] = dmatch (aa0, n0, aa1, n1, *hoff=noff - vmptr->dp,
-X ppst->param_u.fa.optwid, ppst->pam2[ip],
-X ppst->gdelval,ppst->ggapval,f_str);
-X }
-}
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg, struct rstruct *rst)
-{
-X int hoff, n10;
-X
-X double lambda, H;
-X
-X rst->score[0] = rst->score[1] = rst->score[2] = 0;
-X rst->escore = 1.0;
-X rst->segnum = rst->seglen = 1;
-X
-X if (n1 < ppst->param_u.fa.ktup) return;
-X
-#ifdef TFASTA
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
-#else /* FASTA */
-X do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
-#endif
-X
-#ifndef TFASTA
-X if((ppst->zsflag%10) == 6 &&
-X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
-X f_str->kar_p, &lambda, &H)>0)) {
-X rst->comp = 1.0/lambda;
-X rst->H = H;
-X }
-X else {rst->comp = rst->H = -1.0;}
-#else
-X rst->comp = rst->H = -1.0;
-#endif
-}
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct rstruct *rst)
-{
-X int optflag, tscore, hoff, n10;
-X
-X optflag = ppst->param_u.fa.optflag;
-X ppst->param_u.fa.optflag = 1;
-X
-#ifdef TFASTA
-X n10=aatran(aa1,f_str->aa1x,n1,frame);
-X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
-#else /* FASTA */
-X do_fasta(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff);
-#endif
-X ppst->param_u.fa.optflag = optflag;
-}
-X
-#ifdef ALLOCN0
-void
-savemax (dptr, dpos, f_str)
-X register struct dstruct *dptr;
-X int dpos;
-X struct f_struct *f_str;
-{
-X register struct savestr *vmptr;
-X register int i;
-X
-#else
-void
-savemax (dptr, f_str)
-X register struct dstruct *dptr;
-X struct f_struct *f_str;
-{
-X register int dpos;
-X register struct savestr *vmptr;
-X register int i;
-X
-X dpos = (int) (dptr - f_str->diag);
-X
-#endif
-X
-/* check to see if this is the continuation of a run that is already saved */
-X
-X if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
-X vmptr->start == dptr->start)
-X {
-X vmptr->stop = dptr->stop;
-X if ((i = dptr->score) <= vmptr->score)
-X return;
-X vmptr->score = i;
-X if (vmptr != f_str->lowmax)
-X return;
-X }
-X else
-X {
-X i = f_str->lowmax->score = dptr->score;
-X f_str->lowmax->dp = dpos;
-X f_str->lowmax->start = dptr->start;
-X f_str->lowmax->stop = dptr->stop;
-X dptr->dmax = f_str->lowmax;
-X }
-X
-X for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
-X if (vmptr->score < i)
-X {
-X i = vmptr->score;
-X f_str->lowmax = vmptr;
-X }
-X f_str->lowscor = i;
-}
-X
-int spam (const unsigned char *aa0, const unsigned char *aa1,
-X struct savestr *dmax, int **pam2, int xdrop,
-X int noff, int do_extend)
-{
-X register int lpos, tot;
-X register const unsigned char *aa0p, *aa1p;
-X
-X int drop_thresh;
-X
-X struct {
-X int start, stop, score;
-X } curv, maxv;
-X
-X aa1p = &aa1[lpos= dmax->start]; /* get the start of lib seq */
-X aa0p = &aa0[lpos - dmax->dp + noff]; /* start of query */
-#ifdef DEBUG
-X /* also add check in calling routine */
-X if (aa0p < aa0) { return -99; }
-#endif
-X curv.start = lpos; /* start index in lib seq */
-X
-X tot = curv.score = maxv.score = 0;
-X
-X for (; lpos <= dmax->stop; lpos++) {
-X tot += pam2[*aa0p++][*aa1p++];
-X if (tot > curv.score) { /* update current score */
-X curv.stop = lpos;
-X curv.score = tot;
-X }
-X else if (tot < 0) {
-X if (curv.score > maxv.score) { /* save score, start, stop */
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X tot = curv.score = 0; /* reset running score */
-X curv.start = lpos+1; /* reset start */
-X if(lpos >= dmax->stop) break; /* if the zero is beyond stop, quit */
-X }
-X }
-X
-X if (curv.score > maxv.score) {
-X maxv.start = curv.start;
-X maxv.stop = curv.stop;
-X maxv.score = curv.score;
-X }
-X
-#ifndef NOSPAM_EXT
-X
-X /* now check to see if the score gets better by extending */
-X if (do_extend && maxv.score > xdrop) {
-X
-X if (maxv.stop == dmax->stop) {
-X tot = maxv.score;
-X drop_thresh = maxv.score - xdrop;
-X aa1p = &aa1[lpos= dmax->stop];
-X aa0p = &aa0[lpos - dmax->dp + noff];
-X while (tot > drop_thresh ) {
-X ++lpos;
-X tot += pam2[*(++aa0p)][*(++aa1p)];
-X if (tot > maxv.score) {
-X maxv.start = lpos;
-X maxv.score = tot;
-X drop_thresh = tot - xdrop;
-X }
-X }
-X }
-X
-X /* scan backwards now */
-X
-X if (maxv.start == dmax->start) {
-X tot = maxv.score;
-X drop_thresh = maxv.score - xdrop;
-X aa1p = &aa1[lpos= dmax->start];
-X aa0p = &aa0[lpos - dmax->dp + noff];
-X while (tot > drop_thresh) {
-X --lpos;
-X tot += pam2[*(--aa0p)][*(--aa1p)];
-X if (tot > maxv.score) {
-X maxv.start = lpos;
-X maxv.score = tot;
-X drop_thresh = tot - xdrop;
-X }
-X }
-X }
-X }
-#endif
-X
-/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
-X printf(" new region: %3d %3d %3d %3d\n",maxv.start,
-X dmax->start,maxv.stop,dmax->stop);
-*/
-X dmax->start = maxv.start;
-X dmax->stop = maxv.stop;
-X
-X return maxv.score;
-}
-X
-int sconn (struct savestr **v, int n, int cgap, int pgap, int noff)
-{
-X int i, si;
-X struct slink
-X {
-X int score;
-X struct savestr *vp;
-X struct slink *next;
-X } *start, *sl, *sj, *so, sarr[MAXSAV];
-X int lstart, tstart, plstop, ptstop;
-X
-/* sort the score left to right in lib pos */
-X
-X kpsort (v, n);
-X
-X start = NULL;
-X
-/* for the remaining runs, see if they fit */
-X
-X for (i = 0, si = 0; i < n; i++)
-X {
-X
-/* if the score is less than the gap penalty, it never helps */
-X if (v[i]->score < cgap)
-X continue;
-X lstart = v[i]->start;
-X tstart = lstart - v[i]->dp + noff;
-X
-/* put the run in the group */
-X sarr[si].vp = v[i];
-X sarr[si].score = v[i]->score;
-X sarr[si].next = NULL;
-X
-/* if it fits, then increase the score */
-X for (sl = start; sl != NULL; sl = sl->next)
-X {
-X plstop = sl->vp->stop;
-X ptstop = plstop - sl->vp->dp + noff;
-X if (plstop < lstart && ptstop < tstart)
-X {
-X sarr[si].score = sl->score + v[i]->score + pgap;
-X break;
-X }
-X }
-X
-/* now recalculate where the score fits */
-X if (start == NULL)
-X start = &sarr[si];
-X else
-X for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
-X if (sarr[si].score > sj->score) {
-X sarr[si].next = sj;
-X if (so != NULL) so->next = &sarr[si];
-X else start = &sarr[si];
-X break;
-X }
-X so = sj;
-X }
-X si++;
-X }
-X
-X if (start != NULL)
-X return (start->score);
-X else
-X return (0);
-}
-X
-void
-kssort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->score >= v[j + gap]->score)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-void
-kpsort (v, n)
-struct savestr *v[];
-int n;
-{
-X int gap, i, j;
-X struct savestr *tmp;
-X
-X for (gap = n / 2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap)
-X {
-X if (v[j]->start <= v[j + gap]->start)
-X break;
-X tmp = v[j];
-X v[j] = v[j + gap];
-X v[j + gap] = tmp;
-X }
-}
-X
-static int dmatch (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int hoff, int window,
-X int **pam2, int gdelval, int ggapval,
-X struct f_struct *f_str)
-{
-X int low, up;
-X
-X window = min (n1, window);
-X /* hoff is the offset found from aa1 to seq 2 by hmatch */
-X
-X low = -window/2-hoff;
-X up = low+window;
-X
-X return FLOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
-X pam2,
-#ifdef OLD_FASTA_GAP
-X -(gdelval-ggapval),
-#else
-X -gdelval,
-#endif
-X -ggapval,window,f_str);
-X }
-X
-X
-/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
-X
-X To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,MW).
-X The parameters are explained as follows:
-X A, B : two sequences to be aligned
-X M : the length of sequence A
-X N : the length of sequence B
-X L : lower bound of the band
-X U : upper bound of the band
-X W : scoring table for matches and mismatches
-X G : gap-opening penalty
-X H : gap-extension penalty
-X MW : maximum window size
-*/
-X
-#include <stdio.h>
-X
-#define MININT -9999999
-X
-static int
-FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N, int low, int up,
-X int **W, int G,int H, int MW,
-X struct f_struct *f_str)
-{
-X int band;
-X register struct bdstr *bssp;
-X int i, j, si, ei;
-X int c, d, e, m;
-X int leftd, rightd;
-X int best_score;
-X int *wa, curd;
-X int ib;
-X
-X bssp = f_str->bss;
-X
-X m = G+H;
-X low = max(-M, low);
-X up = min(N, up);
-X
-X if (N <= 0) return 0;
-X
-X if (M <= 0) return 0;
-X
-X band = up-low+1;
-X if (band < 1) {
-X fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
-X M, N, low, up);
-X return 0;
-X }
-X
-X if (low > 0) leftd = 1;
-X else if (up < 0) leftd = band;
-X else leftd = 1-low;
-X rightd = band;
-X si = max(0,-up); /* start index -1 */
-X ei = min(M,N-low); /* end index */
-X bssp[leftd].CC = 0;
-X for (j = leftd+1; j <= rightd; j++) {
-X bssp[j].CC = 0;
-X bssp[j].DD = -G;
-X }
-X
-X bssp[rightd+1].CC = MININT;
-X bssp[rightd+1].DD = MININT;
-X
-X best_score = 0;
-X bssp[leftd-1].CC = MININT;
-X bssp[leftd].DD = -G;
-X
-X for (i = si+1; i <= ei; i++) {
-X if (i > N-up) rightd--;
-X if (leftd > 1) leftd--;
-X wa = W[A[i]];
-X if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
-X if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
-X
-X if (d > c) c = d;
-X if (c < 0) c = 0;
-X e = c-G;
-X bssp[leftd].DD = d;
-X bssp[leftd].CC = c;
-X if (c > best_score) best_score = c;
-X
-X for (curd=leftd+1; curd <= rightd; curd++) {
-X if ((c = c-m) > (e = e-H)) e = c;
-X if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
-X c = bssp[curd].CC + wa[B[curd+low-1+i]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X if (c < 0) c = 0;
-X bssp[curd].CC = c;
-X bssp[curd].DD = d;
-X if (c > best_score) best_score = c;
-X }
-X }
-X
-X return best_score;
-}
-X
-/* ckalloc - allocate space; check for success */
-char *ckalloc(size_t amount)
-{
-X char *p;
-X
-X if ((p = malloc( (unsigned) amount)) == NULL)
-X w_abort("Ran out of memory.","");
-X return(p);
-}
-X
-/* calculate the 100% identical score */
-int
-shscore(const unsigned char *aa0, int n0, int **pam2)
-{
-X int i, sum;
-X for (i=0,sum=0; i<n0; i++)
-X sum += pam2[aa0[i]][aa0[i]];
-X return sum;
-}
-X
-int sw_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res
-X )
-{
-X register const unsigned char *aa0p, *aa1p;
-X register int *pwaa;
-X register int i, j;
-X register struct swstr *ssj;
-X struct swstr *ss;
-X int *waa;
-X int e, f, h, p;
-X int q, r, m;
-X int score;
-X int cost, I, J, K, L;
-X
-X ss = f_str->ss;
-X waa = f_str->waa1;
-X
-#ifdef OLD_FASTA_GAP
-X q = -(ppst->gdelval - ppst->ggapval);
-#else
-X q = -ppst->gdelval;
-#endif
-X r = -ppst->ggapval;
-X m = q + r;
-X
-X /* initialize 0th row */
-X for (ssj=ss; ssj<ss+n0; ssj++) {
-X ssj->H = 0;
-X ssj->E = -q;
-X }
-X
-X score = I = J = 0;
-X aa1p = aa1;
-X i = 0;
-X while (*aa1p) {
-X h = p = 0;
-X f = -q;
-X pwaa = waa + (*aa1p++ * n0);
-X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
-X if ((h = h - m) > (f = f - r)) f = h;
-X if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
-X h = p + *pwaa++;
-X if (h < 0 ) h = 0;
-X if (h < f ) h = f;
-X if (h < e ) h = e;
-X p = ssj->H;
-X ssj->H = h;
-X ssj->E = e;
-X if (h > score) {
-X score = h;
-X I = i;
-X J = (int)(ssj-ss);
-X }
-X }
-X i++;
-X } /* done with forward pass */
-X if (score <= 0) return 0;
-X
-X /* to get the start point, go backwards */
-X
-X cost = K = L = 0;
-X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
-X
-X for (i=I; i>=0; i--) {
-X h = f = -1;
-X p = (i == I) ? 0 : -1;
-X ssj = ss+J; /* bug in compiler */
-X for (aa0p = &aa0[J]; ssj>=ss; ssj--,aa0p--) {
-X f = max (f,h-q)-r;
-X ssj->E=max(ssj->E,ssj->H-q)-r;
-X h = max(max(ssj->E,f),p+ppst->pam2[0][*aa0p][aa1[i]]);
-X p = ssj->H;
-X ssj->H=h;
-X if (h > cost) {
-X cost = h;
-X K = i;
-X L = (int)(ssj-ss);
-X if (cost >= score) goto found;
-X }
-X }
-X }
-X
-found:
-X
-X /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
-X
-/* in the f_str version, the *res array is already allocated at 4*n0/3 */
-X
-X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
-X
-X /* the seq array arguments in this call have been reversed to allow
-X assymetric scoring matrices - this affects the score decoding,
-X and allocation of the score row matrix */
-X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,ppst->pam2[0],q,r,a_res->res,&a_res->nres,f_str);
-X
-X /* DISPLAY(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,res,L,K,ppst->sq); */
-X
-X return score;
-}
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int *S, int **W, int G, int H, int *nres);
-X
-#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
-X
-/* static int *sapp; */ /* Current script append ptr */
-/* static int last; */ /* Last script op appended */
-X
-X /* Append "Delete k" op */
-#define DEL(k) \
-{ if (*last < 0) \
-X *last = (*sapp)[-1] -= (k); \
-X else { \
-X *last = (*sapp)[0] = -(k); \
-X (*sapp)++; \
-X } \
-}
-X /* Append "Insert k" op */
-#define INS(k) \
-{ if (*last > 0) \
-X *last = (*sapp)[-1] += (k); \
-X else { \
-X *last = (*sapp)[0] = (k); \
-X (*sapp)++; \
-X } \
-}
-X
-#define REP { *last = (*sapp)[0] = 0; (*sapp)++;} /* Append "Replace" op */
-X
-/* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
-X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
-X and appends such a conversion to the current script. */
-X
-static int
-nw_align(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int tb, int te, int **w, int g, int h,
-X struct f_struct *f_str,
-X int **sapp, int *last)
-{
-X int midi, midj, type; /* Midpoint, type, and cost */
-X int midc;
-X
-X register int i, j;
-X register int c, e, d, s;
-X int m, t, *wa;
-X struct swstr *f_ss, *r_ss;
-X
-X m = g + h;
-X
-X f_ss = f_str->f_ss;
-X r_ss = f_str->r_ss;
-X
-/* Boundary cases: M <= 1 or N == 0 */
-X
-X if (N <= 0) {
-X if (M > 0) {DEL(M)}
-X return -gap(M);
-X }
-X
-X if (M <= 1) {
-X if (M <= 0) {
-X INS(N);
-X return -gap(N);
-X }
-X if (tb < te) tb = te;
-X midc = (tb-h) - gap(N);
-X midj = 0;
-X wa = w[A[1]]; /* in the original version of this code, A[]
-X is the second sequence */
-X for (j = 1; j <= N; j++) {
-X c = -gap(j-1) + wa[B[j]] - gap(N-j);
-X if (c > midc) {
-X midc = c;
-X midj = j;
-X }
-X }
-X if (midj == 0) { DEL(1) INS(N) }
-X else {
-X if (midj > 1) { INS(midj-1) }
-X REP
-X if (midj < N) { INS(N-midj) }
-X }
-X return midc;
-X }
-X
-/* Divide: Find optimum midpoint (midi,midj) of cost midc */
-X
-X midi = M/2; /* Forward phase: */
-X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
-X t = -g;
-X for (j = 1; j <= N; j++) {
-X f_ss[j].H = t = t-h;
-X f_ss[j].E = t-g;
-X }
-X t = tb;
-X for (i = 1; i <= midi; i++) {
-X s = f_ss[0].H;
-X f_ss[0].H = c = t = t-h;
-X e = t-g;
-X wa = w[A[i]];
-X for (j = 1; j <= N; j++) {
-X if ((c = c - m) > (e = e - h)) e = c;
-X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
-X c = s + wa[B[j]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = f_ss[j].H;
-X f_ss[j].H = c;
-X f_ss[j].E = d;
-X }
-X }
-X f_ss[0].E = f_ss[0].H;
-X
-X r_ss[N].H = 0; /* Reverse phase: */
-X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
-X for (j = N-1; j >= 0; j--)
-X { r_ss[j].H = t = t-h;
-X r_ss[j].E = t-g;
-X }
-X t = te;
-X for (i = M-1; i >= midi; i--)
-X { s = r_ss[N].H;
-X r_ss[N].H = c = t = t-h;
-X e = t-g;
-X wa = w[A[i+1]];
-X for (j = N-1; j >= 0; j--)
-X { if ((c = c - m) > (e = e - h)) e = c;
-X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c;
-X c = s + wa[B[j+1]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = r_ss[j].H;
-X r_ss[j].H = c;
-X r_ss[j].E = d;
-X }
-X }
-X r_ss[N].E = r_ss[N].H;
-X
-X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
-X midj = 0;
-X type = 1;
-X for (j = 0; j <= N; j++)
-X if ((c = f_ss[j].H + r_ss[j].H) >= midc)
-X if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E))
-X { midc = c;
-X midj = j;
-X }
-X for (j = N; j >= 0; j--)
-X if ((c = f_ss[j].E + r_ss[j].E + g) > midc)
-X { midc = c;
-X midj = j;
-X type = 2;
-X }
-X
-X
-/* Conquer: recursively around midpoint */
-X
-X if (type == 1) {
-X nw_align(A,B,midi,midj,tb,-g,w,g,h,f_str, sapp, last);
-X nw_align(A+midi,B+midj,M-midi,N-midj,-g,te,w,g,h,f_str,sapp, last);
-X }
-X else {
-X nw_align(A,B,midi-1,midj,tb,0,w,g,h,f_str, sapp, last);
-X DEL(2);
-X nw_align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,g,h,f_str, sapp, last);
-X }
-X return midc;
-}
-X
-/* Interface and top level of comparator */
-X
-static int
-ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int **W, int G, int H, int *S, int *nS,
-X struct f_struct *f_str)
-{
-X int c, ck;
-X struct swstr *f_ss, *r_ss;
-X int *sapp, last;
-X
-X sapp = S;
-X last = 0;
-X
-X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
-X exit (1);
-X }
-X f_ss++;
-X f_str->f_ss = f_ss;
-X
-X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
-X exit (1);
-X }
-X r_ss++;
-X f_str->r_ss = r_ss;
-X
-X c = nw_align(A,B,M,N,-G,-G,W,G,H,f_str,&sapp, &last); /* OK, do it */
-X
-X ck = CHECK_SCORE(A,B,M,N,S,W,G,H,nS);
-X if (c != ck) fprintf(stderr,"Check_score error %d != %d\n",c,ck);
-X
-X f_ss--; r_ss--;
-X free(r_ss); free(f_ss);
-X
-X return c;
-}
-X
-/* Alignment display routine */
-X
-static char ALINE[51], BLINE[51], CLINE[51];
-X
-void DISPLAY(unsigned char *A, unsigned char *B, int M, int N,
-X int *S, int AP, int BP, char *sq)
-{ register char *a, *b, *c;
-X register int i, j, op;
-X int lines, ap, bp;
-X
-X i = j = op = lines = 0;
-X ap = AP;
-X bp = BP;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X while (i < M || j < N)
-X { if (op == 0 && *S == 0)
-X { op = *S++;
-X *a = sq[A[++i]];
-X *b = sq[B[++j]];
-X *c++ = (*a++ == *b++) ? '|' : ' ';
-X }
-X else
-X { if (op == 0)
-X op = *S++;
-X if (op > 0)
-X { *a++ = ' ';
-X *b++ = sq[B[++j]];
-X op--;
-X }
-X else
-X { *a++ = sq[A[++i]];
-X *b++ = ' ';
-X op++;
-X }
-X *c++ = '-';
-X }
-X if (a >= ALINE+50 || (i >= M && j >= N))
-X { *a = *b = *c = '\0';
-X printf("\n%5d ",50*lines++);
-X for (b = ALINE+10; b <= a; b += 10)
-X printf(" . :");
-X if (b <= a+5)
-X printf(" .");
-X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
-X ap = AP + i;
-X bp = BP + j;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X }
-X }
-}
-X
-/* CHECK_SCORE - return the score of the alignment stored in S */
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N, int *S, int **w, int g, int h,
-X int *nres)
-{
-X register int i, j, op, nc;
-X int score;
-X
-X score = i = j = op = nc = 0;
-X while (i < M || j < N) {
-X op = *S++;
-X if (op == 0) {
-X score = w[A[++i]][B[++j]] + score;
-X nc++;
-X /* fprintf(stderr,"=%4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */
-X }
-X else if (op > 0) {
-X score = score - (g+op*h);
-X /* fprintf(stderr,">%4d %4d %4d %4d\n",i,j,-(g+op*h),score); */
-X j += op;
-X nc += op;
-X } else {
-X score = score - (g-op*h);
-X /* fprintf(stderr,"<%4d %4d %4d %4d\n",i,j,-(g-op*h),score); */
-X i -= op;
-X nc -= op;
-X }
-X }
-X *nres = nc;
-X return score;
-}
-X
-X
-static int
-BCHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N, int *S, int **w, int g, int h,
-X int *nres)
-{
-X register int i, j, op, nc;
-X int *Ssave;
-X int score;
-X
-X score = i = j = op = nc = 0;
-X Ssave = S;
-X while (i < M || j < N) {
-X op = *S++;
-X if (op == 0) {
-X score = w[A[++i]][B[++j]] + score;
-X nc++;
-/* fprintf(stderr,"op0 %4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */
-X }
-X else if (op > 0) {
-X score = score - (g+op*h);
-/* fprintf(stderr,"op> %4d %4d %4d %4d %4d\n",i,j,op,-(g+op*h),score); */
-X j += op;
-X nc += op;
-X } else {
-X score = score - (g-op*h);
-/* fprintf(stderr,"op< %4d %4d %4d %4d %4d\n",i,j,op,-(g-op*h),score); */
-X i -= op;
-X nc -= op;
-X }
-X }
-X *nres = nc;
-X return score;
-}
-X
-X
-/* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
-X
-X To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,S,dflag,&SI,&SJ,&EI,&EJ,MW).
-X The parameters are explained as follows:
-X A, B : two sequences to be aligned
-X M : the length of sequence A
-X N : the length of sequence B
-X L : lower bound of the band
-X U : upper bound of the band
-X W : scoring table for matches and mismatches
-X G : gap-opening penalty
-X H : gap-extension penalty
-X dflag : 0 - no display or backward pass
-X *SI : starting position of sequence A in the optimal local alignment
-X *SJ : starting position of sequence B in the optimal local alignment
-X *EI : ending position of sequence A in the optimal local alignment
-X *EJ : ending position of sequence B in the optimal local alignment
-X MW : maximum window size
-*/
-X
-int bd_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct pstruct *ppst,
-X struct f_struct *f_str, int hoff,
-X struct a_res_str *a_res)
-{
-X int low, up, score;
-X int min0, min1, max0, max1;
-X int window;
-X
-X window = min (n1, ppst->param_u.fa.optwid);
-X /* hoff is the offset found from aa1 to seq 2 by hmatch */
-X
-X low = -window/2-hoff;
-X up = low+window;
-X
-X score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
-X ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval-ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X &min0,&min1,&max0,&max1,ppst->param_u.fa.optwid,f_str);
-X
-X if (score <=0) {
-X fprintf(stderr,"n0/n1: %d/%d hoff: %d window: %d\n",
-X n0, n1, hoff, window);
-X return 0;
-X }
-X
-/*
-X fprintf(stderr," ALIGN: start0: %d start1: %d stop0: %d stop1: %d, bot: %d top: %d, win: %d MX %d\n",
-X min0-1,min1-1,max0-min0+1,max1-min1+1,low-(min1-min0),up-(min1-min0),
-X ppst->param_u.fa.optwid,n0);
-*/
-X
-X a_res->min0 = min0-1; a_res->min1 = min1-1;
-X a_res->max0 = max0; a_res->max1 = max1;
-X
-X B_ALIGN(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1,
-X low-(min1-min0),up-(min1-min0),
-X ppst->pam2[0],
-#ifdef OLD_FASTA_GAP
-X -(ppst->gdelval-ppst->ggapval),
-#else
-X -ppst->gdelval,
-#endif
-X -ppst->ggapval,
-X a_res->res,&a_res->nres,ppst->param_u.fa.optwid,n0,f_str->bss);
-X
-X return score;
-}
-X
-static int
-LOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int low, int up, int **W, int G,int H,
-X int *psi, int *psj, int *pei, int *pej, int MW,
-X struct f_struct *f_str)
-{
-X int band;
-X register struct bdstr *bssp;
-X int i, j, si, ei;
-X int c, d, e, t, m;
-X int leftd, rightd;
-X int best_score, starti, startj, endi, endj;
-X int *wa, curd;
-X int ib;
-X char flag;
-X
-X bssp = f_str->bss;
-X
-X m = G+H;
-X low = max(-M, low);
-X up = min(N, up);
-X
-X if (N <= 0) {
-X *psi = *psj = *pei = *pej;
-X return 0;
-X }
-X if (M <= 0) {
-X *psi = *psj = *pei = *pej;
-X return 0;
-X }
-X band = up-low+1;
-X if (band < 1) {
-X fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n",
-X M, N, low, up);
-X return -1;
-X }
-X
-X j = (MW + 2 + 2) * sizeof(struct bdstr);
-X
-X /* already done by init_work();
-X if (f_str->bss==NULL) f_str->bss = (struct bdstr *) ckalloc(j);
-X */
-X
-X if (low > 0) leftd = 1;
-X else if (up < 0) leftd = band;
-X else leftd = 1-low;
-X rightd = band;
-X si = max(0,-up);
-X ei = min(M,N-low);
-X bssp[leftd].CC = 0;
-X for (j = leftd+1; j <= rightd; j++) {
-X bssp[j].CC = 0;
-X bssp[j].DD = -G;
-X }
-X bssp[rightd+1].CC = MININT;
-X bssp[rightd+1].DD = MININT;
-X best_score = 0;
-X endi = si;
-X endj = si+low;
-X bssp[leftd-1].CC = MININT;
-X bssp[leftd].DD = -G;
-X for (i = si+1; i <= ei; i++) {
-X if (i > N-up) rightd--;
-X if (leftd > 1) leftd--;
-X wa = W[A[i]];
-X if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c;
-X if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]];
-/*
-X if (ib > N) fprintf(stderr,"B[%d] out of range %d\n",ib,N);
-*/
-X if (d > c) c = d;
-X if (c < 0) c = 0;
-X e = c-G;
-X bssp[leftd].DD = d;
-X bssp[leftd].CC = c;
-X if (c > best_score) {
-X best_score = c;
-X endi = i;
-X endj = ib;
-X }
-X for (curd=leftd+1; curd <= rightd; curd++) {
-X if ((c = c-m) > (e = e-H)) e = c;
-X if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c;
-/*
-X if ((ib=curd+low-1+i) <= 0 || ib > N)
-X fprintf(stderr,"B[%d]:%d\n",ib,B[ib]);
-*/
-X c = bssp[curd].CC + wa[B[curd+low-1+i]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X if (c < 0) c = 0;
-X bssp[curd].CC = c;
-X bssp[curd].DD = d;
-X if (c > best_score) {
-X best_score = c;
-X endi = i;
-X endj = curd+low-1+i;
-X }
-X }
-X }
-X
-X leftd = max(1,-endi-low+1);
-X rightd = band-(up-(endj-endi));
-X bssp[rightd].CC = 0;
-X t = -G;
-X for (j = rightd-1; j >= leftd; j--) {
-X bssp[j].CC = t = t-H;
-X bssp[j].DD = t-G;
-X }
-X for (j = rightd+1; j <= band; ++j) bssp[j].CC = MININT;
-X bssp[leftd-1].CC = bssp[leftd-1].DD = MININT;
-X bssp[rightd].DD = -G;
-X flag = 0;
-X for (i = endi; i >= 1; i--) {
-X if (i+low <= 0) leftd++;
-X if (rightd < band) rightd++;
-X wa = W[A[i]];
-X if ((c = bssp[rightd-1].CC-m) > (d = bssp[rightd-1].DD-H)) d = c;
-X if ((ib = rightd+low-1+i) <= N) c = bssp[rightd].CC+wa[B[ib]];
-X
-/*
-X if (ib <= 0) fprintf(stderr,"rB[%d] <1\n",ib);
-*/
-X if (d > c) c = d;
-X e = c-G;
-X bssp[rightd].DD = d;
-X bssp[rightd].CC = c;
-X if (c == best_score) {
-X starti = i;
-X startj = ib;
-X flag = 1;
-X break;
-X }
-X for (curd=rightd-1; curd >= leftd; curd--) {
-X if ((c = c-m) > (e = e-H)) e = c;
-X if ((c = bssp[curd-1].CC-m) > (d = bssp[curd-1].DD-H)) d = c;
-X
-/*
-X if ((ib=curd+low-1+i) <= 0 || ib > N)
-X fprintf(stderr,"i: %d, B[%d]:%d\n",i,ib,B[ib]);
-*/
-X c = bssp[curd].CC + wa[B[curd+low-1+i]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X bssp[curd].CC = c;
-X bssp[curd].DD = d;
-X if (c == best_score) {
-X starti = i;
-X startj = curd+low-1+i;
-X flag = 1;
-X break;
-X }
-X }
-X if (flag == 1) break;
-X }
-X
-X if (starti < 0 || starti > M || startj < 0 || startj > N) {
-X printf("starti=%d, startj=%d\n",starti,startj);
-X *psi = *psj = *pei = *pej;
-X exit(1);
-X }
-X *psi = starti;
-X *psj = startj;
-X *pei = endi;
-X *pej = endj;
-X return best_score;
-}
-X
-/* A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
-X
-X To invoke, call B_ALIGN(A,B,M,N,L,U,W,G,H,S,MW,MX).
-X The parameters are explained as follows:
-X A, B : two sequences to be aligned
-X M : the length of sequence A
-X N : the length of sequence B
-X L : lower bound of the band
-X U : upper bound of the band
-X W : scoring table for matches and mismatches
-X G : gap-opening penalty
-X H : gap-extension penalty
-X S : script for DISPLAY routine
-X MW : maximum window size
-X MX : maximum length sequence M to be aligned
-*/
-X
-static int IP;
-static int *MP[3]; /* save crossing points */
-static int *FP; /* forward dividing points */
-static char *MT[3]; /* 0: rep, 1: del, 2: ins */
-static char *FT;
-X
-/* bg_align(A,B,M,N,up,low,tb,te) returns the cost of an optimum conversion between
-X A[1..M] and B[1..N] and appends such a conversion to the current script.
-X tb(te)= 1 no gap-open penalty if the conversion begins(ends) with a delete.
-X tb(te)= 2 no gap-open penalty if the conversion begins(ends) with an insert.
-*/
-static int
-bg_align(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int low, int up, int tb, int te,
-X int **w, int g, int h,
-X struct bdstr *bss, int **sapp, int *last)
-{
-X int rmid, k, l, r, v, kt;
-X int t1, t2, t3;
-X
-X {
-X int band, midd;
-X int leftd, rightd; /* for CC, DD, CP and DP */
-X register int curd; /* current index for CC, DD CP and DP */
-X register int i, j;
-X register int c, d, e;
-X int t, fr, *wa, ib, m;
-X
-X /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
-X if (N <= 0) {
-X if (M > 0) { DEL(M) }
-X return 0;
-X }
-X if (M <= 0) {
-X INS(N)
-X return 0;
-X }
-X if ((band = up-low+1) <= 1) {
-X for (i = 1; i <= M; i++) { REP }
-X return 0;
-X }
-X
-X /* Divide: Find all crossing points */
-X
-X /* Initialization */
-X m = g + h;
-X
-X midd = band/2 + 1;
-X rmid = low + midd - 1;
-X leftd = 1-low;
-X rightd = up-low+1;
-X if (leftd < midd) {
-X fr = -1;
-X for (j = 0; j < midd; j++)
-X bss[j].CP = bss[j].DP = -1;
-X for (j = midd; j <= rightd; j++) {
-X bss[j].CP = bss[j].DP = 0;
-X }
-X MP[0][0] = -1;
-X MP[1][0] = -1;
-X MP[2][0] = -1;
-X MT[0][0] = MT[1][0] = MT[2][0] = 0;
-X } else if (leftd > midd) {
-X fr = leftd-midd;
-X for (j = 0; j <= midd; j++) {
-X bss[j].CP = bss[j].DP = fr;
-X }
-X for (j = midd+1; j <= rightd; j++)
-X bss[j].CP = bss[j].DP = -1;
-X MP[0][fr] = -1;
-X MP[1][fr] = -1;
-X MP[2][fr] = -1;
-X MT[0][fr] = MT[1][fr] = MT[2][fr] = 0;
-X } else {
-X fr = 0;
-X for (j = 0; j < midd; j++) {
-X bss[j].CP = bss[j].DP = 0;
-X }
-X for (j = midd; j <= rightd; j++) {
-X bss[j].CP = bss[j].DP = 0;
-X }
-X MP[0][0] = -1;
-X MP[1][0] = -1;
-X MP[2][0] = -1;
-X MT[0][0] = MT[1][0] = MT[2][0] = 0;
-X }
-X
-X bss[leftd].CC = 0;
-X if (tb == 2) t = 0;
-X else t = -g;
-X for (j = leftd+1; j <= rightd; j++) {
-X bss[j].CC = t = t-h;
-X bss[j].DD = t-g;
-X }
-X bss[rightd+1].CC = MININT;
-X bss[rightd+1].DD = MININT;
-X if (tb == 1) bss[leftd].DD = 0;
-X else bss[leftd].DD = -g;
-X bss[leftd-1].CC = MININT;
-X for (i = 1; i <= M; i++) {
-X if (i > N-up) rightd--;
-X if (leftd > 1) leftd--;
-X wa = w[A[i]];
-X if ((c = bss[leftd+1].CC-m) > (d = bss[leftd+1].DD-h)) {
-X d = c;
-X bss[leftd].DP = bss[leftd+1].CP;
-X } else bss[leftd].DP = bss[leftd+1].DP;
-X if ((ib = leftd+low-1+i) > 0) c = bss[leftd].CC+wa[B[ib]];
-X if (d > c || ib <= 0) {
-X c = d;
-X bss[leftd].CP = bss[leftd].DP;
-X }
-X e = c-g;
-X bss[leftd].DD = d;
-X bss[leftd].CC = c;
-X IP = bss[leftd].CP;
-X if (leftd == midd) bss[leftd].CP = bss[leftd].DP = IP = i;
-X for (curd=leftd+1; curd <= rightd; curd++) {
-X if (curd != midd) {
-X if ((c = c-m) > (e = e-h)) {
-X e = c;
-X IP = bss[curd-1].CP;
-X } /* otherwise, IP is unchanged */
-X if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
-X d = c;
-X bss[curd].DP = bss[curd+1].CP;
-X } else {
-X bss[curd].DP = bss[curd+1].DP;
-X }
-X c = bss[curd].CC + wa[B[curd+low-1+i]];
-X if (c < d || c < e) {
-X if (e > d) {
-X c = e;
-X bss[curd].CP = IP;
-X } else {
-X c = d;
-X bss[curd].CP = bss[curd].DP;
-X }
-X } /* otherwise, CP is unchanged */
-X bss[curd].CC = c;
-X bss[curd].DD = d;
-X } else {
-X if ((c = c-m) > (e = e-h)) {
-X e = c;
-X MP[1][i] = bss[curd-1].CP;
-X MT[1][i] = 2;
-X } else {
-X MP[1][i] = IP;
-X MT[1][i] = 2;
-X }
-X if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) {
-X d = c;
-X MP[2][i] = bss[curd+1].CP;
-X MT[2][i] = 1;
-X } else {
-X MP[2][i] = bss[curd+1].DP;
-X MT[2][i] = 1;
-X }
-X c = bss[curd].CC + wa[B[curd+low-1+i]];
-X if (c < d || c < e) {
-X if (e > d) {
-X c = e;
-X MP[0][i] = MP[1][i];
-X MT[0][i] = 2;
-X } else {
-X c = d;
-X MP[0][i] = MP[2][i];
-X MT[0][i] = 1;
-X }
-X } else {
-X MP[0][i] = i-1;
-X MT[0][i] = 0;
-X }
-X if (c-g > e) {
-X MP[1][i] = MP[0][i];
-X MT[1][i] = MT[0][i];
-X }
-X if (c-g > d) {
-X MP[2][i] = MP[0][i];
-X MT[2][i] = MT[0][i];
-X }
-X bss[curd].CP = bss[curd].DP = IP = i;
-X bss[curd].CC = c;
-X bss[curd].DD = d;
-X }
-X }
-X }
-X
-X /* decide which path to be traced back */
-X if (te == 1 && d+g > c) {
-X k = bss[rightd].DP;
-X l = 2;
-X } else if (te == 2 && e+g > c) {
-X k = IP;
-X l = 1;
-X } else {
-X k = bss[rightd].CP;
-X l = 0;
-X }
-X if (rmid > N-M) l = 2;
-X else if (rmid < N-M) l = 1;
-X v = c;
-X }
-X /* Conquer: Solve subproblems recursively */
-X
-X /* trace back */
-X r = -1;
-X for (; k > -1; r=k, k=MP[l][r], l=MT[l][r]){
-X FP[k] = r;
-X FT[k] = l; /* l=0,1,2 */
-X }
-X /* forward dividing */
-X if (r == -1) { /* optimal alignment did not cross the middle diagonal */
-X if (rmid < 0) {
-X bg_align(A,B,M,N,rmid+1,up,tb,te,w,g,h,bss, sapp, last);
-X }
-X else {
-X bg_align(A,B,M,N,low,rmid-1,tb,te,w,g,h,bss, sapp, last);
-X }
-X } else {
-X k = r;
-X l = FP[k];
-X kt = FT[k];
-X
-X /* first block */
-X if (rmid < 0) {
-X bg_align(A,B,r-1,r+rmid,rmid+1,min(up,r+rmid),tb,1,w,g,h,bss,sapp,last);
-X DEL(1)
-X } else if (rmid > 0) {
-X bg_align(A,B,r,r+rmid-1,max(-r,low),rmid-1,tb,2,w,g,h,bss,sapp,last);
-X INS(1)
-X }
-X
-X /* intermediate blocks */
-X t2 = up-rmid-1;
-X t3 = low-rmid+1;
-X for (; l > -1; k = l, l = FP[k], kt = FT[k]) {
-X if (kt == 0) { REP }
-X else if (kt == 1) { /* right-hand side triangle */
-X INS(1)
-X t1 = l-k-1;
-X bg_align(A+k,B+k+rmid+1,t1,t1,0,min(t1,t2),2,1,w,g,h,bss,sapp,last);
-X DEL(1)
-X }
-X else { /* kt == 2, left-hand side triangle */
-X DEL(1)
-X t1 = l-k-1;
-X bg_align(A+k+1,B+k+rmid,t1,t1,max(-t1,t3),0,1,2,w,g,h,bss,sapp,last);
-X INS(1)
-X }
-X }
-X
-X /* last block */
-X if (N-M > rmid) {
-X INS(1)
-X t1 = k+rmid+1;
-X bg_align(A+k,B+t1,M-k,N-t1,0,min(N-t1,t2),2,te,w,g,h,bss,sapp,last);
-X } else if (N-M < rmid) {
-X DEL(1)
-X t1 = M-(k+1);
-X bg_align(A+k+1,B+k+rmid,t1,N-(k+rmid),max(-t1,t3),0,1,te,w,g,h,
-X bss,sapp,last);
-X }
-X }
-X return(v);
-}
-X
-int B_ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N,
-X int low, int up, int **W, int G, int H, int *S, int *nS,
-X int MW, int MX, struct bdstr *bss)
-{
-X int c, i, j;
-X int g, h;
-X size_t mj;
-X int check_score;
-X int **sapp, *sapp_v, *last, last_v;
-X
-X g = G;
-X h = H;
-X sapp_v = S;
-X sapp = &sapp_v;
-X
-X last_v = 0;
-X last = &last_v;
-X
-X low = min(max(-M, low),min(N-M,0));
-X up = max(min(N, up),max(N-M,0));
-X
-X if (N <= 0) {
-X if (M > 0) { DEL(M); }
-X return -gap(M);
-X }
-X if (M <= 0) {
-X INS(N);
-X return -gap(N);
-X }
-X if (up-low+1 <= 1) {
-X c = 0;
-X for (i = 1; i <= M; i++) {
-X REP;
-X c += W[A[i]][B[i]];
-X }
-X return c;
-X }
-X
-X if (MT[0]==NULL) {
-X mj = MX+1;
-X MT[0] = (char *) ckalloc(mj);
-X MT[1] = (char *) ckalloc(mj);
-X MT[2] = (char *) ckalloc(mj);
-X FT = (char *) ckalloc(mj);
-X
-X mj *= sizeof(int);
-X MP[0] = (int *) ckalloc(mj);
-X MP[1] = (int *) ckalloc(mj);
-X MP[2] = (int *) ckalloc(mj);
-X FP = (int *) ckalloc(mj);
-X }
-X
-X c = bg_align(A,B,M,N,low,up,0,0,W,G,H,bss, sapp, last);
-X
-X check_score = BCHECK_SCORE(A,B,M,N,S,W,G,H,nS);
-X
-X free(FP); free(MP[2]); free(MP[1]); free(MP[0]);
-X free(FT); free(MT[2]); free(MT[1]); free(MT[0]);
-X MT[0]=NULL;
-X
-X if (check_score != c)
-X printf("\nBCheck_score=%d != %d\n", check_score,c);
-X return c;
-}
-X
-int do_walign (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares)
-{
-X int hoff, optflag_s, optcut_s, optwid_s, n10, score;
-X const unsigned char *aa1p;
-X struct rstruct rst;
-X
-#ifdef TFASTA
-X f_str->n10 = n10=aatran(aa1,f_str->aa1x,n1,frame);
-X do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);
-X aa1p = f_str->aa1x;
-X
-#else
-X n10 = n1;
-X aa1p = aa1;
-#endif
-X
-X a_res->res = f_str->res;
-X *have_ares = 1;
-X
-X if (ppst->sw_flag)
-X return sw_walign(aa0, n0, aa1p, n10, ppst, f_str, a_res);
-X else {
-X optflag_s = ppst->param_u.fa.optflag;
-X optcut_s = ppst->param_u.fa.optcut;
-X optwid_s = ppst->param_u.fa.optwid;
-X ppst->param_u.fa.optflag = 1;
-X ppst->param_u.fa.optcut = 0;
-X ppst->param_u.fa.optwid *= 2;
-X
-X do_fasta(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff);
-X
-X if (rst.score[0]>0) {
-X score=bd_walign(aa0, n0, aa1p, n10, ppst, f_str, hoff, a_res);
-X }
-X else {
-X a_res->nres = 0;
-X score=0;
-X }
-X
-X ppst->param_u.fa.optflag = optflag_s;
-X ppst->param_u.fa.optcut = optcut_s;
-X ppst->param_u.fa.optwid = optwid_s;
-X return score;
-X }
-}
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
-X
-#ifdef TFASTA
-X f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
-#endif
-}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-#ifdef TFASTA
-X aln->qlfact = 1;
-X aln->llfact = 3;
-X aln->llmult = 3;
-X aln->qlrev = 0;
-X aln->frame = frame;
-X if (frame > 2) {
-X aln->llrev = 1;
-X aln->frame = 3 - frame;
-X }
-X else aln->llrev = 0;
-#else /* FASTA */
-X aln->llfact = aln->qlfact = aln->llmult = 1;
-X aln->llrev = 0;
-X if (frame > 0) aln->qlrev = 1;
-X else aln->qlrev = 0;
-X aln->frame = 0;
-#endif
-}
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln, struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, nd, ns, itmp;
-X const unsigned char *aa1p;
-X char *sp0, *sp1, *spa, *sq;
-X int *rp;
-X int smins, mins;
-X
-X if (pst.ext_sq_set) { sq = pst.sqx; }
-X else { sq = pst.sq; }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X /* will we show all the start ?*/
-X if (min(a_res.min0,a_res.min1) < aln->llen || aln->showall==1)
-X if (a_res.min0 >= a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins = a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0 - mins;
-X aln->smin1=a_res.min1 - mins;
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X /* set the alignment code to zero for context */
-X memset(seqca,0,mins);
-X
-X /* TFASTA
-X smins = mins = 0;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X */
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1p[i1++]];
-X if (toupper(*sp0) == toupper(*sp1)) {
-X aln->nident++;
-X *spa = M_IDENT;
-X }
-X else if (pst.nt_align) {
-X if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') ||
-X (toupper(*sp0)=='U' && toupper(*sp1)=='T')) {
-X aln->nident++;
-X *spa = M_IDENT;
-X }
-X else if (toupper(*sp0) == 'N') aln->ngap_q++;
-X else if (toupper(*sp1) == 'N') aln->ngap_l++;
-X }
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op > 0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *spa = '\0';
-X
-/* now we have the middle, get the right end */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,nn1-a_res.max1))
-X nd = max(n0-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X
-X /* fprintf(stderr,"%d\n",mins+lenc+nd); */
-X
-X return mins+lenc+nd;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, nd, ns, itmp;
-X const unsigned char *aa1p;
-X char *sp0, *sp0a, *sp1, *spa, *sq;
-X int *rp;
-X int smins, mins;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X /* will we show all the start ?*/
-X /* will we show all the start ?*/
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins = a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0 - smins;
-X aln->smin1=a_res.min1 - smins;
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
-X }
-X /* set the alignment code to zero for context */
-X memset(seqca,0,mins);
-X memset(seqc0a,' ',mins);
-X
-X /* TFASTA
-X smins = mins = 0;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X */
-X
-/* now get the middle */
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp0a = seqc0a+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X *sp0a++ = ann_arr[aa0a[i0]];
-X
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1p[i1++]];
-X
-X if (toupper(*sp0) == toupper(*sp1)) {
-X aln->nident++;
-X *spa = M_IDENT;
-X }
-X else if (pst.nt_align) {
-X if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') ||
-X (toupper(*sp0)=='U' && toupper(*sp1)=='T')) {
-X aln->nident++;
-X *spa = M_IDENT;
-X }
-X else if (toupper(*sp0) == 'N') aln->ngap_q++;
-X else if (toupper(*sp1) == 'N') aln->ngap_l++;
-X }
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1p[i1++]];
-X *spa++ = M_DEL;
-X *sp0a++ = ' ';
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0a++ = ann_arr[aa0a[i0]];
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *sp0a = *spa = '\0';
-X
-/* now we have the middle, get the right end */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,nn1-a_res.max1))
-X nd = max(n0-a_res.max0,nn1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(nn1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
-X }
-X
-X /* fprintf(stderr,"%d\n",mins+lenc+nd); */
-X
-X return mins+lenc+nd;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt) {
-X
-X char op_char[5]={"=-+*"};
-X char tmp_cnt[20];
-X
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln, struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc;
-X int p_op, op_cnt;
-X const unsigned char *aa1p;
-X char sp0, sp1, *sq;
-X int *rp;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
-X op_cnt = 0;
-X
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X
-X if (p_op == 0 || p_op==3) {
-X if (sp0 != '*' && sp1 != '*') {
-X if (p_op == 3) {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X else {op_cnt++;}
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 3;
-X }
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X
-X op = *rp++;
-X lenc++;
-X
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X else if (pst.nt_align) {
-X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
-X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
-X else if (toupper(sp0) == 'N') aln->ngap_q++;
-X else if (toupper(sp1) == 'N') aln->ngap_l++;
-X }
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X if (p_op == 1) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 1;
-X }
-X op--; lenc++; i1++; aln->ngap_q++;
-X }
-X else {
-X if (p_op == 2) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 2;
-X }
-X op++; lenc++; i0++; aln->ngap_l++;
-X }
-X }
-X }
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X
-X return lenc;
-}
-X
-int calc_id(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc;
-X int sp0, sp1;
-X const unsigned char *aa1p;
-X int *rp;
-X char *sq;
-X
-X if (pst.ext_sq_set) { sq = pst.sqx; }
-X else { sq = pst.sq; }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X if (toupper(sp0) == toupper(sp1)) {aln->nident++;}
-X else if (pst.nt_align) {
-X if ((toupper(sp0)=='T' && toupper(sp1)== 'U')||
-X (toupper(sp0)=='U' && toupper(sp1)=='T')) {aln->nident++;}
-X else if (toupper(sp0) == 'N') aln->ngap_q++;
-X else if (toupper(sp1) == 'N') aln->ngap_l++;
-X }
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {op--; lenc++; i1++; aln->ngap_q++;}
-X else {op++; lenc++; i0++; aln->ngap_l++; }
-X }
-X }
-X return lenc;
-}
-X
-#ifdef PCOMPLIB
-X
-#include "w_mw.h"
-X
-void
-update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
-{
-X ppst->n0 = qm_msg->n0;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropnfa.c ||
-echo 'restore of dropnfa.c failed'
-Wc_c="`wc -c < 'dropnfa.c'`"
-test 70110 -eq "$Wc_c" ||
- echo 'dropnfa.c: original size 70110, current size' "$Wc_c"
-fi
-# ============= dropnfa.h ==============
-if test -f 'dropnfa.h' -a X"$1" != X"-c"; then
- echo 'x - skipping dropnfa.h (File already exists)'
-else
-echo 'x - extracting dropnfa.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropnfa.h' &&
-X
-/* global definitions shared by dropnfa.c and altivec.c */
-X
-#ifndef MAXSAV
-#define MAXSAV 10
-#endif
-X
-X
-X
-struct dstruct /* diagonal structure for saving current run */
-{
-X int score; /* hash score of current match */
-X int start; /* start of current match */
-X int stop; /* end of current match */
-X struct savestr *dmax; /* location in vmax[] where best score data saved */
-};
-X
-struct savestr
-{
-X int score; /* pam score with segment optimization */
-X int score0; /* pam score of best single segment */
-X int gscore; /* score from global match */
-X int dp; /* diagonal of match */
-X int start; /* start of match in lib seq */
-X int stop; /* end of match in lib seq */
-};
-X
-struct bdstr { int CC, DD, CP, DP;};
-X
-struct f_struct {
-X struct dstruct *diag;
-X struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
-X struct savestr *vptr[MAXSAV];
-X struct savestr *lowmax;
-X int ndo;
-X int noff;
-X int hmask; /* hash constants */
-X int *pamh1; /* pam based array */
-X int *pamh2; /* pam based kfact array */
-X int *link, *harr; /* hash arrays */
-X int kshft; /* shift width */
-X int nsav, lowscor; /* number of saved runs, worst saved run */
-#ifdef TFASTA
-X unsigned char *aa1x;
-X int n10;
-#endif
-X struct bdstr *bss;
-X struct swstr *ss;
-X struct swstr *f_ss, *r_ss;
-X int *waa0;
-X int *waa1;
-X int *res;
-X int max_res;
-X double aa0_f[MAXSQ];
-X double *kar_p;
-X
-#ifdef FA_ALTIVEC
-X int vec_len;
-X vecInt **vec_matrix;
-X vector signed ALTIVEC_SIZE *vec_HH;
-X vector signed ALTIVEC_SIZE *vec_EE;
-X
-X int vec_len2;
-X vecInt2 **vec_matrix2;
-X vector signed ALTIVEC_SIZE2 *vec_HH2;
-X vector signed ALTIVEC_SIZE2 *vec_EE2;
-#endif
-};
-X
-static int
-FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B,
-X int M, int N, int low, int up,
-X int **W, int G,int H, int MW,
-X struct f_struct *f_str);
-SHAR_EOF
-chmod 0644 dropnfa.h ||
-echo 'restore of dropnfa.h failed'
-Wc_c="`wc -c < 'dropnfa.h'`"
-test 1882 -eq "$Wc_c" ||
- echo 'dropnfa.h: original size 1882, current size' "$Wc_c"
-fi
-# ============= dropnsw.c ==============
-if test -f 'dropnsw.c' -a X"$1" != X"-c"; then
- echo 'x - skipping dropnsw.c (File already exists)'
-else
-echo 'x - extracting dropnsw.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'dropnsw.c' &&
-/* copyright (c) 1994, 1995, 1996 William R. Pearson */
-X
-/* $Name: fa_34_26_5 $ - $Id: dropnsw.c,v 1.35 2006/10/19 14:49:14 wrp Exp $ */
-X
-/*
-X this is a slower version of dropgsw.c that implements the Smith-Waterman
-X algorithm. It lacks the shortcuts in dropgsw.c that prevent scores less
-X than the penalty for the first residue in a gap from being generated.
-X
-X Thus, dropnsw.c should be used for tests with very large gap penalties,
-X and is more appropriate for programs like prss3, which are interested
-X in accurate low scores.
-*/
-X
-/* the do_walign() code in this file is not thread_safe */
-/* init_work(), do_work(), are thread safe */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-X
-static char *verstr="3.5 Sept 2006";
-X
-struct swstr { int H, E;};
-X
-struct f_struct {
-X struct swstr *ss;
-X struct swstr *f_ss;
-X struct swstr *r_ss;
-X int *waa_s, *waa_a;
-X int **pam2p[2];
-X int *res;
-X double aa0_f[MAXSQ];
-X double *kar_p;
-};
-X
-#define DROP_INTERN
-#include "drop_func.h"
-X
-extern int do_karlin(const unsigned char *aa1, int n1,
-X int **pam2, struct pstruct *ppst,
-X double *aa0_f, double *kar_p, double *lambda, double *H);
-extern void aancpy(char *to, char *from, int count, struct pstruct pst);
-int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
-X int **W, int IW, int G, int H, int *S, int *NC,
-X struct f_struct *f_str);
-X
-/* initialize for Smith-Waterman optimal score */
-X
-void init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst,
-X struct f_struct **f_arg)
-{
-X int maxn0;
-X int *pwaa_s, *pwaa_a;
-X int e, f, i, j, q;
-X int *res;
-X struct f_struct *f_str;
-X int **pam2p;
-X struct swstr *ss, *f_ss, *r_ss;
-X int nsq, ip;
-X
-X if (ppst->ext_sq_set) {
-X nsq = ppst->nsqx; ip = 1;
-X }
-X else {
-X nsq = ppst->nsq; ip = 0;
-X }
-X
-X f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
-X
-X /* allocate space for the scoring arrays */
-X maxn0 = n0 + 2;
-X if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate ss array %3d\n", n0);
-X exit (1);
-X }
-X ss++;
-X f_str->ss = ss;
-X
-X if ((f_ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate f_ss array %3d\n", n0);
-X exit (1);
-X }
-X f_ss++;
-X f_str->f_ss = f_ss;
-X
-X if ((r_ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate r_ss array %3d\n", n0);
-X exit (1);
-X }
-X r_ss++;
-X f_str->r_ss = r_ss;
-X
-X /* initialize variable (-S) pam matrix */
-X if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
-X exit(1);
-X }
-X
-X pam2p = f_str->pam2p[1];
-X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X for (i=1; i<n0; i++) {
-X pam2p[i]= pam2p[0] + (i*(nsq+1));
-X }
-X
-X /* initialize universal (alignment) matrix */
-X if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
-X exit(1);
-X }
-X
-X pam2p = f_str->pam2p[0];
-X if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
-X fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
-X exit(1);
-X }
-X
-X for (i=1; i<n0; i++) {
-X pam2p[i]= pam2p[0] + (i*(nsq+1));
-X }
-X
-X /*
-X pwaa effectively has a sequence profile --
-X pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
-X pwaa[n0..2n0-1] has pam scores for residue 1 (A)
-X pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
-X
-X thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
-X *pwaa++ rapidly moves though the scores of the aa1p[] position
-X without further indexing
-X
-X For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
-X a different score in each position.
-X */
-X
-X if (ppst->pam_pssm) {
-X pwaa_s = f_str->waa_s;
-X pwaa_a = f_str->waa_a;
-X for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
-X for (f = 0; f < n0; f++) { /* for each position in aa0 */
-X *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
-X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
-X }
-X }
-X }
-X else { /* initialize scanning matrix */
-X pwaa_s = f_str->waa_s;
-X pwaa_a = f_str->waa_a;
-X for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
-X for (f = 0; f < n0; f++) { /* for each position in aa0 */
-X *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][e][aa0[f]];
-X *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][e][aa0[f]];
-X }
-X }
-X
-X maxn0 = max(3*n0/2,MIN_RES);
-X if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
-X fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
-X exit(1);
-X }
-X f_str->res = res;
-X
-X *f_arg = f_str;
-}
-X
-void close_work (const unsigned char *aa0, int n0,
-X struct pstruct *ppst, struct f_struct **f_arg)
-{
-X struct f_struct *f_str;
-X
-X f_str = *f_arg;
-X
-X if (f_str != NULL) {
-X if (f_str->kar_p !=NULL) free(f_str->kar_p);
-X f_str->ss--;
-X free(f_str->ss);
-X free(f_str->res);
-X free(f_str->waa_a);
-X free(f_str->pam2p[0][0]);
-X free(f_str->pam2p[0]);
-X free(f_str->waa_s);
-X free(f_str->pam2p[1][0]);
-X free(f_str->pam2p[1]);
-X
-X free(f_str);
-X *f_arg = NULL;
-X }
-}
-X
-X
-/* pstring1 is a message to the manager, currently 512 */
-/*void get_param(struct pstruct *pstr,char *pstring1)*/
-void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
-{
-X char psi_str[120];
-X
-X char *pg_str="Smith-Waterman";
-X
-X if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
-X else { psi_str[0]='\0';}
-X
-#ifdef OLD_FASTA_GAP
-X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
-#else
-X sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
-#endif
-X pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
-X
-X if (pstring2 != NULL) {
-#ifdef OLD_FASTA_GAP
-X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
-#else
-X sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
-#endif
-X pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
-X (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
-X }
-}
-X
-X
-void do_work (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, struct f_struct *f_str,
-X int qr_flg,
-X struct rstruct *rst)
-{
-X const unsigned char *aa0p, *aa1p;
-X register struct swstr *ssj;
-X struct swstr *ss, *f_ss, *r_ss;
-X register int *pwaa;
-X int *waa;
-X register int i, j;
-X int e, f, h, p;
-X int q, r, m;
-X int score;
-X
-X double lambda, H, K;
-X
-X rst->escore = 1.0;
-X rst->segnum = rst->seglen = 1;
-X
-X waa = f_str->waa_s;
-X ss = f_str->ss;
-X f_ss = f_str->f_ss;
-X r_ss = f_str->r_ss;
-X
-#ifdef OLD_FASTA_GAP
-X q = -(ppst->gdelval - ppst->ggapval);
-#else
-X q = -ppst->gdelval;
-#endif
-X r = -ppst->ggapval;
-X m = q + r;
-X
-X /* initialize 0th row */
-X for (ssj=ss; ssj<&ss[n0]; ssj++) {
-X ssj->H = 0;
-X ssj->E = -q;
-X }
-X
-X score = 0;
-X aa1p = aa1;
-X while (*aa1p) {
-X h = p = 0;
-X f = -q;
-X pwaa = waa + (*aa1p++ * n0);
-X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
-X if ((h = h - m) > (f = f - r)) f = h;
-X if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
-X h = p + *pwaa++;
-X if (h < 0 ) h = 0;
-X if (h < f ) h = f;
-X if (h < e ) h = e;
-X p = ssj->H;
-X ssj->H = h;
-X ssj->E = e;
-X if (h > score) score = h;
-X }
-X } /* done with forward pass */
-X
-X rst->score[0] = score;
-X
-X if(ppst->zsflag == 6 || ppst->zsflag == 16 &&
-X (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
-X f_str->kar_p, &lambda, &H)>0)) {
-X rst->comp = 1.0/lambda;
-X rst->H = H;
-X }
-X else {rst->comp = rst->H = -1.0;}
-} /* here we should be all done */
-X
-void do_opt (const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *pst, struct f_struct *f_str,
-X struct rstruct *rstr)
-{
-}
-X
-int do_walign (const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X int frame,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X struct a_res_str *a_res,
-X int *have_ares )
-{
-X const unsigned char *aa0p, *aa1p;
-X register int *pwaa;
-X register int i, j;
-X register struct swstr *ssj;
-X struct swstr *f_ss, *r_ss, *ss;
-X int *res, *waa;
-X int e, f, h, p;
-X int q, r, m;
-X int score;
-X int cost, I, J, K, L;
-X
-X ss = f_str->ss;
-X
-X res = f_str->res;
-X waa = f_str->waa_a; /* this time use universal pam2[0] */
-X
-#ifdef OLD_FASTA_GAP
-X q = -(ppst->gdelval - ppst->ggapval);
-#else
-X q = -ppst->gdelval;
-#endif
-X
-X r = -ppst->ggapval;
-X m = q + r;
-X
-X /* initialize 0th row */
-X for (ssj=ss; ssj<ss+n0; ssj++) {
-X ssj->H = 0;
-X ssj->E = -q;
-X }
-X
-X score = 0;
-X aa1p = aa1;
-X i = 0;
-X while (*aa1p) {
-X h = p = 0;
-X f = -q;
-X pwaa = waa + (*aa1p++ * n0);
-X for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
-X if ((h = h - m) > /* gap open from left best */
-X /* gap extend from left gapped */
-X (f = f - r)) f = h; /* if better, use new gap opened */
-X if ((h = ssj->H - m) > /* gap open from up best */
-X /* gap extend from up gap */
-X (e = ssj->E - r)) e = h; /* if better, use new gap opened */
-X h = p + *pwaa++; /* diagonal match */
-X if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
-X if (h < f ) h = f; /* left gap better, reset */
-X if (h < e ) h = e; /* up gap better, reset */
-X p = ssj->H; /* save previous best score */
-X ssj->H = h; /* save (new) up diag-matched */
-X ssj->E = e; /* save upper gap opened */
-X if (h > score) { /* ? new best score */
-X score = h; /* save best */
-X I = i; /* row */
-X J = (int)(ssj-ss); /* column */
-X }
-X }
-X i++;
-X } /* done with forward pass */
-X if (score <= 0) return 0;
-X
-X /* to get the start point, go backwards */
-X
-X /* 18-June-2003 fix bug in backtracking code to identify start of
-X alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
-X pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
-X */
-X
-X cost = K = L = 0;
-X for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
-X
-X for (i=I; i>=0; i--) {
-X h = f = -1;
-X p = (i == I) ? 0 : -1;
-X for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
-X f = max (f,h-q)-r;
-X ssj->E=max(ssj->E,ssj->H-q)-r;
-X h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
-X p = ssj->H;
-X ssj->H=h;
-X if (h > cost) {
-X cost = h;
-X K = i;
-X L = (int)(ssj-ss);
-X if (cost >= score) goto found;
-X }
-X }
-X }
-X
-found:
-X
-X /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
-X
-/* in the f_str version, the *res array is already allocated at 4*n0/3 */
-X
-X a_res->res = f_str->res;
-X *have_ares = 1;
-X a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
-X
-/* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
-X
-/* this code no longer refers to aa0[], it used pam2p[0][L] instead */
-X ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
-X a_res->res,&a_res->nres,f_str);
-X
-/* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
-X
-X return score;
-}
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N,
-X int *S, int **W, int IW, int G, int H, int *nres);
-X
-#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
-X
-/* Append "Delete k" op */
-#define DEL(k) \
-{ if (*last < 0) \
-X *last = (*sapp)[-1] -= (k); \
-X else { \
-X *last = (*sapp)[0] = -(k); \
-X (*sapp)++; \
-X } \
-}
-X
-/* Append "Insert k" op */
-#define INS(k) \
-{ if (*last > 0) \
-X *last = (*sapp)[-1] += (k); \
-X else { \
-X *last = (*sapp)[0] = (k); \
-X (*sapp)++; \
-X } \
-}
-X
-#define REP { *last = (*sapp)[0] = 0; (*sapp)++; } /* Append "Replace" op */
-X
-/*
-#define XTERNAL
-#include "upam.h"
-X
-void
-print_seq_prof(unsigned char *A, int M,
-X unsigned char *B, int N,
-X int **w, int iw, int dir) {
-X char c_max;
-X int i_max, j_max, i,j;
-X
-X char *c_dir="LRlr";
-X
-X for (i=1; i<=min(60,M); i++) {
-X fprintf(stderr,"%c",aa[A[i]]);
-X }
-X fprintf(stderr, - %d\n,M);
-X
-X for (i=0; i<min(60,M); i++) {
-X i_max = -1;
-X for (j=1; j<21; j++) {
-X if (w[iw+i][j]> i_max) {
-X i_max = w[iw+i][j];
-X j_max = j;
-X }
-X }
-X fprintf(stderr,"%c",aa[j_max]);
-X }
-X fputc(':',stderr);
-X for (i=1; i<=min(60,N); i++) {
-X fprintf(stderr,"%c",aa[B[i]]);
-X }
-X fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
-}
-*/
-X
-/* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
-X A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
-X and appends such a conversion to the current script. */
-X
-static int
-align(const unsigned char *A, const unsigned char *B, int M, int N,
-X int tb, int te, int **w, int iw, int g, int h,
-X struct f_struct *f_str, int dir,
-X int **sapp, int *last)
-{
-X int midi, midj, type; /* Midpoint, type, and cost */
-X int midc;
-X int c1, c2;
-X
-{ register int i, j;
-X register int c, e, d, s;
-X int m, t, *wa;
-X struct swstr *f_ss, *r_ss;
-X
-/* print_seq_prof(A,M,B,N,w,iw,dir); */
-X
-X m = g + h;
-X
-X f_ss = f_str->f_ss;
-X r_ss = f_str->r_ss;
-X
-/* Boundary cases: M <= 1 or N == 0 */
-X
-X if (N <= 0) {
-X if (M > 0) {
-X DEL(M)
-X }
-X return -gap(M);
-X }
-X
-X if (M <= 1) {
-X if (M <= 0){
-X INS(N)
-X return -gap(N); }
-X if (tb < te) tb = te;
-X midc = (tb-h) - gap(N);
-X midj = 0;
-/* wa = w[A[1]]; */
-X wa = w[iw];
-X for (j = 1; j <= N; j++) {
-X c = -gap(j-1) + wa[B[j]] - gap(N-j);
-X if (c > midc) { midc = c; midj = j;}
-X }
-X if (midj == 0) {
-X DEL(1)
-X INS(N)
-X }
-X else {
-X if (midj > 1) { INS(midj-1)}
-X REP
-X if (midj < N) { INS(N-midj)}
-X }
-X return midc;
-X }
-X
-/* Divide: Find optimum midpoint (midi,midj) of cost midc */
-X
-X midi = M/2; /* Forward phase: */
-X f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
-X t = -g;
-X for (j = 1; j <= N; j++)
-X { f_ss[j].H = t = t-h;
-X f_ss[j].E = t-g;
-X }
-X t = tb;
-X for (i = 1; i <= midi; i++)
-X { s = f_ss[0].H;
-X f_ss[0].H = c = t = t-h;
-X e = t-g;
-/* wa = w[A[i]]; */
-X wa = w[iw+i-1];
-X for (j = 1; j <= N; j++)
-X { if ((c = c - m) > (e = e - h)) e = c;
-X if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
-X c = s + wa[B[j]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = f_ss[j].H;
-X f_ss[j].H = c;
-X f_ss[j].E = d;
-X }
-X }
-X f_ss[0].E = f_ss[0].H;
-X
-X r_ss[N].H = 0; /* Reverse phase: */
-X t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
-X for (j = N-1; j >= 0; j--)
-X { r_ss[j].H = t = t-h;
-X r_ss[j].E = t-g;
-X }
-X t = te;
-X for (i = M-1; i >= midi; i--)
-X { s = r_ss[N].H;
-X r_ss[N].H = c = t = t-h;
-X e = t-g;
-/* wa = w[A[i+1]]; */
-X wa = w[iw+i];
-X for (j = N-1; j >= 0; j--)
-X { if ((c = c - m) > (e = e - h)) e = c;
-X if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c;
-X c = s + wa[B[j+1]];
-X if (e > c) c = e;
-X if (d > c) c = d;
-X s = r_ss[j].H;
-X r_ss[j].H = c;
-X r_ss[j].E = d;
-X }
-X }
-X r_ss[N].E = r_ss[N].H;
-X
-X midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
-X midj = 0;
-X type = 1;
-X for (j = 0; j <= N; j++)
-X if ((c = f_ss[j].H + r_ss[j].H) >= midc)
-X if (c > midc || f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)
-X { midc = c;
-X midj = j;
-X }
-X for (j = N; j >= 0; j--)
-X if ((c = f_ss[j].E + r_ss[j].E + g) > midc)
-X { midc = c;
-X midj = j;
-X type = 2;
-X }
-X }
-X
-/* Conquer: recursively around midpoint */
-X
-X if (type == 1)
-X { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0, sapp, last);
-X c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
-X }
-X else
-X { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp, last);
-X DEL(2);
-X align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
-X }
-X return midc;
-}
-X
-/* Interface and top level of comparator */
-X
-int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
-X int **W, int IW, int G, int H, int *S, int *NC,
-X struct f_struct *f_str)
-{
-X struct swstr *f_ss, *r_ss;
-X int *sapp, last;
-X int c, ck;
-X
-X sapp = S;
-X last = 0;
-X
-X if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
-X exit (1);
-X }
-X f_ss++;
-X f_str->f_ss = f_ss;
-X
-X if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
-X == NULL) {
-X fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
-X exit (1);
-X }
-X r_ss++;
-X f_str->r_ss = r_ss;
-X
-X /* print_seq_prof(A,M,W,IW); */
-X c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp, &last); /* OK, do it */
-X
-X ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
-X if (c != ck) printf("Check_score error. %d != %d\n",c,ck);
-X
-X f_ss--; r_ss--;
-X free(r_ss); free(f_ss);
-X
-X return c;
-}
-X
-/* Alignment display routine */
-X
-static char ALINE[51], BLINE[51], CLINE[51];
-X
-void DISPLAY(unsigned char *A, unsigned char *B, int M, int N,
-X int *S, int AP, int BP, char *sq)
-{ register char *a, *b, *c;
-X register int i, j, op;
-X int lines, ap, bp;
-X
-X i = j = op = lines = 0;
-X ap = AP;
-X bp = BP;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X while (i < M || j < N)
-X { if (op == 0 && *S == 0)
-X { op = *S++;
-X *a = sq[A[++i]];
-X *b = sq[B[++j]];
-X *c++ = (*a++ == *b++) ? '|' : ' ';
-X }
-X else
-X { if (op == 0)
-X op = *S++;
-X if (op > 0)
-X { *a++ = ' ';
-X *b++ = sq[B[++j]];
-X op--;
-X }
-X else
-X { *a++ = sq[A[++i]];
-X *b++ = ' ';
-X op++;
-X }
-X *c++ = '-';
-X }
-X if (a >= ALINE+50 || i >= M && j >= N)
-X { *a = *b = *c = '\0';
-X printf("\n%5d ",50*lines++);
-X for (b = ALINE+10; b <= a; b += 10)
-X printf(" . :");
-X if (b <= a+5)
-X printf(" .");
-X printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
-X ap = AP + i;
-X bp = BP + j;
-X a = ALINE;
-X b = BLINE;
-X c = CLINE;
-X }
-X }
-}
-X
-/* CHECK_SCORE - return the score of the alignment stored in S */
-X
-static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
-X int M, int N, int *S, int **w, int iw,
-X int g, int h, int *NC)
-{
-X register int i, j, op, nc;
-X int score;
-X
-X /* print_seq_prof(A,M,w,iw); */
-X
-X score = i = j = op = nc = 0;
-X while (i < M || j < N) {
-X op = *S++;
-X if (op == 0) {
-X score = w[iw+i][B[++j]] + score;
-X i++;
-X nc++;
-X }
-X else if (op > 0) {
-X score = score - (g+op*h);
-X j += op;
-X nc += op;
-X } else {
-X score = score - (g-op*h);
-X i -= op;
-X nc -= op;
-X }
-X }
-X *NC = nc;
-X return score;
-}
-X
-void
-pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {}
-X
-/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
-/* call from calcons, calc_id, calc_code */
-void
-aln_func_vals(int frame, struct a_struct *aln) {
-X
-X aln->llfact = aln->llmult = aln->qlfact = 1;
-X aln->qlrev = aln->llrev = 0;
-X aln->frame = 0;
-}
-X
-/* 29-June-2003 this version has been modified to use pst.pam2p
-X instead of pam2 to indicate similarity */
-X
-#include "a_mark.h"
-X
-int calcons(const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc1, char *seqca,
-X struct f_struct *f_str)
-{
-X int i0, i1;
-X int op, lenc, nd, ns, itmp;
-X char *sp0, *sp1, *spa, *sq;
-X int *rp;
-X int mins, smins;
-X
-X if (pst.ext_sq_set) { sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X /* #define LFASTA */
-#ifndef LFASTA
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X }
-#else
-X aln->smin0 = a_res.min0;
-X aln->smin1 = a_res.min1;
-X smins = mins = 0;
-#endif
-X
-/* now get the middle */
-X
-X memset(seqca,M_BLANK,mins);
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1[i1++]];
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
-X (*sp0=='U' && *sp1=='T'))) {
-X aln->nident++; *spa=M_IDENT;
-X }
-X
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X *sp0++ = '-';
-X *sp1++ = sq[aa1[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *spa = '\0';
-/* now we have the middle, get the right end */
-X
-#ifndef LFASTA
-X /* how much extra to show at end ? */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,n1-a_res.max1))
-X nd = max(n0-a_res.max0,n1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(n1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
-X }
-X
-#else /* LFASTA */
-X nd = 0;
-#endif
-X /* #undef LFASTA */
-X return mins+lenc+nd;
-}
-X
-int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
-X const unsigned char *aa1, int n1,
-X int *nc,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X char *ann_arr, struct f_struct *f_str)
-{
-X int i0, i1;
-X int op, lenc, nd, ns, itmp;
-X char *sp0, *sp0a, *sp1, *spa, *sq;
-X int *rp;
-X int mins, smins;
-X
-X if (pst.ext_sq_set) {sq = pst.sqx;}
-X else {sq = pst.sq;}
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X /* first fill in the ends */
-X
-X /* #define LFASTA */
-#ifndef LFASTA
-X if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
-X if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
-X smins=0;
-X if (aln->showall==1) mins=a_res.min0;
-X else mins = min(a_res.min0,aln->llcntx);
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X if ((mins-a_res.min1)>0) {
-X memset(seqc1,' ',mins-a_res.min1);
-X aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
-X aln->smin1 = 0;
-X }
-X else {
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X }
-X }
-X else {
-X smins=0;
-X if (aln->showall == 1) mins=a_res.min1;
-X else mins = min(a_res.min1,aln->llcntx);
-X aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
-X aln->smin1 = a_res.min1-mins;
-X if ((mins-a_res.min0)>0) {
-X memset(seqc0,' ',mins-a_res.min0);
-X aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
-X aln->smin0 = 0;
-X }
-X else {
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aln->smin0 = a_res.min0-mins;
-X }
-X }
-X else {
-X mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
-X smins=mins;
-X aln->smin0=a_res.min0;
-X aln->smin1=a_res.min1;
-X aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
-X aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
-X }
-#else
-X aln->smin0 = a_res.min0;
-X aln->smin1 = a_res.min1;
-X smins = mins = 0;
-#endif
-X
-/* now get the middle */
-X
-X memset(seqca,M_BLANK,mins);
-X memset(seqc0a,' ',mins);
-X
-X spa = seqca+mins;
-X sp0 = seqc0+mins;
-X sp0a = seqc0a+mins;
-X sp1 = seqc1+mins;
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
-X else if (itmp == 0) { *spa = M_ZERO;}
-X else {*spa = M_POS;}
-X if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
-X
-X *sp0a++ = ann_arr[aa0a[i0]];
-X *sp0 = sq[aa0[i0++]];
-X *sp1 = sq[aa1[i1++]];
-X
-X if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
-X else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
-X (*sp0=='U' && *sp1=='T'))) {
-X aln->nident++; *spa=M_IDENT;
-X }
-X
-X sp0++; sp1++; spa++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X *sp0++ = '-';
-X *sp0a++ = ' ';
-X *sp1++ = sq[aa1[i1++]];
-X *spa++ = M_DEL;
-X op--;
-X lenc++;
-X aln->ngap_q++;
-X }
-X else {
-X *sp0a++ = ann_arr[aa0a[i0]];
-X *sp0++ = sq[aa0[i0++]];
-X *sp1++ = '-';
-X *spa++ = M_DEL;
-X op++;
-X lenc++;
-X aln->ngap_l++;
-X }
-X }
-X }
-X
-X *nc = lenc;
-X *spa = '\0';
-/* now we have the middle, get the right end */
-X
-#ifndef LFASTA
-X /* how much extra to show at end ? */
-X if (!aln->llcntx_flg) {
-X ns = mins + lenc + aln->llen; /* show an extra line? */
-X ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
-X if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
-X nd = ns - (mins+lenc); /* this much extra */
-X }
-X else nd = aln->llcntx;
-X
-X if (nd > max(n0-a_res.max0,n1-a_res.max1))
-X nd = max(n0-a_res.max0,n1-a_res.max1);
-X
-X if (aln->showall==1) {
-X nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
-X /* get right end */
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X /* fill with blanks - this is required to use one 'nc' */
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else {
-X if ((nd-(n0-a_res.max0))>0) {
-X aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
-X memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
-X }
-X else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
-X
-X if ((nd-(n1-a_res.max1))>0) {
-X aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
-X memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
-X }
-X else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
-X }
-X
-#else /* LFASTA */
-X nd = 0;
-#endif
-X /* #undef LFASTA */
-X return mins+lenc+nd;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt);
-X
-/* build an array of match/ins/del - length strings */
-int calc_code(const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X char *al_str, int al_str_n, struct f_struct *f_str)
-{
-X int i0, i1, nn1;
-X int op, lenc, nd, ns, itmp;
-X int p_op, op_cnt;
-X const unsigned char *aa1p;
-X char tmp_cnt[20];
-X char sp0, sp1, *sq;
-X int *rp;
-X int mins, smins;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X aln->amin0 = a_res.min0;
-X aln->amax0 = a_res.max0;
-X aln->amin1 = a_res.min1;
-X aln->amax1 = a_res.max1;
-X
-X rp = a_res.res;
-X lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
-X op_cnt = 0;
-X
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X tmp_cnt[0]='\0';
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X
-X if (p_op == 0 || p_op==3) {
-X if (sp0 != '*' && sp1 != '*') {
-X if (p_op == 3) {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X else {op_cnt++;}
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 3;
-X }
-X }
-X else {
-X update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 0;
-X }
-X
-X op = *rp++;
-X lenc++;
-X
-X if (toupper(sp0) == toupper(sp1)) aln->nident++;
-X else if (pst.dnaseq==1) {
-X if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
-X (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
-X else if (toupper(sp0) == 'N') aln->ngap_q++;
-X else if (toupper(sp1) == 'N') aln->ngap_l++;
-X }
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {
-X if (p_op == 1) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 1;
-X }
-X op--; lenc++; i1++; aln->ngap_q++;
-X }
-X else {
-X if (p_op == 2) { op_cnt++;}
-X else {
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X op_cnt = 1; p_op = 2;
-X }
-X op++; lenc++; i0++; aln->ngap_l++;
-X }
-X }
-X }
-X update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
-X
-X return lenc;
-}
-X
-static void
-update_code(char *al_str, int al_str_max, int op, int op_cnt) {
-X
-X char op_char[5]={"=-+*"};
-X char tmp_cnt[20];
-X
-X sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
-X strncat(al_str,tmp_cnt,al_str_max);
-}
-X
-int calc_id(const unsigned char *aa0, const int n0,
-X const unsigned char *aa1, const int n1,
-X struct a_struct *aln,
-X struct a_res_str a_res,
-X struct pstruct pst,
-X struct f_struct *f_str)
-{
-X int i0, i1, nn1, n_id;
-X int op, lenc, nd, ns, itmp;
-X int sp0, sp1;
-X const unsigned char *aa1p;
-X int *rp;
-X char *sq;
-X
-X if (pst.ext_sq_set) {
-X sq = pst.sqx;
-X }
-X else {
-X sq = pst.sq;
-X }
-X
-#ifndef TFASTA
-X aa1p = aa1;
-X nn1 = n1;
-#else
-X aa1p = f_str->aa1x;
-X nn1 = f_str->n10;
-#endif
-X
-X rp = a_res.res;
-X lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
-X i0 = a_res.min0;
-X i1 = a_res.min1;
-X
-X while (i0 < a_res.max0 || i1 < a_res.max1) {
-X if (op == 0 && *rp == 0) {
-X op = *rp++;
-X lenc++;
-X if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
-X
-X sp0 = sq[aa0[i0++]];
-X sp1 = sq[aa1p[i1++]];
-X if (toupper(sp0) == toupper(sp1)) n_id++;
-X else if (pst.dnaseq==1 &&
-X ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
-X }
-X else {
-X if (op==0) op = *rp++;
-X if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
-X else {op++; lenc++; i0++; aln->ngap_l++; }
-X }
-X }
-X aln->nident = n_id;
-X return lenc;
-}
-X
-#ifdef PCOMPLIB
-#include "p_mw.h"
-void
-update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
-{
-X ppst->n0 = qm_msg->n0;
-}
-#endif
-SHAR_EOF
-chmod 0644 dropnsw.c ||
-echo 'restore of dropnsw.c failed'
-Wc_c="`wc -c < 'dropnsw.c'`"
-test 34172 -eq "$Wc_c" ||
- echo 'dropnsw.c: original size 34172, current size' "$Wc_c"
-fi
-# ============= egmsmg.aa ==============
-if test -f 'egmsmg.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping egmsmg.aa (File already exists)'
-else
-echo 'x - extracting egmsmg.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'egmsmg.aa' &&
->EGMSMG Epidermal growth factor precursor - Mouse
-MPWGRRPTWLLLAFLLVFLKISILSVTAWQTGNCQPGPLERSERSGTCAGPAPFLVFSQGKSISRIDPDG
-TNHQQLVVDAGISADMDIHYKKERLYWVDVERQVLLRVFLNGTGLEKVCNVERKVSGLAIDWIDDEVLWV
-DQQNGVITVTDMTGKNSRVLLSSLKHPSNIAVDPIERLMFWSSEVTGSLHRAHLKGVDVKTLLETGGISV
-LTLDVLDKRLFWVQDSGEGSHAYIHSCDYEGGSVRLIRHQARHSLSSMAFFGDRIFYSVLKSKAIWIANK
-HTGKDTVRINLHPSFVTPGKLMVVHPRAQPRTEDAAKDPDPELLKQRGRPCRFGLCERDPKSHSSACAEG
-YTLSRDRKYCEDVNECATQNHGCTLGCENTPGSYHCTCPTGFVLLPDGKQCHELVS
-CPGNVSKCSHGCVLTSDGPRCICPAGSVLGRDGKTCTGCSSPDNGGCSQICLPLRPGSWECDCFPGYDLQ
-SDRKSCAASGPQPLLLFANSQDIRHMHFDGTDYKVLLSRQMGMVFALDYDPVESKIYFAQTALKWIERAN
-MDGSQRERLITEGVDTLEGLALDWIGRRIYWTDSGKSVVGGSDLSGKHHRIIIQERISRPRGIAVHPRAR
-RLFWTDVGMSPRIESASLQGSDRVLIASSNLLEPSGITIDYLTDTLYWCDTKRSVIEMANLDGSKRRRLI
-QNDVGHPFSLAVFEDHLWVSDWAIPSVIRVNKRTGQNRVRLQGSMLKPSSLVVVHPLAKPGADPCLYRNG
-GCEHICQESLGTARCLCREGFVKAWDGKMCLPQDYPILSGENADLSKEVTSLSNST
-QAEVPDDDGTESSTLVAEIMVSGMNYEDDCGPGGCGSHARCVSDGETAECQCLKGFARDGNLCSDIDECV
-LARSDCPSTSSRCINTEGGYVCRCSEGYEGDGISCFDIDECQRGAHNCAENAACTNTEGGYNCTCAGRPS
-SPGRSCPDSTAPSLLGEDGHHLDRNSYPGCPSSYDGYCLNGGVCMHIESLDSYTCNCVIGYSGDRCQTRD
-LRWWELRHAGYGQKHDIMVVAVCMVALVLLLLLGMWGTYYYRTRKQLSNPPKNPCDEPSGSVSSSGPDSS
-SGAAVASCPQPWFVVLEKHQDPKNGSLPADGTNGAVVDAGLSPSLQLGSVHLTSWRQKPHIDGMGTGQSC
-WIPPSSDRGPQEIEGNSHLPSYRPVGPEKLHSLQSANGSCHERAPDLPRQTEPVK
-SHAR_EOF
-chmod 0644 egmsmg.aa ||
-echo 'restore of egmsmg.aa failed'
-Wc_c="`wc -c < 'egmsmg.aa'`"
-test 1286 -eq "$Wc_c" ||
- echo 'egmsmg.aa: original size 1286, current size' "$Wc_c"
-fi
-# ============= faatran.c ==============
-if test -f 'faatran.c' -a X"$1" != X"-c"; then
- echo 'x - skipping faatran.c (File already exists)'
-else
-echo 'x - extracting faatran.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'faatran.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: faatran.c,v 1.6 2007/04/02 18:08:11 wrp Exp $ */
-X
-/* aatran.c translates from nt to aa, 1 char codes */
-/* modified July 2, 1987 for all 6 frames */
-/* 23 Jan 1991 fixed bug for short sequences */
-X
-/* this mapping is not alphabet independent */
-X
-#define XTERNAL
-#include <stdio.h>
-#include <stdlib.h>
-X
-#include "upam.h"
-#include "uascii.h"
-X
-/*
-1. The Standard Code (transl_table=1)
-X
-By default all transl_table in GenBank flatfiles are equal to id 1, and this
-is not shown. When transl_table is not equal to id 1, it is shown as a
-qualifier on the CDS feature.
-X
-*/
-static
-char *AA1="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = ---M---------------M---------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-2. The Vertebrate Mitochondrial Code (transl_table=2)
-*/
-static
-char *AA2 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
-/*
-X Starts = --------------------------------MMMM---------------M------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-3. The Yeast Mitochondrial Code (transl_table=3)
-*/
-static
-char *AA3 ="FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
-Mycoplasma/Spiroplasma Code (transl_table=4)
-*/
-static
-char *AA4 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = --MM---------------M------------MMMM---------------M------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-5. The Invertebrate Mitochondrial Code (transl_table=5)
-*/
-static
-char *AA5 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
-/*
-X Starts = ---M----------------------------MMMM---------------M------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
-*/
-static
-char *AA6 ="FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-9. The Echinoderm Mitochondrial Code (transl_table=9)
-*/
-static
-char *AA7 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-10. The Euplotid Nuclear Code (transl_table=10)
-*/
-static
-char *AA10="FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-11. The Bacterial "Code" (transl_table=11)
-*/
-static
-char *AA11="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = ---M---------------M------------MMMM---------------M------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-12. The Alternative Yeast Nuclear Code (transl_table=12)
-*/
-static
-char *AA12 ="FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = -------------------M---------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-13. The Ascidian Mitochondrial Code (transl_table=13)
-*/
-static
-char *AA13="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-14. The Flatworm Mitochondrial Code (transl_table=14)
-*/
-static
-char *AA14 ="FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-X
-15. Blepharisma Nuclear Code (transl_table=15)
-*/
-static
-char *AA15="FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X Starts = -----------------------------------M----------------------------
-X Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-X
-static
-char *AA16 ="FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X id 16 ,
-X name "Chlorophycean Mitochondrial" ,
-X sncbieaa "-----------------------------------M----------------------------"
-X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-X
-static
-char *AA21 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
-X name "Trematode Mitochondrial" ,
-X id 21 ,
-X sncbieaa "-----------------------------------M---------------M------------"
-X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-X
-static
-char *AA22 ="FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X name "Scenedesmus obliquus Mitochondrial" ,
-X id 22 ,
-X sncbieaa "-----------------------------------M----------------------------"
-X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-X
-static
-char *AA23 ="FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
-X name "Thraustochytrium Mitochondrial" ,
-X id 23 ,
-X sncbieaa "--------------------------------M--M---------------M------------"
-X -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-X -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-X -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-X
-X
-static char aacmap[64]={
-X 'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
-X 'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
-X 'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
-X '*','Y','*','Y','S','S','S','S','*','C','W','C','L','F','L','F'
-};
-X
-static int aamap[64]; /* integer aa values */
-static int aamapr[64]; /* reverse sequence map */
-X
-/* tnt is used only by aatran.c. It must be consistent with lascii and
-the nt alphabet. It uses 3,3 because T and U are considered separately
-*/
-static int tnt[]={0,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0,
-X 0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
-X
-static int debug_set;
-X
-int
-aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame)
-{
-X int iaa, im, nna, i;
-X register int *nnp;
-X const unsigned char *nts0;
-X register int *aamp;
-X register unsigned char *aap;
-X
-X iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
-X if (nna <= 3 ) {
-X aaseq[0]=EOSEQ;
-X return 0;
-X }
-X
-X nnp = tnt;
-X
-X if (frame < 3) {
-X aamp = aamap;
-X nts0 = &ntseq[frame];
-X aap = aaseq;
-X while (nna--) {
-X im = nnp[*nts0++]<<4;
-X im += nnp[*nts0++]<<2;
-X im += nnp[*nts0++];
-X *aap++ = aamp[im];
-X
-X /* this check is included because of a bug in tfasty
-X which occurs only during the alignment process */
-X
-#ifdef DEBUG
-X if (debug_set && aamp[im] > MAXUC) {
-X fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
-X *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
-X
-X /* this allows recovery, but should not be done frequently */
-X for (i=0; i<64; i++) {
-X aamap[i]=aascii[aacmap[i]];
-X aamapr[i]=aascii[aacmap[(~i)&63]];
-X }
-X *(aap-1) = aamp[im];
-X }
-#endif
-X }
-X }
-X else {
-X aamp = aamapr;
-X nts0 = &ntseq[maxs-(frame-3)];
-X aap = aaseq;
-X while (nna--) {
-X im = nnp[*--nts0]<<4;
-X im += nnp[*--nts0]<<2;
-X im += nnp[*--nts0];
-X *aap++ = aamp[im];
-X /* this check is included because of a bug in tfasty
-X which occurs only during the alignment process */
-X
-#ifdef DEBUG
-X if (debug_set && aamp[im] > MAXUC) {
-X fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
-X *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
-X
-X /* this allows recovery, but should not be done frequently */
-X for (i=0; i<64; i++) {
-X aamap[i]=aascii[aacmap[i]];
-X aamapr[i]=aascii[aacmap[(~i)&63]];
-X }
-X *(aap-1) = aamp[im];
-X }
-#endif
-X }
-X }
-X aaseq[iaa]=EOSEQ;
-X return iaa;
-}
-X
-/* slower version that masks out NNN,XXX */
-X
-/* - A C G T U R Y M W S K D H V B N X */
-static int snt[]={0,0,1,2,3,3,0,1,0,0,4,4,4,4,4,4,4,4};
-X
-int
-saatran(const unsigned char *ntseq,
-X unsigned char *aaseq, int maxs, int frame)
-{
-X int iaa, im, it, nna, xflag;
-X register int *nnp;
-X const unsigned char *nts0;
-X register int *aamp;
-X register unsigned char *aap;
-X
-X iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
-X if (nna <= 3 ) {
-X aaseq[0]=EOSEQ;
-X return 0;
-X }
-X
-X nnp = snt;
-X if (frame < 3) {
-X aamp = aamap;
-X nts0 = &ntseq[frame];
-X aap = aaseq;
-X while (nna--) {
-X xflag = 0;
-X if ((it=nnp[*nts0++])<4) {im = it<<4;}
-X else {xflag = 1; im=0;}
-X if ((it=nnp[*nts0++])<4) {im += it<<2;}
-X else xflag = 1;
-X if ((it=nnp[*nts0++])<4) {im += it;}
-X else xflag = 1;
-X if (xflag) *aap++ = aascii['X'];
-X else *aap++ = aamp[im];
-X }
-X }
-X else {
-X aamp = aamapr;
-X nts0 = &ntseq[maxs-(frame-3)];
-X aap = aaseq;
-X while (nna--) {
-X xflag = 0;
-X if ((it=nnp[*--nts0]) < 4) im = it<<4;
-X else {xflag = 1; im=0;}
-X if ((it=nnp[*--nts0]) < 4) im += it<<2;
-X else xflag = 1;
-X if ((it=nnp[*--nts0]) < 4) im += it;
-X else xflag = 1;
-X if (xflag) *aap++ = aascii['X'];
-X else *aap++ = aamp[im];
-X }
-X }
-X aaseq[iaa]=EOSEQ;
-X return iaa;
-}
-X
-void
-aainit(int tr_type, int debug)
-{
-X int i,j;
-X char *aasmap;
-X int imap[4]={3,1,0,2}, i0, i1, i2, ii;
-X
-X debug_set = debug;
-X
-X aasmap = AA1;
-X if (tr_type > 0) {
-X /* need to put in a new translation table */
-X switch (tr_type) {
-X case 1: aasmap = AA1; break;
-X case 2: aasmap = AA2; break;
-X case 3: aasmap = AA3; break;
-X case 4: aasmap = AA4; break;
-X case 5: aasmap = AA5; break;
-X case 6: aasmap = AA6; break;
-X case 7: aasmap = AA7; break;
-X case 10: aasmap = AA10; break;
-X case 11: aasmap = AA11; break;
-X case 12: aasmap = AA12; break;
-X case 13: aasmap = AA13; break;
-X case 14: aasmap = AA14; break;
-X case 15: aasmap = AA15; break;
-X case 16: aasmap = AA16; break;
-X case 21: aasmap = AA21; break;
-X case 22: aasmap = AA22; break;
-X case 23: aasmap = AA23; break;
-X
-X default: aasmap = AA1; break;
-X }
-X
-X if (debug) fprintf(stderr," codon table: %d\n new old\n",tr_type);
-X for (i0 = 0; i0 < 4; i0++)
-X for (i1 = 0; i1 < 4; i1++)
-X for (i2 = 0; i2 < 4; i2++) {
-X ii = (imap[i0]<<4) + (imap[i1]<<2) + imap[i2];
-X if (debug && aacmap[ii] != *aasmap)
-X fprintf(stderr," %c%c%c: %c - %c\n",
-X nt[imap[i0]+1],nt[imap[i1]+1],nt[imap[i2]+1],
-X *aasmap,aacmap[ii]);
-X aacmap[ii]= *aasmap++;
-X }
-X
-X /*
-X for (i=0; i<64; i++) {
-X fprintf(stderr,"'%c',",aacmap[i]);
-X if ((i%16)==15) fputc('\n',stderr);
-X }
-X fputc('\n',stderr);
-X */
-X }
-X for (i=0; i<64; i++) {
-X aamap[i]=aascii[aacmap[i]];
-X aamapr[i]=aascii[aacmap[(~i)&63]];
-X }
-}
-X
-void
-aagetmap(char *to, int n)
-{
-X int i;
-X for (i=0; i<n; i++) to[i] = aacmap[i];
-}
-SHAR_EOF
-chmod 0644 faatran.c ||
-echo 'restore of faatran.c failed'
-Wc_c="`wc -c < 'faatran.c'`"
-test 13742 -eq "$Wc_c" ||
- echo 'faatran.c: original size 13742, current size' "$Wc_c"
-fi
-# ============= fast_new ==============
-if test -f 'fast_new' -a X"$1" != X"-c"; then
- echo 'x - skipping fast_new (File already exists)'
-else
-echo 'x - extracting fast_new (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fast_new' &&
-NBRF PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/blast/pir1.lseg
-NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
-NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
-NCBI/Blast non-redundant proteins$0+nr+/slib2/blast/nr.lseg
-NCBI/Blast Swissprot$0+sp+/slib2/blast/swissprot.lseg
-GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/blast/genpept.fsa
-Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
-Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
-C. elegans blast server$0W/slib2/blast/C.elegans_blast.fa
-E. coli proteome$0E/slib0/genomes/ecoli.npep
-H. influenzae proteome$0I/slib0/genomes/hinf.npep
-H. pylori proteome$0L/slib0/genomes/hpyl.npep
-NCBI Entrez Human proteins$0H/slib2/blast/human.aa
-M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
-M. jannaschii proteome$0J/slib0/genomes/mjan.npep
-Synechosystis proteome$0C/slib0/genomes/synecho.npep
-GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
-GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
-GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
-GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
-GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
-GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
-GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
-GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
-GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
-GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
-GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
-GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
-GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
-GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
-GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
-Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
-E. coli genome$1D/slib0/genomes/ecoli.gbk 1
-Blast Human ESTs$1F/slib2/blast/est_human
-TIGR Human Gene Index$1K/slib2/blast/HGI.nr.031898
-Blast Mouse ESTs$1C/slib2/blast/est_mouse
-TIGR Mouse Gene Index$1J/slib2/blast/MGI.nr.022498
-NCBI/BLAST NR DNA$1n/slib2/blast/nt
-SHAR_EOF
-chmod 0644 fast_new ||
-echo 'restore of fast_new failed'
-Wc_c="`wc -c < 'fast_new'`"
-test 1959 -eq "$Wc_c" ||
- echo 'fast_new: original size 1959, current size' "$Wc_c"
-fi
-# ============= fasta.defaults ==============
-if test -f 'fasta.defaults' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta.defaults (File already exists)'
-else
-echo 'x - extracting fasta.defaults (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta.defaults' &&
-#pgm mol matrix g_open g_ext fr_shft e_cut ktup
-# -n/-p -s -e -f -h/-j -E argv[3]
-fasta prot bl50 -10 -2 - 10.0 2
-fasta dna +5/-4 -14 -4 - 2.0 6
-ssearch prot bl50 -10 -2 - 10.0 -
-ssearch dna +5/-4 -14 -4 - 2.0 -
-fastx prot BL50 -12 -2 -20 5.0 2
-fasty prot BL50 -12 -2 -20/-24 5.0 2
-tfastx dna BL50 -14 -2 -20 5.0 2
-tfasty dna BL50 -14 -2 -20/-24 5.0 2
-fasts prot MD20-MS - - - 5.0 -
-tfasts prot MD10-MS - - - 2.0 -
-fastf prot MD20 - - - 5.0 -
-tfastf prot MD10 - - - 2.0 -
-fastm prot MD20 - - - 5.0 -
-tfastm prot MD10 - - - 2.0 -
-SHAR_EOF
-chmod 0644 fasta.defaults ||
-echo 'restore of fasta.defaults failed'
-Wc_c="`wc -c < 'fasta.defaults'`"
-test 529 -eq "$Wc_c" ||
- echo 'fasta.defaults: original size 529, current size' "$Wc_c"
-fi
-# ============= fasta.options ==============
-if test -f 'fasta.options' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta.options (File already exists)'
-else
-echo 'x - extracting fasta.options (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta.options' &&
-doinit.c
-X case 'a': m_msg->aln.showall = 1;
-X case 'B': m_msg->z_bits = 0;
-X case 'b': m_msg->mshow
-X case 'C': m_msg->nmlen
-X case 'd': m_msg->ashow);
-X case 'D': ppst->debug_lib = 1;
-X case 'E': m_msg->e_cut
-X case 'F': m_msg->e_low
-X case 'f': ppst->gdelval
-X case 'g': ppst->ggapval
-X case 'H': m_msg->nohist = 1; break;
-X case 'i': m_msg->revcomp = 1; break;
-X case 'I': m_msg->self = 1; break;
-X case 'J': m_msg->ql_start, ql_stop
-X case 'K': max_buf_cnt (PCOMPLIB)
-X case 'l': m_msg->flstr
-X case 'L': m_msg->long_info = 1
-X case 'M': m_msg->n1_low,&m_msg->n1_high
-X case 'm': m_msg->markx
-X case 'n': m_msg->qdnaseq = 1
-X case 'N': m_msg->maxn
-X case 'p': m_msg->qdnaseq = 0;
-X case 'O': m_msg->outfile
-X case 'q':
-X case 'Q': m_msg->quiet = 1;
-X case 'r': ppst->p_d_mat,&ppst->p_d_mis
-X case 'R': m_msg->dfile
-X case 's': standard_pam(smstr); ppst->pamoff=atoi(bp+1);
-X case 'S': ppst->ext_sq_set = 1;
-X case 't': ppst->tr_type
-X case 'T': PCOMPLIB: worker_1,worker_n
-X _t: max_workers
-X case 'v': ppst->zs_win
-X case 'w': m_msg->aln.llen
-X case 'W': m_msg->aln.llcntx);
-X case 'X': m_msg->sq0off,&m_msg->sq1off
-X case 'x': ppst->pam_x
-X case 'z': ppst->zsflag
-X case 'Z': ppst->zdb_size
-X
-initfa.c
-X case '1': ppst->param_u.fa.iniflag=1;
-X case '3': m_msg->nframe = 3; /* TFASTA */
-X m_msg->nframe = 1; /* for TFASTXY */
-X m_msg->qframe = 1; /* for FASTA, FASTX */
-X case 'A': ppst->sw_flag= 1;
-X case 'c': ppst->param_u.fa.optcut
-X case 'h': ppst->gshift
-X case 'j': ppst->gsubs
-X case 'o': ppst->param_u.fa.optflag = 0;
-X case 'y': ppst->param_u.fa.optwid
-X
-initsw.c
-X case '3': m_msg->qframe = m_msg->nframe = 1;
-SHAR_EOF
-chmod 0644 fasta.options ||
-echo 'restore of fasta.options failed'
-Wc_c="`wc -c < 'fasta.options'`"
-test 1670 -eq "$Wc_c" ||
- echo 'fasta.options: original size 1670, current size' "$Wc_c"
-fi
-# ============= fasta20.doc ==============
-if test -f 'fasta20.doc' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta20.doc (File already exists)'
-else
-echo 'x - extracting fasta20.doc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta20.doc' &&
-X
-X COPYRIGHT NOTICE
-X
-Copyright 1988, 1991, 1992, 1994, 1995, 1996 by William R.
-Pearson and the University of Virginia. All rights reserved. The
-FASTA program and documentation may not be sold or incorporated
-into a commercial product, in whole or in part, without written
-consent of William R. Pearson and the University of Virginia.
-For further information regarding permission for use or
-reproduction, please contact: David Hudson, Assistant Provost for
-Research, University of Virginia, P.O. Box 9025, Charlottesville,
-VA 22906-9025, (434) 924-6853
-X
-X
-The FASTA program package
-X
-Introduction
-X
-X This documentation describes the version 2.0x of the FASTA
-program package (see W. R. Pearson and D. J. Lipman (1988),
-"Improved Tools for Biological Sequence Analysis", PNAS 85:2444-
-2448, and W. R. Pearson (1990) "Rapid and Sensitive Sequence
-Comparison with FASTP and FASTA" Methods in Enzymology 183:63-
-98). Version 2.0 modifies version 1.8 to include explicit
-statistical estimates for similarity scores based on the extreme
-value distribution. In addition, FASTA protein alignments now
-use the Smith-Waterman algorithm with no limitation on gap size.
-FASTA and SSEARCH now use the BLOSUM50 matrix by default, with
-options to change gap penalties on the command line. Version 1.7
-replaces rdf2 and rss with prdf and prss, which use the extreme-
-value distribution to calculate accurate probability estimates.
-X
-X
-Although there are a large number of programs in this package,
-they belong to four groups:
-X
-X
-X Library search programs: FASTA, FASTX, TFASTA, TFASTX, SSEARCH
-X
-X Local homology programs: LFASTA, PLFASTA, LALIGN, PLALIGN, FLALIGN
-X
-X Statistical significance: PRDF, RELATE, PRSS, RANDSEQ
-X
-X Global alignment: ALIGN
-X
-X
-X
-In addition, I have included several programs for protein
-sequence analysis, including a Kyte-Doolittle hydropathicity
-plotting program (GREASE, TGREASE), and a secondary structure
-prediction package (GARNIER).
-X
-X The FASTA sequence comparison programs on this disk are
-improved versions of the FASTP program, originally described in
-Science (Lipman and Pearson, (1985) Science 227:1435-1441). We
-have made several improvements. First, the library search
-programs use a more sensitive method for the initial comparison
-of two sequences which allows the scores of several similar
-regions to be combined. As a result, the results of a library
-search are now given with three scores, initn (the new initial
-score which may include several similar regions), init1 (the old
-fastp initial score from the best initial region), and opt (the
-old fastp optimized score allowing gaps in a 32 residue wide
-band).
-X
-X These programs have also been modified to become "universal"
-(hence FAST-A, for FASTA-All, as opposed to FAST-P (protein) or
-FAST-N (nucleotides)); by changing the environment variable
-SMATRIX, the programs can be used to search protein sequences,
-DNA sequences, or whatever you like. By default, FASTA, LFASTA,
-and the PRDF programs automatically recognize protein and DNA
-sequences. Sequences are first read as amino acids, and then
-converted to nucleotides if the sequence is greater than 85%
-A,C,G,T (the '-n' option can be used to indicate DNA sequences).
-TFASTA compares protein sequences to a translated DNA sequence.
-Alternative scoring matrices can also be used. In addition to
-the BLOSUM50 matrix for proteins, the PAM250 matrix or matrices
-based on simple identities or the genetic code can also be used
-for sequence comparisons or evaluation of significance. Several
-different protein sequence matrices have been included;
-instructions for constructing your own scoring matrix are
-included in the file FORMAT.DOC.
-X
-X
-The remainder of this document is divided into three sections:
-(1) a brief history of the changes to the FASTA package; (2) A
-guide to installing the programs and databases; (3) A guide to
-using the FASTA programs. The programs are very easy to use, so
-if you are using them on a machine that is administered by
-someone else, you may want to skip to section (3) to learn how to
-use the programs, and then read section (1) to look at some of
-the more recent changes. If you are installing the programs on
-your own machine, you will need to read section (2) carefully.
-X
-X
-1. Revision History
-X
-1.1. Changes with version 2.0u
-X
-X Version 2.0u provides several major improvements over
-previous versions of FASTA (and SSEARCH). The most important is
-the incorporation of explicit statistical estimates and
-appropriate normalization of similarity scores. This improvement
-is discussed in more detail below in the section entitled
-Statistical Significance. In addition, all of the protein
-comparison programs now use the BLOSUM50 matrix, with gap
-penalties of -12, -2, by default. BLOSUM50 performs
-significantly better than the older PAM250 matrix. PAM250 can
-still be used with the command line option: -s 250. (DNA
-sequence comparisons use a more stringent gap penalty of -16, -4,
-which produces excellent statistical estimates when optimized
-scores are used. TFASTA uses -16, -4 as well.)
-X
-X The quality of the fit of the extreme value distribution to
-the actual distribution of similarity scores is summarized with
-the Kolmogorov-Smirnov statistic. The acceptance limits for this
-statistic can be found in many statistics books. In general,
-values <0.10 (N=30) indicate excellent agreement between the
-actual and theoretical distributions. If this statistic is >
-0.2, consider using a higher (more stringent) gap penalty, e.g.
--16, -4 rather than -12, -2. The default scoring matrix for DNA
-has been changed to score +5 for an identity and -4 for a
-mismatch. These are the same scores used by BLASTN.
-X
-X With explicit expectation calculations, the program now
-shows all scores and alignments with expectations less than 10.0
-(with optimized scores, 2.0 without optimization) when the "-Q"
-(quiet) mode is used. The expectation threshold can be changed
-with the "-E" option.
-X
-X Finally, the algorithm used to produce the final alignments
-of protein sequences is now a full Smith-Waterman, with unlimited
-gaps. (The older band-limited alignments are used for DNA
-sequences and TFASTA by default, because Smith-Waterman
-alignments are very slow for long sequences.) Both the optimized
-and Smith-Waterman scores are reported; if the Smith-Waterman
-score is higher, then additional gaps allowed a better alignment
-and similarity score to be calculated.
-X
-X FASTA searches now optimize similarity scores by default
-(this slows searches about 2-fold (worst case) for ktup=2). Thus,
-the meaning of the "-o" option has been reversed; "-o" now turns
-off optimization and reports results sorted by "initn" scores.
-Optimization significantly improves the sensitivity of FASTA, so
-that it almost matches Smith-Waterman. With version 2.0, the
-default band width used for optimized calculations can be varied
-with the "-y" option. For proteins with ktup=2, a width of 16
-(-y 16) is used; 16 is also used for DNA sequences. For proteins
-and ktup=1, a width of 32 is used. Searches that disable
-optimization with the "-o" option will work fine for sequences
-that share 25% or more identity in general, but to detect
-evolutionary relationships with 20% - 25% identity, the more
-sensitive default optimization is often required. Optimization
-is required for accurate statistical estimates with either
-protein or DNA sequences.
-X
-X The FASTA package now includes FASTX, a program that
-compares a DNA sequence to a protein sequence database by
-translating the DNA sequence in three frames (the reverse frames
-are selected with the -i option) and aligning the three-frame
-translation with the sequences in the protein database.
-Alignment scores allow frameshifts so that a cDNA or EST sequence
-with insertion/deletion errors can be aligned with its homologues
-from beginning to end.
-X
-X With release 20u6, there is also a TFASTX program, which is
-a replacement for TFASTA. TFASTA treats each of the six reading
-frames of a DNA library sequence as a different sequence; TFASTX
-compares a protein sequence against only two sequences from each
-DNA sequence - the forward and reverse orientation. For a given
-orientation, TFASTX calculates a similarity score for alignments
-that allow frameshifts, thus considering all possible reading
-frames.
-X
-X Another new program is included - randseq - which will
-produce a randomly shuffled (uniform or local shuffle) from an
-input sequence. This randomly shuffled sequence can be used to
-evaluate the statistical estimates produced by FASTA, SSEARCH, or
-BLAST.
-X
-1.2. Changes with version 1.7
-Version 1.7 has been released to provide the PRDF and PRSS
-programs for shuffling sequences and estimating accurately the
-probabilities of the unshuffled-sequence scores.
-X
-PRDF a version of RDF2 that uses calculates the probability
-X of a similarity score more accurately by using a fit to
-X an extreme value distribution. Code to fit the extreme
-X value distribution parameters and the impetus to update
-X RDF2 was provided by Phil Green, U. of Washington.
-X
-PRSS a version of PRDF that uses a rigorous Smith-Waterman
-X calculation to score similarities
-X
-1.3. Changes with version 1.6
-X
-X FASTA version 1.6 uses a new method for calculating optimal
-scores in a band (the optimization or last step in the FASTA
-algorithm). In addition, it uses a linear-space method for
-calculating the actual alignments. FASTA v1.6 package includes
-several new programs:
-X
-SSEARCH a program to search a sequence database using the
-X rigorous Smith-Waterman algorithm (this program is
-X about 100-fold slower than FASTA with ktup=2 (for
-X proteins).
-X
-LALIGN A rigorous local sequence alignment program that will
-X display the N-best local alignments (N=10 by default).
-X
-PLALIGN a version of lalign that plots the local alignments to
-X a tektronix display.
-X
-FLALIGN a version of lalign that plots the local alignments to
-X a GCG Figure file.
-X
-X The LALIGN/PLALIGN/FLALIGN programs incorporate the "sim"
-algorithm described by Huang and Miller (1991) Adv. Appl. Math.
-12:337-357. The SSEARCH and PRSS programs incorporate algorithms
-described by Huang, Hardison, and Miller (1990) CABIOS 6:373-381.
-X
-X LFASTA and PLFASTA now calculate a different number of local
-similarities; they now behave more like LALIGN/PLALIGN. Since
-local alignments of identical sequences produce "mirror-image"
-alignments, lalign and lfasta consider only one-half of the
-potential alignments between sequences from identical file names.
-Thus
-X
-X lfasta mchu.aa mchu.aa
-X
-Displays only two alignments, with earlier versions of the
-program, it would have displayed five, including the identity
-alignment. PLFASTA does display five alignments; when two
-identical filenames are given, it draws the identity alignment,
-calculates the two unique local alignments, draws them, and draws
-their mirror images. LFASTA/PLFASTA and LALIGN/PLALIGN use the
-filenames, rather than the actual sequences, to determine whether
-sequences are identical; you can "trick" the programs into
-behaving the old way by putting the same sequence in two
-different files.
-X
-1.4. Changes with version 1.5
-X
-X FASTA version 1.5 includes a number of substantial revisions
-to improve the performance and sensitivity of the program. It is
-now possible to tell the program to optimize all of the initn
-scores greater than a threshold. The threshold is set at the
-same value as the old FASTA cutoff score. Alternatively, you can
-tell FASTA to sort the results by the init1, rather than the
-initn, score by using the -1 option. FASTA -1 ... will report
-the results the way the older FASTP program did.
-X
-X A new method has been provided for selecting libraries. In
-the past, one could enter the name of a sequence file to be
-searched or a single letter that would specify a library from the
-list included in the $FASTLIBS file. Now, you can specify a set
-of library files with a string of letters preceded by a '%'.
-Thus, if the FASTLIBS file has the lines:
-X
-X Genbank 70 primates$1P/seqlib/gbpri.seq 1
-X Genbank 70 rodents$1R/seqlib/gbrod.seq 1
-X Genbank 70 other mammals$1M/seqlib/gbmam.seq 1
-X Genbank 70 vertebrates $1B/seqlib/gbvrt.seq 1
-X
-Then the string: "%PRMB" would tell FASTA to search the four
-libraries listed above. The %PRMB string can be entered either
-on the command line or when the program asks for a filename or
-library letter.
-X
-X FASTA1.5 also provides additional flexibility for specifying
-the number of results and alignments to be displayed with the -Q
-(quiet) option. The -b number option allows you to specify the
-number of sequence scores to show when the search is finished.
-Thus
-X
-X
-X FASTA -b 100 ...
-X
-X
-tells the program to display the top 100 sequence scores. In the
-past, if you displayed 100 scores (in -Q mode), you would also
-have store 100 alignments. The -d option allows you to limit the
-number of alignments shown. FASTA -b 100 -d 20 would show 100
-scores and 20 alignments.
-X
-X Finally, FASTA can provide a complete list of all of the
-sequences and scores calculated to a file with the -r (results)
-option. FASTA -r results.out ... creates a file with a list of
-scores for every sequence in the library. The list is not
-sorted, and only includes those scores calculated during the
-initial scan of the library.
-X
-2. Installing the FASTA package
-X
-2.1. Installing the programs
-X
-2.1.1. Unix version
-X
-X The FASTA distribution comes with several makefile's that
-can be used to compile the FASTA programs. Over the years, as
-ATT Unix System 5 and BSD unix have converged, these files have
-become very similar. To begin with, I recommend using the
-standard Makefile. There are two values in the makefile that
-should be checked against the values used on your system: the HZ
-value, which is the frequency in ticks per second used by the
-times() system call, this value can usually be found by running:
-X
-X grep HZ /usr/include/sys/*
-X
-and the functions available to return random numbers. If you
-have a rand48() function that returns a 32-bit random number, use
-it and use the lines:
-X
-X NRAND=nrand48
-X RANFLG= -DRAND32
-X
-If not, you will need to use the rand() function call and
-determine whether it returns a 16-bit or a 32-bit value. These
-functions are used by PRDF and PRSS. If you have problems
-compiling the programs, you may want to examine the makefile.unx
-and makefile.sun files, to look for differences. I have tried to
-use very standard unix functions in these programs, and they have
-been successfully compiled, with very small changes to the
-Makefile, on Sun's (Sun OS 4.1), IBM RS/6000's (AIX), and MIPS
-machines (under the BSD environment).
-X
-2.1.2. IBM-PC/DOS version
-X
-X For the IBM-PC/DOS version, the FASTA source code disk
-contains the complete source code to all of the programs on the
-other disks. The programs were compiled with Borland's Turbo
-'C++', using Borland's MAKE utility. The graphics programs
-(PLFASTA, TGREASE) use the graphics device drivers supplied with
-the Turbo 'C' V2.0 package. Also included are the documentation
-files PROGRAMS.DOC and FORMAT.DOC. You do not need any of the
-files the source code disk to run the programs. The files on
-this disk are identical to the UNIX and VMS versions that run on
-larger machines. Also included is the code to compile
-ALIGN0.EXE. ALIGN0 is the same as ALIGN, but does not penalize
-for end-gaps.
-X
-X If you have the DOS or Macintosh version of the FASTA
-package, to install the programs you should:
-X
-X (1) Make a new directory (folder) for the FASTA programs.
-X This need not be the same as the directory for your
-X sequence databases.
-X
-X (2) Copy the files from the FASTA source disk to the new
-X directory.
-X
-X (3) (DOS only) Edit your AUTOEXEC.BAT file to (a) modify your
-X PATH command to include the FASTA directory and (b) add
-X the line:
-X
-X set FASTLIBS=c:\yourfastadirectory\fastgbs
-X
-X On the Macintosh, you may need to edit the "environment"
-X file and change the line that reads:
-X
-X FASTLIBS=fastgbs
-X
-X to indicate the full directory path for the fastgbs file,
-X for example:
-X
-X FASTLIBS=Q105:FASTA:fastgbs
-X
-X
-X (4) Finally, you will need to edit the fastgbs file. This is
-X usually the most confusing part of the installation. An
-X example of this file is shown below; to customize this
-X file for your machine, you will need to change the file
-X names from those provided in the fastgbs file to ones that
-X reflect the directory names and file names you use on your
-X machine. This is explained in more detail below. In
-X addition, some entries in the fastgbs file refer to other
-X files of file names. These files of file names (as
-X opposed to actual database files) may also need to be
-X edited.
-X
-2.2. Installing the libraries
-X
-2.2.1. The NBRF protein sequence library
-X
-X The FASTA program package does not include any protein or
-DNA sequence libraries. You can obtain the PIR protein sequence
-database from:
-X
-X National Biomedical Research Foundation
-X Georgetown University Medical Center
-X 3900 Reservoir Rd, N.W.
-X Washington, D.C. 20007
-X
-In addition, this database is available via anonymous ftp from
-the host "ftp.bchs.uh.edu". It is available in two formats, VMS
-and CODATA format. The "VMS" format (library type 5 below) can
-be searched much faster, can be easily reformatted for use by the
-"BLAST" rapid searching program, and is compatible with the
-Genetics Computer Group package of programs. The CODATA format
-is used by the EUGENE/MBIR computing package from Baylor (library
-type 2).
-X
-2.2.2. The GENBANK DNA sequence library
-X
-X FASTA, and TFASTA search sequences from the GENBANK
-"flatfile" (not ASN.1) DNA sequence library in the flat-file
-format distributed by the National Center for Biotechnology
-Information and the PIR format used by EBI/EMBL. CD-ROMs can be
-obtained from:
-X
-X Genbank
-X National Center for Biotechnology Information
-X National Library of Medicine
-X National Institutes of Health
-X 8600 Rockville Pike
-X Bethesda, MD 20894
-X
-X
-X The GenBank DNA sequence library is also available via
-anonymous FTP from ncbi.nlm.nih.gov.
-X
-2.2.3. The EBI/EMBL CD-ROM libraries
-X
-X The European Bioinformatics Institute (EBI) is now
-distributing the EMBL CD-ROM that contains both the complete EMBL
-DNA sequence database (which should be essentially identical to
-the GenBank DNA sequence database) and the SWISS-PROT protein
-sequence database. SWISS-PROT is derived from the NBRF Protein
-sequence database with additions from the EBI/EMBL DNA sequence
-database. This CD-ROM is a "best-buy," since it provides both
-DNA and protein sequence libraries. It is available from:
-X
-X
-X European Bioinformatics Institute
-X Hinxton Genome Campus, Hinxton Hall
-X Hinxton, Cambridge CB10 1RQ,
-X United Kingdom
-X Tel: +44 1223 4944
-X Fax: +44 1223 494468
-X Email: DATALIB@ebi.ac.uk
-X
-X
-X
-X In addition, the SWISS-PROT protein sequence database is
-available via anonymous FTP from ncbi.nlm.nih.gov.
-X
-2.3. Finding the libraries: FASTLIBS
-X
-X FASTA and TFASTA use the environment variable FASTLIBS to
-find the protein and DNA sequence libraries. The FASTLIBS
-variable contains the name of a file that has the actual
-filenames of the libraries. The FASTGBS file on is an example of
-a file that can be referred to by FASTLIBS. To use the FASTGBS
-file, type:
-X
-X setenv FASTLIBS /usr/lib/fasta/fastgbs (BSD UNIX/csh)
-X or
-X export FASTLIBS=/usr/lib/fasta/fastgbs (SysV UNIX/ksh)
-X
-Then edit the FASTGBS file to indicate where the protein and DNA
-sequence libraries can be found. If you have a hard disk and
-your protein sequence library is kept in the file
-/usr/lib/aabank.lib and your Genbank DNA sequence library is kept
-in the directory: /usr/lib/genbank, then fastgbs might contain:
-X
-X NBRF Protein$0P/usr/lib/seq/aabank.lib 0
-X SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
-X GB Primate$1P@/usr/lib/genbank/gpri.nam
-X GB Rodent$1R@/usr/lib/genbank/grod.nam
-X GB Mammal$1M@/usr/lib/genbank/gmammal.nam
-X ^ 1 ^^^^ 4 ^ ^
-X 23 (5)
-X
-The first line of this file says that there is a copy of the NBRF
-protein sequence database (which is a protein database) that can
-be selected by typing "P" on the command line or when the
-database menu is presented in the file /usr/lib/seq/aabank.lib.
-X
-X Note that there are 4 or 5 fields in the lines in fastgbs.
-The first field is the description of the library which will be
-displayed by FASTA; it ends with a '$'. The second field (1
-character), is a 0 if the library is a protein library and 1 if
-it is a DNA library. The third field (1 character) is the
-character to be typed to select the library.
-X
-X The fourth field is the name of the library file. In the
-example above, the /usr/lib/seq/aabank.lib file contains the
-entire protein sequence library. However the DNA library file
-names are preceded by a '@', because these files (gpri.nam,
-grod.nam, gmammal.nam) do not contain the sequences; instead they
-contain the names of the files which contain the sequences. This
-is done because the GENBANK DNA database is broken down in to a
-large number of smaller files. In order to search the entire
-primate database, you must search more than a dozen files.
-X
-X In addition, an optional fifth field can be used to specify
-the format of the library file. Alternatively, you can specify
-the library format in a file of file names (a file preceded by an
-'@'). This field must be separated from the file name by a space
-character (' ') from the filename. In the example above, the
-aabank.lib file is in Pearson/FASTA format, while the swiss.seq
-file is in PIR/VMS format (from the EMBL CD-ROM). Currently,
-FASTA can read the following formats:
-X
-X 0 Pearson/FASTA (>SEQID - comment/sequence)
-X 1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
-X 2 NBRF CODATA (ENTRY/SEQUENCE)
-X 3 EMBL/SWISS-PROT (ID/DE/SQ)
-X 4 Intelligenetics (;comment/SEQID/sequence)
-X 5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
-X 6 GCG (version 8.0) Unix Protein and DNA (compressed)
-X 11 NCBI Blast1.3.2 format (unix only)
-X
-In particular, this version will work with the EMBL and PIR VMS
-formats that are distributed on the EMBL CD-ROM. The latter
-format (PIR VMS) is much faster to search than EMBL format. This
-release also works with the protein and DNA database formats
-created for the BLASTP and BLASTN programs by SETDB and PRESSDB
-and with the new NCBI search format. If a library format is not
-specified, for example, because you are just comparing two
-sequences, Pearson/FASTA (format 0) is used by default. To
-change this default, you may set the LIBTYPE environment variable
-to a number. For example,
-X
-X setenv LIBTYPE 1
-X
-would cause the program to use the GenBank LOCUS format by
-default for libraries (or the second sequence file), but the
-Pearson/FASTA format would still be used for the query sequence.
-X
-X You can specify a group of library files by putting a '@'
-symbol before a file that contains a list of file names to be
-searched. For example, if @gmam.nam is in the fastgbs file, the
-file "gmam.nam" might contain the lines:
-X
-X </usr/lib/genbank
-X gbpri.seq 1
-X gbrod.seq 1
-X gbmam.seq 1
-X
-In this case, the line beginning with a '<' indicates the
-directory the files will be found in. The remaining lines name
-the actual sequence files. So the first sequence file to be
-searched would be:
-X
-X /usr/lib/genbank/gbpri.seq
-X
-The notation "<PIRNAQ:" might be used under the VAX/VMS operating
-system. Under UNIX, the trailing '/' is left off, so the library
-directory might be written as "</usr/seqlib".
-X
-X With version 1.4 of the FASTA package, the FASTA and TFASTA
-programs can search a library composed of different files in
-different sequence formats. For example, you may wish to search
-the Genbank files (in GenBank flat file format) and the EMBL DNA
-sequence database on CD-ROM. To do this, you simply list the
-names and filetypes of the files to be searched in a file of
-filenames. For example, to search the mammalian portion of
-Genbank, the unannotated portion of Genbank, and the unannotated
-portion of the EMBL library, you could use the file:
-X
-X </usr/lib/DNA
-X gbpri.seq 1
-X # (this '#' causes the program to display the size of the library)
-X gbrod.seq 1
-X gbmam.seq 1
-X gbuna.seq 1
-X unanno.seq 5
-X #
-X
-X You do not need to include library format numbers if you
-X only use the Pearson/FASTA version of the PIR protein se-
-X quence library. If no library type is specified, the
-X program assumes that type 0 is being used (unless you
-X have set LIBTYPE).
-X
-Support for the old compressed GenBank files, which have not been
-distributed for more than four years, has been removed from
-programs in the FASTA package.
-X
-X
-X Test the setup by running FASTA. Enter the sequence file
-'MUSPLFM.AA' when the program requests it (this file is included
-with the programs). The program should then ask you to select a
-protein sequence library. Alternatively, if you run the TFASTA
-program and use the MUSPLFM.AA query sequence, the program should
-show you a selection of DNA sequence libraries. Once the fastgbs
-file has been set up correctly, you can set FASTLIBS=fastgbs in
-your AUTOEXEC.BAT file, and you will not need to remember where
-the libraries are kept or how they are named.
-X
-X FASTA and TFASTA must open a large number of files when
-searching and reporting the results of a GENBANK floppy disk
-format library search. You may have problems with the large
-number of files under DOS on IBM-PC's (Unix and VMS users will
-not have these problems). If you are going to search the GENBANK
-floppy disk format DNA sequence library under DOS, you should add
-the line:
-X
-X FILES=16
-X
-to your CONFIG.SYS file. (Typically this is already done for
-programs like Windows or WordPerfect.)
-X
-3. Using the FASTA Package
-X
-3.1. Overview
-X
-X The FASTA sequence comparison programs all require similar
-information, the name of a query sequence file, a library file,
-and the ktup parameter. All of the programs can accept arguments
-on the command line, or they will prompt for the file names and
-ktup value.
-X
-To use FASTA, simply type:
-X
-X FASTA
-X and you will be prompted for :
-X the name of the test sequence file
-X the name of the library file
-X and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
-X
-X ktup of 2 is about 5 times faster than ktup = 1.
-X For a 200 aa sequence against a 10,000,000 aa
-X library, the program takes about 30 min with
-X ktup = 2, 150 min with ktup = 1, on a 12 Mhz 286
-X IBM-PC.
-X
-X
-The program can also be run by typing
-X
-X FASTA test.aa /lib/bigfile.lib ktup (1 or 2)
-X
-X
-Included with the package are the test files, MUSPLFM.AA,
-LCBO.AA, MCHU.AA and BOVPRL.SEQ. To check to make certain that
-everything is working, you can try:
-X
-X fasta musplfm.aa lcbo.aa
-X and
-X tfasta musplfm.aa bovprl.seq
-X
-To test the local similarity programs LFASTA and PLFASTA, try:
-X
-X lfasta mchu.aa mchu.aa
-X and
-X plfasta mchu.aa mchu.aa (use this only on an IBM-PC with graphics
-X or on a Tektronix terminal under UNIX or VMS)
-X
-MCHU (calmodulin) has four duplicated calcium binding sites that
-are clearly detected by LFASTA. For a more complicated example,
-try MWRTC1.aa, myosin heavy chain.
-X
-3.2. Sequence files
-X
-X The FASTA programs know about three kinds of sequence files
-(four under VMS): (1) plain sequence files that can only be used
-as query sequences or for LFASTA, PRDF, and ALIGN. (2) Standard
-library files. These are the same as plain sequence files, each
-sequence is preceded by a comment line with a '>' in the first
-column. (3) distributed sequence libraries (this is a broad class
-that includes the NBRF/PIR VMS and blocked ascii formats, Genbank
-flat-file format, EMBL flat-file format, and Intelligenetics
-format. All of the files that you create should be of type (1)
-or (2). Type (2) files (ones with a be used as query or library
-sequence files by all of the programs.
-X
-X I have included several sample test files, *.AA. The first
-line may begin with a '>' or ';' followed by a comment. The
-text after ';' in other lines will be ignored. Spaces and
-tabs (and anything else that is not an amino-acid code) are
-ignored.
-X
-X Library files should have the form:
-X
-X >Sequence name and identifier
-X A F A S Y T .... actual sequence.
-X F S S .... second line of sequence.
-X >Next sequence name and identifier
-X
-This is often referred to as "FASTA" or "Pearson" format. You
-can build your own library by concatenating several sequence
-files. Just be sure that each sequence is preceded by a line
-beginning with a '>' with a sequence name.
-X
-X The test file should not have lines longer than 120
-characters, and sequences entered with word processors should use
-a document mode, with normal carriage returns at the end of
-lines.
-X
-Program Summary
-X
-3.3. Sequence search programs
-X
-FASTA universal sequence comparison. Defaults to comparing
-X protein sequences; if the sequences are > 85% A+C+G+T
-X or the -n option is used, a DNA sequence is assumed.
-X
-FASTX Search a protein sequence library using amino acid
-X sequence comparison to the forward three frames of a
-X translated DNA query sequence. (The reverse frames are
-X specified with the -i option.) Alignment scores allow
-X frameshifts; the final alignment uses a Smith-Waterman
-X type alignment routine (no limit on gaps) that allows
-X frameshifts.
-X
-TFASTA Search DNA library for a protein sequence by
-X translating the DNA sequence to protein in all six
-X frames (three forward frames with the -3 command line
-X option). TFASTA with ktup=2 is about as fast as a DNA
-X FASTA with ktup=4, and is substantially more sensitive.
-X (also reads the GENBANK library)
-X
-TFASTX Search DNA library for a protein sequence by
-X translating the DNA sequence to protein in all six
-X frames (three forward frames with the -3 command line
-X option) calculating similarity scores that allow
-X frameshifts. TFASTX produces an optimal Smith-Waterman
-X alignment of the query and translated-library sequence.
-X
-SSEARCH Universal sequence comparison using the Smith-Waterman
-X algorithm ( T. F. Smith and M. S. Waterman (1981) J.
-X Mol. Biol. 147:195-197). This program uses code
-X developed by Huang and Miller (X. Huang, R. C.
-X Hardison, W. Miller (1990) CABIOS 6:373-381) for
-X calculating the local similarity score and code from
-X the ALIGN program (see below) for calculating the local
-X alignment. SSEARCH is about 50-times slower than FASTA
-X with ktup=2 (for proteins).
-X
-ALIGN optimal global alignment of two sequences with no
-X short-cuts. This program is a slightly modified
-X version of one taken from E. Myers and W. Miller. The
-X algorithm is described in E. Myers and W. Miller,
-X "Optimal Alignments in Linear Space" (CABIOS (1988)
-X 4:11-17).
-X
-3.4. Local similarity programs
-X
-LFASTA local similarity searches showing local alignments.
-X The algorithm used to calculate the local alignment in
-X a band has been improved (Chao, Pearson, and Miller,
-X submitted).
-X
-PLFASTA local similarity searches with plot output (on the IBM,
-X this program requires that the environment variable
-X BGIDIR be set).
-X
-PCLFASTA (unix only) local similarity searches with plot output
-X using pic commands.
-X
-LALIGN Calculates the N-best local alignments using a rigorous
-X algorithm. (N=10 by default.) The algorithm was
-X developed by Huang and Miller (X. Huang and W. Miller
-X (1991) Adv. Appl. Math. 12:337-357), which is a
-X linear-space version of an algorithm described by M. S.
-X Waterman and M. Eggert (J. Mol. Biol. 197:723-728).
-X Like SSEARCH, LALIGN is rigorous, but also very slow.
-X
-PLALIGN A version of LALIGN that plots its output to a screen
-X or to a Tektronix terminal emulator.
-X
-3.5. Statistical Significance
-X
-X With version 2.0 of the FASTA program distribution, FASTA,
-TFASTA, and SSEARCH now provide estimates of statistical
-significance for library searches. Work by Altschul, Arratia,
-Karlin, Mott, Waterman, and others (see Altschul et al. (1994)
-Nature Genetics 6:119 for an excellent review) suggests that
-local sequence similarity scores follow the extreme value
-distribution, so that P(s > x) = 1 - exp(-exp(-lambda(x-u)) where
-u = ln(Kmn)/lambda and m,m are the lengths of the query and
-library sequence. This formula can be rewritten as: 1 - exp(-Kmn
-exp(-lambda x), which shows that the average score for an
-unrelated library sequence increases with the logarithm of the
-length of the library sequence. FASTA and SSEARCH use simple
-linear regression against the the log of the library sequence
-length to calculate a normalized "z-score" with mean 50,
-regardless of library sequence length, and variance 10. These
-z-scores can then be used with the extreme value distribution and
-the poisson distribution (to account for the fact that each
-library sequence comparison is an independent test) to calculate
-the number of library sequences to obtain a score greater than or
-equal to the score obtained in the search. The original idea and
-routines to do the linear regression on library sequence length
-were provided Phil Green, U. Washington. This version of FASTA
-and SSEARCH uses a slightly different strategy for fitting the
-data than those originally provided by Dr. Green.
-X
-X The expected number of sequences is plotted in the histogram
-using an "*". Since the parameters for the extreme value
-distribution are not calculated directly from the distribution of
-similarity scores, the pattern of "*'s" in the histogram gives a
-qualitative view of how well the statistical theory fits the
-similarity scores calculated by FASTA and SSEARCH. For FASTA, if
-optimized scores are calculated for each sequence in the database
-(the default), the agreement between the actual distribution of
-"z-scores" and the expected distribution based on the length
-dependence of the score and the extreme value distribution is
-usually very good. Likewise, the distribution of SSEARCH Smith-
-Waterman scores typically agrees closely with the actual
-distribution of "z-scores." The agreement with unoptimized
-scores, ktup=2, is often not very good, with too many high
-scoring sequences and too few low scoring sequences compared with
-the predicted relationship between sequence length and similarity
-score. In those cases, the expectation values may be
-overestimates.
-X
-X The statistical routines assume that the library contains a
-large sample of unrelated sequences. If this is not the case,
-then the expectation values are meaningless. Likewise, if there
-are fewer than 20 sequences in the library, the statistical
-calculations are not done.
-X
-X For protein searches, library sequences with E() values <
-0.01 for searches of a 10,000 entry protein database are almost
-always homologous. Frequently sequences with E()-values from 1 -
-10 are related as well. Remember, however, that these E() values
-also reflect differences between the amino acid composition of
-the query sequence and that of the "average" library sequence.
-Thus, when searches are done with query sequences with "biased"
-amino-acid composition, unrelated sequences may have
-"significant" scores because of sequence bias. The programs
-below, PRDF and PRSS, can address this problem by calculating
-similarity scores for random sequences with the same length and
-amino acid composition.
-X
-X If optimization is not used ("-o"), E-values for DNA
-sequences overestimate the significance of the scores that are
-obtained and unrelated sequences frequently have E()-values <
-0.0005. With optimization, the agreement between E()-value
-compares favorably with protein sequence comparison. This is in
-part due to the use of more stringent gap penalties for DNA
-sequence comparison, -16, -4 rather than -12, -2. With the
-latter penalties, many unrelated sequences appear to have
-significant similarity. Nevertheless, since protein sequence
-comparison is much more sensitive, DNA sequence comparison should
-not be used to identify sequences that encode protein. Even with
-ktup=6, optimization rarely increases run-times more than 50%
-with mRNA-size query sequences. Optimization should be used
-whenever possible.
-X
-X Similar comments apply to TFASTA, where higher gap
-penalties (-16,-4) are required for accurate statistical
-estimates. Because TFASTA produces so many artificial "coding"
-sequences with atypical amino acid compositions, the statistical
-estimates with TFASTA are often over estimates. With optimized
-scores, ktup=1, and gap penalties of -16, -4, unrelated sequences
-will sometimes have E() values of 0.1. If initn scores are used,
-unrelated sequences may have have E() values < 0.01.
-X
-PRDF improved version of RDF program that includes accurate
-X probability estimates for all three scoring methods
-X (includes local or window shuffle routine)
-X
-PRSS A version of PRDF that uses the rigorous Smith-Waterman
-X calculation used by SSEARCH.
-X
-RANDSEQ produces a randomly shuffled sequence from a query
-X sequence.
-X
-RELATE significance program described by Dayhoff (Atlas of
-X Protein Sequence and Structure, Vol. 5, Supplement 3).
-X Each chunk of 25 residues in one sequence is compared
-X to every 25 residue fragment of the second sequence.
-X Sequences which are genuinely related will have a large
-X number of scores greater than 3 standard deviations
-X above the mean score of all of the comparisons.
-X
-3.6. Other analysis programs
-X
-AACOMP calculate the amino acid composition and molecular
-X weight of a sequence.
-X
-BESTSCOR calculate the best self-comparison score.
-X
-GREASE Kyte-Doolittle hydropathicity profile
-X
-TGREASE graphic plot of Kyte-Doolittle profile
-X
-FROMGB convert from GenBank LOCUS format (also used by the
-X IBI-Pustell programs) to Pearson/FASTA format.
-X
-GARNIER A secondary structure prediction program using the
-X method of Garnier, Osgusthorpe, and Robson, J. Mol.
-X Biol., (1978) 120:97-120.
-X
-3.7. Options
-X
-X These programs have a number of output options, which are
-invoked by the environment variables LINLEN, SHOWALL, and MARKX.
-Alternatively, these values can be controlled by command line
-options. The number of sequence residues per output line is now
-adjustable by setting the environment variable LINLEN, or the
-command line option -w. LINLEN is normally 60, to change it set
-LINLEN=80 before running the program or add -w 80 to the command
-line. LINLEN can be set up to 200. SHOWALL (-a) determines
-whether all, or just a portion, of the aligned sequences are
-displayed. Previously, FASTP would show the entire length of
-both sequences in an alignment while FASTN would only show the
-portions of the two sequences that overlapped. Now the default is
-to show only the overlap between the two sequences, to show
-complete sequences, set SHOWALL=1, or use the -a option on the
-command line.
-X
-X The differences between the two aligned sequences can be
-highlighted in three different ways by changing the environment
-variable MARKX or the -m option. Normally (MARKX=0) the program
-uses ':' do denote identities and '.' to denote conservative
-replacements. If MARKX=1, the program will not mark identities;
-instead conservative replacements are denoted by a 'x' and non-
-conservative substitutions by a 'X'. If MARKX=2, the residues in
-the second sequence are only shown if they are different from the
-first. MARKX=3 displays the aligned library sequences without the
-query sequence; these can be used to build a primitive multiple
-alignment. MARKX=4 provides a graphical display of the
-boundaries of the alignments. Thus the five options are:
-X
-X
-X MARKX=0 MARKX=1 MARKX=2 MARKX=3 MARKX=4
-X
-X MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
-X ::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
-X MWKSCGYPYT MWKSCGYPYT
-X
-X
-(fasta20u4, Feb. 1996) In addition MARKX=10 is a new, parseable
-format for use with other programs. See the file"readme.v20u4"
-for a more complete description.
-X
-3.8. Command line options
-X
-X It is now possible to specify several options on the
-command line, instead of using environment variables. The
-command line options are preceded by a dash; the following
-options are available:
-X
--a same as showall=1
-X
--A force Smith-Waterman alignments for DNA sequences and
-X TFASA. By default, only FASTA protein sequence
-X comparisons use Smith-Waterman alignments.
-X
--b # Number of sequence scores to be shown on output. In
-X the absence of this option, fasta (and tfasta and
-X ssearch) display all library sequences obtaining
-X similarity scores with expectations less than 10.0 if
-X optimized score are used, or 2.0 if they are not. The
-X -b option can limit the display further, but it will
-X not cause additional sequences to be displayed.
-X
--c # Threshold score for optimization (OPTCUT). Set "-c 1"
-X to optimize every sequence in a database. (This slows
-X the program down about 5-fold).
-X
--E # Limit the number of scores and alignments shown based
-X on the expected number of scores. Used to override the
-X expectation value of 10.0 used by default. When used
-X with -Q, -E 2.0 will show all library sequences with
-X scores with an expectation value <= 2.0.
-X
--d # Number of alignments to be reported by default. (Used
-X in conjunction with -Q). No longer necessary, see "-b"
-X above.
-X
--f Penalty for the first residue in a gap (-12 by default
-X for proteins, -16 for DNA or for TFASTA).
-X
--g Penalty for additional residues in a gap (-2 by default
-X for proteins, -4 for DNA and TFASTA ).
-X
--h Penalty for frameshift (FASTX, TFASTX only).
-X
--H Omit histogram.
-X
--i Invert (reverse complement) the query sequence if it is
-X DNA. For TFASTX, search the reverse complement of the
-X library sequence only.
-X
--k # Threshold for joining init1 segments to build an initn
-X score (GAPCUT).
-X
--l file Location of library menu file (FASTLIBS).
-X
--L Display more information about the library sequence in
-X the alignment.
-X
--m # MARKX = # (0, 1, 2, 3, 4, 10)
-X
--n Force the query sequence to be treated as a DNA
-X sequence. This is particularly useful for query
-X sequences that contain a large number of ambiguous
-X residues, e.g. transcription factor binding sites.
-X
--O Send copy of results to "filename." Helpful for
-X environments without STDOUT.
-X
--o Turn off default optimization of all scores greater
-X than OPTCUT. Sort results by "initn" scores.
-X
--Q,-q Quiet - does not prompt for any input. Writes scores
-X and alignments to the terminal or standard output file.
-X
--r file Save a results summary line for every sequence in the
-X sequence library. The summary line includes the
-X sequence identifier, superfamily number (if available)
-X position in the library, and the similarity scores
-X calculated. This option can be used to evaluate the
-X sensitivity and selectivity of different search
-X strategies (see W. R. Pearson (1991) Genomics 11:635-
-X 650.)
-X
--s file SMATRIX is read from file. Several SMATRIX files are
-X provided with the standard distribution. For protein
-X sequences: codaa.mat - based on minimum mutation
-X matrix; idnaa.mat - identity matrix; pam250.mat - the
-X PAM250 matrix developed by Dayhoff et al (Atlas of
-X Protein Sequence and Structure, vol. 5, suppl. 3,
-X 1978); pam120.mat - a PAM120 matrix. The default
-X scoring matrix is BLOSUM50, PAM250 is available with
-X "-s 250", BLOSUM62 ("-s BL62") is also available.
-X
--v (LINEVAL) values used for line styles in plfasta
-X
--w # Line length (width) = number (<200)
-X
--x Specifies offsets for the beginning of the query and
-X library sequence. For example, if you are comparing
-X upstream regions for two genes, and the first sequence
-X contains 500 nt of upstream sequence while the second
-X contains 300 nt of upstream sequence, you might try:
-X
-X fasta -x "-500 -300" seq1.nt seq2.nt
-X
-X If the -x option is not used, FASTA assumes numbering
-X starts with 1. This option will not work properly with
-X the translated library sequence with tfasta. (You
-X should double check to be certain the negative
-X numbering works properly.)
-X
--y Set the width of the band used for calculating
-X "optimized" scores. For proteins and ktup=2, the width
-X is 16. For proteins with ktup=1, the width is 32 by
-X default. For DNA the width is 16.
-X
--z Turn off statistical calculations.
-X
--1 sort output by init1 score (as FASTP used to do).
-X
--3 (TFASTA, TFASTX only) translate only three forward
-X frames
-X
-X
-For example:
-X
-X fasta -w 80 -a seq1.aa seq.aa
-X
-would compare the sequence in seq1.aa to that in seq2.aa and
-display the results with 80 residues on an output line, showing
-all of the residues in both sequences. Be sure to enter the
-options before entering the file names, or just enter the options
-on the command line, and the program will prompt for the file
-names.
-X
-X Not all of these options are appropriate for all of the
-programs. The options above are used by FASTA and TFASTA. RELATE
-uses the -s option, ALIGN uses the -w, -m, and -s options, and
-the PRDF program uses -c, -f, -k, and -s.
-X
-4. Environment variable summary
-X
-X Environment variables allow you to set search parameters
-that will be used frequently when you run a program; for example,
-if you prefer to use the PAM250 scoring matrix, you might "set
-SMATRIX=250." Command line parameters, if used, always override
-environment variable settings. The following environment
-variables are used by this program:
-X
-AABANK the file name of the default sequence library.
-X
-FASTLIBS the location of the file which contains the list of
-X library files to be searched.
-X
-GAPCUT threshold used for joining init1 regions in the second
-X step of FASTA. Normally set based on sequence length
-X and ktup.
-X
-LIBTYPE used to specify the format of the library sequence for
-X FASTA and TFASTA.
-X
-LINLEN output line length - can go up to 200
-X
-LINEVAL used by plfasta to determine the relationship between
-X line style and similarity score (-v). This should be a
-X string of three numbers, e.g. "200 100 50"
-X
-MARKX symbol for denoting matches, mismatches. Note that this
-X symbol is only used across the optimized local region;
-X sequences that are outside this region are not marked.
-X
-OPTCUT Set the threshold to be used for optimization in a band
-X around the best initial region. Normally the OPTCUT
-X value is calculated from the length of the sequence and
-X the ktup value (for a 200 residue sequence, it is about
-X 28). If OPTCUT=1, every sequence in the database will
-X be optimized. This is the most sensitive option.
-X
-PAMFACT This version of fasta uses a more sensitive method for
-X identifying initial regions. Instead of using a
-X constant factor (fact) for each match in a ktup, it
-X uses the scoring matrix (PAM) scores. While this works
-X well for protein sequences, it has not been as
-X carefully tested for DNA sequences, so by default, this
-X modification is used for proteins but not for DNA.
-X Setting the PAMFACT environment variable to 1 forces
-X the option on; PAMFACT=0 turns it off.
-X
-SHOWALL on output, show the complete sequence instead of just
-X the overlap of the two aligned sequences.
-X
-SMATRIX alternative scoring matrix file.
-X
-TEKPLOT (IBM-PC only, Unix and VMS versions generate Tektronix
-X graphics by default) Generate Tektronix output.
-X Normally, PLFASTA and TGREASE plot graphs using the
-X Turbo C graphics library. Unfortunately, often these
-X plots cannot be printed out without special programs.
-X However, if you set TEKPLOT=1, tektronix graphics
-X commands will be used. Tektronix commands can be used
-X together with the PLOTDEV program, available from
-X Microplot Systems. They no lonter sell this program,
-X but it can be downloaded from
-X http://iquest.com/~microplt/index1.html. PLOTDEV also
-X allows you to print out graphics on the screen.
-X
-As always, please inform me of bugs as soon as possible.
-X
-William R. Pearson
-Department of Biochemistry
-Box 440, Jordan Hall
-U. of Virginia
-Charlottesville, VA
-X
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 fasta20.doc ||
-echo 'restore of fasta20.doc failed'
-Wc_c="`wc -c < 'fasta20.doc'`"
-test 49762 -eq "$Wc_c" ||
- echo 'fasta20.doc: original size 49762, current size' "$Wc_c"
-fi
-# ============= fasta3.1 ==============
-if test -f 'fasta3.1' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta3.1 (File already exists)'
-else
-echo 'x - extracting fasta3.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta3.1' &&
-.TH FASTA/TFASTA/FASTX/TFASTXv3 1 local
-.SH NAME
-fasta3, fasta3_t \- scan a protein or DNA sequence library for similar
-sequences
-X
-tfasta3, tfasta3_t \- compare a protein sequence to a DNA sequence
-library, translating the DNA sequence library `on-the-fly'.
-X
-fastx3, fastx3_t \ - compare a DNA sequence to a protein sequence
-database, comparing the translated DNA sequence in forward and
-reverse frames.
-X
-tfastx3, tfastx3_t \ - compare a protein sequence to a DNA sequence
-database, calculating similarities with frameshifts to the forward and
-reverse orientations.
-X
-fasty3, fasty3_t \ - compare a DNA sequence to a protein sequence
-database, comparing the translated DNA sequence in forward and reverse
-frames.
-X
-tfasty3, tfasty3_t \ - compare a protein sequence to a DNA sequence
-database, calculating similarities with frameshifts to the forward and
-reverse orientations.
-X
-fasts3, fasts3_t \- compare unordered peptides to a protein sequence database
-X
-tfasts3, tfasts3_t \- compare unordered peptides to a translated DNA
-sequence database
-X
-fastf3, fastf3_t \- compare mixed peptides to a protein sequence database
-X
-tfastf3, tfastf3_t \- compare mixed peptides to a translated DNA
-sequence database
-X
-ssearch3, ssearch3_t \- compare a protein or DNA sequence to a
-sequence database using the Smith-Waterman algorithm.
-X
-prss3, prfx3 \- estimate statistical significance of an alignment by
-comparing the score to the distribution of similarity scores generated
-by shuffling the second sequence. prss3 uses Smith-Waterman. prfx3
-uses the fastx algorithm.
-X
-.SH DESCRIPTION
-X
-Release 3.x of the FASTA package provides a modular set of sequence
-comparison programs that can run on conventional single processor
-computers or in parallel on multiprocessor computers. Seven different
-programs \- fasta3, fastx3, fasty3, tfastx3, tfasty3, tfasta3, and
-ssearch3 \- are currently available.
-X
-All of the comparison programs share a set of basic command line
-options; additional options are available for individual comparison
-functions.
-X
-The fasta3_t, fastx3_t, fasty3_t, tfasta3_t, tfastx3_t, tfasty3_t and
-ssearch3_t programs are threaded versions that will run in parallel on
-Digital Equipment, Sun, and SGI multiprocessor computers.
-X
-.SH Options for comparison functions
-.LP
-These versions of the fasta programs have been modified to accept a
-query sequence from the unix "stdin" data stream. This makes it much
-easier to use fasta3 and its relatives as part of a WWW page. To
-indicate that stdin is to be used, use "@" as the query
-sequence file name. "@" can also be used to specify a
-subset of the query sequence to be used, e.g:
-.sp
-.ti 0.5i
-cat query.aa | fasta3 -q @:50-150 s
-.sp
-would search the 's' database with residues 50-150 of query.aa. FASTA
-cannot automatically detect the sequence type (protein vs DNA) when
-"stdin" is used, so the '-n' option is required for DNA.
-.TP
-\-1
-Sort by "init1" score.
-.TP
-\-3
-(TFASTA3, TFASTX/Y3 only) use only forward frame translations
-.TP
-\-a #
-"SHOWALL" option attempts to align all of both sequences in FASTA and SSEARCH.
-.TP
-\-A
-force Smith-Waterman alignment for output. Smith-Waterman is the
-default for protein sequences and FASTX3, but not for TFASTA3 or DNA
-comparisons with FASTA3.
-.TP
-\-b #
-number of best scores to show (must be < -E cutoff if -E is given)
-.TP
-\-B
-show z-scores rather than bit scores
-.TP
-\-c #
-threshold for band optimization (FASTA, FASTX)
-.TP
-\-C #
-(fasta34t11d4) length of name abbreviation in alignments, default = 6.
-.TP
-\-d #
-number of best alignments to show ( must be < -e cutoff)
-.TP
-\-D
-turn on debugging mode. Enables checks on sequence alphabet that
-cause problems with tfastx3, tfasty3, tfasta3.
-.TP
-\-E #
-expectation value upper limit for score and alignment display.
-Defaults are 10.0 for FASTA3 and SSEARCH3 protein searches, 5.0 for
-translated DNA/protein comparisons, and 2.0 for DNA/DNA searches.
-.TP
-\-f #
-penalty for opening a gap (or first residue for older versions)
-.TP
-\-F #
-expectation value lower limit for score and alignment display.
--F 1e-6 prevents library sequences with E()-values lower than 1e-6
-from being displayed. This allows the use to focus on more distant
-relationships.
-.TP
-\-g #
-penalty for additional residues in a gap
-.TP
-\-h #
-(FASTX3, TFASTX3, FASTY3, TFASTY3 only) penalty for a frameshift between
-two codons.
-.TP
-\-j #
-(FASTY3, TFASTY3 only) penalty for a frameshift within a codon.
-.TP
-\-H
-turn off histogram display
-.TP
-\-i
-(DNA only) reverse complement the query sequence. (TFASTX) compare against
-only the reverse complement of the library sequence.
-.TP
-\-l str
-specify FASTLIBS file
-.TP
-\-L
-report long sequence description in alignments
-.TP
-\-m 0,1,2,3,4,5,6,9,10 alignment display options. \fC-m 0, 1, 2, 3\fP
-display different types of alignments. \fC-m 4\fP provides an
-alignment "map" on the query. \fC-m 5\fP combines the alignment map
-and a \fC-m 0\fP alignment. \fC-m 6\fP provides an HTML output.
-\fC-m 9\fP does not change the alignment output, but provides
-alignment coordinate and percent identity information with the best
-scores report. \fC-m 9c\fP adds encoded alignment information to the
-\fC-m 9\fP; \fC-m 9i\fP provides only percent identity and alignment
-length information with the best scores. With current versions of the
-FASTA programs, independent \fC-m\fP options can be combined;
-e.g. \fC-m 1 -m 9c -m 6\fP.
-.TP
-\-M #-#
-molecular weight (residue) cutoffs. -M "101-200" examines only sequences that are 101-200 residues long.
-.TP
-\-n
-force query to nucleotide sequence
-.TP
-\-N #
-break long library sequences into blocks of # residues. Useful for
-bacterial genomes, which have only one sequence entry. -N 2000 works
-well for well for bacterial genomes.
-.TP
-\-o
-(FASTA) turn fasta band optimization off during initial phase. This was
-the behavior of fasta1.x versions.
-.TP
-\-O file
-send output to file
-.TP
-\-q/-Q
-quiet option; do not prompt for input
-.TP
-\-r "+n/-m"
-values for match/mismatch for DNA comparisons. \fC+n\fP is
-used for the maximum positive value and \fC-m\fP is used for the
-maximum negative value. Values between max and min, are rescaled, but
-residue pairs having the value -1 continue to be -1.
-.TP
-\-R file
-save all scores to statistics file (previously -r file)
-.TP
-\-s name
-specify substitution matrix. BLOSUM50 is used by default;
-PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
-P250, or BL62. With this version, many more scoring matrices are
-available, including BLOSUM80 (BL80), and MDM10, MDM20, MDM40 (Jones,
-Taylor, and Thornton, 1992 CABIOS 8:275-282; specified as -s M10, -s
-M20, -s M40). Alternatively, BLASTP1.4 format scoring matrix files can
-be specified. BL80, BL62, and P120 are scaled in 1/2 bit units; all
-the other matrices use 1/3 bit units. DNA scoring matrices can also
-be specified with the "-r" option.
-.TP
-\-S
-treat lower case letters in the query or database as low complexity
-regions that are equivalent to 'X' during the initial database scan,
-but are treated as normal residues for the final alignment display.
-Statistical estimates are based on the 'X'ed out sequence used during
-the initial search. Protein databases (and query sequences) can be
-generated in the appropriate format using John Wooton's "pseg"
-program, available from ftp://ncbi.nlm.nih.gov/pub/seg/pseg. Once you
-have compiled the "pseg" program, use the command:
-.IP
-\fCpseg database.fasta -z 1 -q > database.lc_seg\fP
-.TP
-\-t #
-Translation table - tfasta3, fastx3, tfastx3, fasty3, and
-tfasty3 now support the BLAST tranlation tables. See
-\fChttp://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi\fP.
-.IP
-In addition, "\-t t" or "\-t t#" turns on the addition of an implicit termination
-codon to a protein:translated DNA match. That is, each protein
-sequence implicitly ends with "*", which matches the termination codes
-for the appropriate genetic code. "\-t t#" sets implicit termination
-and a different genetic code.
-.TP
-\-T #
-(threaded, parallel only) number of threads or workers to use (set by
-default to 4 at compile time).
-.TP
-\-U
-Do RNA sequence comparisons: treat 'T' as 'U', allow G:U base pairs (by
-scoring "G-A" and "T-C" as "G-G" -1). Search only one strand.
-.TP
-\-V "?$%*"
-Allow special annotation characters in query sequence. These characters
-will be displayed in the alignments on the coordinate number line.
-.TP
-\-w # line width for similarity score, sequence alignment, output.
-.TP
-\-W # context length (default is 1/2 of line width -w) for alignment,
-like fasta and ssearch, that provide additional sequence context.
-.TP
-\-x #match,#mismatch
-scores used for matches to 'X:X','N:N', '*:*' matches, and the corresponding
-'X:not-X', etc, mismatches, overriding the values
-specified in the scoring matrix. If only one value is given, it is
-used for both values.
-.TP
-\-X "#,#"
-offsets query, library sequence for numbering alignments
-.TP
-\-y #
-Width for band optimization; by default 16 for DNA and protein ktup=2;
-32 for protein ktup=1;
-.TP
-\-z #
-Specify statistical calculation. Default is -z 1, which uses
-regression against the length of the library sequence. -z 0 disables
-statistics. -z 2 provides maximum likelihood estimates for lambda and K,
-censoring the 250 lowest and 250 highest scores. -z 3 uses Altschul
-and Gish's statistical estimates for specific protein BLOSUM scoring
-matrices and gap penalties. -z 4,5: an alternate regression method.
-\-z 6 uses a composition based maximum likelihood estimate based
-on the method of Mott (1992) Bull. Math. Biol. 54:59-75.
--z 11,12,14,15,16: compute the regression against scores of randomly
-shuffled copies of the library sequences. Twice as many comparisons
-are performed, but accurate estimates can be generated from databases
-of related sequences. -z 11 uses the -z 1 regression strategy, etc.
-.TP
-\-Z db_size
-Set the apparent database size used for expectation value calculations
-(used for protein/protein FASTA and SSEARCH, and for FASTX, FASTY, TFASTX,
-and TFASTY).
-.SH Environment variables:
-.TP
-FASTLIBS
-location of library choice file (-l FASTLIBS)
-.TP
-SMATRIX
-default scoring matrix (-s SMATRIX)
-.TP
-SRCH_URL
-the format string used to define the option to re-search the
-database.
-.TP
-REF_URL
-the format string used to define the option to lookup the library
-sequence in entrez, or some other database.
-X
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 fasta3.1 ||
-echo 'restore of fasta3.1 failed'
-Wc_c="`wc -c < 'fasta3.1'`"
-test 10345 -eq "$Wc_c" ||
- echo 'fasta3.1: original size 10345, current size' "$Wc_c"
-fi
-# ============= fasta3.rsp ==============
-if test -f 'fasta3.rsp' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta3.rsp (File already exists)'
-else
-echo 'x - extracting fasta3.rsp (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta3.rsp' &&
-compacc.obj doinit.obj showbest.obj htime.obj apam.obj karlin.obj scaleswn.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
-SHAR_EOF
-chmod 0644 fasta3.rsp ||
-echo 'restore of fasta3.rsp failed'
-Wc_c="`wc -c < 'fasta3.rsp'`"
-test 177 -eq "$Wc_c" ||
- echo 'fasta3.rsp: original size 177, current size' "$Wc_c"
-fi
-# ============= fasta3x.doc ==============
-if test -f 'fasta3x.doc' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta3x.doc (File already exists)'
-else
-echo 'x - extracting fasta3x.doc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta3x.doc' &&
-(Updated December, 2003)
-X
-X
-X COPYRIGHT NOTICE
-X
-Copyright 1988, 1991, 1992, 1994, 1995, 1996, 1999 by William R.
-Pearson and the University of Virginia. All rights reserved. The
-FASTA program and documentation may not be sold or incorporated
-into a commercial product, in whole or in part, without written
-consent of William R. Pearson and the University of Virginia.
-For further information regarding permission for use or
-reproduction, please contact: David Hudson, Assistant Provost for
-Research, University of Virginia, P.O. Box 9025, Charlottesville,
-VA 22906-9025, (434) 924-6853
-X
-The FASTA program package
-X
-Introduction
-X
-X This documentation describes the version 3 of the FASTA
-program package (see W. R. Pearson and D. J. Lipman (1988),
-"Improved Tools for Biological Sequence Analysis", PNAS
-85:2444-2448 (Pearson and Lipman, 1988); W. R. Pearson (1996)
-"Effective protein sequence comparison" Meth. Enzymol.
-266:227-258 (Pearson, 1996); Pearson et. al. (1997) Genomics
-46:24-36 (Zhang et al., 1997); Pearson, (1999) Meth. in
-Molecular Biology 132:185-219 (Pearson, 2000). Version 3 of the
-FASTA packages contains many programs for searching DNA and
-protein databases and one program (prss3) for evaluating
-statistical significance from randomly shuffled sequences.
-Several additional analysis programs, including programs that
-produce local alignments, are available as part of version 2 of
-the FASTA package, which is still available.
-X
-X This document is divided into three sections: (1) A summary
-overview of the programs in the FASTA3 package; (2) A guide to
-installing the programs and databases; (3) A guide to using the
-FASTA programs. The revision history of the programs can be found
-in the readme.v30..v34, files. The programs are easy to use, so
-if you are using them on a machine that is administered by
-someone else, you can skip section (2) and focus on (1) and (3)
-to learn how to use the programsIf you are installing the
-programs on your own machine, you will need to read section (2)
-carefully.
-X
-1. An overview of the FASTA programs
-X
-X Although there are a large number of programs in this
-package, they belong to three groups: (1) "Conventional" Library
-search programs: FASTA3, FASTX3, FASTY3, TFASTA3, TFASTX3,
-TFASTY3, SSEARCH3; (2) Programs for searching with short
-fragments: FASTS3, FASTF3, TFASTS3, TFASTF3; (3) Statistical
-significance: PRSS3. Programs that start with fast search
-protein databases, while tfast programs search translated DNA
-databases. Table I gives a brief description of the programs.
-X
-X
-X Table I. Comparison programs in the FASTA3 package
-X
----------------------------------------------------------------------------
-fasta3 Compare a protein sequence to a protein sequence
-X database or a DNA sequence to a DNA sequence database
-X using the FASTA algorithm (Pearson and Lipman, 1988,
-X Pearson, 1996). Search speed and selectivity are con-
-X trolled with the ktup(wordsize) parameter. For protein
-X comparisons, ktup = 2 by default; ktup =1 is more sen-
-X sitive but slower. For DNA comparisons, ktup=6 by de-
-X fault; ktup=3 or ktup=4 provides higher sensitivity;
-X ktup=1 should be used for oligonucleotides (DNA query
-X lengths < 20).
-X
-ssearch3 Compare a protein sequence to a protein sequence
-X database or a DNA sequence to a DNA sequence database
-X using the Smith-Waterman algorithm (Smith and Water-
-X man, 1981). ssearch3 is about 10-times slower than
-X FASTA3, but is more sensitive for full-length protein
-X sequence comparison.
-X
-fastx3/ fasty3 Compare a DNA sequence to a protein sequence database,
-X by comparing the translated DNA sequence in three
-X frames and allowing gaps and frameshifts. fastx3 uses
-X a simpler, faster algorithm for alignments that allows
-X frameshifts only between codons; fasty3 is slower but
-X produces better alignments with poor quality sequences
-X because frameshifts are allowed within codons.
-X
-tfastx3/ tfasty3 Compare a protein sequence to a DNA sequence database,
-X calculating similarities with frameshifts to the for-
-X ward and reverse orientations.
-X
-tfasta3 Compare a protein sequence to a DNA sequence database,
-X calculating similarities (without frameshifts) to the 3
-X forward and three reverse reading frames. tfastx3 and
-X tfasty3 are preferred because they calculate similarity
-X over frameshifts.
-X
-fastf3/tfastf3 Compares an ordered peptide mixture, as would be ob-
-X tained by Edman degredation of a CNBr cleavage of a
-X protein, against a protein (fastf) or DNA (tfastf)
-X database.
-X
-fasts3/tfasts3 Compares set of short peptide fragments, as would be
-X obtained from mass-spec. analysis of a protein, against
-X a protein (fasts) or DNA (tfasts) database.
----------------------------------------------------------------------------
-X
-2. Installing FASTA and the sequence databases
-X
-2.1. Obtaining the libraries
-X
-X The FASTA program package does not include any protein or
-DNA sequence libraries. Protein databases are available on CD-
-ROM from the PIR and EMBL (see below), or via anonymouse FTP from
-many different sources. As this document is updated in the fall
-of 1999, no DNA databases are available on CD-ROM from the major
-sequence databases: Genbank at the National for Biotechnology
-Information (www.ncbi.nlm.nih.gov and ftp://ncbi.nlm.nih.gov) and
-EMBL at the European Bioinformatics Institute (www.ebi.ac.uk).
-However, the databases are available via anonymous FTP from both
-sites.
-X
-2.1.1. The GENBANK DNA sequence library
-X
-X Because of the large size of DNA databases, you will
-probably want to keep DNA databases in only one, or possibly two,
-formats. The FASTA3 programs that search DNA databases - fasta3,
-tfastx/y3, and tfasta3 can read DNA databases in Genbank flatfile
-(not ASN.1), FASTA, GCG/compressed-binary, BLAST1.4 (pressdb),
-and BLAST2.0 (formatdb) formats, as well as EMBL format. If you
-are also running the GCG suite of sequence analysis programs, you
-should use GCG/compressed-binary format or BLAST2.0 format for
-your fasta3 searches. If not, BLAST2.0 is a good choice. These
-files are considerably more compact than Genbank flat files, and
-are preferred. The NCBI does not provide software for converting
-from Genbank flat files to Blast2.0 DNA databases, but you can
-use the Blast formatdb program to convert ASN.1 formated Genbank
-files, which are available from the NCBI ftp site.
-X
-X The NCBI also provides the nr, swissprot, and several EST
-databases that are used by BLAST in FASTA format from:
-ftp://ncbi.nlm.nih.gov/blast/db. These databases are updated
-nightly.
-X
-2.1.2. The NBRF protein sequence library
-X
-X You can obtain the PIR protein sequence database (Barker et
-al., 1998) from:
-X
-X National Biomedical Research Foundation
-X Georgetown University Medical Center
-X 3900 Reservoir Rd, N.W.
-X Washington, D.C. 20007
-X
-or via ftp from nbrf.georgetown.edu or from the NCBI
-(ncbi.nlm.nih.gov/repository/PIR). The data in the ascii
-directory is in PIR Codata format, which is not widely used. I
-recommend the PIR/VMS format data (libtype=5) in the vms
-directory.
-X
-2.1.3. The EBI/EMBL CD-ROM libraries
-X
-X The European Bioinformatics Institute (EBI) distributes both
-the EMBL DNA database and the SwissProt database on CD-ROM
-(Bairoch and Apweiler, 1996), and they are available from:
-X
-X EMBL-Outstation European Bioinformatics Institute
-X Wellcome Trust Genome Campus,
-X Hinxton Hall
-X Hinxton,
-X Cambridge CB10 1SD
-X United Kingdom
-X Tel: +44 (0)1223 494444
-X Fax: +44 (0)1223 494468
-X Email: DATALIB@ebi.ac.uk
-X
-In addition, the SWISS-PROT protein sequence database is
-available via anonymous FTP from
-ftp://ftp.expasy.ch/databases/swiss-prot/ (also see
-www.expasy.ch).
-X
-2.2. Finding the libraries: FASTLIBS
-X
-X The major problem that most new users of the FASTA package
-have is in setting up the program to find the databases and their
-library type. In general, if you cannot get fasta3 to read a
-sequence database, it is likely that something is wrong with the
-FASTLIBS file. A common problem is that the database file is
-found, but either no sequences are read, or an incorrect number
-of entries is read. This is almost always because the library
-format (libtype) is incorrect. Note that a type 5 file (PIR/VMS
-format) can be read as a type 0 (default FASTA) format file, and
-the number of entries will be correct, but the sequence lengths
-will not.
-X
-X All the search programs in the FASTA3 package use the
-environment variable FASTLIBS to find the protein and DNA
-sequence libraries. The FASTLIBS variable contains the name of a
-file that has the actual filenames of the libraries. The
-fastlibs file included with the distribution on is an example of
-a file that can be referred to by FASTLIBS. To use the fastlibs
-file, type:
-X
-X setenv FASTLIBS /usr/lib/fasta/fastgbs (BSD UNIX/csh)
-X or
-X export FASTLIBS=/usr/lib/fasta/fastgbs (SysV UNIX/ksh)
-X
-Then edit the fastlibs file to indicate where the protein and DNA
-sequence libraries can be found. If you have a hard disk and
-your protein sequence library is kept in the file
-/usr/lib/aabank.lib and your Genbank DNA sequence library is kept
-in the directory: /usr/lib/genbank, then fastgbs might contain:
-X
-X NBRF Protein$0P/usr/lib/seq/aabank.lib 0
-X SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
-X GB Primate$1P@/usr/lib/genbank/gpri.nam
-X GB Rodent$1R@/usr/lib/genbank/grod.nam
-X GB Mammal$1M@/usr/lib/genbank/gmammal.nam
-X ^ 1 ^^^^ 4 ^ ^
-X 23 (5)
-X
-The first line of this file says that there is a copy of the NBRF
-protein sequence database (which is a protein database) that can
-be selected by typing "P" on the command line or when the
-database menu is presented in the file /usr/lib/seq/aabank.lib.
-X
-X Note that there are 4 or 5 fields in the lines in fastgbs.
-The first field is the description of the library which will be
-displayed by FASTA; it ends with a '$'. The second field (1
-character), is a 0 if the library is a protein library and 1 if
-it is a DNA library. The third field (1 character) is the
-character to be typed to select the library.
-X
-X The fourth field is the name of the library file. In the
-example above, the /usr/lib/seq/aabank.lib file contains the
-entire protein sequence library. However the DNA library file
-names are preceded by a '@', because these files (gpri.nam,
-grod.nam, gmammal.nam) do not contain the sequences; instead they
-contain the names of the files which contain the sequences. This
-is done because the GENBANK DNA database is broken down in to a
-large number of smaller files. In order to search the entire
-primate database, you must search more than a dozen files.
-X
-X In addition, an optional fifth field can be used to specify
-the format of the library file. Alternatively, you can specify
-the library format in a file of file names (a file preceded by an
-'@'). This field must be separated from the file name by a space
-character (' ') from the filename. In the example above, the
-aabank.lib file is in Pearson/FASTA format, while the swiss.seq
-file is in PIR/VMS format (from the EMBL CD-ROM). Currently,
-FASTA can read the following formats:
-X
-X 0 Pearson/FASTA (>SEQID - comment/sequence)
-X 1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
-X 2 NBRF CODATA (ENTRY/SEQUENCE)
-X 3 EMBL/SWISS-PROT (ID/DE/SQ)
-X 4 Intelligenetics (;comment/SEQID/sequence)
-X 5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
-X 6 GCG (version 8.0) Unix Protein and DNA (compressed)
-X 11 NCBI Blast1.3.2 format (unix only)
-X 12 NCBI Blast2.0 format (unix only, fasta32t08 or later)
-X
-In particular, this version will work with the EMBL and PIR VMS
-formats that are distributed on the EMBL CD-ROM. The latter
-format (PIR VMS) is much faster to search than EMBL format. This
-release also works with the protein and DNA database formats
-created for the BLASTP and BLASTN programs by SETDB and PRESSDB
-and with the new NCBI search format. If a library format is not
-specified, for example, because you are just comparing two
-sequences, Pearson/FASTA (format 0) is used by default. To
-specify a library type on the command line, add it to the library
-filename and surround the filename and library type in quotes:
-X
-X fasta3 query.file "/seqdb/genbank/gbpri1.seq 1"
-X
-X You can specify a group of library files by putting a '@'
-symbol before a file that contains a list of file names to be
-searched. For example, if @gmam.nam is in the fastgbs file, the
-file "gmam.nam" might contain the lines:
-X
-X </seqdb/genbank
-X gbpri1.seq 1
-X gbpri2.seq 1
-X gbpri3.seq 1
-X gbpri4.seq 1
-X gbrod.seq 1
-X gbmam.seq 1
-X
-In this case, the line beginning with a '<' indicates the
-directory the files will be found in. The remaining lines name
-the actual sequence files. So the first sequence file to be
-searched would be:
-X
-X /usr/lib/genbank/gbpri.seq
-X
-The notation "<PIRNAQ:" might be used under the VAX/VMS operating
-system. Under UNIX, the trailing '/' is left off, so the library
-directory might be written as "</usr/seqlib".
-X
-X The FASTA programs can search a database composed of
-different files in different sequence formats. For example, you
-may wish to search the Genbank files (in GenBank flat file
-format) and the EMBL DNA sequence database on CD-ROM. To do
-this, you simply list the names and filetypes of the files to be
-searched in a file of filenames. For example, to search the
-mammalian portion of Genbank, the unannotated portion of Genbank,
-and the unannotated portion of the EMBL library, you could use
-the file:
-X
-X </usr/lib/DNA
-X gbpri.seq 1
-X # (this '#' causes the program to display the size of the library)
-X gbrod.seq 1
-X ...
-X gbmam.seq 1
-X ...
-X gbuna.seq 1
-X ...
-X unanno.seq 5
-X #
-X
-X You do not need to include library format numbers if you
-X only use the Pearson/FASTA version of the PIR protein se-
-X quence library. If no library type is specified, the
-X program assumes that type 0 is being used.
-X
-X Test the setup by running FASTA. Enter the sequence file
-'mgstm1.aa' when the program requests it (this file is included
-with the programs). The program should then ask you to select a
-protein sequence library. Alternatively, if you run the TFASTA
-program and use the mgstm1.aa query sequence, the program should
-show you a selection of DNA sequence libraries. Once the fastgbs
-file has been set up correctly, you can set FASTLIBS=fastgbs in
-your AUTOEXEC.BAT file, and you will not need to remember where
-the libraries are kept or how they are named.
-X
-3. Using the FASTA Package
-X
-3.1. Overview
-X
-X The FASTA sequence comparison programs all require similar
-information, the name of a query sequence file, a library file,
-and the ktup parameter. All of the programs can accept arguments
-on the command line, or they will prompt for the file names and
-ktup value.
-X
-To use FASTA, simply type:
-X
-X FASTA
-X and you will be prompted for :
-X the name of the test sequence file
-X the name of the library file
-X and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
-X (ktup of 2 is about 5 times faster than ktup = 1)
-X
-The program can also be run by typing
-X
-X FASTA test.aa /lib/bigfile.lib ktup (1 or 2)
-X
-Included with the package are several test files. To check to
-make certain that everything is working, you can try:
-X
-X fasta musplfm.aa prot_test.lib
-X and
-X tfastx mgstm1.aa gst.nlib
-X
-3.2. Sequence files
-X
-X The fasta3 programs know about three kinds of sequence
-files: (1) plain sequence files - files that contain nothing but
-sequence residues - can only be used as query sequences. (2)
-FASTA format files. These are the same as plain sequence files,
-each sequence is preceded by a comment line with a '>' in the
-first column. (3) distributed sequence libraries (this is a broad
-class that includes the NBRF/PIR VMS and blocked ascii formats,
-Genbank flat-file format, EMBL flat-file format, and
-Intelligenetics format. All of the files that you create should
-be of type (1) or (2). FASTA format files (ones with a '>' and
-comment before the sequence) are preferred, because they can be
-used as query or library sequence files by all of the programs.
-X
-X I have included several sample test files, *.aa and *.seq as
-well as two small sequence libraries, prot_test.lib and gst.nlib.
-The first line may begin with a '>' by a comment. Spaces and
-tabs (and anything else that is not an amino-acid code) are
-ignored.
-X
-X Library files should have the form:
-X
-X >Sequence name and identifier
-X A F A S Y T .... actual sequence.
-X F S S .... second line of sequence.
-X >Next sequence name and identifier
-X
-This is often referred to as "FASTA" or format. You can build
-your own library by concatenating several sequence files. Just
-be sure that each sequence is preceded by a line beginning with a
-'>' with a sequence name.
-X
-X The test file should not have lines longer than 120
-characters, and sequences entered with word processors should use
-a document mode, with normal carriage returns at the end of
-lines.
-X
-X A different format is required to specify the ordered
-peptide mixture for fastf3/tfastf3. For example:
-X
-X >mgstm1
-X MGCEN,
-X MIDYP,
-X MLLAY,
-X MLLGY
-X
-indicates m in the first position of all three peptides (as from
-CNBr), G, I, L (twice) in the second position (first cycle),
-C,D,L (twice) in the third position, etc. The commas (,) are
-required to indicate the number of fragments in the mixture, but
-there should be no comma after the last residue.
-X
-X For the fasts3/tfasts3 program, the format is the same,
-except that there is no requirement for the peptides to be the
-same length.
-X
-4. Statistical Significance
-X
-X All the programs in the FASTA3 package attempt to calculate
-accurate estimates of the statistical significance of a match.
-For fasta3, ssearch3, and fastx3/y3, these estimates are very
-accurate (Pearson, 1998, Zhang et al., 1997).. Altschul et al.
-(Altschul et al., 1994) provides an excellent review of the
-statistics of local similarity scores. Local sequence similarity
-scores follow the extreme value distribution, so that P(s > x) =
-1 - exp(-exp(-lambda(x-u)) where u = ln(Kmn)/lambda and m,m are
-the lengths of the query and library sequence. This formula can
-be rewritten as: 1 - exp(-Kmn exp(-lambda x), which shows that
-the average score for an unrelated library sequence increases
-with the logarithm of the length of the library sequence. The
-fasta3 programs use simple linear regression against the the log
-of the library sequence length to calculate a normalized "z-
-score" with mean 50, regardless of library sequence length, and
-variance 10. (Several other estimation methods are available with
-the -z option.) These z-scores can then be used with the extreme
-value distribution and the poisson distribution (to account for
-the fact that each library sequence comparison is an independent
-test) to calculate the number of library sequences to obtain a
-score greater than or equal to the score obtained in the search.
-The original idea and routines to do the linear regression on
-library sequence length were provided Phil Green, U. Washington.
-This version uses a slightly different strategy for fitting the
-data than those originally provided by Dr. Green.
-X
-X The expected number of sequences is plotted in the histogram
-using an "*". Since the parameters for the extreme value
-distribution are not calculated directly from the distribution of
-similarity scores, the pattern of "*'s" in the histogram gives a
-qualitative view of how well the statistical theory fits the
-similarity scores calculated by the programs. For fasta3, if
-optimized scores are calculated for each sequence in the database
-(the default), the agreement between the actual distribution of
-"z-scores" and the expected distribution based on the length
-dependence of the score and the extreme value distribution is
-usually very good. Likewise, the distribution of ssearch3 Smith-
-Waterman scores typically agrees closely with the <actual
-distribution of "z-scores." The agreement with unoptimized
-scores, ktup=2, is often not very good, with too many high
-scoring sequences and too few low scoring sequences compared with
-the predicted relationship between sequence length and similarity
-score. In those cases, the expectation values may be
-overestimates.
-X
-X With version 33t01, all the FASTA programs also report a
-"bit" score, which is equivalent to the bit score reported by
-BLAST2. The FASTA33/BLAST2 bit score is calculated as: (lambda*S
-- ln K)/ln 2, where S is the raw similarity score, lambda and K
-are statistical parameters estimated from the distribution of
-unrelated sequence similarity scores. The statistical
-signficance of a given bit score depends on the lengths of the
-query and library sequences and the size of the library, but a 1
-bit increase in score corresponds to a 2-fold reduction in
-expectation; a 10-bit increase implies 1000-fold lower
-expectation, etc.
-X
-X The statistical routines assume that the library contains a
-large sample of unrelated sequences. If this is not true, then
-statistical parameters can be estimated by using the -z 11-15,
-options. -z options greater than 10 calculate a shuffled
-similarity score for each library sequence, in addition to the
-unshuffled score, and estimate the statistical parameters from
-the scores of the shuffled sequences. If there are fewer than 20
-sequences in the library, the statistical calculations are not
-done.
-X
-X For protein searches, library sequences with E() values <
-0.01 for searches of a 10,000 entry protein database are almost
-always homologous. Frequently sequences with E()-values from 1 -
-10 are related as well, but unrelated sequences ( 1 - 10 per
-search) will have scores in this renage as well. Remember,
-however, that these E() values also reflect differences between
-the amino acid composition of the query sequence and that of the
-"average" library sequence. Thus, when searches are done with
-query sequences with "biased" amino-acid composition, unrelated
-sequences may have "significant" scores because of sequence bias.
-PRSS3 can address this problem by calculating similarity scores
-for random sequences with the same length and amino acid
-composition.
-X
-5. Options
-X
-X Command line options are available to change the scoring
-parameters and output display. Command line options must preceed
-other program arguments, such as the query and library file
-names.
-X
-5.1. Command line options
-X
--a (fasta3, ssearch3 only) show both sequences in their
-X entirety.
-X
--A force Smith-Waterman alignments for fasta3 DNA sequences.
-X By default, only fasta3 protein sequence comparisons use
-X Smith-Waterman alignments.
-X
--B Show normalized score as a z-score, rather than a bit-score
-X in the list of best scores.
-X
--b # Number of sequence scores to be shown on output. In the
-X absence of this option, fasta (and tfasta and ssearch)
-X display all library sequences obtaining similarity scores
-X with expectations less than 10.0 if optimized score are
-X used, or 2.0 if they are not. The -b option can limit the
-X display further, but it will not cause additional sequences
-X to be displayed.
-X
--c # Threshold score for optimization (OPTCUT). Set "-c 1" to
-X optimize every sequence in a database.
-X
--E # Limit the number of scores and alignments shown based on the
-X expected number of scores. Used to override the expectation
-X value of 10.0 used by default. When used with -Q, -E 2.0
-X will show all library sequences with scores with an
-X expectation value <= 2.0.
-X
--d # Maximum number of alignments to be displayed. Ignored if
-X "-Q" is not used.
-X
--f Penalty for the first residue in a gap (-12 by default for
-X proteins, -16 for DNA, -15 for FAST[XY]/TFAST[XY]).
-X
--F # Limit the number of scores and alignments shown based on the
-X expected number of scores. "-E #" sets the highest E()-value
-X shown; "-F #" sets the lowest E()-value. Thus, "-F 0.0001"
-X will not show any matches or alignments with E() < 0.0001.
-X This allows one to skip over close relationships in searches
-X for more distant relationships.
-X
--g Penalty for additional residues in a gap (-2 by default for
-X proteins, -4 for DNA, -3 for FAST[XY]/TFAST[XY]).
-X
--h Penalty for frameshift (fastx3/y3, tfastx3/y3 only).
-X
--H Omit histogram.
-X
--i Invert (reverse complement) the query sequence if it is DNA.
-X For tfasta3/x3/y3, search the reverse complement of the
-X library sequence only.
-X
--j # Penalty for frameshift within a codon (fasty3/tfasty3 only).
-X
--l file
-X Location of library menu file (FASTLIBS).
-X
--L Display more information about the library sequence in the
-X alignment.
-X
--M low-high
-X Range of amino acid sequence lengths to be included in the
-X search.
-X
--m # Specify alignment type: 0, 1, 2, 3, 4, 5, 6, 9, 10
-X
-X -m 0 -m 1 -m 2 -m 3 -m 4
-X MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
-X ::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
-X MWKSCGYPYT MWKSCGYPYT
-X
-X -m 5 provides a combination of -m 4 and -m 0. -m 6 provides
-X -m 5 plus HTML formatting.
-X
--m 9 provides coordinates and scores with the best score
-X information. A simple " -m 9 extends the normal best score
-X information:
-X
-X The best scores are: opt bits E(14548)
-X XURTG4 glutathione transferase (EC 2.5.1.18) 4 - ( 219) 1248 291.7 1.1e-79
-X
-X to include the additional information (on the same line,
-X separated by a <tab>):
-X
-X %_id %_gid sw alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs
-X 0.771 0.771 1248 218 1 218 1 218 1 218 1 219 0 0 0
-X
-X -m 9c provides additional information: an encoded alignment
-X string. Thus:
-X
-X 10 20 30 40 50 60 70
-X GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
-X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
-X XURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
-X 20 30 40 50 60
-X
-X would be encoded:
-X
-X =23+9=13-2=10-1=3+1=5
-X
-X The alignment encoding is with repect to the alignment, not
-X the sequences. The coordinate of the alignment is given
-X earlier in the " -m 9c" line.
-X
--m 10
-X -m 10 is a new, parseable format for use with other
-X programs. See the file "readme.v20u4" for a more complete
-X description.
-X
-X As of version "fa34t23b2", it has become possible to combine
-X independent "-m" options. Thus, one can use "-m 1 -m 6 -m
-X 9".
-X
--M low-high
-X Include library sequences (proteins only) with lengths
-X between low and high.
-X
--n Force the query sequence to be treated as a DNA sequence.
-X This is particularly useful for query sequences that contain
-X a large number of ambiguous residues, e.g. transcription
-X factor binding sites.
-X
--O Send copy of results to "filename." Helpful for
-X environments without STDOUT (mostly for the Macintosh).
-X
--o Turn off default optimization of all scores greater than
-X OPTCUT. Sort results by "initn" scores (reduces the accuracy
-X of statistical estimates).
-X
--p Force query to be treated as protein sequence.
-X
--Q,-q
-X Quiet - does not prompt for any input. Writes scores and
-X alignments to the terminal or standard output file.
-X
--r Specify match/mismatch scores for DNA comparisons. The
-X default is "+5/-4". "+3/-2" can perform better in some
-X cases.
-X
--R file
-X Save a results summary line for every sequence in the
-X sequence library. The summary line includes the sequence
-X identifier, superfamily number (if available) position in
-X the library, and the similarity scores calculated. This
-X option can be used to evaluate the sensitivity and
-X selectivity of different search strategies (Pearson, 1995,
-X Pearson, 1998).
-X
--s file
-X Specify the scoring matrix file. fasta3 uses the same
-X scoring matrices as Blast1.4/2.0. Several scoring matrix
-X files are included in the standard distribution. For
-X protein sequences: codaa.mat - based on minimum mutation
-X matrix; idnaa.mat - identity matrix; pam250.mat - the PAM250
-X matrix developed by Dayhoff et al. (Dayhoff et al., 1978);
-X pam120.mat - a PAM120 matrix. The default scoring matrix is
-X BLOSUM50 ("-s BL50"). Other matrices available from within
-X the program are: PAM250/"-s P250", PAM120/"-s P120",
-X PAM40/"-s P40", PAM20/"-s P20", MDM10 - MDM40/"-s M10 - M40"
-X (MDM are modern PAM matrices from Jones et al. (Jones et
-X al., 1992),), BLOSUM50, 62, and 80/"-s BL50", "-s BL62", "-s
-X BL80".
-X
--S Treat lower-case characters in the query or library
-X sequences as "low-complexity" ("seg"-ed) residues.
-X Traditionally, the "seg" program (Wootton and
-X Federhen, 1993) is used to remove low complexity regions in
-X DNA sequences by replacing the residues with an "X". When
-X the "-S" option is used, the FASTA33 programs provide a
-X potentially more informative approach. With "-S", lower
-X case characters in the query or database sequences are
-X treated as "X"'s during the initial scan, but are treated as
-X normal residues during the final alignment display. Since
-X statistical significance is calculated from the similarity
-X score calculated during the library search, when the lower
-X case residues are "X"'s, low complexity regions will not
-X produce statistically significant matches. However, if a
-X significant alignment contains low complexity regions, their
-X alignmen is shown. With "-S", lower case characters may be
-X included in the alignment to indicate low complexity
-X regions, and the final alignment score may be higher than
-X the score obtained during the search.
-X
-X The pseg program can be used to produce databases (or query
-X sequences) with lower case residues indicating low
-X complexity regions using the command:
-X
-X pseg database.fasta -z 1 -q > database.lc_seg
-X
-X (seg can also be used with some post processing, see
-X readme.v33tx.)
-X
--U Treat the query sequence an RNA sequence. In addition to
-X selecting a DNA/RNA alphabet, this option causes changes to
-X the scoring matrix so that 'G:A' , 'T:C' or 'U:C' are scored
-X as 'G:G'.
-X
--V str
-X It is now possible to specify some annotation characters
-X that can be included (and will be ignored), in the query
-X sequence file. Thus, One might have a file with:
-X "ACVS*ITRLFT?", where "*" and "?" are used to indicate
-X phosphorylation. By giving the option -V '*?', those
-X characters in the query will be moved to an "annotation
-X string", and alignments that include the annotated residues
-X will be highlighted with the appropriate character above the
-X sequence (on the number line).
-X
--w # Line length (width) = number (<200)
-X
--W # context length (default is 1/2 of line width -w) for
-X alignment, like fasta and ssearch, that provide additional
-X sequence context.
-X
--x # Specify the penalty for a match to an 'X', independently of
-X the PAM matrix. Particularly useful for fastx3/fasty3,
-X where termination codons are encoded as 'X'.
-X
--X Specifies offsets for the beginning of the query and library
-X sequence. For example, if you are comparing upstream
-X regions for two genes, and the first sequence contains 500
-X nt of upstream sequence while the second contains 300 nt of
-X upstream sequence, you might try:
-X
-X fasta -X "-500 -300" seq1.nt seq2.nt
-X
-X If the -X option is not used, FASTA assumes numbering starts
-X with 1. (You should double check to be certain the negative
-X numbering works properly.)
-X
--y Set the width of the band used for calculating "optimized"
-X scores. For proteins and ktup=2, the width is 16. For
-X proteins with ktup=1, the width is 32 by default. For DNA
-X the width is 16.
-X
--z -1,0,1,2,3,4,5
-X -z -1 turns off statistical calculations. z 0 estimates the
-X significance of the match from the mean and standard
-X deviation of the library scores, without correcting for
-X library sequence length. -z 1 (the default) uses a weighted
-X regression of average score vs library sequence length; -z 2
-X uses maximum likelihood estimates of Lambda and K; -z 3 uses
-X Altschul-Gish parameters (Altschul and Gish, 1996); -z 4 - 5
-X uses two variations on the -z 1 strategy. -z 1 and -z 2 are
-X the best methods, in general.
-X
--z 11,12,14,15
-X estimate the statistical parameters from shuffled copies of
-X each library sequence. This doubles the time required for a
-X search, but allows accurate statistics to be estimated for
-X libraries comprised of a single protein family.
-X
--Z db_size
-X set the apparent size of the database to be used when
-X calculating expectation E() values. If you searched a
-X database with 1,000 sequences, but would like to have the
-X E()-values calculated in the context of a 100,000 sequence
-X database, use '-Z 100000'.
-X
--1 sort output by init1 score (for compatibility with FASTP -
-X do not use).
-X
--3 translate only three forward frames
-X
-For example:
-X
-X fasta -w 80 -a seq1.aa seq.aa
-X
-would compare the sequence in seq1.aa to that in seq2.aa and
-display the results with 80 residues on an output line, showing
-all of the residues in both sequences. Be sure to enter the
-options before entering the file names, or just enter the options
-on the command line, and the program will prompt for the file
-names.
-X
-X (November, 1997) In addition, it is now possible to provide
-the fasta programs with the query sequence (fasta, fasty,
-ssearch, tfastx), or two sequences (prss, lalign, plalign) from
-the unix "stdin" stream. This makes it much easier to set up
-FASTA or PRSS WWW pages. To specify that stdin be used, rather
-than a file, the file name should be specified as '-' or '@' (the
-latter file name makes it possible to specify a subset of the
-sequence). Thus:
-X
-X cat query.aa | fasta -q @:25-75 s
-X
-would take residues 25-75 from query.aa and search the 's'
-library (see the discussion of FASTLIBS).
-X
-5.2. Environment variables
-X
-X Because the current version of the program allows the user
-to set virtually every option on the command line (except the
-ktup, which must be set as the third command line argument), only
-the FASTLIBS environment variable is routinely used.
-X
-FASTLIBS
-X specifies the location of the file which contains the list
-X of library descriptions, locations, and library types (see
-X section on finding library files).
-X
-6. Frequently Asked Questions
-X
-X (1) Which program should I use? See Table I.
-X
-X (2) How do I search with both DNA strands with fasta3 and
-X fastx3? With version 32 of the FASTA program package, all
-X searches that use DNA queries (e.g. fasta3, fastx3/y3)
-X examine both strands. To revert to earlier FASTA behavior
-X - only looking at the forward or reverse strand - use -3
-X to search only the forward strand and -i -3 to search only
-X the reverse strand.
-X
-X (3) When I search Genbank - the program reports: 0 residues in
-X 0 sequences. This typically happens because the program
-X does not know that you are searching a Genbank flatfile
-X database and is looking for a FASTA format database. Be
-X certain to specify the library type ("1" for Genbank
-X flatfile) with the database name.
-X
-X (4) What is the difference between fastx3 and fasty3 (or
-X tfastx3 and tfasty3). [t]fastx3 uses a simpler codon
-X based model for alignments that does not allow frameshifts
-X in some codon positions (see ref. (Zhang et al., 1997)).
-X tfastx3 is about 30% faster, but tfasty3 can produce
-X higher quality alignments in some cases.
-X
-X (5) When I run fasta3 -q, I don't see any (or very little)
-X output, but I get lots of scores when I run interactively.
-X With the -Q option, the number of high scores displayed is
-X limited by the -E # cutoff, which is 10.0 for protein
-X comparisons, 2.0 for DNA comparisons, and 5.0 for
-X translated DNA:protein comparisons. In interactive mode
-X (without -Q), by default you see 20 high scores,
-X regardless of E() value.
-X
-X (6) What is ktup - All of the programs with fast in their name
-X use a computer science method called a lookup table to
-X speed the search. For proteins with ktup=2, this means
-X that the program does not look at any sequence alignment
-X that does not involve matching two identical residues in
-X both sequences. Likewise with DNA and ktup = 6, the
-X initial alignment of the sequences looks for 6 identical
-X adjacent nucleotides in both sequences. Because it is
-X less likely that two identical amino-acids will line up by
-X chance in two unrelated proteins, this speeds up the
-X comparison. But very distantly related sequences may
-X never have two identical residues in a row but will have
-X single aligned identities. In this case, ktup = 1 may
-X find alignments that ktup=2 misses.
-X
-X (7) Sometimes, in the list of best scores, the same sequence
-X is shown twice with exactly the same score. Sometimes,
-X the sequence is there twice, but the scores are slightly
-X different. When any of the fasta3 programs searches a long
-X sequence, it breaks the sequence up into overlapping
-X pieces. The length of the piece depends on the length of
-X the query and the particular program being used (it can
-X also be controlled with the -N #### option). Since the
-X pieces overlap by the length of the query sequence (or
-X 3*query_length for fastx/y3 and tfasta/x/y3), if the
-X highest scoring alignment is at the end of one piece, it
-X will be scored again at the beginning of the next piece.
-X If the alignment is not be completely included in the
-X overlap region, one of the pieces will give a higher score
-X than the other. These duplications can be detected by
-X looking at the coordinates of the alignment. If either
-X the beginning or end coordinate is identical in two
-X alignments, the alignments are at least partially
-X duplicates.
-X
-As always, please inform me of bugs as soon as possible.
-X
-William R. Pearson
-Department of Biochemistry
-Jordan Hall Box 800733
-U. of Virginia
-Charlottesville, VA
-X
-wrp@virginia.EDU
-X
-7. References
-X
-Altschul, S. F., Boguski, M. S., Gish, W., and Wootton, J. C.
-(1994). Issues in searching molecular sequence databases. Nature
-Genet. 6,119-129.
-X
-Altschul, S. F. and Gish, W. (1996). Local alignment statistics.
-Methods Enzymol. 266,460-480.
-X
-Bairoch, A. and Apweiler, R. (1996). The Swiss-Prot protein
-sequence data bank and its new supplement TrEMBL. Nucleic Acids.
-Res. 24,21-25.
-X
-Barker, W. C., Garavelli, J. S., Haft, D. H., Hunt, L. T.,
-Marzec, C. R., Orcutt, B. C., Srinivasarao, G. Y., Yeh, L. S. L.,
-Ledley, R. S., Mewes, H. W., Pfeiffer, F., and Tsugita, A.
-(1998). The PIR-International Protein Sequence Database. Nucleic
-Acids Res 26,27-32.
-X
-Dayhoff, M., Schwartz, R. M., and Orcutt, B. C. (1978). A model
-of evolutionary change in proteins. In Atlas of Protein Sequence
-and Structure, vol. 5, supplement 3. M. Dayhoff, ed. (Silver
-Spring, MD: National Biomedical Research Foundation), pp.
-345-352.
-X
-Jones, D. T., Taylor, W. R., and Thornton, J. M. (1992). The
-rapid generation of mutation data matrices from protein
-sequences. Comp. Appl. Biosci. 8,275-282.
-X
-Pearson, W. R. (2000). Flexible similarity searching with the
-FASTA3 program package. In Bioinformatics Methods and Protocols,
-S. Misener and S. A. Krawetz, ed. (Totowa, NJ: Humana Press), pp.
-185-219.
-X
-Pearson, W. R. and Lipman, D. J. (1988). Improved tools for
-biological sequence comparison. Proc. Natl. Acad. Sci. USA
-85,2444-2448.
-X
-Pearson, W. R. (1995). Comparison of methods for searching
-protein sequence databases. Prot. Sci. 4,1145-1160.
-X
-Pearson, W. R. (1996). Effective protein sequence comparison.
-Methods Enzymol. 266,227-258.
-X
-Pearson, W. R. (1998). Empirical statistical estimates for
-sequence similarity searches. J. Mol. Biol. 276,71-84.
-X
-Smith, T. F. and Waterman, M. S. (1981). Identification of common
-molecular subsequences. J. Mol. Biol. 147,195-197.
-X
-Wootton, J. C. and Federhen, S. (1993). Statistics of local
-complexity in amino acid sequences and sequence databases.
-Comput. Chem. 17,149-163.
-X
-Zhang, Z., Pearson, W. R., and Miller, W. (1997). Aligning a DNA
-sequence with a protein sequence. J. Computational Biology
-4,339-349.
-X
-SHAR_EOF
-chmod 0644 fasta3x.doc ||
-echo 'restore of fasta3x.doc failed'
-Wc_c="`wc -c < 'fasta3x.doc'`"
-test 41617 -eq "$Wc_c" ||
- echo 'fasta3x.doc: original size 41617, current size' "$Wc_c"
-fi
-# ============= fasta3x.me ==============
-if test -f 'fasta3x.me' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta3x.me (File already exists)'
-else
-echo 'x - extracting fasta3x.me (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta3x.me' &&
-.nr pp 11
-.nr sp 11
-.nr tp 11
-.nr fp 10
-.nr fi 0n
-.sz 11
-.if t \{
-.po 1i
-.he 'FASTA3.DOC''Release 3.4, Fall, 2003'
-.fo ''- % -''
-\}
-.if n \{
-.po 0
-.na
-.nh
-\}
-.ll 6.5i
-.ce
-\fBCOPYRIGHT NOTICE\fP
-.lp
-Copyright 1988, 1991, 1992, 1994, 1995, 1996, 1999 by William
-R. Pearson and the University of Virginia. All rights reserved. The
-FASTA program and documentation may not be sold or incorporated into a
-commercial product, in whole or in part, without written consent of
-William R. Pearson and the University of Virginia. For further
-information regarding permission for use or reproduction, please
-contact: David Hudson, Assistant Provost for Research, University of
-Virginia, P.O. Box 9025, Charlottesville, VA 22906-9025, (434)
-924-6853
-.uh "\s+2The FASTA program package\s0"
-.uh "Introduction"
-.pp
-This documentation describes the version 3 of the FASTA program
-package (see W. R. Pearson and D. J. Lipman (1988), "Improved Tools
-for Biological Sequence Analysis", PNAS 85:2444-2448 [.wrp881.]; W. R.
-Pearson (1996) "Effective protein sequence comparison"
-Meth. Enzymol. 266:227-258;[.wrp960.] Pearson et. al. (1997) Genomics
-46:24-36;[.wrp971.] Pearson, (1999) Meth. in Molecular Biology
-132:185-219.[.wrp000.] Version 3 of the FASTA packages contains many
-programs for searching DNA and protein databases and one program
-(prss3) for evaluating statistical significance from randomly shuffled
-sequences. Several additional analysis programs, including programs
-that produce local alignments, are available as part of version 2 of
-the FASTA package, which is still available.
-.pp
-This document is divided into three sections: (1) A summary overview of
-the programs in the FASTA3 package; (2) A guide to installing the
-programs and databases; (3) A guide to using the FASTA programs. The
-revision history of the programs can be found in the
-\fCreadme.v30..v34\fP, files. The programs are easy to use, so if
-you are using them on a machine that is administered by someone else,
-you can skip section (2) and focus on (1) and (3) to learn how to use
-the programsIf you are installing the programs on your own
-machine, you will need to read section (2) carefully.
-.sh 1 "An overview of the \f(CBFASTA\fP programs"
-.pp
-Although there are a large number of programs in this package, they
-belong to three groups: (1)
-"Conventional" Library search programs:
-FASTA3, FASTX3, FASTY3, TFASTA3, TFASTX3, TFASTY3, SSEARCH3;
-(2)
-Programs for searching with short fragments:
-FASTS3, FASTF3, TFASTS3, TFASTF3;
-(3)
-Statistical significance: PRSS3.
-Programs that start with \f(CBfast\fP search protein
-databases, while \f(CBtfast\fP programs search translated DNA databases.
-Table I gives a brief description of the programs.
-.lp
-.(z
-.TS
-center;
-c s
-c s
-= =
-l lw(5.5i).
-\d\fBTable I. Comparison programs in the FASTA3 package\fP\u
-X
-\fCfasta3\fP T{
-Compare a protein sequence to a protein sequence
-database or a DNA sequence to a DNA sequence database using the FASTA
-algorithm.[.wrp881,wrp960.] Search speed and selectivity are
-controlled with the \fIktup\fP(wordsize) parameter. For protein
-comparisons, \fIktup\fP = 2 by default; \fIktup\fP =1 is more sensitive
-but slower. For DNA comparisons, \fIktup\fP=6 by default; \fIktup\fP=3 or
-\fIktup\fP=4 provides higher sensitivity; \fIktup\fP=1 should be used for
-oligonucleotides (DNA query lengths < 20).
-T}
-X
-\fCssearch3\fP T{
-Compare a protein sequence to a protein sequence
-database or a DNA sequence to a DNA sequence database using the
-Smith-Waterman algorithm.[.wat815.] \fCssearch3\fP is about 10-times
-slower than FASTA3, but is more sensitive for full-length protein
-sequence comparison.
-T}
-X
-\fCfastx3\fP/ \fCfasty3\fP T{
-Compare a DNA sequence to a protein
-sequence database, by comparing the translated DNA sequence in three
-frames and allowing gaps and frameshifts. \fCfastx3\fP uses a
-simpler, faster algorithm for alignments that allows frameshifts only
-between codons; \fCfasty3\fP is slower but produces better alignments
-with poor quality sequences because frameshifts are allowed within
-codons.
-T}
-X
-\fCtfastx3\fP/ \fCtfasty3\fP T{
-Compare a protein sequence to a DNA sequence
-database, calculating similarities with frameshifts to the forward and
-reverse orientations.
-T}
-X
-\fCtfasta3\fP T{
-Compare a protein sequence to a DNA sequence database, calculating
-similarities (without frameshifts) to the 3 forward and three reverse
-reading frames. \fCtfastx3\fP and \fCtfasty3\fP are preferred because
-they calculate similarity over frameshifts.
-T}
-X
-\fCfastf3/tfastf3\fP T{
-Compares an ordered peptide mixture, as would be obtained by
-Edman degredation of a CNBr cleavage of a protein, against a protein
-(\fCfastf\fP) or DNA (\fCtfastf\fP) database.
-T}
-X
-\fCfasts3/tfasts3\fP T{
-Compares set of short peptide fragments, as would be obtained
-from mass-spec. analysis of a protein, against a
-protein (\fCfasts\fP) or DNA (\fCtfasts\fP) database.
-T}
-= =
-.TE
-.)z
-.sh 1 "Installing FASTA and the sequence databases"
-.sh 2 "Obtaining the libraries"
-.pp
-The FASTA program package does not include any protein or DNA sequence
-libraries. Protein databases are available on CD-ROM from the PIR and
-EMBL (see below), or via anonymouse FTP from many different sources.
-As this document is updated in the fall of 1999, no DNA databases are
-available on CD-ROM from the major sequence databases: Genbank at the
-National for Biotechnology Information (\fCwww.ncbi.nlm.nih.gov\fP and
-\fCftp://ncbi.nlm.nih.gov\fP) and EMBL at the European Bioinformatics
-Institute (\fCwww.ebi.ac.uk\fP). However, the databases are available
-via anonymous FTP from both sites.
-.sh 3 "The GENBANK DNA sequence library"
-.pp
-Because of the large size of DNA databases, you will probably want to
-keep DNA databases in only one, or possibly two, formats. The FASTA3
-programs that search DNA databases - \fCfasta3\fP, \fCtfastx/y3\fP,
-and \fCtfasta3\fP can read DNA databases in Genbank flatfile (not
-ASN.1), FASTA, GCG/compressed-binary, BLAST1.4 (\fCpressdb\fP), and
-BLAST2.0 (\fCformatdb\fP) formats, as well as EMBL format. If you are
-also running the GCG suite of sequence analysis programs, you should
-use GCG/compressed-binary format or BLAST2.0 format for your
-\fCfasta3\fP searches. If not, BLAST2.0 is a good choice. These
-files are considerably more compact than Genbank flat files, and are
-preferred. The NCBI does not provide software for converting from
-Genbank flat files to Blast2.0 DNA databases, but you can use the
-Blast \fCformatdb\fP program to convert ASN.1 formated Genbank files,
-which are available from the NCBI \fCftp\fP site.
-.pp
-The NCBI also provides the \fCnr\fP, \fCswissprot\fP, and several EST
-databases that are used by BLAST in FASTA format from:
-\fCftp://ncbi.nlm.nih.gov/blast/db\fP. These databases are updated
-nightly.
-.sh 3 "The NBRF protein sequence library"
-.pp
-You can obtain the PIR protein sequence database
-[.pir980.] from:
-.(l
-National Biomedical Research Foundation
-Georgetown University Medical Center
-3900 Reservoir Rd, N.W.
-Washington, D.C. 20007
-.)l
-or via ftp from \fCnbrf.georgetown.edu\fP or from the NCBI
-(\fCncbi.nlm.nih.gov/repository/PIR\fP). The data in the \fCascii\fP
-directory is in PIR Codata format, which is not widely used. I
-recommend the PIR/VMS format data (libtype=5) in the \fCvms\fP
-directory.
-.sh 3 "The EBI/EMBL CD-ROM libraries"
-.pp
-The European Bioinformatics Institute (EBI) distributes both the EMBL
-DNA database and the SwissProt database on CD-ROM,[.apw961.] and they
-are available from:
-.(l
-EMBL-Outstation European Bioinformatics Institute
-Wellcome Trust Genome Campus,
-Hinxton Hall
-Hinxton,
-Cambridge CB10 1SD
-United Kingdom
-Tel: +44 (0)1223 494444
-Fax: +44 (0)1223 494468
-Email: DATALIB@ebi.ac.uk
-.)l
-In addition, the SWISS-PROT protein sequence database is available via
-anonymous FTP from \fCftp://ftp.expasy.ch/databases/swiss-prot/\fP
-(also see \fCwww.expasy.ch\fP).
-.sh 2 "Finding the libraries: FASTLIBS"
-.pp
-The major problem that most new users of the FASTA package have is in
-setting up the program to find the databases and their library type.
-In general, if you cannot get \fCfasta3\fP to read a sequence
-database, it is likely that something is wrong with the \fCFASTLIBS\fP
-file. A common problem is that the database file is found, but either
-no sequences are read, or an incorrect number of entries is read.
-This is almost always because the library format (\fClibtype\fP) is
-incorrect. Note that a type 5 file (PIR/VMS format) can be read
-as a type 0 (default FASTA) format file, and the number of entries
-will be correct, but the sequence lengths will not.
-.pp
-All the search programs in the FASTA3 package use the environment
-variable \fCFASTLIBS\fP to find the protein and DNA sequence libraries. The
-\fCFASTLIBS\fP variable contains the name of a file that has the actual
-filenames of the libraries. The \fCfastlibs\fP file included with the
-distribution on is an example of a file that can be referred to by
-FASTLIBS. To use the \fCfastlibs\fP file, type:
-.(l
-\fCsetenv FASTLIBS /usr/lib/fasta/fastgbs\fP (BSD UNIX/csh)
-or
-\fCexport FASTLIBS=/usr/lib/fasta/fastgbs\fP (SysV UNIX/ksh)
-.)l
-Then edit the \fCfastlibs\fP file to indicate where the protein and DNA
-sequence libraries can be found. If you have a hard disk and your
-protein sequence library is kept in the file \fC/usr/lib/aabank.lib\fP and
-your Genbank DNA sequence library is kept in the directory:
-\fC/usr/lib/genbank\fP, then \fCfastgbs\fP might contain:
-.ne 8
-.(l
-.ft C
-NBRF Protein$0P/usr/lib/seq/aabank.lib 0
-SWISS PROT 10$0S/usr/lib/vmspir/swiss.seq 5
-GB Primate$1P@/usr/lib/genbank/gpri.nam
-GB Rodent$1R@/usr/lib/genbank/grod.nam
-GB Mammal$1M@/usr/lib/genbank/gmammal.nam
-^ 1 ^^^^ 4 ^ ^
-X 23 (5)
-.ft R
-.)l
-The first line of this file says that there is a copy of the NBRF
-protein sequence database (which is a protein database) that can be
-selected by typing "P" on the command line or when the database menu
-is presented in the file \fC/usr/lib/seq/aabank.lib\fP.
-.pp
-Note that there are 4 or 5 fields in the lines in \fCfastgbs\fP. The first
-field is the description of the library which will be displayed by
-FASTA; it ends with a '$'. The second field (1 character), is a 0 if
-the library is a protein library and 1 if it is a DNA library. The
-third field (1 character) is the character to be typed to select the
-library.
-.pp
-The fourth field is the name of the library file. In the example
-above, the \fC/usr/lib/seq/aabank.lib\fP file contains the entire
-protein sequence library. However the DNA library file names are
-preceded by a '@', because these files (\fCgpri.nam, grod.nam,
-gmammal.nam\fP) do not contain the sequences; instead they contain the names
-of the files which contain the sequences. This is done because the
-GENBANK DNA database is broken down in to a large number of smaller
-files. In order to search the entire primate database, you must
-search more than a dozen files.
-.pp
-In addition, an optional fifth field can be used to specify the format
-of the library file. Alternatively, you can specify the library
-format in a file of file names (a file preceded by an '@'). This
-field must be separated from the file name by a space character ('\ ')
-from the filename. In the example above, the \fCaabank.lib\fP file is
-in Pearson/FASTA format, while the \fCswiss.seq\fP file is in PIR/VMS format
-(from the EMBL CD-ROM). Currently, FASTA can read the following formats:
-.(l I
-.ft C
-0 Pearson/FASTA (>SEQID - comment/sequence)
-1 Uncompressed Genbank (LOCUS/DEFINITION/ORIGIN)
-2 NBRF CODATA (ENTRY/SEQUENCE)
-3 EMBL/SWISS-PROT (ID/DE/SQ)
-4 Intelligenetics (;comment/SEQID/sequence)
-5 NBRF/PIR VMS (>P1;SEQID/comment/sequence)
-6 GCG (version 8.0) Unix Protein and DNA (compressed)
-11 NCBI Blast1.3.2 format (unix only)
-12 NCBI Blast2.0 format (unix only, fasta32t08 or later)
-.ft R
-.)l
-In particular, this version will work with the EMBL and PIR VMS
-formats that are distributed on the EMBL CD-ROM. The latter format
-(PIR VMS) is much faster to search than EMBL format. This release
-also works with the protein and DNA database formats created for the
-BLASTP and BLASTN programs by SETDB and PRESSDB and with the new NCBI
-search format. If a library format is not specified, for example,
-because you are just comparing two sequences, Pearson/FASTA (format 0)
-is used by default. To specify a library type on the command line,
-add it to the library filename and surround the filename and library
-type in quotes:
-.(l
-.ft C
-fasta3 query.file "/seqdb/genbank/gbpri1.seq 1"
-.ft P
-.)l
-.pp
-You can specify a group of library files by putting a '@' symbol
-before a file that contains a list of file names to be searched. For
-example, if @gmam.nam is in the fastgbs file, the file "gmam.nam"
-might contain the lines:
-.(l
-.ft C
-</seqdb/genbank
-gbpri1.seq 1
-gbpri2.seq 1
-gbpri3.seq 1
-gbpri4.seq 1
-gbrod.seq 1
-gbmam.seq 1
-.ft R
-.)l
-In this case, the line beginning with a '<' indicates the directory
-the files will be found in. The remaining lines name the actual
-sequence files. So the first sequence file to be searched would be:
-.(l
-.ft C
-/usr/lib/genbank/gbpri.seq
-.ft R
-.)l
-The notation "\fC<PIRNAQ:\fP" might be used under the VAX/VMS operating
-system. Under UNIX, the trailing '/' is left off, so the library
-directory might be written as "\fC</usr/seqlib\fP".
-.pp
-The FASTA programs can search a database composed of different files
-in different sequence formats. For example, you may wish to search
-the Genbank files (in GenBank flat file format) and the EMBL DNA
-sequence database on CD-ROM. To do this, you simply list the names
-and filetypes of the files to be searched in a file of filenames. For
-example, to search the mammalian portion of Genbank, the unannotated
-portion of Genbank, and the unannotated portion of the EMBL library,
-you could use the file:
-.(l I
-.ft C
-</usr/lib/DNA
-gbpri.seq 1
-\&# (this '#' causes the program to display the size of the library)
-gbrod.seq 1
-\&...
-gbmam.seq 1
-\&...
-gbuna.seq 1
-\&...
-unanno.seq 5
-\&#
-.ft R
-.)l
-.(l I F
-You do not need to include library format numbers if you only use the
-Pearson/FASTA version of the PIR protein sequence library. If no
-library type is specified, the program assumes that type 0 is being
-used.
-.)l
-.pp
-Test the setup by running FASTA. Enter the sequence
-file '\fCmgstm1.aa\fP' when the program requests it (this file is
-included with the programs). The program should then ask you to
-select a protein sequence library. Alternatively, if you run the
-TFASTA program and use the mgstm1.aa query sequence, the program
-should show you a selection of DNA sequence libraries.
-Once the fastgbs file has been set up correctly, you can
-set FASTLIBS=fastgbs in your AUTOEXEC.BAT file, and you will not need to
-remember where the libraries are kept or how they are named.
-.ne 8
-.sh 1 "Using the FASTA Package"
-.sh 2 "Overview"
-.pp
-The FASTA sequence comparison programs all require similar
-information, the name of a query sequence file, a library file, and
-the \fIktup\fP parameter. All of the programs can accept arguments
-on the command line, or they will prompt for the file names and
-\fIktup\fP value.
-.lp
-To use FASTA, simply type:
-.(l
-.ft C
-\f(CBFASTA\fP
-and you will be prompted for :
-.in +0.5i
-the name of the test sequence file
-the name of the library file
-and whether you want ktup = 1 or 2. (or 1 to 6 for DNA sequences)
-(ktup of 2 is about 5 times faster than ktup = 1)
-.ft R
-.)l
-The program can also be run by typing
-.(l
-.ft C
-FASTA test.aa /lib/bigfile.lib \fIktup\fP (1 or 2)
-.ft R
-.)l
-.lp
-Included with the package are several test files.
-To check to make certain that everything is working, you can try:
-.(l
-.ft C
-fasta musplfm.aa prot_test.lib
-and
-tfastx mgstm1.aa gst.nlib
-.ft R
-.)l
-.sh 2 "Sequence files"
-.pp
-The \fCfasta3\fP programs know about three kinds of sequence files:
-(1) plain sequence files - files that contain nothing but
-sequence residues - can only be used as query sequences. (2) FASTA
-format files. These are the same as plain sequence files, each
-sequence is preceded by a comment line with a '>' in the first
-column. (3) distributed sequence libraries (this is a broad class that
-includes the NBRF/PIR VMS and blocked ascii formats, Genbank flat-file
-format, EMBL flat-file format, and Intelligenetics format. All of the
-files that you create should be of type (1) or (2). FASTA format
-files (ones with a '>' and comment before the sequence) are preferred,
-because they can be used as query or library sequence files by all of
-the programs.
-.pp
-I have included several sample test files, \fC*.aa\fP and \fC*.seq\fP
-as well as two small sequence libraries, \fCprot_test.lib\fP and
-\fCgst.nlib\fP. The first line may begin with a '>' by a comment.
-Spaces and tabs (and anything else that is not an amino-acid code) are
-ignored.
-.pp
-Library files should have the form:
-.(l
-.ft C
->Sequence name and identifier
-A F A S Y T .... actual sequence.
-F S S .... second line of sequence.
->Next sequence name and identifier
-.ft R
-.)l
-This is often referred to as "FASTA" or format. You can
-build your own library by concatenating several sequence files. Just
-be sure that each sequence is preceded by a line beginning with a '>'
-with a sequence name.
-.pp
-The test file should not have lines longer than 120 characters, and
-sequences entered with word processors should use a document
-mode, with normal carriage returns at the end of lines.
-.pp
-A different format is required to specify the ordered peptide mixture for \fCfastf3/tfastf3\fP. For example:
-.(l I
-.ft C
->mgstm1
-MGCEN,
-MIDYP,
-MLLAY,
-MLLGY
-.ft P
-.)l
-indicates \fCm\fP in the first position of all three peptides (as
-from CNBr), \fCG, I, L\fP (twice) in the second position (first cycle),
-\fCC,D,L\fP (twice) in the third position, etc. The commas (\fC,\fP)
-are required to indicate the number of fragments in the mixture, but
-there should be no comma after the last residue.
-.pp
-For the \fCfasts3/tfasts3\fP program, the format is the same, except that there
-is no requirement for the peptides to be the same length.
-.sh 1 "Statistical Significance"
-.pp
-All the programs in the FASTA3 package attempt to calculate accurate
-estimates of the statistical significance of a match. For
-\fCfasta3\fP, \fCssearch3\fP, and \fCfastx3/y3\fP, these estimates are
-very accurate.[.wrp971,wrp981.]. Altschul et al. [.alt940.] provides
-an excellent review of the statistics of local similarity scores.
-Local sequence similarity scores follow the extreme value
-distribution, so that P(s > x) = 1 - exp(-exp(-lambda(x-u)) where u =
-ln(Kmn)/lambda and m,m are the lengths of the query and library
-sequence. This formula can be rewritten as: 1 - exp(-Kmn exp(-lambda
-x), which shows that the average score for an unrelated library
-sequence increases with the logarithm of the length of the library
-sequence. The \fCfasta3\fP programs use simple linear regression
-against the the log of the library sequence length to calculate a
-normalized "z-score" with mean 50, regardless of library sequence
-length, and variance 10. (Several other estimation methods are
-available with the \fC\-z\fP option.) These z-scores can then be used
-with the extreme value distribution and the poisson distribution (to
-account for the fact that each library sequence comparison is an
-independent test) to calculate the number of library sequences to
-obtain a score greater than or equal to the score obtained in the
-search. The original idea and routines to do the linear regression on
-library sequence length were provided Phil Green, U. Washington. This
-version uses a slightly different strategy for fitting the data than
-those originally provided by Dr. Green.
-.pp
-The expected number of sequences is plotted in the histogram using an
-"*". Since the parameters for the extreme value distribution are not
-calculated directly from the distribution of similarity scores, the
-pattern of "*'s" in the histogram gives a qualitative view of how well
-the statistical theory fits the similarity scores calculated by the
-programs. For \fCfasta3\fP, if optimized scores are calculated for
-each sequence in the database (the default), the agreement between the
-actual distribution of "z-scores" and the expected distribution based
-on the length dependence of the score and the extreme value
-distribution is usually very good. Likewise, the distribution of
-\fCssearch3\fP Smith-Waterman scores typically agrees closely with the
-<actual distribution of "z-scores." The agreement with unoptimized
-scores, \fIktup=2\fP, is often not very good, with too many high
-scoring sequences and too few low scoring sequences compared with the
-predicted relationship between sequence length and similarity score.
-In those cases, the expectation values may be overestimates.
-.pp
-With version 33t01, all the FASTA programs also report a "bit" score,
-which is equivalent to the bit score reported by BLAST2. The
-FASTA33/BLAST2 bit score is calculated as: (lambda*S - ln K)/ln 2,
-where S is the raw similarity score, lambda and K are statistical
-parameters estimated from the distribution of unrelated sequence
-similarity scores. The statistical signficance of a given bit score
-depends on the lengths of the query and library sequences and the size
-of the library, but a 1 bit increase in score corresponds to a 2-fold
-reduction in expectation; a 10-bit increase implies 1000-fold lower
-expectation, etc.
-.pp
-The statistical routines assume that the library contains a large
-sample of unrelated sequences. If this is not true, then statistical
-parameters can be estimated by using the \fC\-z 11\-15\fP, options.
-\fC\-z\fP options greater than 10 calculate a shuffled similarity score
-for each library sequence, in addition to the unshuffled score, and
-estimate the statistical parameters from the scores of the shuffled
-sequences. If there are fewer than 20 sequences in the library, the
-statistical calculations are not done.
-.pp
-For protein searches, library sequences with E() values < 0.01 for
-searches of a 10,000 entry protein database are almost always
-homologous. Frequently sequences with E()-values from 1 - 10 are
-related as well, but unrelated sequences ( 1 \- 10 per search) will
-have scores in this renage as well. Remember, however, that these E()
-values also reflect differences between the amino acid composition of
-the query sequence and that of the "average" library sequence. Thus,
-when searches are done with query sequences with "biased" amino-acid
-composition, unrelated sequences may have "significant" scores because
-of sequence bias. \fCPRSS3\fP can address this problem by calculating
-similarity scores for random sequences with the same length and amino
-acid composition.
-.sh 1 "Options"
-.pp
-Command line options are available to change the scoring parameters
-and output display. \fBCommand line options must preceed other program
-arguments, such as the query and library file names.\fP
-.sh 2 "Command line options"
-.ip "-a"
-(fasta3, ssearch3 only) show both sequences in their entirety.
-.ip "-A"
-force Smith-Waterman alignments for fasta3 DNA sequences. By default,
-only fasta3 protein sequence comparisons use Smith-Waterman alignments.
-.ip "-B"
-Show normalized score as a z-score, rather than a bit-score in the list
-of best scores.
-.ip "-b #"
-Number of sequence scores to be shown on output. In the absence of
-this option, fasta (and tfasta and ssearch) display all library
-sequences obtaining similarity scores with expectations less than
-10.0 if optimized score are used, or 2.0 if they are not. The -b
-option can limit the display further, but it will not cause additional
-sequences to be displayed.
-.ip "-c #"
-Threshold score for optimization (OPTCUT). Set "-c 1" to
-optimize every sequence in a database.
-.ip "-E #"
-Limit the number of scores and alignments shown based on the
-expected number of scores. Used to override the expectation value of 10.0
-used by default. When used with -Q, -E 2.0 will show all library sequences
-with scores with an expectation value <= 2.0.
-.ip "-d #"
-Maximum number of alignments to be displayed. Ignored if "-Q" is not
-used.
-.ip "-f"
-Penalty for the first residue in a gap (-12 by default for proteins,
--16 for DNA, -15 for FAST[XY]/TFAST[XY]).
-.ip "-F #"
-Limit the number of scores and alignments shown based on the expected
-number of scores. "-E #" sets the highest E()-value shown; "-F #" sets
-the lowest E()-value. Thus, "-F 0.0001" will not show any matches or
-alignments with E() < 0.0001. This allows one to skip over close
-relationships in searches for more distant relationships.
-.ip "-g"
-Penalty for additional residues in a gap (-2 by default for proteins,
--4 for DNA, -3 for FAST[XY]/TFAST[XY]).
-.ip "-h"
-Penalty for frameshift (fastx3/y3, tfastx3/y3 only).
-.ip "-H"
-Omit histogram.
-.ip "-i"
-Invert (reverse complement) the query sequence if it is DNA. For
-tfasta3/x3/y3, search the reverse complement of the library sequence
-only.
-.ip "-j #"
-Penalty for frameshift within a codon (fasty3/tfasty3 only).
-.ip "-l file"
-Location of library menu file (FASTLIBS).
-.ip "-L"
-Display more information about the library sequence in the alignment.
-.ip "-M low-high"
-Range of amino acid sequence lengths to be included in the search.
-.ip "-m #"
-Specify alignment type: 0, 1, 2, 3, 4, 5, 6, 9, 10
-.(l I
-.ft C
-X \-m 0 \-m 1 \-m 2 \-m 3 \-m 4
-.ft C
-MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT MWRTCGPPYT
-::..:: ::: xx X ..KS..Y... MWKSCGYPYT ----------
-MWKSCGYPYT MWKSCGYPYT
-.ft P
-.)l
-.ip
-\fC\-m 5\fP provides a combination of \fC\-m 4\fP and
-\fC\-m 0. \fC\-m 6 provides \fC\-m 5\fP plus HTML formatting.
-.ip "-m 9"
-provides coordinates and scores with the best score information.
-A simple "\fC -m 9\fP extends the normal best score information:
-.(l
-.ft C
-The best scores are: opt bits E(14548)
-XXURTG4 glutathione transferase (EC 2.5.1.18) 4 - ( 219) 1248 291.7 1.1e-79
-.ft P
-.)l
-to include the additional information (on the same line, separated by
-a <tab>):
-.(l
-.ft C
-%_id %_gid sw alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs
-0.771 0.771 1248 218 1 218 1 218 1 218 1 219 0 0 0
-.ft P
-.)l
-\fC -m 9c\fP provides additional information: an encoded alignment string. Thus:
-.(l I
-.ft C
-X 10 20 30 40 50 60 70
-GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
-X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
-XXURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
-X 20 30 40 50 60
-.ft P
-.)l
-would be encoded:
-.(l
-.ft C
-=23+9=13-2=10-1=3+1=5
-.ft P
-.)l
-The alignment encoding is with repect to the alignment, not the
-sequences. The coordinate of the alignment is given earlier in the
-"\fC -m 9c\fP" line.
-.ip "-m 10"
-\fC\-m 10\fP is a new, parseable format for use
-with other programs. See the file "readme.v20u4" for a more complete
-description.
-.ip
-As of version "fa34t23b2", it has become possible to combine independent
-"\fC\-m\fP" options. Thus, one can use "\fC\-m 1 -m 6 -m 9\fP".
-.ip "-M low\-high"
-Include library sequences (proteins only) with lengths between low and
-high.
-.ip "-n"
-Force the query sequence to be treated as a DNA sequence. This is
-particularly useful for query sequences that contain a large number of
-ambiguous residues, e.g. transcription factor binding sites.
-.ip "-O"
-Send copy of results to "filename." Helpful for environments without
-STDOUT (mostly for the Macintosh).
-.ip "-o "
-Turn off default optimization of all scores greater than OPTCUT. Sort
-results by "initn" scores (reduces the accuracy of statistical
-estimates).
-.ip "-p"
-Force query to be treated as protein sequence.
-.ip "-Q,-q"
-Quiet - does not prompt for any input. Writes scores and alignments
-to the terminal or standard output file.
-.ip "-r"
-Specify match/mismatch scores for DNA comparisons. The default is
-"+5/-4". "+3/-2" can perform better in some cases.
-.ip "-R file"
-Save a results summary line for every sequence in the sequence
-library. The summary line includes the sequence identifier,
-superfamily number (if available) position
-in the library, and the similarity scores calculated. This option can
-be used to evaluate the sensitivity and selectivity of different
-search strategies.[.wrp951,wrp981.]
-.ip "-s file"
-Specify the scoring matrix file. \fCfasta3\fP uses the same scoring
-matrices as Blast1.4/2.0. Several scoring matrix files are included
-in the standard distribution. For protein sequences: \fCcodaa.mat\fP
-- based on minimum mutation matrix; \fCidnaa.mat\fP - identity matrix;
-\fCpam250.mat\fP - the PAM250 matrix developed by Dayhoff et
-al.;[.day787.] \fCpam120.mat\fP - a PAM120 matrix. The default
-scoring matrix is BLOSUM50 ("-s BL50"). Other matrices available from
-within the program are: PAM250/"-s P250", PAM120/"-s P120", PAM40/"-s
-P40", PAM20/"-s P20", MDM10 - MDM40/"-s M10 \- M40" (MDM are modern
-PAM matrices from Jones et al.,[.tay925.]), BLOSUM50, 62, and 80/"-s
-BL50", "-s BL62", "-s BL80".
-.ip "-S"
-Treat lower-case characters in the query or library sequences as
-"low-complexity" ("seg"-ed) residues. Traditionally, the "seg"
-program [.woo935.] is used to remove low complexity regions in DNA
-sequences by replacing the residues with an "X". When the "-S" option
-is used, the FASTA33 (and later) programs provide a potentially more
-informative approach. With "-S", lower case characters in the query
-or database sequences are treated as "X"'s during the initial scan,
-but are treated as normal residues during the final alignment display.
-Since statistical significance is calculated from the similarity score
-calculated during the library search, when the lower case residues are
-"X"'s, low complexity regions will not produce statistically
-significant matches. However, if a significant alignment contains low
-complexity regions, their alignmen is shown. With "-S", lower case
-characters may be included in the alignment to indicate low complexity
-regions, and the final alignment score may be higher than the score
-obtained during the search.
-.ip
-The \fCpseg\fP program can be used to produce databases (or query
-sequences) with lower case residues indicating low complexity regions
-using the command:
-.(l I
-\fCpseg database.fasta -z 1 -q > database.lc_seg\fP
-.)l
-(\fCseg\fP can also be used with some post processing, see readme.v33tx.)
-.ip
-The \fC-S\fP option should always be used with \fCFASTX/Y\fCP and
-\fCTFASTX/Y\fP because out of frame translations often generate
-low-complexity protein sequences. However, only lower case characters
-in the protein sequence (or protein database) are masked; lower case
-DNA sequences are translated into upper case protein sequences, and
-not treated as low complexity by the translated alignment programs.
-.ip "-t #"
-Translation table - tfasta3, fastx3, tfastx3, fasty3, and
-tfasty3 now support the BLAST tranlation tables. See
-\fChttp://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi\fP.
-.ip
-In addition, "\-t t" or "\-t t#" turns on the addition of an implicit termination
-codon to a protein:translated DNA match. That is, each protein
-sequence implicitly ends with "*", which matches the termination codes
-for the appropriate genetic code. "\-t t#" sets implicit termination
-and a different genetic code.
-.ip "-U"
-Treat the query sequence an RNA sequence. In addition to selecting a
-DNA/RNA alphabet, this option causes changes to the scoring matrix so
-that 'G:A' , 'T:C' or 'U:C' are scored as 'G:G'.
-.ip "-V str"
-It is now possible to specify some annotation characters that can be
-included (and will be ignored), in the query sequence file. Thus, One
-might have a file with: \fC"ACVS*ITRLFT?"\fP, where "*" and "?" are
-used to indicate phosphorylation. By giving the option \fC\-V '*?'\fP,
-those characters in the query will be moved to an "annotation string",
-and alignments that include the annotated residues will be highlighted
-with the appropriate character above the sequence (on the number line).
-.ip "-w #"
-Line length (width) = number (<200)
-.ip "-W #"
-X context length (default is 1/2 of line width -w) for alignment,
-like fasta and ssearch, that provide additional sequence context.
-.ip "-x #match,#mismatch"
-Specify the penalty for a match to an 'X', and mismatch to 'X',
-independently of the PAM matrix. Particularly useful for
-\fCfastx3/fasty3\fP, where termination codons are encoded as 'X'.
-.ip "-X \"off1 off2\""
-Specifies offsets for the beginning of the query and library sequence.
-For example, if you are comparing upstream regions for two genes, and
-the first sequence contains 500 nt of upstream sequence while the
-second contains 300 nt of upstream sequence, you might try:
-.(l I
-\fCfasta -X "-500 -300" seq1.nt seq2.nt\fP
-.)l
-If the -X option is not used, FASTA assumes numbering starts with 1.
-(You should double check to be certain the negative numbering works
-properly.)
-.ip "-y"
-Set the width of the band used for calculating "optimized" scores.
-For proteins and ktup=2, the width is 16. For proteins with ktup=1,
-the width is 32 by default. For DNA the width is 16.
-.ip "-z -1,0,1,2,3,4,5"
-\fC\-z -1\fP turns off statistical calculations. \fCz 0\fP estimates
-the significance of the match from the mean and standard deviation of
-the library scores, without correcting for library sequence length.
-\fC\-z 1\fP (the default) uses a weighted regression of average score
-vs library sequence length; \fC\-z 2\fP uses maximum likelihood
-estimates of
-.if t \(*l
-.if n Lambda
-and \fIK\fP; \fC\-z 3\fP uses Altschul-Gish
-parameters;[.alt960.] \fC\-z 4 \- 5\fP uses two variations on the
-\fC\-z 1\fP strategy. \fC\-z 1\fP and \fC\-z 2\fP are the best methods,
-in general.
-.ip "-z 11,12,14,15"
-estimate the statistical parameters from shuffled copies of each
-library sequence. This doubles the time required for a search, but
-allows accurate statistics to be estimated for libraries comprised of
-a single protein family.
-.ip "-Z db_size"
-set the apparent size of the database to be used when calculating
-expectation E() values. If you searched a database with 1,000
-sequences, but would like to have the E()-values calculated in the
-context of a 100,000 sequence database, use '-Z 100000'.
-.ip "-1"
-sort output by init1 score (for compatibility with FASTP - do not
-use).
-.ip "-3"
-translate only three forward frames
-.sp
-.lp
-For example:
-.(l
-\fCfasta -w 80 -a seq1.aa seq.aa\fP
-.)l
-would compare the sequence in seq1.aa to that in seq2.aa and display the
-results with 80 residues on an output line, showing all of the residues
-in both sequences. Be sure to enter the options before entering the file
-names, or just enter the options on the command line, and the program will
-prompt for the file names.
-.sp
-.pp
-(November, 1997) In addition, it is now possible to provide the fasta
-programs with the query sequence (fasta, fasty, ssearch, tfastx), or
-two sequences (prss, lalign, plalign) from the unix "stdin" stream. This
-makes it much easier to set up FASTA or PRSS WWW pages. To specify
-that stdin be used, rather than a file, the file name should be
-specified as '-' or '@' (the latter file name makes it possible to
-specify a subset of the sequence).
-Thus:
-.(l
-cat query.aa | fasta -q @:25-75 s
-.)l
-would take residues 25-75 from query.aa and search the 's' library
-(see the discussion of FASTLIBS).
-.sh 2 "Environment variables"
-.pp
-Because the current version of the program allows the user to set
-virtually every option on the command line (except the \fIktup\fP,
-which must be set as the third command line argument), only the
-\fCFASTLIBS\fP environment variable is routinely used.
-.ip "FASTLIBS"
-specifies the location of the file which contains the list of library
-descriptions, locations, and library types (see section on finding
-library files).
-.sh 1 "Frequently Asked Questions (FAQs)"
-.np
-\fIWhich program should I use?\fP See Table I.
-.np
-\fIHow do I search with both DNA strands with\fP \fCfasta3\fP \fIand\fP
-\fCfastx3\fP? With version 32 of the FASTA program package, all
-searches that use DNA queries (e.g. \fCfasta3\fP, \fCfastx3/y3\fP)
-examine both strands. To revert to earlier FASTA behavior - only
-looking at the forward or reverse strand - use \fC\-3\fP to search only
-the forward strand and \fC\-i -3\fP to search only the reverse strand.
-.np
-\fIWhen I search Genbank - the program reports:\fP \fC0 residues in 0
-sequences\fP. This typically happens because the program does not
-know that you are searching a Genbank flatfile database and is looking
-for a FASTA format database. Be certain to specify the library type
-("1" for Genbank flatfile) with the database name.
-.np
-What is the difference between \fCfastx3\fP and \fCfasty3\fP (or
-\fCtfastx3\fP and \fCtfasty3\fP). \fC[t]fastx3\fP uses a simpler
-codon based model for alignments that does not allow frameshifts in
-some codon positions (see ref. [.wrp971.]). \fCtfastx3\fP is about
-30% faster, but \fCtfasty3\fP can produce higher quality alignments in
-some cases.
-.np
-\fIWhen I run\fP \fCfasta3 -q\fP, I don't see any (or very little)
-output, but I get lots of scores when I run interactively. With the
-\fC\-Q\fP option, the number of high scores displayed is limited by the
-\fC\-E #\fP cutoff, which is 10.0 for protein comparisons, 2.0 for DNA
-comparisons, and 5.0 for translated DNA:protein comparisons. In
-interactive mode (without \fC\-Q\fP), by default you see 20 high
-scores, regardless of \fCE()\fP value.
-.np
-\fIWhat is ktup\fP \- All of the programs with \fCfast\fP in their
-name use a computer science method called a lookup table to speed the
-search. For proteins with \fIktup\fP=2, this means that the program
-does not look at any sequence alignment that does not involve matching
-two identical residues in both sequences. Likewise with DNA and
-\fIktup\fP = 6, the initial alignment of the sequences looks for 6
-identical adjacent nucleotides in both sequences. Because it is less
-likely that two identical amino-acids will line up by chance in two
-unrelated proteins, this speeds up the comparison. But very distantly
-related sequences may never have two identical residues in a row but
-will have single aligned identities. In this case, \fIktup\fP = 1 may
-find alignments that \fIktup\fP=2 misses.
-.np
-\fISometimes, in the list of best scores, the same sequence is shown
-twice with exactly the same score. Sometimes, the sequence is there
-twice, but the scores are slightly different.\fP When any of the
-\fCfasta3\fP programs searches a long sequence, it breaks the sequence
-up into \fIoverlapping\fP pieces. The length of the piece depends on
-the length of the query and the particular program being used (it can
-also be controlled with the -N #### option). Since the pieces overlap
-by the length of the query sequence (or 3*query_length for fastx/y3
-and tfasta/x/y3), if the highest scoring alignment is at the end of
-one piece, it will be scored again at the beginning of the next piece.
-If the alignment is not be completely included in the overlap region,
-one of the pieces will give a higher score than the other. These
-duplications can be detected by looking at the coordinates of the
-alignment. If either the beginning or end coordinate is identical in
-two alignments, the alignments are at least partially duplicates.
-.lp
-As always, please inform me of bugs as soon as possible.
-.sp
-.nf
-William R. Pearson
-Department of Biochemistry
-Jordan Hall Box 800733
-U. of Virginia
-Charlottesville, VA
-X
-wrp@virginia.EDU
-X
-.sh 1 "References"
-.[]
-SHAR_EOF
-chmod 0644 fasta3x.me ||
-echo 'restore of fasta3x.me failed'
-Wc_c="`wc -c < 'fasta3x.me'`"
-test 39642 -eq "$Wc_c" ||
- echo 'fasta3x.me: original size 39642, current size' "$Wc_c"
-fi
-# ============= fasta_func.doc ==============
-if test -f 'fasta_func.doc' -a X"$1" != X"-c"; then
- echo 'x - skipping fasta_func.doc (File already exists)'
-else
-echo 'x - extracting fasta_func.doc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasta_func.doc' &&
-Over all structure of the fasta3 program. (Some functions
-are different for translated comparisons FASTX, FASTY, TFASTX, TFASTY.)
-X
-main() { /* complib.c structure */
-X
-X /* get command line arguments, set up initial parameter values */
-X initenv (argc, argv, &m_msg, &pst,&aa0[0],outtty);
-X
-X /* allocate space for sequence arrays */
-X /* get the query file name if not on command line */
-X /* get query */
-X m_msg.n0 = getseq (m_msg.tname,aa0[0], MAXTOT, m_msg.libstr,&pst.dnaseq,
-X &m_msg.sq0off);
-X
-X /* reset some parameters if DNA */
-X resetp (aa0[0], m_msg.n0, &m_msg, &pst);
-X
-X /* get a library name if not on command line */
-X libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
-X /* use library name to build list of library files */
-X libselect(m_msg.lname, &m_msg);
-X
-X /* get additional options (ktup, prss-window) if not specified */
-X query_parm (&m_msg, &pst);
-X
-X /* do final parameter initializations */
-X last_init(&m_msg, &pst);
-X
-X /* set up structures for saved scores[20000], statistics[50000] */
-X nbest = 0;
-X
-X /* initialize the comparison function */
-X init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
-X
-X /* open the library */
-X for (iln = 0; iln < m_msg.nln; iln++) {
-X if (openlib(m_msg.lbnames[iln],m_msg)!=1) {continue;}
-X }
-X
-X /* get the library sequence and do the comparison */
-X while ((n1=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) {
-X do_work (aa0[itt], m_msg.n0, aa1, n1, itt, &pst, f_str[itt], &rst);
-X
-X /* save the scores */
-X /* save the scores for statistics */
-X }
-X
-X /* all done with all libraries */
-X process_hist(stats,nstats,pst);
-X
-X /* sort the scores by z-value */
-X sortbestz (bptr, nbest);
-X
-X /* sort the scores by E-value */
-X sortbeste (bptr, nbest);
-X
-X /* print the histogram */
-X prhist (stdout,m_msg,pst,gstring2);
-X
-X /* show the high scoring sequences */
-X showbest (stdout, aa0, aa1, maxn, bptr, nbest, qlib, &m_msg, pst,
-X f_str, gstring2);
-X
-X /* show the high-scoring alignments */
-X showalign(outfd, aa0, aa1, maxn, bptr, nbest, qlib, m_msg, pst,
-X f_str, gstring2);
-X
-X /* thats all folks !!! */
-}
-X\f
-================
-complib.c /* version set as mp_verstr */
-X
-main()
-printsum() /* prints summary of run (residues, entries, time) */
-void fsigint() /* sets up interrupt handler for HUP not used */
-X
-================
-compacc.c
-X
-void selectbest() /* select best 15000/20000 based on raw score */
-void selectbestz() /* select best 15000/20000 based on z-score */
-void sortbest() /* sort based on raw score */
-void sortbestz() /* sort based on z-score */
-void sortbeste() /* sort based on E() score - different from z-score for DNA */
-X
-prhist() /* print histogram */
-X
-shuffle() /* shuffle sequence (prss) */
-wshuffle() /* window shuffle */
-X
-================
-showbest.c
-X
-void showbest() /* present list of high scoring sequences */
-X
-================
-showalign.c
-X
-void showalign() /* show list of high-scoring alignments */
-void do_show() /* show an individual alignment */
-void initseq() /* setup seqc0/seqc1 which contain alignment characters */
-void freeseq() /* free them up */
-X
-================
-htime.c
-X
-time_t s_time() /* get the time in usecs */
-void ptime() /* print elapsed time */
-X
-================
-apam.c
-X
-initpam () /* read in PAM matrix or change default array */
-void mk_n_pam() /* make DNA pam from +5/-3 values */
-================
-doinit.c
-X
-void initenv() /* read environment variables, general options */
-================
-initfa.c /* version set as "verstr" */
-X
-alloc_pam() /* allocate 2D pam array */
-initpam2() /* fill it up from 1D pam triangle */
-f_initenv() /* function-specific environment variables */
-f_getopt() /* function-specific options */
-f_getarg() /* function specific argument - ktup */
-resetp() /* reset scoring matrix, optional parameters for DNA-DNA */
-reseta() /* reset scoring matrix, optional parameters for prot-DNA */
-query_parm() /* ask for additional program arguments (ktup) */
-last_init() /* last chance to set up parameters based on query,lib,parms */
-f_initpam() /* not used - could set parameters from pam matrix */
-X
-================
-scaleswn.c
-X
-process_hist() /* do statistics calculations */
-X
-X proc_hist_r() /* regression fit z=1, also used by z=5 */
-X float find_z() /* gives z-score for score, length, mu, rho, var */
-X float find_zr() /* gives z-score for score, length, mu, rho, var */
-X fit_llen() /* first estimate of mu, rho, var */
-X fit_llens() /* second estimate of mu, rho, var, mu2, rho2 */
-X
-X proc_hist_r2() /* regression_i fit z=4 */
-X float find_zr2() /* gives z-score for score, length, mu, rho, mu2, rho2 */
-X fit_llen2() /* iterative estimate of mu, rho, var */
-X
-X proc_hist_ln() /* ln()-scaled z=2 */ /* no longer used */
-X float find_zl() /* gives z-score from ln()-scaled scores */
-X
-X proc_hist_ml() /* estimate lambda, K using Maximum Likelihood */
-X float find_ze() /* z-score from lambda, K */
-X
-X proc_hist_n() /* no length-scaling z=0 */
-X float find_zn() /* gives z-score from mu, var (no scaling) */
-X
-X proc_hist_a() /* Altschul-Gish params z= 3 */
-X ag_parm() /* match pst.pamfile name, look_p() */
-X look_p() /* lookup Lambda, K, H given param struct */
-X float find_za()
-X
-eq_s() /* returns (double)score (available for length correction) */
-ln_s() /* returns (double)score * ln(200)/ln(length) */
-X
-proc_hist_r() /* regression fit z=1, also used by z=5 */
-alloc_hist() /* set up arrays for score vs length */
-free_hist() /* free them */
-inithist() /* calls alloc_hist(), sets some other globals */
-addhist() /* update score vs length hist */
-inithistz() /* initialize displayed (z-score) histogram hist[]*/
-addhistz() /* add to hist[], increment num_db_entries */
-addhistzp() /* add to hist[], don't change num_db_entries */
-prune_hist() /* remove scores from score vs length */
-update_db_size() /* num_db_entries = nlib - ntrimmed */
-set_db_size() /* -Z db_size; set nlib */
-X
-double z_to_E() /* z-value to E() (extreme value distribution */
-double zs_to_E() /* z-score (mu=50, sigma=10) to E() */
-double zs_to_bit() /* z-score to BLAST2 bit score */
-X
-float E_to_zs() /* E() to z-score */
-double zs_to_Ec() /* z-score to num_db_entries*(1 - P(zs))
-X
-summ_stats() /* put stat summary in string */
-vsort() /* not used, does shell sort */
-calc_ks() /* does Kolmogorov-Smirnoff calculation for histogram */
-================
-dropnfa.c /* contains worker comparison functions */
-X
-init_work() /* set up struct f_struct fstr - hash query */
-get_param() /* actually prints parameters to string */
-close_work() /* clean up fstr */
-do_work() /* do a comparison */
-X do_fasta() /* use the fasta() function */
-X savemax() /* save the best region during scan */
-X spam() /* rescan the best regions */
-X sconn() /* try to connect the best regions for initn */
-X kssort() /* sort by score */
-X kpsort() /* sort by left end pos */
-X shscore() /* best self-score */
-X dmatch() /* do band alignment for opt score */
-X FLOCAL_ALIGN() /* fast band score-only */
-X
-do_opt() /* do an "optimized comparison */
-X
-do_walign() /* put an alignment into res[] for calcons() */
-X sw_walign() /* SW alignment driver - find boundaries */
-X ALIGN() /* actual alignment driver */
-X nw_align() /* recursive global alignment */
-X CHECK_SCORE() /* double check */
-X DISPLAY() /* Miller's display routine */
-X
-X bd_walign() /* band alignment driver for DNA */
-X LOCAL_ALIGN() /* find boundaries in band */
-X B_ALIGN() /* produce band alignment */
-X bg_align() /* recursively produce band alignment */
-X BCHECK_SCORE() /* double check */
-X
-calcons() /* calculate ascii alignment seqc0,seqc1 from res[]*/
-calc_id() /* calculate % identity with no alignment */
-================
-nxgetaa.c
-X
-getseq() /* get a query (prot or DNA) */
-getntseq() /* get a nt query (for fastx, fasty) */
-gettitle() /* get a description */
-X
-int openlib() /* open a library */
-closelib() /* close it */
-GETLIB() /* get a fasta-format next library entry */
-RANLIB() /* jump back in, get description, position for getlib() */
-X
-lgetlib() /* get a Genbank flat-file format next library entry */
-lranlib() /* jump back in, get description, position for lgetlib() */
-X
-pgetlib() /* get CODATA format next library entry */
-pranlib() /* jump back in, get description, position for lgetlib() */
-X
-egetlib() /* get EMBL format next library entry */
-eranlib() /* jump back in, get description, position for egetlib() */
-X
-igetlib() /* get Intelligenetics format next library entry */
-iranlib() /* jump back in, get description, position for igetlib() */
-X
-vgetlib() /* get PIR/VMS/GCG format next library entry */
-vranlib() /* jump back in, get description, position for vgetlib() */
-X
-gcg_getlib() /* get GCG binary format next library entry */
-gcg_ranlib() /* jump back in, get description, position for gcg_getlib() */
-X
-int scanseq() /* find %ACGT */
-X
-revcomp() /* do reverse complement */
-sf_sort() /* sort superfamily numbers */
-================
-c_dispn.c
-X
-discons() /* display alignment from seqc0, seqc1 */
-disgraph() /* display graphical representation, -m 4,5 */
-aancpy() /* copy a binary sequence to ascii */
-r_memcpy()
-l_memcpy()
-iidex() /* lookup ascii-encoding of residue */
-cal_coord() /* calculate coordinates of alignment ends */
-X
-================
-ncbl_lib.c
-X
-ncbl_openlib()
-ncbl_closelib()
-ncbl_getliba()
-ncbl_getlibn()
-ncbl_ranlib()
-src_ulong_read()
-src_long_read()
-src_char_read()
-src_fstr_read()
-newname()
-X
-================
-lib_sel.c
-X
-getlnames()
-libchoice()
-libselect()
-addfile()
-ulindex()
-X
-================
-nrand48.c
-X
-irand(time) /* initialize random number generator */
-nrand(n) /* get a number 0 - n */
-X
-================
-url_subs.c
-X
-void do_url1() /* setup search links */
-X
-SHAR_EOF
-chmod 0644 fasta_func.doc ||
-echo 'restore of fasta_func.doc failed'
-Wc_c="`wc -c < 'fasta_func.doc'`"
-test 9645 -eq "$Wc_c" ||
- echo 'fasta_func.doc: original size 9645, current size' "$Wc_c"
-fi
-# ============= fastf3.1 ==============
-if test -f 'fastf3.1' -a X"$1" != X"-c"; then
- echo 'x - skipping fastf3.1 (File already exists)'
-else
-echo 'x - extracting fastf3.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fastf3.1' &&
-.TH FASTF/TFASTFv3 1 local
-.SH NAME
-fastf3, fastf3_t \- compare a mixed peptide sequence against a protein
-database using a modified fasta algorithm.
-X
-tfastf3, tfastf3_t \- compare a mixed pepide sequence against a
-translated DNA database.
-X
-.SH DESCRIPTION
-X
-.B fastf3
-and
-.B tfastf3
-are designed to compare a sequence of mixed peptides to a protein
-(fastf3) or translated DNA (tfastf3) database. Unlike the traditional
-.B fasta3
-search, which uses a protein or DNA sequence,
-.B fastf3
-and
-.B tfastf3
-work with a query sequence of the form:
-.in +5
-.nf
->testf from mgstm1
-MGCEN,
-MIDYP,
-MLLAY,
-MLLGY
-.fi
-.in 0
-This sequence indicates that a mixture of four peptides has been
-found, with 'M' in the first position of each one (as from a CNBr
-cleavage), in the second position 'G', 'I', or 'L' (twice), at the
-third position 'C', 'D', or 'L' (twice), at the fourth position 'E',
-'Y', 'A', or 'G', etc. When this sequence is compared against mgstm1.aa
-(included with the distribution), the mixture is deconvolved to form:
-.nf
-.ft C
-.in +5
-testf MILGY-----------MLLEY-----------MGDAP-----------
-X ::::: ::::: :::::
-GT8.7 MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
-X 10 20 30 40 50
-X
-testf --------------------------------------------------
-X
-GT8.7 FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
-X 60 70 80 90 100
-X
-X 20
-testf ------------MLCYN
-X :::::
-GT8.7 ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
-X 110 120 130 140 150
-.in 0
-.ft P
-.fi
-.SH Options
-.LP
-.B fastf3
-and
-.B tfastf3
-can accept a query sequence from the unix "stdin" data stream. This makes it much
-easier to use fasta3 and its relatives as part of a WWW page. To
-indicate that stdin is to be used, use "-" or "@" as the query
-sequence file name.
-.TP
-\-b #
-number of best scores to show (must be < -E cutoff)
-.TP
-\-d #
-number of best alignments to show ( must be < -E cutoff)
-.TP
-\-D
-turn on debugging mode. Enables checks on sequence alphabet that
-cause problems with tfastx3, tfasty3, tfasta3.
-.TP
-\-E #
-Expectation value limit for displaying scores and
-alignments. Expectation values for
-.B fastf3
-and
-.B tfastf3
-are not as accurate as those for the other
-.B fasta3
-programs.
-.TP
-\-H
-turn off histogram display
-.TP
-\-i
-compare against only the reverse complement of the library sequence.
-.TP
-\-L
-report long sequence description in alignments
-.TP
-\-m 0,1,2,3,4,5,6,10
-alignment display options
-.TP
-\-n
-force query to nucleotide sequence
-.TP
-\-N #
-break long library sequences into blocks of # residues. Useful for
-bacterial genomes, which have only one sequence entry. -N 2000 works
-well for well for bacterial genomes.
-.TP
-\-O file
-send output to file
-.TP
-\-q/-Q
-quiet option; do not prompt for input
-.TP
-\-R file
-save all scores to statistics file
-.TP
-\-S #
-offset substitution matrix values by a constant #
-.TP
-\-s name
-specify substitution matrix. BLOSUM50 is used by default;
-PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
-P250, or BL62. With this version, many more scoring matrices are
-available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
-M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
-specified.
-.TP
-\-T #
-(threaded, parallel only) number of threads or workers to use (set by
-default to 4 at compile time).
-.TP
-\-t #
-Translation table - tfastf3 can use the BLAST tranlation tables. See
-\fChttp://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
-.TP
-\-w #
-line width for similarity score, sequence alignment, output.
-.TP
-\-x "#,#"
-offsets query, library sequence for numbering alignments
-.TP
-\-z #
-Specify statistical calculation. Default is -z 1, which uses
-regression against the length of the library sequence. -z 0 disables
-statistics. -z 2 uses the ln() length correction. -z 3 uses Altschul
-and Gish's statistical estimates for specific protein BLOSUM scoring
-matrices and gap penalties. -z 4: an alternate regression method.
-.TP
-\-Z db_size
-Set the apparent database size used for expectation value calculations.
-.TP
-\-1
-Sort by "init1" score.
-.TP
-\-3
-(TFASTF3 only) use only forward frame translations
-.SH Environment variables:
-.TP
-FASTLIBS
-location of library choice file (-l FASTLIBS)
-.TP
-SMATRIX
-default scoring matrix (-s SMATRIX)
-.TP
-SRCH_URL
-the format string used to define the option to re-search the
-database.
-.TP
-REF_URL
-the format string used to define the option to lookup the library
-sequence in entrez, or some other database.
-X
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 fastf3.1 ||
-echo 'restore of fastf3.1 failed'
-Wc_c="`wc -c < 'fastf3.1'`"
-test 4824 -eq "$Wc_c" ||
- echo 'fastf3.1: original size 4824, current size' "$Wc_c"
-fi
-# ============= fastlibs ==============
-if test -f 'fastlibs' -a X"$1" != X"-c"; then
- echo 'x - skipping fastlibs (File already exists)'
-else
-echo 'x - extracting fastlibs (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fastlibs' &&
-NBRF PIR1 Annotated Protein Database (rel 56)$0A/seqlib/lib/pir1.seq 5
-NBRF PIR1 Annotated (seg) (rel 56)$0B/slib2/blast/pir1.seg
-NBRF Protein database (complete)$0P@/seqlib/lib/NBRF.nam
-NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
-NCBI/Blast non-redundant proteins$0N/slib2/blast/nr
-NCBI/Blast non-redundant proteins (seg)$0K/slib2/blast/nr.seg
-NCBI/Blast Swissprot$0Q/slib2/blast/swissprot
-NCBI/Blast Swissprot (seg)$0R/slib2/blast/swissprot.seg
-OWL 30.1 non-redundant protein database$0O/slib2/OWL/owl.seq 5
-GENPEPT Translated Protein Database (rel 106.0)$0G/slib2/blast/genpept.fsa
-Swiss-Prot Release 34$0S/slib0/lib/swiss.seq 5
-Yeast proteins$0Y/slib0/genomes/yeast_nr.pep
-C. elegans blast server$0W/slib2/blast/C.elegans_blast.fa
-E. coli proteome$0E/slib0/genomes/ecoli.npep
-H. influenzae proteome$0I/slib0/genomes/hinf.npep
-H. pylori proteome$0L/slib0/genomes/hpyl.npep
-NCBI Entrez Human proteins$0H/slib2/blast/human.aa
-M. pneumococcus proteome$0M/slib0/genomes/mpneu.npep
-M. jannaschii proteome$0J/slib0/genomes/mjan.npep
-Synechosystis proteome$0C/slib0/genomes/synecho.npep
-GB108.0 Invertebrates$1I/seqlib2/gcggenbank/gb_in.seq 6
-GB108.0 Bacteria$1T@/slib0/lib/gb_ba.nam 6
-GB108.0 Primate$1P@/slib0/lib/gb_pri.nam
-GB108.0 Rodent$1R/seqlib2/gcggenbank/gb_ro.seq 6
-GB108.0 other Mammal$1M/seqlib2/gcggenbank/gb_om.seq 6
-GB108.0 verteBrates$1B/seqlib2/gcggenbank/gb_ov.seq 6
-GB108.0 Expressed Seq. Tags$1E@/slib0/lib/gb_est.nam
-GB108.0 High throughput genmomic$1h/seqlib2/gcggenbank/gb_htg.seq 6
-GB108.0 pLants$1L@/slib0/lib/gb_pl.nam 6
-GB108.0 genome Survey sequences$1S@/slib0/lib/gb_gss.nam 6
-GB108.0 Viral$1V/seqlib2/gcggenbank/gb_vi.seq 6
-GB108.0 Phage$1G/seqlib2/gcggenbank/gb_ph.seq 6
-GB108.0 Unannotated$1D/seqlib2/gcggenbank/gb_un.seq 6
-GB108.0 New$1u/seqlib2/gcggenbank/gb_new.seq 6
-GB108.0 All sequences (long)$1A@/slib0/lib/genbank.nam
-Yeast genome$1Y@/seqlib/yeast/yeast_chr.nam
-E. coli genome$1D/slib0/genomes/ecoli.gbk 1
-Blast Human ESTs$1F/slib2/blast/est_human
-TIGR Human Gene Index$1K/slib2/blast/HGI.nr.031898
-Blast Mouse ESTs$1C/slib2/blast/est_mouse
-TIGR Mouse Gene Index$1J/slib2/blast/MGI.nr.022498
-NCBI/BLAST NR DNA$1n/slib2/blast/nt
-SHAR_EOF
-chmod 0644 fastlibs ||
-echo 'restore of fastlibs failed'
-Wc_c="`wc -c < 'fastlibs'`"
-test 2173 -eq "$Wc_c" ||
- echo 'fastlibs: original size 2173, current size' "$Wc_c"
-fi
-# ============= fasts3.1 ==============
-if test -f 'fasts3.1' -a X"$1" != X"-c"; then
- echo 'x - skipping fasts3.1 (File already exists)'
-else
-echo 'x - extracting fasts3.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasts3.1' &&
-.TH FASTS/TFASTSv3 1 local
-.SH NAME
-fasts3, fasts3_t \- compare several short peptide sequences against a protein
-database using a modified fasta algorithm.
-X
-tfasts3, tfasts3_t \- compare short pepides against a
-translated DNA database.
-X
-.SH DESCRIPTION
-X
-.B fasts3
-and
-.B tfasts3
-are designed to compare set of (presumably non-contiguous) peptides to
-a protein (fasts3) or translated DNA (tfasts3) database.
-fasts3/tfasts3 are designed particularly for short peptide data from
-mass-spec analysis of protein digests. Unlike the traditional
-.B fasta3
-search, which uses a protein or DNA sequence,
-.B fasts3
-and
-.B tfasts3
-work with a query sequence of the form:
-.in +5
-.nf
->tests from mgstm1
-MLLE,
-MILGYW,
-MGADP,
-MLCYNP
-.fi
-.in 0
-This sequence indicates that four peptides are to be used. When this
-sequence is compared against mgstm1.aa (included with the
-distribution), the result is:
-.nf
-.ft C
-.in +5
-testf MILGYW----------MLLE------------MGDAP-----------
-X :::::: :::: :::::
-GT8.7 MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEK
-X 10 20 30 40 50
-X
-testf --------------------------------------------------
-X
-GT8.7 FKLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIV
-X 60 70 80 90 100
-X
-X 20
-testf ------------MLCYNP
-X ::::::
-GT8.7 ENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAG
-X 110 120 130 140 150
-.in 0
-.ft P
-.fi
-.SH Options
-.LP
-.B fasts3
-and
-.B tfasts3
-can accept a query sequence from the unix "stdin" data stream. This makes it much
-easier to use fasta3 and its relatives as part of a WWW page. To
-indicate that stdin is to be used, use "-" or "@" as the query
-sequence file name.
-.TP
-\-b #
-number of best scores to show (must be < -E cutoff)
-.TP
-\-d #
-number of best alignments to show ( must be < -E cutoff)
-.TP
-\-D
-turn on debugging mode. Enables checks on sequence alphabet that
-cause problems with tfastx3, tfasty3, tfasta3.
-.TP
-\-E #
-Expectation value limit for displaying scores and
-alignments. Expectation values for
-.B fasts3
-and
-.B tfasts3
-are not as accurate as those for the other
-.B fasta3
-programs.
-.TP
-\-H
-turn off histogram display
-.TP
-\-i
-compare against only the reverse complement of the library sequence.
-.TP
-\-L
-report long sequence description in alignments
-.TP
-\-m 0,1,2,3,4,5,6,9,10
-alignment display options
-.TP
-\-N #
-break long library sequences into blocks of # residues. Useful for
-bacterial genomes, which have only one sequence entry. -N 2000 works
-well for well for bacterial genomes.
-.TP
-\-O file
-send output to file
-.TP
-\-q/-Q
-quiet option; do not prompt for input
-.TP
-\-R file
-save all scores to statistics file
-.TP
-\-S #
-offset substitution matrix values by a constant #
-.TP
-\-s name
-specify substitution matrix. BLOSUM50 is used by default;
-PAM250, PAM120, and BLOSUM62 can be specified by setting -s P120,
-P250, or BL62. With this version, many more scoring matrices are
-available, including BLOSUM80 (BL80), and MDM_10, MDM_20, MDM_40 (M10,
-M20, M40). Alternatively, BLASTP1.4 format scoring matrix files can be
-specified.
-.TP
-\-T #
-(threaded, parallel only) number of threads or workers to use (set by
-default to 4 at compile time).
-.TP
-\-t #
-Translation table - tfasts3 can use the BLAST tranlation tables. See
-\fChttp://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c/\fP.
-.TP
-\-w #
-line width for similarity score, sequence alignment, output.
-.TP
-\-x "#,#"
-offsets query, library sequence for numbering alignments
-.TP
-\-z #
-Specify statistical calculation. Default is -z 1, which uses
-regression against the length of the library sequence. -z 0 disables
-statistics. -z 2 uses the ln() length correction. -z 3 uses Altschul
-and Gish's statistical estimates for specific protein BLOSUM scoring
-matrices and gap penalties. -z 4: an alternate regression method.
-.TP
-\-Z db_size
-Set the apparent database size used for expectation value calculations.
-.TP
-\-3
-(TFASTS3 only) use only forward frame translations
-.SH Environment variables:
-.TP
-FASTLIBS
-location of library choice file (-l FASTLIBS)
-.TP
-SMATRIX
-default scoring matrix (-s SMATRIX)
-.TP
-SRCH_URL
-the format string used to define the option to re-search the
-database.
-.TP
-REF_URL
-the format string used to define the option to lookup the library
-sequence in entrez, or some other database.
-X
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 fasts3.1 ||
-echo 'restore of fasts3.1 failed'
-Wc_c="`wc -c < 'fasts3.1'`"
-test 4556 -eq "$Wc_c" ||
- echo 'fasts3.1: original size 4556, current size' "$Wc_c"
-fi
-# ============= fasts3.rsp ==============
-if test -f 'fasts3.rsp' -a X"$1" != X"-c"; then
- echo 'x - skipping fasts3.rsp (File already exists)'
-else
-echo 'x - extracting fasts3.rsp (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'fasts3.rsp' &&
-compacc.obj doinit.obj showbest.obj htime.obj apam.obj scaleswt.obj karlin.obj last_tat.obj tatsta_s.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
-SHAR_EOF
-chmod 0644 fasts3.rsp ||
-echo 'restore of fasts3.rsp failed'
-Wc_c="`wc -c < 'fasts3.rsp'`"
-test 203 -eq "$Wc_c" ||
- echo 'fasts3.rsp: original size 203, current size' "$Wc_c"
-fi
-# ============= getenv.c ==============
-if test -f 'getenv.c' -a X"$1" != X"-c"; then
- echo 'x - skipping getenv.c (File already exists)'
-else
-echo 'x - extracting getenv.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'getenv.c' &&
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-X
-#define MAXENV 1024
-char *envstr;
-X
-char *mgetenv(str)
-char *str;
-{
-X static int EnvInit=0;
-X
-X char *eptr, *esptr, *bp;
-X int i,esize;
-X FILE *fenv;
-X
-X if (EnvInit==0) {
-X EnvInit=1;
-X if ((fenv=fopen("environment","r"))!=NULL) {
-X if ((envstr=malloc((size_t)(esize=MAXENV)))==NULL) {
-X fclose(fenv); goto noenv;}
-X esptr=envstr; esize -= 10;
-X while (fgets(esptr,esize,fenv)!=NULL) {
-X if ((bp=strchr(esptr,'\n'))!=NULL) *bp='\0';
-X esize -= (i=strlen(esptr)+1);
-X esptr += i;
-X }
-X fclose(fenv);
-X esptr='\0';
-X }
-X else envstr=NULL;
-X }
-X
-X if (envstr==NULL) return NULL;
-X else {
-X for (eptr=envstr; *eptr; eptr += strlen(eptr)+1) {
-X if (strncmp(str,eptr,(long)strlen(str))==0) {
-X return strchr(eptr,'=')+1;
-X }
-X }
-X return NULL;
-X }
-noenv: envstr=NULL; return NULL;
-X }
-X
-strnpcpy(to,from,max)
-X char *to; Str255 from; size_t max;
-{
-X size_t i, n;
-X
-X n = (*from<max) ? *from : max;
-X from++;
-X
-X for (i=0; i<n; i++) *to++ = *from++;
-X if (n<max) *to='\0';
-X }
-SHAR_EOF
-chmod 0644 getenv.c ||
-echo 'restore of getenv.c failed'
-Wc_c="`wc -c < 'getenv.c'`"
-test 1036 -eq "$Wc_c" ||
- echo 'getenv.c: original size 1036, current size' "$Wc_c"
-fi
-# ============= getopt.c ==============
-if test -f 'getopt.c' -a X"$1" != X"-c"; then
- echo 'x - skipping getopt.c (File already exists)'
-else
-echo 'x - extracting getopt.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'getopt.c' &&
-/*LINTLIBRARY*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#define ERR(s, c) if(opterr){\
-X char errbuf[3];\
-X errbuf[0] = c; errbuf[1] = '\n'; errbuf[2]='\0';\
-X (void) fputs(argv[0],stderr);\
-X (void) fputs(s,stderr);\
-X (void) fputs(errbuf,stderr);}
-X
-X
-int opterr = 1;
-int optind = 1;
-int optopt;
-char *optarg;
-X
-int
-getopt(argc, argv, opts)
-int argc;
-char **argv, *opts;
-{
-X static int sp = 1;
-X register int c;
-X register char *cp;
-X
-X if(sp == 1)
-X if(optind >= argc ||
-X argv[optind][0] != '-' || argv[optind][1] == '\0')
-X return(EOF);
-X else if(strcmp(argv[optind], "--") == 0) {
-X optind++;
-X return(EOF);
-X }
-X optopt = c = argv[optind][sp];
-X if(c == ':' || (cp=strchr(opts, c)) == NULL) {
-X ERR(": illegal option -- ", c);
-X if(argv[optind][++sp] == '\0') {
-X optind++;
-X sp = 1;
-X }
-X return('?');
-X }
-X if(*++cp == ':') {
-X if(argv[optind][sp+1] != '\0')
-X optarg = &argv[optind++][sp+1];
-X else if(++optind >= argc) {
-X ERR(": option requires an argument -- ", c);
-X sp = 1;
-X return('?');
-X } else
-X optarg = argv[optind++];
-X sp = 1;
-X } else {
-X if(argv[optind][++sp] == '\0') {
-X sp = 1;
-X optind++;
-X }
-X optarg = NULL;
-X }
-X return(c);
-}
-SHAR_EOF
-chmod 0644 getopt.c ||
-echo 'restore of getopt.c failed'
-Wc_c="`wc -c < 'getopt.c'`"
-test 1174 -eq "$Wc_c" ||
- echo 'getopt.c: original size 1174, current size' "$Wc_c"
-fi
-# ============= getseq.c ==============
-if test -f 'getseq.c' -a X"$1" != X"-c"; then
- echo 'x - skipping getseq.c (File already exists)'
-else
-echo 'x - extracting getseq.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'getseq.c' &&
-/* May, June 1987 - modified for rapid read of database
-X
-X copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson
-X
-X This is one of three alternative files that can be used to
-X read a database. The three files are nxgetaa.c, nmgetaa.c, and
-X mmgetaa.c.
-X
-X nxgetaa.c contains the original code for reading databases, and
-X is still used for Mac and PC versions of fasta33 (which do not
-X use mmap).
-X
-X nmgetaa.c and mmgetaa.c are used together. nmgetaa.c provides
-X the same functions as nxgetaa.c if memory mapping is not used,
-X mmgetaa.c provides the database reading functions if memory
-X mapping is used. The decision to use memory mapping is made on
-X a file-by-file basis.
-X
-X June 2, 1987 - added TFASTA
-X March 30, 1988 - combined ffgetaa, fgetgb;
-X April 8, 1988 - added PIRLIB format for unix
-X Feb 4, 1989 - added universal subroutines for libraries
-X December, 1995 - added range option file.name:1-1000
-X Feb 22, 2002 - fix to allow "plain" text file queries
-X
-X getnt.c associated subroutines for matching sequences */
-X
-/* $Name: fa_34_26_5 $ - $Id: getseq.c,v 1.13 2006/10/05 18:22:07 wrp Exp $ */
-X
-/*
-X 8-April-88
-X The compile time #define PIRLIB allows this routine to be used
-X to read protein and DNA sequence libraries in the NBRF/PIR
-X VAX/VMS library format. That is:
-X
-X >P1;LCBO
-X This is a line of description
-X GTYH ... the sequence starts on this line
-X
-X This may ease conversion from UWGCG format libraries. It
-X has not been extensively tested.
-X
-X In addition, sequence libraries with a '>' in the 4th position
-X are recognized as NBRF format libraries for consistency with
-X UWGCG
-*/
-X
-/* Nov 12, 1987 - this version checks to see if the sequence
-X is DNA or protein by asking whether > 85% is A, C, G, T
-X
-X May 5, 1988 - modify the DNA/PROTEIN checker by re-reading
-X DNA sequences in order to check for 'U'.
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "structs.h"
-X
-#ifndef SFCHAR
-#define SFCHAR ':'
-#endif
-X
-#ifdef VMS
-#define PIRLIB
-#endif
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#undef XTERNAL
-X
-#define YES 1
-#define NO 0
-#define MAXLINE 512
-X
-#ifndef min
-#define min(x,y) ((x) > (y) ? (y) : (x))
-#endif
-X
-#ifdef SUPERFAMNUM
-extern int nsfnum; /* number of superfamily numbers */
-extern int sfnum[]; /* superfamily number from types 0 and 5 */
-extern int nsfnum_n;
-extern int sfnum_n[];
-#endif
-X
-#define NO_FORMAT 0
-#define FASTA_FORMAT 1
-#define GCG_FORMAT 2
-X
-static int seq_format=NO_FORMAT;
-static char seq_title[200];
-X
-int scanseq(unsigned char *, int, char *);
-void sf_sort(int *, int);
-extern void init_ascii(int is_ext, int *sascii, int is_dna);
-X
-/* getseq - get a query sequence, possibly re-reading to set type
-X returns - length of query sequence or error = 0
-X
-X char *filen - name of file to be opened
-X char *seq - destination for query sequence
-X int maxs - maximum length of query
-X char libstr[20] - short description (locus or acc)
-X int *dnaseq - -1 => use scanseq to determine sequence type
-X 0 => must be protein
-X 1 => must be DNA
-X long *sq0off - offset into query specified by query_file:1001-2000
-*/
-X
-int
-getseq(char *filen, int *qascii, unsigned char *seq, int maxs, char *libstr, long *sq0off)
-{
-X FILE *fptr;
-X char line[512],*bp, *bp1, *bpn, *tp;
-X int i, rn, n;
-X int ic;
-X int sstart, sstop, sset=0;
-X int llen, l_offset;
-#ifdef SUPERFAMNUM
-X static char tline[MAXLINE];
-#endif
-X
-X seq_title[0]='\0';
-X libstr[0]='\0';
-X
-X sstart = sstop = -1;
-#ifndef DOS
-X if ((bp=strchr(filen,':'))!=NULL && *(bp+1)!='\0') {
-#else
-X if ((bp=strchr(filen+3,':'))!=NULL && *(bp+1)!='\0') {
-#endif
-X *bp='\0';
-X if (*(bp+1)=='-') {
-X sstart = 0;
-X sscanf(bp+2,"%d",&sstop);
-X }
-X else {
-X sscanf(bp+1,"%d-%d",&sstart,&sstop);
-X sstart--;
-X if (sstop <= 0 ) sstop = BIGNUM;
-X }
-X sset=1;
-X }
-X else {
-X sstart = 0;
-X sstop = BIGNUM;
-X }
-X
-X /* check for input from stdin */
-X if (strcmp(filen,"-") && strcmp(filen,"@")) {
-X if ((fptr=fopen(filen,"r"))==NULL) {
-X fprintf(stderr," could not open %s\n",filen);
-X return 0;
-X }
-X }
-X else {
-X fptr = stdin;
-X }
-X rn = n=0;
-X
-X while(fgets(line,sizeof(line),fptr)!=NULL) {
-#ifdef PIRLIB
-X if (line[0]=='>'&& (line[3]==';'||line[3]=='>'))
-X fgets(line,sizeof(line),fptr);
-X else
-#endif
-X l_offset = 0;
-X if (line[0]=='>') {
-X seq_format = FASTA_FORMAT;
-#ifdef SUPERFAMNUM
-X sfnum[nsfnum=0]= sfnum_n[nsfnum_n=0]=0;
-X strncpy(tline,line+1,sizeof(tline));
-X tline[sizeof(tline)-1]='\0';
-X
-X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
-X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
-X fprintf(stderr," second %c missing: %s\n",SFCHAR,tline);
-X }
-X else {
-X if ((bpn=strchr(bp+1,NSFCHAR))!=NULL) *bpn = '\0';
-X *bp1 = '\0';
-X i = 0;
-X if ((tp = strtok(bp+1," \t"))!=NULL) {
-X sfnum[i++] = atoi(tp);
-X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
-X if (isdigit(*tp)) sfnum[i++] = atoi(tp);
-X if (i>=9) break;
-X }
-X }
-X sfnum[nsfnum=i]= 0;
-X if (nsfnum>1) sf_sort(sfnum,nsfnum);
-X else {
-X if (nsfnum < 1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
-X }
-X if (bpn != NULL) {
-X tp = strtok(bpn+1," \t");
-X sfnum_n[0]=atoi(tp);
-X i = 1;
-X while ((tp=strtok(NULL," \t"))!=NULL) {
-X sfnum_n[i++] = atoi(tp);
-X if (i >= 10) {
-X fprintf(stderr,
-X " error - too many negative superfamilies: %d\n %s\n",
-X i,tline);
-X break;
-X }
-X }
-X sfnum_n[nsfnum_n=i]=0;
-X sf_sort(sfnum_n,nsfnum_n);
-X }
-X }
-X }
-X else {
-X sfnum[nsfnum = 0] = 0;
-X sfnum_n[nsfnum_n = 0] = 0;
-X }
-#endif
-X if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
-X strncpy(seq_title,line+1,sizeof(seq_title));
-X seq_title[sizeof(seq_title)-1]='\0';
-X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
-X strncpy(libstr,line+1,12);
-X libstr[12]='\0';
-X }
-X else if (seq_format==NO_FORMAT && strcmp(line,"..")==0) {
-X seq_format = GCG_FORMAT;
-/*
-X if (*dnaseq != 1) qascii['*'] = qascii['X'];
-*/
-X l_offset = 10;
-X llen = strlen(line);
-X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
-X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
-X llen = strlen(line);
-X }
-X bp = strtok(line," \t");
-/*
-X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
-X else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
-*/
-X if (bp!=NULL) strncpy(libstr,bp,12);
-X else strncpy(libstr,filen,12);
-X libstr[12]='\0';
-X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
-X }
-X else {
-X if (libstr[0]=='\0') strncpy(libstr,filen,12);
-X libstr[12]='\0';
-X }
-X
-X if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
-X
-X if (line[0]!='>'&& line[0]!=';') {
-X for (i=l_offset; (n<maxs && rn < sstop)&&
-X ((ic=qascii[line[i]&AAMASK])<EL); i++)
-X if (ic<NA && ++rn > sstart) seq[n++]= ic;
-X if (ic == ES || rn > sstop) break;
-X }
-X }
-X
-X if (n==maxs) {
-X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
-X fflush(stderr);
-X }
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
-X if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
-X seq[n]= EOSEQ;
-X
-X
-X if (seq_format !=GCG_FORMAT)
-X while(fgets(line,sizeof(line),fptr)!=NULL) {
-#ifdef PIRLIB
-X if (line[0]=='>'&& (line[3]==';'||line[3]=='>'))
-X fgets(line,sizeof(line),fptr);
-X else
-#endif
-X if (line[0]!='>'&& line[0]!=';') {
-X for (i=0; (n<maxs && rn < sstop)&&
-X ((ic=qascii[line[i]&AAMASK])<EL); i++)
-X if (ic<NA && ++rn > sstart ) seq[n++]= ic;
-X if (ic == ES || rn > sstop) break;
-X }
-X }
-X else {
-X llen = strlen(line);
-X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
-X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
-X llen = strlen(line);
-X }
-X while (fgets(line,sizeof(line),fptr)!=NULL) {
-X if (strlen(line)<l_offset) continue;
-X for (i=l_offset; (n<maxs && rn < sstop) &&
-X ((ic=qascii[line[i]&AAMASK])<EL); i++)
-X if (ic<NA && ++rn > sstart ) seq[n++]= ic;
-X if (ic == ES || rn > sstop ) break;
-X }
-X }
-X
-X if (n==maxs) {
-X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
-X fflush(stderr);
-X }
-X seq[n]= EOSEQ;
-X
-X if (fptr!=stdin) fclose(fptr);
-X
-X if (sset==1) {
-X sstart++;
-X filen[strlen(filen)]=':';
-X if (*sq0off==1 || sstart>=1) *sq0off = sstart;
-X }
-X
-X return n;
-}
-X
-int
-gettitle(char *filen, char *title, int len) {
-X FILE *fptr;
-X char line[512];
-X char *bp;
-X int sset;
-#ifdef WIN32
-X char *strpbrk();
-#endif
-X
-X sset = 0;
-X
-X if (strncmp(filen,"-",1)==0 || strncmp(filen,"@",1)==0) {
-X strncpy(title,seq_title,len);
-X title[len-1]='\0';
-X return (int)strlen(title);
-X }
-X
-X if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
-X
-X
-X if ((fptr=fopen(filen,"r"))==NULL) {
-X fprintf(stderr," file %s was not found\n",filen);
-X fflush(stderr);
-X return 0;
-X }
-X
-X if (sset==1) filen[strlen(filen)]=':';
-X
-X while(fgets(line,sizeof(line),fptr)!=NULL) {
-X if (line[0]=='>'|| line[0]==';') goto found;
-X }
-X fclose(fptr);
-X title[0]='\0';
-X return 0;
-X
-X found:
-X
-#ifdef PIRLIB
-X if (line[0]=='>'&&(line[3]==';'||line[3]=='>')) {
-X if ((bp = strchr(line,'\n'))!=NULL) *bp='\0';
-X ll=strlen(line); line[ll++]=' '; line[ll]='\0';
-X fgets(&line[ll],sizeof(line)-ll,fptr);
-X }
-#endif
-X
-#ifdef WIN32
-X bp = strpbrk(line,"\n\r");
-#else
-X bp = strchr(line,'\n');
-#endif
-X if (bp!=NULL) *bp = 0;
-X strncpy(title,line,len);
-X title[len-1]='\0';
-X fclose(fptr);
-X return strlen(title);
-}
-X
-SHAR_EOF
-chmod 0644 getseq.c ||
-echo 'restore of getseq.c failed'
-Wc_c="`wc -c < 'getseq.c'`"
-test 9431 -eq "$Wc_c" ||
- echo 'getseq.c: original size 9431, current size' "$Wc_c"
-fi
-# ============= grou_drome.pseg ==============
-if test -f 'grou_drome.pseg' -a X"$1" != X"-c"; then
- echo 'x - skipping grou_drome.pseg (File already exists)'
-else
-echo 'x - extracting grou_drome.pseg (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'grou_drome.pseg' &&
->gi|121620|sp|P16371|GROU_DROME GROUCHO PROTEIN (ENHANCER OF SPLIT M9/10)
-MYPSPVRHpaaggpppqgpIKFTIADTLERIKEEFNFLQAHYHSIKLECEKLSNEKTEMQ
-RHYVMYYEMSYGLNVEMHKQTEIAKRLNTLINQLLPFLQADHQQQVLQAVERAKQVTMQE
-LNLIIGQQIHAqqvpggppqpmgALNPFGALGATMGLPHGPQGLLNKPPEHHRPDIKPTG
-LEGPAAAEERLRNSVSPADREKYRTRSPLDIENDSKRRKDEKLQEDEGEKSDQDLVVDVA
-NEMESHSPRPNGEHVSMEVRDRESLNGERLEKPSSSGIKQErppsrsgssssrstpsLKT
-KDMEKPGTPGakartptpnaaapapgvnpkqmmpqgpppagypgapyqrpaDPYQRPPSD
-PAYGRPPPMPYDPHAHVRTNGIPHPSALTGGKPAYSFHMNGEGSLQPVPFPPDALVGVGI
-PRHARQINTLSHGEVVCAVTISNPTKYVYTGGKGCVKVWDISQPGNKNPVSQLDCLQRDN
-YIRSVKLLPDGRTLIVGGEASNLSIWDLASPTPRIKAELTSAAPACYALAISPDSKVCFS
-CCSDGNIAVWDLHNEILVRQFQGHTDGASCIDISPDGSRLWTGGLDNTVRSWDLREGRQL
-QQHDFSSQIFSLGYCPTGDWLAVGMENSHVEVLHASKPDKYQLHLHESCVLSLRFAACGK
-WFVSTGKDNLLNAWRTPYGASIFQSKETSSVLSCDISTDDKYIVTGSGDKKATVYEVIY
-X
-SHAR_EOF
-chmod 0644 grou_drome.pseg ||
-echo 'restore of grou_drome.pseg failed'
-Wc_c="`wc -c < 'grou_drome.pseg'`"
-test 806 -eq "$Wc_c" ||
- echo 'grou_drome.pseg: original size 806, current size' "$Wc_c"
-fi
-# ============= gst.nlib ==============
-if test -f 'gst.nlib' -a X"$1" != X"-c"; then
- echo 'x - skipping gst.nlib (File already exists)'
-else
-echo 'x - extracting gst.nlib (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'gst.nlib' &&
->pGT875 | 266
-GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
-GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
-TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
-TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
-CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
-AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
-catgcagctcatcatgctctgttacaaccctgactttgagaagcagaagccagagttctt
-gaagaccatccctgagaaaatgaagctctactctgagttcctgggcaagaggccatggtt
-tgcaggggacaaggtcacctatgtggatttccttgcttatgacattcttgaccagtaccg
-tatgtttgagcccaagtgcctggacgccttcccaaacctgagggacttcctggcccgctt
-cgagggcctcaagaagatctctgcctacatgaagagtagccgctacatcgcaacacctat
-ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
-GAGGACCTGTCCACACTGGGGATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
-TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
-TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
-ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
-TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
-CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
-CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
->RABGLTR Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
-X CGGCAGCTCC TGTGGACTCA GAGGAGCTGC ACCATGCCCA TGACGCTGGG TTACTGGGAC
-X GTCCGTGGGC TGGCTCTGCC AATCCGCATG CTCCTGGAAT ACACGGACAC CAGCTATGAG
-X GAAAAGAAAT ACACCATGGG GGATGCTCCC AACTATGACC AAAGCAAGTG GCTGAGTGAG
-X AAGTTCACCC TGGGCCTGGA CTTTCCCAAT CTGCCCTACC TAATTGATGG GACTCACAAG
-X CTCACGCAGA GCAACGCCAT CCTGCGCTAC CTGGCCCGCA AGCACGGCCT GTGTGGGGAG
-X ACGGAAGAGG AGAGGATTCG CGTGGACATT CTGGAGAATC AGCTGATGGA CAACCGCTTC
-X CAACTTGTAA ACGTCTGCTA CAGTCCCGAC TTTGAGAAGC TCAAGCCCGA GTACCTGAAG
-X GGGCTCCCTG AGAAGCTGCA GCTGTACTCG CAGTTCCTGG GAAGCCTCCC CTGGTTCGCA
-X GGGGACAAGA TCACCTTCGC CGATTTCCTT GTCTACGACG TTCTTGACCA GAACCGGATA
-X TTTGTGCCTG GGTGCCTGGA CGCGTTCCCA AACCTGAAGG ACTTTCATGT CCGCTTTGAG
-X GGCCTGCCGA AGATCTCTGC CTACATGAAG TCCAGCCGCT TTATCCGAGT CCCTGTGTTT
-X TTAAAGAAGG CCACGTGGAC GGGAATATAG GGCCCTGGAA GGAGGTGGGC CATCCCCTGG
-X GAGCTCAGGT CTCCCAGCCT CTTGCTCATC TTCCTCAACC TTCCCAAAAA CAAAAGCCTA
-X CTGCCTGCTT GTGTTCTGAG CCAGCCCCTC CCATGCAGGC TCTGGCCAGC TCAGAAACCC
-X ACCCTTCTAG CCATGGGCTC TCTAAGGCTG CTCTTCCCGG ACTAAGCAGA CCCCACGGGC
-X CACATCTCTC TTCGTGGGCT CCGTTTGATC TCCCCGACTG CCAGAATCAT GGTTGTACCT
-X GCTGCGGCCC TATTCCCAGG CGGGACTCCC CAGTGCTGTT TGGTCCCCAG GAGGGCCTGA
-X CCTCAGCCAG GGCCCTTCTT ACCCCTCCCT GTGTTGCACT GGAGTGGGCG CTGACTGTGC
-X AGACCTTGGG GGGGTTTCTT TGTTCTGCTG CCCACAGCAT GGCTGGGTGG GGCAGGATTA
-X GTGTGGGGGG AGTTGGGTGC TCAGGCAGGG CTATGAGGGA TCTTGTTCAT TTCCGGGCCC
-X TATCCATGTG CTCTGCTCCT CGCCCTGGGT TTTCTCCTCT GCCCGGGTTC CTCGTTCCTT
-X CACCCTGGAG GGAGGCCAGG GCCACGTGCA GCCGTGCCGG GTTCTGAGAG CGCTGGGCTG
-X ATGGGGACGG GGCTGAGCAG GCTTGAGCAG ACCCCTCTGT CACCATCTCC CGGAAGCTTT
-X CAGCTGATAC AGATGCTCCT CGTCTATAGT TTCAGGATGT TTCTCAATAA AACATCCCAC
-X TGT
->BTGST Bovine GST mRNA for gluthathione S-transferase, class-pi.
-X CGGCTCAGGC CGCCGCCGAG CGCGCTGGAA CTTTGCTGCC GCCGCCACCT TTACCGACTT
-X CCCCGACTCC AGGATGCCTC CCTACACCAT CGTCTACTTC CCGGTTCAAG GGCGCTGCGA
-X GGCCATGCGC ATGCTGCTGG CCGACCAGGG CCAGAGCTGG AAGGAGGAGG TCGTAGCCAT
-X GCAGAGCTGG CTGCAGGGCC CACTCAAGGC CTCCTGCCTG TACGGGCAGC TCCCCAAGTT
-X CCAGGACGGA GACCTCACGC TGTACCAGTC CAATGCCATC CTGCGGCACC TGGGCCGCAC
-X CCTCGGGCTG TATGGGAAGG ACCAGCAGGA GGCGGCCCTG GTGGACATGG TGAATGACGG
-X TGTAGAGGAC CTTCGCTGCA AATACGTCTC CCTCATTTAC ACCAACTACG AGGCGGGCAA
-X GGAGGACTAT GTGAAGGCGC TGCCCCAGCA CCTGAAGCCT TTCGAGACCC TGCTGTCCCA
-X GAACAAGGGT GGCCAGGCCT TCATCGTGGG CGACCAGATC TCCTTTGCGG ACTACAACCT
-X GCTGGACCTG CTTCGGATTC ACCAGGTCCT GGCCCCCAGC TGTCTGGACT CCTTCCCCCT
-X GCTCTCAGCC TACGTGGCCC GTCTCAACTC CCGGCCCAAG CTCAAGGCCT TCCTGGCCTC
-X CCCCGAGCAC ATGAACCGGC CCATCAACGG CAATGGGAAA CAGTGAGGGC TTGCAGCACT
-X CTCTGCTCGA GGCAGGGGGC TGCCTGCTCT TCCCTTTCCC CAGGACCAAT AAAACTTCCA
-X AGAGAGAAAA AAAAAAAAAA AAAAAAAAA
->OCDHPR Rabbit mRNA for dihydropyridine (DHP) receptor (from skeletal
-X TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
-X GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
-X GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
-X CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
-X CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
-X AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
-X AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
-X GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
-X CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
-X ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
-X GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
-X CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
-X GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
-X CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
-X ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
-X TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
-X GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
-X CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
-X TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
-X GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
-X CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
-X TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
-X TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
-X CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
-X AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
-X CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
-X ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
-X ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
-X CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
-X AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
-X GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
-X AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
-X CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
-X GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
-X AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
-X GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
-X GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
-X GAGGCGGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGCAGGAAG
-X ATGTCCAGGG GTCTCCCTGA CAAGACAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
-X GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
-X GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
-X GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
-X CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
-X AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
-X CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
-X TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
-X GAGATTGTCC TCAAGATGAC GACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
-X AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCTGTGT CTCTCATCTC CATGGGTCTC
-X GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
-X CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
-X CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATT
-X GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAACG ACCTATCCAA GATGACAGAA
-X GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
-X CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
-X TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
-X TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
-X ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
-X GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
-X CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
-X CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
-X GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
-X ATGAACCACA TCTCGGACAT CCTCAACGTG GCCTTCACCA TCATCTTCAC ACTGGAGATG
-X ATCCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
-X GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
-X CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
-X GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTCATGAG GCTGATCAAG
-X CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
-X GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
-X ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
-X TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
-X CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
-X CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
-X ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
-X TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCTATC
-X TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
-X CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
-X AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
-X ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTTGAGCAG
-X GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTG
-X CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
-X ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
-X TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
-X GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
-X AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACGGG AGGCCTGTTT
-X GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTGC CCCCGGTGAT GGCCAACCAA
-X AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
-X GACTTCCCTC AAGATGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
-X GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
-X TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
-X CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
-X CTGGTCCAGC CGGGGATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
-X ACGGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
-X GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
-X CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
-X TTCGTCACGG CAACAAGCCA GGCCCTGGCA GACGCCTGTC AGATGGAACC GGAGGAAGTA
-X GAGGTCGCAG CCACAGAGCT ACTGAAAGCG CGAGAGTCTG TCCAGGGCAT GGCCAGTGTC
-X CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
-X GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGGTGTC CACATGACCA AGGCGAGAGG
-X GACAGTGCGT GCAGAAGCTC AGCCCTGCAT GGCAGCCTCC CTCTGTCTCA GCCCTCCTGC
-X TGAGCTGGGG CGGTCTGGAA CCGCACCAGG AAGCCAGGAG CCTCCCCTGG CCAGCAAGAG
-X GCATGATTCT AAAGCCATCC AGAAAGGCCT GGTCAGTGCC ACTCCCCAGC AGGACATTAA
-X AGTCTCTAGG TCTGTGGCAC TGG
->RABALP1A Rabbit dihydropyridine-sensitive calcium channel alpha-1 subunit
-X TTCCACCTAC ATGTTGGCCT GGACAGCAGG GAGCCGAGGG GAGGCTAATT TTACTGCTGG
-X GAGCAGCTAG CATAATCCTC CCGCCCCCAC CCCGCTGGCT CAGCAGGGCA GGCTTCGCCC
-X GGCAAGCTCA GCGGCCCAGT CCCCAAGGCG GGGAACACTG GGGACGCAGG GAAGAGAGGG
-X CCGCGGGGTG GGGGAGCAGC AGGAAGCGCC GTGGCCAGGG AAGCCATGGA GCCATCCTCA
-X CCCCAGGATG AGGGCCTGAG GAAGAAACAG CCCAAGAAGC CCCTGCCCGA GGTCCTGCCC
-X AGGCCGCCGC GGGCTCTGTT CTGCCTGACC CTGCAGAACC CGCTGAGGAA GGCGTGCATC
-X AGCATCGTGG AATGGAAACC CTTCGAGACC ATCATCCTGC TCACCATCTT TGCCAACTGT
-X GTGGCCCTGG CCGTGTACCT GCCCATGCCC GAGGATGACA ACAACTCCCT GAACCTGGGC
-X CTGGAGAAGC TGGAGTACTT CTTCCTCACC GTCTTCTCCA TCGAAGCCGC CATGAAGATC
-X ATCGCCTACG GCTTCCTGTT CCACCAGGAC GCCTACCTGC GCAGCGGCTG GAACGTGCTG
-X GACTTCATCA TCGTCTTCCT GGGGGTCTTC ACGGCGATTC TGGAACAGGT CAACGTCATC
-X CAGAGCAACA CGGCCCCGAT GAGCAGCAAA GGAGCCGGCC TGGACGTCAA GGCCCTGAGG
-X GCCTTCCGTG TGCTCAGACC CCTCCGGCTG GTGTCGGGGG TGCCTAGTTT GCAGGTGGTC
-X CTCAACTCCA TCTTCAAGGC CATGCTCCCC CTGTTCCACA TCGCCCTGCT CGTCCTCTTC
-X ATGGTCATCA TCTACGCCAT CATCGGGCTG GAGCTCTTCA AGGGCAAGAT GCACAAGACC
-X TGCTACTACA TCGGGACAGA CATCGTGGCC ACAGTGGAGA ATGAGAAGCC CTCGCCCTGC
-X GCTAGGACGG GCTCGGGGCG CCCCTGCACC ATCAACGGCA GCGAGTGCCG GGGCGGCTGG
-X CCGGGGCCCA ACCACGGCAT CACGCACTTC GACAACTTCG GCTTCTCCAT GCTCACCGTG
-X TACCAGTGCA TCACCATGGA GGGCTGGACA GATGTCCTCT ACTGGGTCAA CGATGCCATC
-X GGGAACGAGT GGCCCTGGAT CTACTTTGTC ACTCTCATCC TGCTGGGGTC CTTCTTCATC
-X CTCAACCTGG TGCTGGGCGT CCTGAGTGGG GAATTCACCA AGGAGCGGGA GAAGGCCAAG
-X TCCAGGGGAA CCTTCCAGAA GCTGCGGGAG AAGCAGCAGC TGGAGGAGGA CCTTCGGGGC
-X TACATGAGCT GGATCACGCA GGGCGAGGTC ATGGACGTGG AGGACCTGAG AGAAGGAAAG
-X CTGTCCTTGG AAGAGGGAGG CTCCGACACG GAAAGCCTGT ACGAAATCGA GGGCTTGAAC
-X AAAATCATCC AGTTCATCCG ACACTGGAGG CAGTGGAACC GTGTCTTTCG CTGGAAGTGC
-X CATGACCTGG TGAAGTCGAG AGTCTTCTAC TGGCTGGTCA TCCTGATCGT GGCCCTCAAC
-X ACCCTGTCCA TCGCCTCGGA GCACCACAAC CAGCCGCTCT GGCTGACCCA CTTGCAAGAC
-X ATCGCCAATC GAGTGCTGCT GTCACTCTTC ACCATCGAGA TGCTGCTGAA GATGTACGGG
-X CTGGGCCTGC GCCAGTACTT CATGTCCATC TTCAACCGCT TCGACTGCTT CGTGGTGTGC
-X AGCGGCATCC TGGAGCTGCT GCTGGTGGAG TCGGGCGCCA TGACGCCGCT GGGCATCTCC
-X GTGTTGCGCT GCATCCGCCT CCTGAGGCTC TTCAAGATCA CCAAGTACTG GACGTCGCTC
-X AGCAACCTGG TGGCCTCCCT GCTCAACTCC ATCCGCTCCA TCGCCTCGCT GCTGCTGCTG
-X CTCTTCCTCT TCATCATCAT CTTCGCCCTG CTGGGCATGC AGCTCTTCGG GGGGCGGTAC
-X GACTTCGAGG ACACGGAAGT GCGACGCAGC AACTTCGACA ACTTCCCCCA GGCCCTCATC
-X AGCGTCTTCC AGGTGCTGAC GGGTGAGGAC TGGAACTCCG TGATGTACAA CGGGATCATG
-X GCCTACGGAG GCCCGTCCTA CCCGGGCGTT CTCGTGTGCA TCTATTTCAT CATCCTTTTT
-X GTCTGCGGCA ACTATATCCT GCTGAATGTC TTCCTGGCCA TCGCCGTGGA CAACCTGGCC
-X GAGGCCGAGA GCCTGACTTC CGCGCAAAAG GCCAAGGCCG AGGAGAGGAA ACGTAGGAAG
-X ATGTCCAGGG GTCTCCCTGA CAAGAGAGAG GAGGAGAAGT CTGTGATGGC CAAGAAGCTG
-X GAGCAGAAGC CCAAGGGGGA GGGCATCCCC ACCACTGCCA AGCTCAAGGT CGATGAGTTC
-X GAATCTAACG TCAACGAGGT GAAGGACCCC TACCCTTCAG CTGACTTCCC AGGGGATGAT
-X GAGGAGGACG AGCCTGAGAT CCCAGTGAGC CCCCGACCGC GCCCGCTGGC CGAGCTGCAG
-X CTCAAAGAGA AGGCAGTGCC CATCCCGGAA GCCAGCTCCT TCTTCATCTT CAGTCCCACC
-X AATAAGGTCC GTGTCCTGTG TCACCGCATC GTCAACGCCA CCTGGTTCAC CAACTTCATC
-X CTGCTCTTCA TCCTGCTCAG CAGTGCTGCG CTGGCCGCCG AGGACCCCAT CCGGGCGGAG
-X TCCGTGAGGA ATCAGATCCT TGGATATTTT GATATTGCCT TCACCTCTGT CTTCACTGTG
-X GAGATTGTCC TCAAGATGAC AACCTACGGC GCCTTCCTGC ACAAGGGCTC CTTCTGCCGC
-X AACTACTTCA ACATCCTGGA CCTGCTGGTG GTGGCCGTGT CTCTCATCTC CATGGGTCTC
-X GAGTCCAGCA CCATCTCCGT GGTAAAGATC CTGAGAGTGC TAAGGGTGCT CCGGCCCCTG
-X CGAGCCATCA ACAGAGCCAA AGGGTTGAAG CACGTGGTCC AGTGCGTGTT CGTGGCCATC
-X CGCACCATCG GGAACATCGT CCTGGTCACC ACGCTCCTGC AGTTCATGTT CGCCTGCATC
-X GGTGTCCAGC TCTTCAAGGG CAAGTTCTTC AGCTGCAATG ACCTATCCAA GATGACAGAA
-X GAGGAGTGCA GGGGCTACTA CTATGTGTAC AAGGACGGGG ACCCCACGCA GATGGAGCTG
-X CGCCCCCGCC AGTGGATACA CAATGACTTC CACTTTGACA ACGTGCTGTC GGCCATGATG
-X TCGCTCTTCA CGGTGTCCAC CTTCGAGGGA TGGCCCCAGC TGCTGTACAG GGCCATAGAC
-X TCCAACGAGG AGGACATGGG CCCCGTTTAC AACAACCGAG TGGAGATGGC CATCTTCTTC
-X ATCATCTACA TCATCCTCAT TGCCTTCTTC ATGATGAACA TCTTTGTGGG CTTTGTCATC
-X GTCACCTTCC AGGAGCAGGG GGAGACAGAG TACAAGAACT GCGAGCTGGA CAAGAACCAG
-X CGCCAGTGTG TGCAGTATGC CCTGAAGGCC CGCCCACTTC GGTGCTACAT CCCCAAGAAC
-X CCATACCAGT ACCAGGTGTG GTACGTCGTC ACCTCCTCCT ACTTTGAATA CCTGATGTTC
-X GCCCTCATCA TGCTCAACAC CATCTGCCTG GGCATGCAGC ACTACCACCA GTCGGAGGAG
-X ATGAACCACA TCTCAGACAT CCTCAATGTG GCCTTCACCA TCATCTTCAC GCTGGAGATG
-X ATTCTCAAGC TCTTGGCGTT CAAGGCCAGG GGCTATTTCG GAGACCCCTG GAATGTGTTC
-X GACTTCCTGA TCGTCATCGG CAGCATCATT GACGTCATCC TCAGCGAGAT CGACACTTTC
-X CTGGCCTCCA GCGGGGGACT GTATTGCCTG GGTGGCGGCT GCGGGAACGT TGACCCAGAC
-X GAGAGCGCCC GCATCTCCAG TGCCTTCTTC CGCCTGTTCC GGGTTATGAG GCTGATCAAG
-X CTGCTGAGTC GGGCCGAGGG CGTGCGCACG CTGCTGTGGA CGTTCATCAA GTCCTTCCAG
-X GCCCTGCCCT ACGTGGCCCT GCTCATCGTC ATGCTGTTCT TCATCTACGC CGTCATCGGC
-X ATGCAGATGT TTGGAAAGAT CGCCCTGGTG GACGGGACCC AGATCAACCG CAACAACAAC
-X TTCCAGACCT TCCCGCAGGC CGTGCTGCTG CTCTTCAGGT GTGCGACAGG GGAGGCGTGG
-X CAAGAGATCC TGCTGGCCTG CAGCTACGGG AAGTTGTGCG ACCCAGAGTC AGACTACGCC
-X CCGGGCGAGG AGTACACGTG TGGCACCAAC TTCGCCTACT ACTACTTCAT CAGCTTCTAC
-X ATGCTCTGCG CCTTCCTGAT CATCAACCTC TTCGTGGCTG TCATCATGGA CAACTTTGAC
-X TACCTGACAC GCGACTGGTC CATCCTGGGC CCTCACCACC TGGACGAGTT CAAGGCCATC
-X TGGGCAGAGT ATGACCCAGA GGCCAAGGGG CGAATCAAGC ACCTGGACGT GGTGACCCTG
-X CTGAGAAGGA TCCAGCCCCC TCTGGGCTTC GGGAAGTTCT GTCCACACCG GGTGGCCTGT
-X AAGCGCCTGG TGGGCATGAA CATGCCCCTG AACAGTGACG GCACGGTCAC CTTCAATGCC
-X ACGCTCTTTG CCCTGGTGCG CACGGCCCTC AAGATCAAGA CAGAAGGTAA CTTCGAGCAG
-X GCCAACGAGG AGCTGAGGGC CATCATCAAG AAGATCTGGA AGAGAACCAG CATGAAGCTA
-X CTGGACCAGG TCATCCCTCC CATAGGAGAT GACGAGGTGA CCGTGGGGAA GTTCTACGCC
-X ACATTCCTCA TCCAGGAGCA CTTCCGGAAG TTCATGAAGC GCCAGGAGGA ATATTATGGG
-X TATCGGCCCA AGAAGGACAC CGTGCAGATC CAGGCTGGGC TGCGGACCAT AGAGGAGGAG
-X GCGGCCCCTG AGATCCGCCG CACCATCTCA GGAGACCTGA CCGCCGAGGA GGAGCTGGAG
-X AGAGCCATGG TGGAGGCTGC GATGGAGGAG AGGATCTTCC GGAGGACCGG AGGCCTGTTT
-X GGCCAGGTGG ACACCTTCCT GGAAAGGACC AACTCCCTAC CCCCGGTGAT GGCCAACCAA
-X AGACCGCTCC AGTTTGCTGA GATAGAAATG GAAGAGCTTG AGTCGCCTGT CTTCTTGGAG
-X GACTTCCCTC AAGACGCAAG AACCAACCCT CTCGCTCGTG CCAATACCAA CAACGCCAAT
-X GCCAATGTTG CCTATGGCAA CAGCAACCAT AGCAACAACC AGATGTTTTC CAGCGTCCAC
-X TGTGAAAGGG AGTTCCCGGG AGAGGCGGAG ACACCGGCTG CCGGACGAGG AGCCCTCAGC
-X CACTCCCACA GGGCCCTGGG ACCTCACAGC AAGCCCTGTG CTGGAAAACT GAATGGGCAG
-X CTGGTCCAGC CGGGAATGCC CATCAACCAG GCACCTCCTG CCCCCTGCCA GCAGCCTAGC
-X ACAGATCCCC CAGAGCGCGG GCAGAGGAGG ACCTCCCTGA CAGGGTCTCT GCAAGACGAA
-X GCACCCCAGA GGAGGAGCTC CGAGGGGAGC ACCCCCAGGC GCCCGGCTCC TGCTACAGCT
-X CTGCTGATCC AAGAGGCTCT GGTTCGAGGG GGCCTGGACA CCTTGGCAGC TGATGCTGGC
-X TTCGTCATGG CAACAAGCCA GGCCCTGGTA GACGCCTGTC AGATGGAACC GGAGGAAGTA
-X GAGGTCGCAG CCACAGAGCT ACTGAAAGAG CGAGAGTCCG TCCAGGGCAT GGCCAGTGTC
-X CCGGGAAGCC TGAGCCGCAG GTCCTCCCTG GGCAGCCTTG ACCAGGTCCA GGGCTCCCAG
-X GAAACCCTTA TTCCTCCCAG GCCGTGATGG CTGTGCAGTG TCCACATGAC CAAGGCGAGA
-X GGGACAGTGC GTGCAGAAGC TCAGCCCTGC ATGGCAGCCT CCCTCTGTCT CAGCCCTCCT
-X GCTGAGCTGG GGCGGTCTGG AACCGACCAG GAAGCCAGGA GCCTCCCCTG GCCAGCAAGA
-X GGCATGATTC TAAAGCCATC CAGAAAGGCC TGGTCAGTGC CACTCCCCAG CAGGACATTA
-X AAGTCTCTAG GTCTGTGGCA
->RABGSTB Oryctolagus cuniculus glutathione S-transferase mRNA, complete cds.
-X CAGAAACCAC CACTATGGCA GGGAAGCCCA AGCTTCACTA CTTCAATGCA CGGGGCAGAA
-X TGGAGTCTAT CCGGTGGCTC CTGACTGCAG CTGGGGTAGA GTTTGAAGAG AAATGTATGA
-X AAACTCGAGA AGACCTGGAA AAGTTAAGAA AAGATGGGGT ATTGATGTTC CAGCAAGTGC
-X CCATGGTTGA GATTGATGGG ATGAAGCTGG TGCAGACCAG AGCCATTTTC AACTACATTG
-X CAGACAAGCA CAACCTGTAT GGGAAAGACA TAAAGGAGAG AGCCCTGATT GATATGTATA
-X CAGAAGGCAT AGTAGATTTG AATGAATTGA TTCTTACTCG TCCATTCCTT CCACCGGAGG
-X AACAAGAGGC AAAACTTGCT CAGATCAAAG ATAAAGCAAA AAACCGTTAT TTTCCTGCCT
-X TTGAAAAGGT GTTGAAGAGC CACGGACAAG ACTACCTTGT TGGCAACAAG CTGAGCAAGG
-X CTGACATTCT CCTGGTTGAA CTTCTCTACA ACGTGGAAGA GCTCAACCCC GGCGCGACTG
-X CCAGCTTCCC TCTGCTGCAG GCCCTGAAAA CCAGGATCAG CAATCTCCCC ACCGTGAAGA
-X AGTTTCTGCA GCCTGGCAGC CAGAGGAATC CGCCTGATGA TGAGAAATGC AGAGAAGAAG
-X CAAAAATCAT TTTCCATTAA GAAGGCAAAG ATACCAAGCA CAGGCAAGAC CAGCCTCTGA
-X CCCCCTGCAG CGATGAAGTA CTTTAAATAA ATAGTGATCC TGATTGTCAT AAGGCATATT
-X ACGTTTTCTA AGTATTGTGT AAATTTAATT AAAAACCACC CATGTAGATT TAGTTGCAAT
-X ACATGGTACT TGGTTTTGAT CAAATACAAA ATTATGAGCA CCTCCTAGGA TGTCCCTTTG
-X AA
-SHAR_EOF
-chmod 0644 gst.nlib ||
-echo 'restore of gst.nlib failed'
-Wc_c="`wc -c < 'gst.nlib'`"
-test 18633 -eq "$Wc_c" ||
- echo 'gst.nlib: original size 18633, current size' "$Wc_c"
-fi
-# ============= gst.seq ==============
-if test -f 'gst.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping gst.seq (File already exists)'
-else
-echo 'x - extracting gst.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'gst.seq' &&
->gi|193547|gb|J04632|MUSGLUTA Mouse glutathione S-transferase class mu (GST1-1) mRNA, complete cds
-CCTGCCTTCCGCTTTAGGGTCTGCTGCTCTGGTTACAGACCTAGGAAGGGGAGTGCCTAATTGGGATTGG
-TGCAGGGTTGGGAGGGACCCGCTGTTTTGTCCTGCCCACGTTTCTCTAGTAGTCTGTATAAAGTCACAAC
-TCCAAACACACAGGTCAGTCCTGCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGAT
-ACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGC
-TATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAATGAGAAGT
-TCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAA
-TGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCA
-GACATTGTGGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTTG
-AGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAA
-GAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTAC
-CGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCC
-TCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCA
-CTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGC
-AGGCCCTGGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCC
-TTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCC
-AGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGA
-GGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAGCCCTCCCT
-AGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTC
-GTCTACAATAAAGTCTGAAACACACTT
-SHAR_EOF
-chmod 0644 gst.seq ||
-echo 'restore of gst.seq failed'
-Wc_c="`wc -c < 'gst.seq'`"
-test 1405 -eq "$Wc_c" ||
- echo 'gst.seq: original size 1405, current size' "$Wc_c"
-fi
-# ============= gtm1_human.aa ==============
-if test -f 'gtm1_human.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping gtm1_human.aa (File already exists)'
-else
-echo 'x - extracting gtm1_human.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'gtm1_human.aa' &&
->gtm1_human GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
-MPMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKITQSNAILCY
-IARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRPWFAGNKITFVD
-FLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
-SHAR_EOF
-chmod 0644 gtm1_human.aa ||
-echo 'restore of gtm1_human.aa failed'
-Wc_c="`wc -c < 'gtm1_human.aa'`"
-test 300 -eq "$Wc_c" ||
- echo 'gtm1_human.aa: original size 300, current size' "$Wc_c"
-fi
-# ============= gtt1_drome.aa ==============
-if test -f 'gtt1_drome.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping gtt1_drome.aa (File already exists)'
-else
-echo 'x - extracting gtt1_drome.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'gtt1_drome.aa' &&
->GTT1_DROME GLUTATHIONE S-TRANSFERASE 1-1 (EC 2.5.1.18) (CLASS-THETA). - DROS
-MVDFYYLPGSSPCRSVIMTAKAVGVELNKKLLNLQAGEHLKPEFLKINPQHTIPTLVDNGFALWESRAIQVYLVEKYG
-KTDSLYPKCPKKRAVINQRLYFDMGTLYQSFANYYYPQVFAKAPADPEAFKKIEAAFEFLNTFLEGQDYAAGDSLTVA
-DIALVATVSTFEVAKFEISKYANVNRWYENAKKVTPGWEENWAGCLEFKKYFE
-SHAR_EOF
-chmod 0644 gtt1_drome.aa ||
-echo 'restore of gtt1_drome.aa failed'
-Wc_c="`wc -c < 'gtt1_drome.aa'`"
-test 291 -eq "$Wc_c" ||
- echo 'gtt1_drome.aa: original size 291, current size' "$Wc_c"
-fi
-# ============= h10_human.aa ==============
-if test -f 'h10_human.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping h10_human.aa (File already exists)'
-else
-echo 'x - extracting h10_human.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'h10_human.aa' &&
->H10_HUMAN | 90538 | HISTONE H1' (H1.0) (H1(0)).
-TENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKVGENADSQIKLSIKRLV
-TTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKAASKAPTKKPKATPVKKAKKKLA
-ATPKKAKKPKTVKAKPVKASKPKKAKPVKPKAKSSAKRAGKKK
-SHAR_EOF
-chmod 0644 h10_human.aa ||
-echo 'restore of h10_human.aa failed'
-Wc_c="`wc -c < 'h10_human.aa'`"
-test 247 -eq "$Wc_c" ||
- echo 'h10_human.aa: original size 247, current size' "$Wc_c"
-fi
-# ============= h_altlib.h ==============
-if test -f 'h_altlib.h' -a X"$1" != X"-c"; then
- echo 'x - skipping h_altlib.h (File already exists)'
-else
-echo 'x - extracting h_altlib.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'h_altlib.h' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: h_altlib.h,v 1.2 1999/12/30 01:26:57 wrp Exp $ */
-X
-X
-#define LASTENTRY 10
-#define LASTLIB 10
-#define BINARYGB 9
-#define DEFAULT 0
-#define FULLGB 1
-#define UNIXPIR 2
-#define EMBLSWISS 3
-#define INTELLIG 4
-#define VMSPIR 5
-X
-int agetlib_h(); /* pearson fasta format */
-int agetntlib_h(); /* pearson fasta format nucleotides */
-int vgetlib_h(); /* PIR VMS format */
-X
-int (*h_getliba[LASTLIB])()={
-X agetlib_h,agetlib_h,agetlib_h,agetlib_h,
-X agetlib_h,vgetlib_h,agetlib_h,agetlib_h,
-X agetlib_h,agetlib_h};
-X
-int (*h_getntliba[LASTLIB])()={
-X agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
-X agetntlib_h,agetntlib_h,agetntlib_h,agetntlib_h,
-X agetntlib_h,agetntlib_h};
-X
-SHAR_EOF
-chmod 0644 h_altlib.h ||
-echo 'restore of h_altlib.h failed'
-Wc_c="`wc -c < 'h_altlib.h'`"
-test 691 -eq "$Wc_c" ||
- echo 'h_altlib.h: original size 691, current size' "$Wc_c"
-fi
-# ============= hahu.aa ==============
-if test -f 'hahu.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping hahu.aa (File already exists)'
-else
-echo 'x - extracting hahu.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'hahu.aa' &&
->HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
-VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
-AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
-R
-SHAR_EOF
-chmod 0644 hahu.aa ||
-echo 'restore of hahu.aa failed'
-Wc_c="`wc -c < 'hahu.aa'`"
-test 225 -eq "$Wc_c" ||
- echo 'hahu.aa: original size 225, current size' "$Wc_c"
-fi
-# ============= hostacc.c ==============
-if test -f 'hostacc.c' -a X"$1" != X"-c"; then
- echo 'x - skipping hostacc.c (File already exists)'
-else
-echo 'x - extracting hostacc.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'hostacc.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: hostacc.c,v 1.7 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* Concurrent read version */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-X
-#ifdef PVM_SRC
-#include "pvm3.h"
-#endif
-#ifdef MPI_SRC
-#include "mpi.h"
-#endif
-X
-#include "msg.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#undef XTERNAL
-X
-extern char prog_name[];
-X
-extern int nnodes;
-#ifdef PVM_SRC
-extern int pinums[];
-#endif
-X
-X
-#ifdef PVM_SRC
-int tidtonode(tid)
-X int tid;
-{
-X int i;
-X for (i=FIRSTNODE; i< nnodes; i++) if (tid==pinums[i]) return i;
-X return -1;
-}
-#endif
-X
-/* rand_nodes selects nnodes at random from max_nodes */
-X
-void
-rand_nodes(int *node_map, int nnodes, int max_nodes)
-{
-X int node_used[MAXNOD];
-X int i, j;
-X struct timeval tv;
-X
-X gettimeofday(&tv,NULL);
-X SRAND(tv.tv_usec);
-X
-X for (i=0; i<max_nodes; i++) node_used[i]=0;
-X
-X if (nnodes < (max_nodes+1)/2) {
-X for (i=0; i<nnodes; ) {
-X j = RAND()%max_nodes;
-X if (node_used[j]) continue;
-X else {
-X node_map[i++]=j;
-X node_used[j]=1;
-X }
-X }
-X }
-X else {
-X for (i=0; i<(max_nodes-nnodes); ) {
-X j = RAND()%max_nodes;
-X if (node_used[j]) continue;
-X else {
-X node_used[j]=1;
-X i++;
-X }
-X }
-X for (i=j=0; i<nnodes; j++)
-X if (node_used[j]) continue;
-X else node_map[i++]=j;
-X }
-/* for (i=0; i<nnodes; i++) fprintf(stderr,"%2d %2d\n",i,node_map[i]); */
-}
-SHAR_EOF
-chmod 0644 hostacc.c ||
-echo 'restore of hostacc.c failed'
-Wc_c="`wc -c < 'hostacc.c'`"
-test 1466 -eq "$Wc_c" ||
- echo 'hostacc.c: original size 1466, current size' "$Wc_c"
-fi
-# ============= hsgstm1b.gcg ==============
-if test -f 'hsgstm1b.gcg' -a X"$1" != X"-c"; then
- echo 'x - skipping hsgstm1b.gcg (File already exists)'
-else
-echo 'x - extracting hsgstm1b.gcg (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'hsgstm1b.gcg' &&
-X FROMSTADEN of: hsgstm1b.g check: 1769 from: 1 to: 5183
-X
-X <---No Contig Comments--->
-X
-hsgstm1b.gcg Length: 5183 October 12, 1994 10:58 Type: N Check: 1769 ..
-X
-X 1 GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGTGG
-X
-X 51 GGTAAGCGAG GGTCCTCTGG TGGGTGGGAC AGGGGGCGGA GGCGGGGATG
-X
-X 101 TGTGGAGTAG CTGCAGGACT GGCTCTAGGG ACCGTTCCTC TTCAGGGCTG
-X
-X 151 CCCGCCTCAG AAGGGCCTGT GCATGACGCT GTGTGTGTGT TTGGGGGTGG
-X
-X 201 GGGCGGGTAG AGGAGGCGAC GGGTACGTGC AGTATAGACT AGGGCTGGCC
-X
-X 251 TGGTGCAGAG AAAGTCACCA AGTCAGGGAC CCTCCATCTC TGACCCGAGC
-X
-X 301 CGCGGCCATC TCTCCCAGCT GGCCCACGCC ATCCGCCTGC TCCTGGAATA
-X
-X 351 CACAGACTCA AGCTACGAGG AAAAGAAGTA TACGATGGGG GACGGTAATG
-X
-X 401 ACACCCTTGT GTCCGGGCTC TGCACTCACG CTGAGTTGGC ACCAAGCAAC
-X
-X 451 CCATGGTGGC CACCTGTCGT ACCTCTGCAG GCCTCCCCTG CTGGAGCTGC
-X
-X 501 AGGCTGTCCC TTCCCTGAGC CCCGGTGAGG AGTCCTGTGG CCTTGCAAGG
-X
-X 551 CAGAATGCTG GGGCGGGATA GTGGGTCCCT GTTTAATTGG GTTGGGTGTC
-X
-X 601 CTCAGAGCTT CCCAAACCCT GGAAGCCTTA GCCGTGTGGG GTCCAGAGCC
-X
-X 651 TCAGCGGGAT TATTTGTCCC TGAACCCTGG GATGTGGGAC TGAGTGGTCA
-X
-X 701 GATTCTAGAT CCACCTGTCT CAGGGATCTT GCCACTGGTT CCTTGGGAGG
-X
-X 751 GTCCCCGGAA GGAGGGCTGG GCTCTGGGGA GGTTTGTTTT CACTTCTTCT
-X
-X 801 TCCCCACGGC AGCTCCTGAC TATGACAGAA CGCAGTGGCT GAATGAAAAA
-X
-X 851 TTCAAGCTGG GCCTGGACTT TCCCAATGTA GGTGCAGGGG GAAGGGGCGG
-X
-X 901 TTTTGGGGGA AAGTGCGACG TGTCTCTGAC TGCATCTCCT CTCCCCAGAT
-X
-X 951 TAGAGGTGTT CGGATCAGGA GTCTTCTGCC CAATTCCTGG TTGTCTACAC
-X
-X 1001 AGCCCCTGCA TGATGTTCTG TGTCCCAGCT CATTTGTTCA TGTGACAGTA
-X
-X 1051 TTTCTATGTC AGGCCTGCAT GAGCGGGCAC AGTGAGTCTG GTCTCCCCTT
-X
-X 1101 GCATATAGGA AGGGGATGCT GGGGAGCCTG CTGGCCCCAA CTGAGCTTCC
-X
-X 1151 CCGGTTTCCC ATCTATCCAG CTGCCCTACT TGATTGATGG GGCTCACAAG
-X
-X 1201 ATCACCCAGA GCAACGCCAT CTTGTGCTAC ATTGCCCGCA AGCACAACCT
-X
-X 1251 GTGTGAGTGT GGGTGGCTGC AATGTGTGGG GGGAAGGTGG CCTCCTCCTT
-X
-X 1301 GGCTGGGCTG TGATGCTGAG ATTGAGTCTG TGTTTTGTGG GTGGCAGGTG
-X
-X 1351 GGGAGACAGA AGAGGAGAAG ATTCGTGTGG ACATTTTGGA GAACCAGACC
-X
-X 1401 ATGGACAACC ATATGCAGCT GGGCATGATC TGCTACAATC CAGAATTTGT
-X
-X 1451 GAGTGTCCCC AGTGAGCTGC ATCTGACAGA GTTTGGATTT GGGGCCAGGA
-X
-X 1501 CTCTTGCATC CTGCACACAT TGGTCTTAAG TCCCTGGTAC CATTCATCCT
-X
-X 1551 CCAAGTGCTT TCCCATCATC TAGCAGTATC TCTACGACTC CAATGTCATG
-X
-X 1601 TCAACAAAAG CAGAGGCAAT TCCCAACCAA CCTTAGGACA CGATTCCAGG
-X
-X 1651 CATTCCCAGG GTAGAAATTT CAGTTCCTGT ATGGTAAAGT TTGTGTTCAG
-X
-X 1701 AATCTCCTTC ATCAGCTCTG GCCTCTGACT TCTGTCCTGG GTCATTTCTG
-X
-X 1751 TCAGCCAGTT CACATCACCT GCCTGCTCCT AGAATATGCA GACTCAAGTA
-X
-X 1801 GAAGACTCAG GAATGTAATG GCACCCTCGA ATTGCATCTT CTCCTCAACA
-X
-X 1851 GTTTTCTGAG TGCTGTCATT GACATGCACA GGGATCTGCG CATCTTCATA
-X
-X 1901 ACAGACAGCT CAGAGGCAGT CAGAGGGCCT TTATTCCTCT CCCTCCTTCC
-X
-X 1951 TTTCAACTTG AACTTCTCAT CTCCCTGGAA ACTAGTCAAC GTTCATTGTT
-X
-X 2001 TTCTTCTGCC ACCCCATTAG AAGGAACTTT CTACTTTCCC TGAGCTCCCT
-X
-X 2051 TAGTTCTTTG CATCCTTGAT TCTGCTGGTC TGGATCCAGA GGCTGCCAGG
-X
-X 2101 TGCTTGGGCG CTCCTGGGGC TGACCCAGAG GCTATTGGGA GGTCAGTGAG
-X
-X 2151 GACAGATTCA GGGACAGCAT CTCATTCCTC TCTGCCTTCT GATCAGTTTA
-X
-X 2201 GATAGGGTCT GACACTCAGT CAGAGTCTAA AATGCTGAGT ATCCAATTGA
-X
-X 2251 AGCCTGCACT GCCCCAGTTC CAGACTTGGG GAAGATGGCT GCTTGCCCGT
-X
-X 2301 GCCAGCCTGG CCGTCCACAG CCCCGGGGAG GCCACGTCTG TGCAGGGAGC
-X
-X 2351 TTTTGTCCGA GGGTGGTGAC AGCTGTTTTC TGCCTCAGGA GAAACTGAAG
-X
-X 2401 CCAAAGTACT TGGAGGAACT CCCTGAAAAG CTAAAGCTCT ACTCAGAGTT
-X
-X 2451 TCTGGGGAAG CGGCCATGGT TTGCAGGAAA CAAGGTAAAG GAGGAGTGAT
-X
-X 2501 ATGGGGAATG AGATCTGTTT TGCTTCACGT GTTATGGAGG TTCCAGCCCA
-X
-X 2551 CACATTCTTG GCCTTCTGCA GATCACTTTT GTAGATTTTC TCGTCTATGA
-X
-X 2601 TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG GACGCCTTCC
-X
-X 2651 CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGTGATGCC CCCAATCCTC
-X
-X 2701 CCTTCTCTTT GATGCCCCTT GTTCCGTTAC CTCCTTTCAG ATGCTTTCCC
-X
-X 2751 ATGCCTGGAG CTACACACAG AATAACTCGC ATGTATTGAG TACTGGTTTC
-X
-X 2801 ATGCCACGAA CCGTACCCCA GCACATTATA CCTATTGTGT GAAATTTGAA
-X
-X 2851 TTTTATAACA TTCCAGTAAG GTAACAGAAT TATCTCGCCC ATTTTAGAGA
-X
-X 2901 TAAGGAAACT AAGAATGAGA GGGTCGGTCC TCTGCTCAGG GTCCCAGAGC
-X
-X 2951 TAGTGGAGGC AGTGCTGGGC CCCTGTGAGC CTCTGGATCT ATGGGTGGCA
-X
-X 3001 GTCAGGCTCT CCCATTCGAC AGAGAAAAAG CCTTAGCGTT CACCTAGCCT
-X
-X 3051 GGGTTTCACA GCCCAGGACA CTTTGGAAGA GGCAGAGAAC TTCATGACCA
-X
-X 3101 TAGATGGAGC TGGCAATAGT AGGACTGACA CAACGGTGAC ATTGATGTCT
-X
-X 3151 AGTACTGAAC CCACAGGCAA TCTCATAGCT ACCTCCAGAA GCTTTGCATG
-X
-X 3201 ATTGGACCCC AGTGTGGGAA TCCTGAGAGC CAGGGCTGTG GCTGTAGCTG
-X
-X 3251 GATTAAGGTA CATATGTGGG TGTCCCTGTT GAAGGAGTAT ATGTTGAAAT
-X
-X 3301 GCCCGGTGCT GGGGCACTTA CTTACTCCAC CACTATCTTT TTTTTTTTTT
-X
-X 3351 TTTTTTTTTT TTTGTGCTGG AGTCTTGCTC TGTTGCCCAG GCTGGAGTTC
-X
-X 3401 AATGGAGTGA TCTTGGCTCA CTGCAACCTC CGCCTCCTGG GTTCAAGCGA
-X
-X 3451 TTCTACTGCC TCAGCTGCAC GATTAGTTGG GATTACAGGT GTGCACCACC
-X
-X 3501 ACGTCTGGCT AATTTTTGTA TTTTTAGTAG AGATGGGGTT TTGCCATGTT
-X
-X 3551 GGTCAGGCTG GTCTTCGAAC TCCTGACCTC AGGTGATCTA CCCACATCAG
-X
-X 3601 CCTCCCTCAG ATCGTGTCTT GCTGTTGCCC AGGCTGGAGC AGCAGTTGCG
-X
-X 3651 TGACCTCGGA CTTACTGCAA CCTCTGCTCC CGGGTTCAAA CAATTCTCTG
-X
-X 3701 CCTCAGCCTC CCGAGTAGCT GGGAATTACA AGTGTCTATC ACCACGCCCA
-X
-X 3751 GCTAATTTTT CTATTTTTAG TAGAGATGGG CTTTTCACCA TGTTGGCCAG
-X
-X 3801 GTGGTCTTGA ACTCCTGACC TCGGTGATCC ACCCACCTCG GCTTCCCACA
-X
-X 3851 TCTGAGTGTC ATGTAGCCTG ATCTGCAGCA GGGCTGTAGA TGCCATGGGT
-X
-X 3901 TAGGGCACAG TGAGATTTTG CTCAGGTATT AGATGGAGAA CTTTGGACTT
-X
-X 3951 TCTGCTTTAA GGGGAATGTT TAGAGCCTAG TCTCgTTTGA TTTTCTTGTG
-X
-X 4001 CACTGCCACC CCCCATTCCA CTTTCATCCA GGTTTACTGA GACATTGGGG
-X
-X 4051 TGAGTGTGTT CAGAGCCCCT TTGTTCTGCT GCAGGTCCCT TCTGTGTCTC
-X
-X 4101 TATACCCAGA CAAGCCAAGA GCCTCCCTGT GGAAAAGGAG ACTGTTTGTG
-X
-X 4151 CAGTCAAGGA GTGACAGGGC CTGGTGTGAG GGGTGGTGGG GCAGAAGAAG
-X
-X 4201 AAGAGAATTT GTCAGGAAGA GGCCAGAACT GGAGAGAGAC AGAACCAGGC
-X
-X 4251 TACACYGCAA GTTCTATTCC CCTTACAAGG TATCTAAACG TAAGGAAGTT
-X
-X 4301 GCTGAACTTC TGTTCCACAT GAGAATGGTG ATAATAGATT CAGCCTTGCA
-X
-X 4351 GAGCAGTCGA GTGGTTTTCT AAGCTTACGT TGTAATTTGT GTTGGTACAG
-X
-X 4401 AGCACCCAGC ACCGTGTAGA ATCTTCGTAA GTGTTAGCTG TTACTGTGGT
-X
-X 4451 ACAACATTAC CTAAAGGAAG TTGGAAGAGT TAACTCAGCA AATCTGGGGA
-X
-X 4501 CCCTAAGAAG CTGTGTGATG CCTCAGCACT TGAGCCCACA TGGAAAGGCT
-X
-X 4551 GTGCCAGGGC CCTGACCTGC TGTGTCTGCA GTGGGGTTGT CCCACCGCTC
-X
-X 4601 ATGGGCAGCT GACCTTGAGT TCTGGCCTTA TTTTCCCCCC TCTCAGGGCT
-X
-X 4651 TGGAGAAGAT CTCTGCCTAC ATGAAGTCCA GCCGCTTCCT CCCAAGACCT
-X
-X 4701 GTGTTCTCAA AGATGGCTGT CTGGGGCAAC AAGTAGGGCC TTGAAGGCAG
-X
-X 4751 GAGGTGGGAG TGAGGAGCCC ATACTCAGCC TGCTGCCCAG GCTGTGCAGC
-X
-X 4801 GCAGCTGGAC TCTGCATCCC AGCACCTGCC TCCTCGTTCC TTTCTCCTGT
-X
-X 4851 TTATTCCCAT CTTTACTCCC AAGACTTCAT TGTCCCTCTT CACTCCCCCT
-X
-X 4901 AAACCCCTGT CCCATGCAGG CCCTTTGAAG CCTCAGCTAC CCACTATCCT
-X
-X 4951 TCGTGAACAT CCCCTCCCAT CATTACCCTT CCCTGCACTA AAGCCAGCCT
-X
-X 5001 GACCTTCCTT CCTGTTAGTG GTTGTGTCTG CTTTAAAGCC TGCCTGGCCC
-X
-X 5051 CTCGCCTGTG GAGCTCAGCC CCGAGCTGTC CCCGTGTTGC ATGAAGGAGC
-X
-X 5101 AGCATTGACT GGTTTACAGG CCCTGCTCCT GCAGCATGGT CCCTGCCTAG
-X
-X 5151 GCCTACCTGA TGGAAGTAAA GCCTCAACCA CAc
-X
-SHAR_EOF
-chmod 0644 hsgstm1b.gcg ||
-echo 'restore of hsgstm1b.gcg failed'
-Wc_c="`wc -c < 'hsgstm1b.gcg'`"
-test 7118 -eq "$Wc_c" ||
- echo 'hsgstm1b.gcg: original size 7118, current size' "$Wc_c"
-fi
-# ============= hsgstm1b.seq ==============
-if test -f 'hsgstm1b.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping hsgstm1b.seq (File already exists)'
-else
-echo 'x - extracting hsgstm1b.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'hsgstm1b.seq' &&
->gi|31932|emb|X68676|HSGSTM1B H.sapiens GSTM1b gene for glutathione S-transferase
-ATGCCCATGATACTGGGGTACTGGGACATCCGTGGGGTAAGCGAGGGTCCTCTGGTGGGTGGGACAGGGG
-GCGGAGGCGGGGATGTGTGGAGTAGCTGCAGGACTGGCTCTAGGGACCGTTCCTCTTCAGGGCTGCCCGC
-CTCAGAAGGGCCTGTGCATGACGCTGTGTGTGTGTTTGGGGGTGGGGGCGGGTAGAGGAGGCGACGGGTA
-CGTGCAGTATAGACTAGGGCTGGCCTGGTGCAGAGAAAGTCACCAAGTCAGGGACCCTCCATCTCTGACC
-CGAGCCGCGGCCATCTCTCCCAGCTGGCCCACGCCATCCGCCTGCTCCTGGAATACACAGACTCAAGCTA
-CGAGGAAAAGAAGTATACGATGGGGGACGGTAATGACACCCTTGTGTCCGGGCTCTGCACTCACGCTGAG
-TTGGCACCAAGCAACCCATGGTGGCCACCTGTCGTACCTCTGCAGGCCTCCCCTGCTGGAGCTGCAGGCT
-GTCCCTTCCCTGAGCCCCGGTGAGGAGTCCTGTGGCCTTGCAAGGCAGAATGCTGGGGCGGGATAGTGGG
-TCCCTGTTTAATTGGGTTGGGTGTCCTCAGAGCTTCCCAAACCCTGGAAGCCTTAGCCGTGTGGGGTCCA
-GAGCCTCAGCGGGATTATTTGTCCCTGAACCCTGGGATGTGGGACTGAGTGGTCAGATTCTAGATCCACC
-TGTCTCAGGGATCTTGCCACTGGTTCCTTGGGAGGGTCCCCGGAAGGAGGGCTGGGCTCTGGGGAGGTTT
-GTTTTCACTTCTTCTTCCCCACGGCAGCTCCTGACTATGACAGAACGCAGTGGCTGAATGAAAAATTCAA
-GCTGGGCCTGGACTTTCCCAATGTAGGTGCAGGGGGAAGGGGCGGTTTTGGGGGAAAGTGCGACGTGTCT
-CTGACTGCATCTCCTCTCCCCAGATTAGAGGTGTTCGGATCAGGAGTCTTCTGCCCAATTCCTGGTTGTC
-TACACAGCCCCTGCATGATGTTCTGTGTCCCAGCTCATTTGTTCATGTGACAGTATTTCTATGTCAGGCC
-TGCATGAGCGGGCACAGTGAGTCTGGTCTCCCCTTGCATATAGGAAGGGGATGCTGGGGAGCCTGCTGGC
-CCCAACTGAGCTTCCCCGGTTTCCCATCTATCCAGCTGCCCTACTTGATTGATGGGGCTCACAAGATCAC
-CCAGAGCAACGCCATCTTGTGCTACATTGCCCGCAAGCACAACCTGTGTGAGTGTGGGTGGCTGCAATGT
-GTGGGGGGAAGGTGGCCTCCTCCTTGGCTGGGCTGTGATGCTGAGATTGAGTCTGTGTTTTGTGGGTGGC
-AGGTGGGGAGACAGAAGAGGAGAAGATTCGTGTGGACATTTTGGAGAACCAGACCATGGACAACCATATG
-CAGCTGGGCATGATCTGCTACAATCCAGAATTTGTGAGTGTCCCCAGTGAGCTGCATCTGACAGAGTTTG
-GATTTGGGGCCAGGACTCTTGCATCCTGCACACATTGGTCTTAAGTCCCTGGTACCATTCATCCTCCAAG
-TGCTTTCCCATCATCTAGCAGTATCTCTACGACTCCAATGTCATGTCAACAAAAGCAGAGGCAATTCCCA
-ACCAACCTTAGGACACGATTCCAGGCATTCCCAGGGTAGAAATTTCAGTTCCTGTATGGTAAAGTTTGTG
-TTCAGAATCTCCTTCATCAGCTCTGGCCTCTGACTTCTGTCCTGGGTCATTTCTGTCAGCCAGTTCACAT
-CACCTGCCTGCTCCTAGAATATGCAGACTCAAGTAGAAGACTCAGGAATGTAATGGCACCCTCGAATTGC
-ATCTTCTCCTCAACAGTTTTCTGAGTGCTGTCATTGACATGCACAGGGATCTGCGCATCTTCATAACAGA
-CAGCTCAGAGGCAGTCAGAGGGCCTTTATTCCTCTCCCTCCTTCCTTTCAACTTGAACTTCTCATCTCCC
-TGGAAACTAGTCAACGTTCATTGTTTTCTTCTGCCACCCCATTAGAAGGAACTTTCTACTTTCCCTGAGC
-TCCCTTAGTTCTTTGCATCCTTGATTCTGCTGGTCTGGATCCAGAGGCTGCCAGGTGCTTGGGCGCTCCT
-GGGGCTGACCCAGAGGCTATTGGGAGGTCAGTGAGGACAGATTCAGGGACAGCATCTCATTCCTCTCTGC
-CTTCTGATCAGTTTAGATAGGGTCTGACACTCAGTCAGAGTCTAAAATGCTGAGTATCCAATTGAAGCCT
-GCACTGCCCCAGTTCCAGACTTGGGGAAGATGGCTGCTTGCCCGTGCCAGCCTGGCCGTCCACAGCCCCG
-GGGAGGCCACGTCTGTGCAGGGAGCTTTTGTCCGAGGGTGGTGACAGCTGTTTTCTGCCTCAGGAGAAAC
-TGAAGCCAAAGTACTTGGAGGAACTCCCTGAAAAGCTAAAGCTCTACTCAGAGTTTCTGGGGAAGCGGCC
-ATGGTTTGCAGGAAACAAGGTAAAGGAGGAGTGATATGGGGAATGAGATCTGTTTTGCTTCACGTGTTAT
-GGAGGTTCCAGCCCACACATTCTTGGCCTTCTGCAGATCACTTTTGTAGATTTTCTCGTCTATGATGTCC
-TTGACCTCCACCGTATATTTGAGCCCAACTGCTTGGACGCCTTCCCAAATCTGAAGGACTTCATCTCCCG
-CTTTGAG
-SHAR_EOF
-chmod 0644 hsgstm1b.seq ||
-echo 'restore of hsgstm1b.seq failed'
-Wc_c="`wc -c < 'hsgstm1b.seq'`"
-test 2788 -eq "$Wc_c" ||
- echo 'hsgstm1b.seq: original size 2788, current size' "$Wc_c"
-fi
-# ============= htime.c ==============
-if test -f 'htime.c' -a X"$1" != X"-c"; then
- echo 'x - skipping htime.c (File already exists)'
-else
-echo 'x - extracting htime.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'htime.c' &&
-/* Concurrent read version */
-X
-/* $Name: fa_34_26_5 $ - $Id: htime.c,v 1.3 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdio.h>
-#include <time.h>
-X
-#ifdef UNIX
-#include <sys/types.h>
-#include <sys/time.h>
-#ifdef TIMES
-#include <sys/times.h>
-#else
-#undef TIMES
-#endif
-#endif
-X
-#ifndef HZ
-#define HZ 100
-#endif
-X
-time_t s_time () /* returns time in milliseconds */
-{
-#ifndef TIMES
-X time_t time(), tt;
-X return time(&tt)*1000;
-#else
-X struct tms tt;
-X times(&tt);
-#ifdef CLK_TCK
-X return tt.tms_utime*1000/CLK_TCK;
-#else
-X return tt.tms_utime*1000/HZ;
-#endif
-#endif
-}
-X
-void ptime (FILE *fp, time_t time) /* prints the time */
-{
-X fprintf (fp, "%6.3f",(double)(time)/1000.0);
-}
-X
-SHAR_EOF
-chmod 0644 htime.c ||
-echo 'restore of htime.c failed'
-Wc_c="`wc -c < 'htime.c'`"
-test 674 -eq "$Wc_c" ||
- echo 'htime.c: original size 674, current size' "$Wc_c"
-fi
-# ============= humgstd.seq ==============
-if test -f 'humgstd.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping humgstd.seq (File already exists)'
-else
-echo 'x - extracting humgstd.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'humgstd.seq' &&
->HUMGSTD Human glutathione transferase class mu (GST1) mRNA, complete cds.
-X GCACCAACCA GCACCATGCC CATGATACTG GGGTACTGGG ACATCCGCGG GCTGGCCCAC
-X GCCATCCGCC TGCTCCTGGA ATACACAGAC TCAAGCTATG AGGAAAAGAA GTACACGATG
-X GGGGACGCTC CTGATTATGA CAGAAGCCAG TGGCTGAATG AAAAATTCAA GCTGGGCCTG
-X GACTTTCCCA ATCTGCCCTA CTTGATTGAT GGGGCTCACA AGATCACCCA GAGCAACGCC
-X ATCTTGTGCT ACATTGCCCG CAAGCACAAC CTGTGTGGGG AGACAGAAGA GGAGAAGATT
-X CGTGTGGACA TTTTGGAGAA CCAGACCATG GACAACCATA TGCAGCTGGG CATGATCTGC
-X TACAATCCAG AATTTGAGAA ACTGAAGCCA AAGTACTTGG AGGAACTCCC TGAAAAGCTA
-X AAGCTCTACT CAGAGTTTCT GGGGAAGCGG CCATGGTTTG CAGGAAACAA GATCACTTTT
-X GTAGATTTTC TCGTCTATGA TGTCCTTGAC CTCCACCGTA TATTTGAGCC CAACTGCTTG
-X GACGCCTTCC CAAATCTGAA GGACTTCATC TCCCGCTTTG AGGGCTTGGA GAAGATCTCT
-X GCCTACATGA AGTCCAGCCG CTTCCTCCCA AGACCTGTGT TCTCAAAGAT GGCTGTCTGG
-X GGCAACAAGT AGGGCCTTGA AGGCAGGAGG TGGGAGTGAG GAGCCCATAC TCAGCCTGCT
-X GCCCAGGCTG TGCAGCGCAG CTGGACTCTG CATCCCAGCA CCTGCCTCCT CGTTCCTTTC
-X TCCTGTTTAT TCCCATCTTT ACTCCCAAGA CTTCATTGTC CCTCTTCACT CCCCCTAAAC
-X CCCTGTCCCA TGCAGGCCCT TTGAAGCCTC AGCTACCCAC TATCCTTCGT GAACATCCCC
-X TCCCATCATT ACCCTTCCCT GCACTAAAGC CAGCCTGACC TTCCTTCCTG TTAGTGGTTG
-X TGTCTGCTTT AAAGCCTGCC TGGCCCCTCG CCTGTGGAGC TCAGCCCCGA GCTGTCCCCG
-X TGTTGCATGA AGGAGCAGCA TTGACTGGTT TACAGGCCCT GCTCCTGCAG CATGGTCCCT
-X GCCTAGGCCT ACCTGATGGA AGTAAAGCCT CAACCAC
-SHAR_EOF
-chmod 0644 humgstd.seq ||
-echo 'restore of humgstd.seq failed'
-Wc_c="`wc -c < 'humgstd.seq'`"
-test 1323 -eq "$Wc_c" ||
- echo 'humgstd.seq: original size 1323, current size' "$Wc_c"
-fi
-# ============= idn_aa.mat ==============
-if test -f 'idn_aa.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping idn_aa.mat (File already exists)'
-else
-echo 'x - extracting idn_aa.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'idn_aa.mat' &&
-X A R N B D C Q Z E G H I L K M F P S T W Y V X
-A 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-R -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-N -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-B -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-D -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-C -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-Q -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-Z -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-E -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-G -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-H -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-I -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-L -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10
-K -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10 -10
-M -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10 -10
-F -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10 -10
-P -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10 -10
-S -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10 -10
-T -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10 -10
-W -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10 -10
-Y -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10 -10
-V -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 4 -10
-XX -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 0
-SHAR_EOF
-chmod 0644 idn_aa.mat ||
-echo 'restore of idn_aa.mat failed'
-Wc_c="`wc -c < 'idn_aa.mat'`"
-test 2210 -eq "$Wc_c" ||
- echo 'idn_aa.mat: original size 2210, current size' "$Wc_c"
-fi
-# ============= initfa.c ==============
-if test -f 'initfa.c' -a X"$1" != X"-c"; then
- echo 'x - skipping initfa.c (File already exists)'
-else
-echo 'x - extracting initfa.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'initfa.c' &&
-/* initfa.c */
-X
-/* $Name: fa_34_26_5 $ - $Id: initfa.c,v 1.148 2007/04/26 18:40:58 wrp Exp $ */
-X
-/* copyright (c) 1996, 1997, 1998 William R. Pearson and the U. of Virginia */
-X
-/* init??.c files provide function specific initializations */
-X
-/* h_init() - called from comp_lib.c, comp_thr.c to initialize pstruct ppst
-X which includes the alphabet, and pam matrix
-X
-X alloc_pam() - allocate pam matrix space
-X init_pam2() - convert from 1D to 2D pam
-X
-X init_pamx() - convert from 1D to 2D pam
-X
-X f_initenv() - set up mngmsg and pstruct defaults
-X f_getopt() - read fasta specific command line options
-X f_getarg() - read ktup
-X
-X resetp() - reset the parameters, scoring matrix for DNA-DNA/DNA-prot
-X
-X query_parm() - ask for ktup
-X last_init() - some things must be done last
-X
-X f_initpam() - set some parameters based on the pam matrix
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <math.h>
-X
-#ifdef UNIX
-#include <sys/types.h>
-#include <sys/stat.h>
-#endif
-X
-#include "defs.h"
-#include "structs.h"
-#include "param.h"
-X
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#define XTERNAL
-#include "upam.h"
-#include "uascii.h"
-#undef XTERNAL
-X
-#define MAXWINDOW 32
-X
-int initpam(char *, struct pstruct *);
-void init_pam2 (struct pstruct *ppst);
-void extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst);
-void build_xascii(int *qascii, char *save_str);
-void ann_ascii(int *qascii, char *ann_arr);
-void re_ascii(int *qascii, int *pascii);
-extern int nrand(int);
-X
-/* at some point, all the defaults should be driven from this table */
-/*
-#pgm q_seq l_seq p_seq matrix g_open g_ext fr_shft e_cut ktup
-# -n/-p -s -e -f -h/-j -E argv[3]
-fasta prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 2
-fasta dna(1) dna(1) dna(1) +5/-4 -14 -4 - 2.0 6
-ssearch prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 -
-ssearch dna(1) dna(1) dna(1) +5/-4 -14 -4 - 2.0 -
-fastx dna(1) prot(0) prot(0) BL50 -12 -2 -20 5.0 2
-fasty dna(1) prot(0) prot(0) BL50 -12 -2 -20/-24 5.0 2
-tfastx dna(1) prot(0) prot(0) BL50 -14 -2 -20 5.0 2
-tfasty dna(1) prot(0) prot(0) BL50 -14 -2 -20/-24 5.0 2
-fasts prot(0) prot(0) prot(0) MD20-MS - - - 5.0 -
-fasts dna(1) dna(1) dna(1) +2/-4 - - - 5.0 1
-tfasts prot(0) dna(1) prot(0) MD10-MS - - - 2.0 1
-fastf prot(0) prot(0) prot(0) MD20 - - - 2.0 1
-tfastf prot(0) dna(1) prot(0) MD10 - - - 1.0 1
-fastm prot(0) prot(0) prot(0) MD20 - - - 5.0 1
-fastm dna(1) dna(1) dna(1) +2/-4 - - - 2.0 1
-tfastm prot(0) dna(1) prot(0) MD10 - - - 2.0 1
-*/
-X
-struct pgm_def_str {
-X int pgm_id;
-X char *prog_func;
-X char *pgm_abbr;
-X char *iprompt0;
-X char *ref_str;
-X int PgmDID;
-X char *smstr;
-X int g_open_mod;
-X int gshift;
-X int hshift;
-X int e_cut;
-X int ktup;
-};
-X
-char *ref_str_a[]={
-X "\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n",
-X "\nPlease cite:\n T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; \n W.R. Pearson (1991) Genomics 11:635-650\n",
-X "\nPlease cite:\n Pearson et al, Genomics (1997) 46:24-36\n",
-X "\nPlease cite:\n Mackey et al. Mol. Cell. Proteomics (2002) 1:139-147\n",
-X "\nPlease cite:\n W.R. Pearson (1996) Meth. Enzymol. 266:227-258\n"
-};
-X
-#define FA_PID 1
-#define SS_PID 2
-#define FX_PID 3
-#define FY_PID 4
-#define FS_PID 5
-#define FF_PID 6
-#define FM_PID 7
-#define RSS_PID 8
-#define RFX_PID 9
-#define SSS_PID 10 /* old (slow) non-PG Smith-Waterman */
-#define TFA_PID FA_PID+10
-#define TFX_PID FX_PID+10
-#define TFY_PID FY_PID+10
-#define TFS_PID FS_PID+10
-#define TFF_PID FF_PID+10
-#define TFM_PID FM_PID+10
-X
-struct pgm_def_str
-pgm_def_arr[20] = {
-X {0, "", "", "", NULL, 400, "", 0, 0, 0, 1.0, 0 }, /* 0 */
-X {FA_PID, "FASTA", "fa",
-X "FASTA searches a protein or DNA sequence data bank",
-X NULL, 401, "BL50", 0, 0, 0, 10.0, 2}, /* 1 - FASTA */
-X {SS_PID, "SSEARCH","gsw","SSEARCH searches a sequence data bank",
-X NULL, 404, "BL50", 0, 0, 0, 10.0, 0}, /* 2 - SSEARCH */
-X {FX_PID, "FASTX","fx",
-X "FASTX compares a DNA sequence to a protein sequence data bank",
-X NULL, 405, "BL50", -2, -20, 0, 5.0, 2}, /* 3 - FASTX */
-X {FY_PID, "FASTY", "fy",
-X "FASTY compares a DNA sequence to a protein sequence data bank",
-X NULL, 405, "BL50", -2, -20, -24, 5.0, 2}, /* 4 - FASTY */
-X {FS_PID, "FASTS", "fs",
-X "FASTS compares linked peptides to a protein data bank",
-X NULL, 400, "MD20-MS", 0, 0, 0, 5.0, 1}, /* 5 - FASTS */
-X {FF_PID, "FASTF", "ff",
-X "FASTF compares mixed peptides to a protein databank",
-X NULL, 400, "MD20", 0, 0, 0, 2.0, 1 }, /* 6 - FASTF */
-X {FM_PID, "FASTM", "fm",
-X "FASTM compares ordered peptides to a protein data bank",
-X NULL, 400, "MD20", 0, 0, 0, 5.0, 1 }, /* 7 - FASTM */
-X {RSS_PID, "PRSS", "rss",
-X "PRSS evaluates statistical signficance using Smith-Waterman",
-X NULL, 401, "BL50", 0, 0, 0, 1000.0, 0 }, /* 8 - PRSS */
-X {RFX_PID,"PRFX", "rfx",
-X "PRFX evaluates statistical signficance using FASTX",
-X NULL, 401, "BL50", -2, -20, -24, 1000.0, 2 }, /* 9 - PRFX */
-X {SSS_PID, "OSEARCH","ssw","OSEARCH searches a sequence data bank",
-X NULL, 404, "BL50", 0, 0, 0, 10.0, 0}, /* 2 - OSEARCH */
-X {TFA_PID, "TFASTA", "tfa",
-X "TFASTA compares a protein to a translated DNA data bank",
-X NULL, 402, "BL50", -2, 0, 0, 5.0, 2 },
-X {0, "", "", "", NULL, 400, "", 0, 0, 0, 1.0, 0 }, /* 0 */
-X {TFX_PID, "TFASTX", "tfx",
-X "TFASTX compares a protein to a translated DNA data bank",
-X NULL, 406, "BL50", -2, -20, 0, 2.0, 2},
-X {TFY_PID, "TFASTY", "tfy",
-X "TFASTY compares a protein to a translated DNA data bank",
-X NULL, 406, "BL50", -2, -20, -24, 2.0, 2},
-X {TFS_PID, "TFASTS", "tfs",
-X "TFASTS compares linked peptides to a translated DNA data bank",
-X NULL, 400, "MD10-MS", 0, 0, 0, 2.0, 2 },
-X {TFF_PID, "TFASTF", "tff",
-X "TFASTF compares mixed peptides to a protein databank",
-X NULL, 400, "MD10", 0, 0, 0, 1.0, 1 },
-X {TFM_PID, "TFASTM", "tfm",
-X "TFASTM compares ordered peptides to a translated DNA databank",
-X NULL, 400, "MD10", 0, 0, 0, 1.0, 1 }
-};
-X
-struct msg_def_str {
-X int pgm_id;
-X int q_seqt;
-X int l_seqt;
-X int p_seqt;
-X int sw_flag;
-X int stages;
-X int qframe;
-X int nframe;
-X int nrelv, srelv, arelv;
-X char *f_id0, *f_id1, *label;
-};
-X
-/* pgm_id q_seqt l_seqt p_seqt sw_f st qf nf nrv srv arv s_ix */
-struct msg_def_str msg_def_arr[20] = {
-X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=0 */
-X {FA_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 1, 3,
-X "fa","sw", "opt"},
-X {SS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
-X "sw","sw", "s-w"},
-X {FX_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
-X "fx","sx", "opt"},
-X {FY_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,
-X "fy","sy", "opt"},
-X {FS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
-X "fs","fs", "initn init1"},
-X {FF_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
-X "ff","ff", "initn init1"},
-X {FM_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,
-X "fm","fm","initn init1"},
-X {RSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 0, 1, 1, -1, 1, 1, 1,
-X "rss","sw","s-w"},
-X {RFX_PID, SEQT_DNA,SEQT_PROT, SEQT_PROT, 0, 1, 2, -1, 3, 1, 3,
-X "rfx","sx","opt"},
-X {SSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,
-X "sw","sw", "s-w"},
-X {TFA_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 0, 1, 1, 6, 3, 1, 3,
-X "tfa","fa","initn init1"},
-X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=12 */
-X {TFX_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
-X "tfx","sx","initn opt"},
-X {TFY_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,
-X "tfy","sy","initn opt"},
-X {TFS_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
-X "tfs","fs","initn init1"},
-X {TFF_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
-X "tff","ff","initn init1"},
-X {TFM_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,
-X "tfm","fm","initn init1"}
-};
-X
-int
-get_pgm_id() {
-X
-X int rval=0;
-X
-#ifdef FASTA
-#ifndef TFAST
-X pgm_def_arr[FA_PID].ref_str = ref_str_a[0];
-X rval=FA_PID;
-#else
-X pgm_def_arr[TFA_PID].ref_str = ref_str_a[0];
-X rval=TFA_PID;
-#endif
-#endif
-X
-#ifdef FASTX
-#ifndef TFAST
-#ifndef PRSS
-X pgm_def_arr[FX_PID].ref_str = ref_str_a[2];
-X rval=FX_PID;
-#else
-X pgm_def_arr[RFX_PID].ref_str = ref_str_a[2];
-X rval=RFX_PID;
-#endif
-#else
-X pgm_def_arr[TFX_PID].ref_str = ref_str_a[2];
-X rval=TFX_PID;
-#endif
-#endif
-X
-#ifdef FASTY
-#ifndef TFAST
-X pgm_def_arr[FY_PID].ref_str = ref_str_a[2];
-X rval=FY_PID;
-#else
-X pgm_def_arr[TFY_PID].ref_str = ref_str_a[2];
-X rval=TFY_PID;
-#endif
-#endif
-X
-#ifdef FASTS
-#ifndef TFAST
-X pgm_def_arr[FS_PID].ref_str = ref_str_a[3];
-X rval=FS_PID;
-#else
-X pgm_def_arr[TFS_PID].ref_str = ref_str_a[3];
-X rval=TFS_PID;
-#endif
-#endif
-X
-#ifdef FASTF
-#ifndef TFAST
-X pgm_def_arr[FF_PID].ref_str = ref_str_a[3];
-X rval=FF_PID;
-#else
-X pgm_def_arr[TFF_PID].ref_str = ref_str_a[3];
-X rval=TFF_PID;
-#endif
-#endif
-X
-#ifdef FASTM
-#ifndef TFAST
-X pgm_def_arr[FM_PID].ref_str = ref_str_a[3];
-X rval=FM_PID;
-#else
-X pgm_def_arr[TFM_PID].ref_str = ref_str_a[3];
-X rval=TFM_PID;
-#endif
-#endif
-X
-#ifdef SSEARCH
-X pgm_def_arr[SS_PID].ref_str = ref_str_a[1];
-X rval=SS_PID;
-#endif
-X
-#ifdef OSEARCH
-X pgm_def_arr[SSS_PID].ref_str = ref_str_a[1];
-X rval=SSS_PID;
-#endif
-X
-#ifdef PRSS
-#ifndef FASTX
-X pgm_def_arr[RSS_PID].ref_str = ref_str_a[4];
-X rval=RSS_PID;
-#endif
-#endif
-X
-X return rval;
-}
-X
-char *iprompt1=" test sequence file name: ";
-char *iprompt2=" database file name: ";
-X
-char *verstr="version 34.26.5 April 26, 2007";
-X
-char *s_optstr = "13Ac:f:g:h:j:k:nopP:r:s:St:Ux:y:";
-X
-static int mktup=2;
-static int ktup_set = 0;
-static int gap_set=0;
-static int del_set=0;
-static int mshuff_set = 0;
-static int prot2dna = 0;
-X
-extern int max_workers;
-X
-extern void s_abort(char *, char *);
-extern void init_ascii(int ext_sq, int *sascii, int dnaseq);
-extern int standard_pam(char *smstr, struct pstruct *ppst,
-X int del_set, int gap_set);
-extern void mk_n_pam(int *arr,int siz, int mat, int mis);
-extern int karlin(int , int, double *, double *, double *);
-extern void init_karlin_a(struct pstruct *, double *, double **);
-extern int do_karlin_a(int **, struct pstruct *, double *,
-X double *, double *, double *, double *);
-X
-#if defined(TFAST) || defined(FASTX) || defined(FASTY)
-extern void aainit(int tr_type, int debug);
-#endif
-X
-char *iprompt0, *prog_func, *refstr;
-X
-X
-/* Sets defaults assuming a protein sequence */
-void h_init (struct pstruct *ppst, struct mngmsg *m_msp, char *pgm_abbr)
-{
-X struct pgm_def_str pgm_def;
-X int i, pgm_id;
-X
-X ppst->pgm_id = pgm_id = get_pgm_id();
-X pgm_def = pgm_def_arr[pgm_id];
-X
-X /* check that pgm_def_arr[] is valid */
-X if (pgm_def.pgm_id != pgm_id) {
-X fprintf(stderr,
-X "**pgm_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
-X pgm_def.pgm_id, pgm_id);
-X exit(1);
-X }
-X
-X /* check that msg_def_arr[] is valid */
-X if (msg_def_arr[pgm_id].pgm_id != pgm_id) {
-X fprintf(stderr,
-X "**msg_def integrity failure: def.pgm_id %d != pgm_id %d**\n",
-X msg_def_arr[pgm_id].pgm_id, pgm_id);
-X exit(1);
-X }
-X
-X strncpy(pgm_abbr,pgm_def.pgm_abbr,MAX_SSTR);
-X iprompt0 = pgm_def.iprompt0;
-X refstr = pgm_def.ref_str;
-X prog_func = pgm_def.prog_func;
-X
-X /* MAXTOT = MAXTST + MAXLIB for everything except TFAST,
-X where it is MAXTST + MAXTRN */
-X m_msp->max_tot = MAXTOT;
-X
-X /* set up DNA query sequence if required*/
-X if (msg_def_arr[pgm_id].q_seqt == SEQT_DNA) {
-X memcpy(qascii,nascii,sizeof(qascii));
-X m_msp->qdnaseq = SEQT_DNA;
-X }
-X else { /* when SEQT_UNK, start with protein */
-X memcpy(qascii,aascii,sizeof(qascii));
-X m_msp->qdnaseq = msg_def_arr[pgm_id].q_seqt;
-X }
-X
-#if defined(FASTF) || defined(FASTS) || defined(FASTM)
-X qascii[','] = ESS;
-X /* also initialize aascii, nascii for databases */
-X qascii['*'] = NA;
-#endif
-X
-X /* initialize a pam matrix */
-X strncpy(ppst->pamfile,pgm_def.smstr,MAX_FN);
-X standard_pam(ppst->pamfile,ppst,del_set,gap_set);
-X ppst->have_pam2 = 0;
-X
-X /* this is always protein by default */
-X ppst->nsq = naa;
-X ppst->nsqx = naax;
-X for (i=0; i<=ppst->nsqx; i++) {
-X ppst->sq[i] = aa[i];
-X ppst->hsq[i] = haa[i];
-X ppst->sqx[i]=aax[i]; /* sq = aa */
-X ppst->hsqx[i]=haax[i]; /* hsq = haa */
-X }
-X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
-X
-X /* set up the c_nt[] mapping */
-X
-#if defined(FASTS) || defined(FASTF) || defined(FASTM)
-X ppst->c_nt[ESS] = ESS;
-#endif
-X ppst->c_nt[0]=0;
-X for (i=1; i<=nnt; i++) {
-X ppst->c_nt[i]=gc_nt[i];
-X ppst->c_nt[i+nnt]=gc_nt[i]+nnt;
-X }
-}
-X
-/*
-X * alloc_pam(): allocates memory for the 2D pam matrix as well
-X * as for the integer array used to transmit the pam matrix
-X */
-void
-alloc_pam (int d1, int d2, struct pstruct *ppst)
-{
-X int i, *d2p;
-X char err_str[128];
-X
-X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X s_abort (err_str,"");
-X }
-X
-X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X s_abort (err_str,"");
-X }
-X
-X if ((d2p = pam12 = (int *) calloc (d1 * d2, sizeof (int))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X s_abort (err_str,"");
-X }
-X
-X for (i = 0; i < d1; i++, d2p += d2)
-X ppst->pam2[0][i] = d2p;
-X
-X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);
-X s_abort (err_str,"");
-X }
-X
-X for (i = 0; i < d1; i++, d2p += d2)
-X ppst->pam2[1][i] = d2p;
-X
-X ppst->have_pam2 = 1;
-}
-X
-/*
-X * init_pam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
-X */
-void
-init_pam2 (struct pstruct *ppst) {
-X int i, j, k, nsq;
-X
-X nsq = ppst->nsq;
-X
-X ppst->pam2[0][0][0] = -BIGNUM;
-X ppst->pam_h = -1; ppst->pam_l = 1;
-X
-X k = 0;
-X for (i = 1; i <= nsq; i++) {
-X ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;
-X for (j = 1; j <= i; j++) {
-X ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;
-X if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];
-X if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];
-X }
-X }
-}
-X
-void
-init_pamx (struct pstruct *ppst) {
-X int i, j, k, nsq, pam_xx, pam_xm;
-X int sa_x, sa_t, tmp;
-X
-X nsq = ppst->nsq;
-X
-X ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
-X
-X if (ppst->nt_align) {
-X sa_x = pascii['N'];
-X sa_t = sa_x;
-X }
-X else {
-X sa_x = pascii['X'];
-X sa_t = pascii['*'];
-X }
-X
-X if (ppst->dnaseq == SEQT_RNA) {
-X tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 1;
-X ppst->pam2[0][nascii['A']][nascii['G']] =
-X ppst->pam2[0][nascii['C']][nascii['T']] =
-X ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
-X }
-X
-X if (ppst->pam_x_set) {
-X for (i=1; i<=nsq; i++) {
-X ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
-X ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
-X }
-X ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
-X ppst->pam2[0][sa_t][sa_t]=ppst->pam_xx;
-X }
-X else {
-X ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
-X ppst->pam_xm = ppst->pam2[0][1][sa_x];
-X }
-X
-X pam_xx = ppst->pam_xx;
-X pam_xm = ppst->pam_xm;
-X
-X if (ppst->ext_sq_set) { /* using extended alphabet */
-X /* fill in pam2[1] matrix */
-X ppst->pam2[1][0][0] = -BIGNUM;
-X /* fill in additional parts of the matrix */
-X for (i = 1; i <= nsq; i++) {
-X
-X /* -BIGNUM to all matches vs 0 */
-X ppst->pam2[0][0][i+nsq] = ppst->pam2[0][i+nsq][0] =
-X ppst->pam2[1][0][i+nsq] = ppst->pam2[1][i+nsq][0] =
-X ppst->pam2[1][0][i] = ppst->pam2[1][i][0] = -BIGNUM;
-X
-X for (j = 1; j <= nsq; j++) {
-X
-X /* replicate pam2[0] to i+nsq, j+nsq */
-X ppst->pam2[0][i+nsq][j] = ppst->pam2[0][i][j+nsq] =
-X ppst->pam2[0][i+nsq][j+nsq] = ppst->pam2[1][i][j] =
-X ppst->pam2[0][i][j];
-X
-X /* set the high portion of pam2[1] to the corresponding value
-X of pam2[1][sa_x][j] */
-X
-X ppst->pam2[1][i+nsq][j] = ppst->pam2[1][i][j+nsq]=
-X ppst->pam2[1][i+nsq][j+nsq]=ppst->pam2[0][sa_x][j];
-X }
-X }
-X }
-}
-X
-/* function specific initializations */
-void
-f_initenv (struct mngmsg *m_msp, struct pstruct *ppst, unsigned char **aa0) {
-X struct msg_def_str m_msg_def;
-X int pgm_id;
-X
-X pgm_id = ppst->pgm_id;
-X m_msg_def = msg_def_arr[pgm_id];
-X
-X m_msp->last_calc_flg=0;
-X
-X strncpy(m_msp->f_id0,m_msg_def.f_id0,sizeof(m_msp->f_id0));
-X strncpy(m_msp->f_id1,m_msg_def.f_id1,sizeof(m_msp->f_id1));
-X strncpy (m_msp->label, m_msg_def.label, sizeof(m_msp->label));
-X
-#ifndef SSEARCH
-X strncpy (m_msp->alab[0],"initn",20);
-X strncpy (m_msp->alab[1],"init1",20);
-X strncpy (m_msp->alab[2],"opt",20);
-#else
-X strncpy (m_msp->alab[0],"s-w opt",20);
-#endif
-X
-X ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;
-X ppst->sw_flag = m_msg_def.sw_flag;
-X m_msp->e_cut=pgm_def_arr[pgm_id].e_cut;
-X
-X ppst->score_ix = 0;
-X ppst->histint = 2;
-X m_msp->qframe = m_msg_def.qframe;
-X ppst->sw_flag = m_msg_def.sw_flag;
-X m_msp->nframe = m_msg_def.nframe;
-X m_msp->nrelv = m_msg_def.nrelv;
-X m_msp->srelv = m_msg_def.srelv;
-X m_msp->arelv = m_msg_def.arelv;
-X m_msp->stages = m_msg_def.stages;
-#if defined(PRSS)
-X m_msp->shuff_wid = 0;
-X m_msp->shuff_max = 200;
-#endif
-X
-X /* see param.h for the definition of all these */
-X
-X m_msp->qshuffle = 0;
-X m_msp->nm0 = 1;
-X m_msp->escore_flg = 0;
-X
-X /* pam information */
-X ppst->pam_pssm = 0;
-#if defined(FASTS) || defined(FASTF) || defined(FASTM)
-X ppst->pam_xx = ppst->pam_xm = 0;
-#else
-X ppst->pam_xx = 1; /* set >0 to use pam['X']['X'] value */
-X ppst->pam_xm = -1; /* set >0 to use pam['X']['A-Z'] value */
-#endif
-X ppst->pam_x_set = 0;
-X ppst->pam_set = 0;
-X ppst->pam_pssm = 0;
-X ppst->p_d_set = 0;
-X ppst->pamoff = 0;
-X ppst->ext_sq_set = 0;
-X
-X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
-X mktup = 2;
-X ppst->param_u.fa.bestscale = 300;
-X ppst->param_u.fa.bestoff = 36;
-X ppst->param_u.fa.bkfact = 6;
-X ppst->param_u.fa.scfact = 3;
-X ppst->param_u.fa.bktup = 2;
-X ppst->param_u.fa.ktup = 0;
-X ppst->param_u.fa.bestmax = 50;
-X ppst->param_u.fa.pamfact = 1;
-X ppst->param_u.fa.altflag = 0;
-X ppst->param_u.fa.optflag = 1;
-X ppst->param_u.fa.iniflag = 0;
-X ppst->param_u.fa.optcut = 0;
-X ppst->param_u.fa.optcut_set = 0;
-X ppst->param_u.fa.cgap = 0;
-X ppst->param_u.fa.optwid = MAXWINDOW;
-X }
-X
-}
-X
-/* switches for fasta only */
-X
-static int shift_set=0;
-static int subs_set=0;
-static int sw_flag_set=0;
-static int nframe_set=0;
-static int wid_set=0;
-X
-void
-f_getopt (char copt, char *optarg,
-X struct mngmsg *m_msg, struct pstruct *ppst)
-{
-X int pgm_id;
-X char *bp;
-X
-X pgm_id = ppst->pgm_id;
-X
-X switch (copt) {
-X case '1':
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X ppst->param_u.fa.iniflag=1;
-X }
-X break;
-X case '3':
-X nframe_set = 1;
-X if (pgm_id == TFA_PID) {
-X m_msg->nframe = 3; break;
-X }
-X else {
-X m_msg->nframe = 1; /* for TFASTXY */
-X m_msg->qframe = 1; /* for FASTA, FASTX */
-X }
-X break;
-X case 'A':
-X ppst->sw_flag= 1;
-X sw_flag_set = 1;
-X break;
-X case 'c':
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X sscanf (optarg, "%d", &ppst->param_u.fa.optcut);
-X ppst->param_u.fa.optcut_set = 1;
-X }
-X break;
-X case 'f':
-X sscanf (optarg, "%d", &ppst->gdelval);
-X if (ppst->gdelval > 0) ppst->gdelval = -ppst->gdelval;
-X del_set = 1;
-X break;
-X case 'g':
-X sscanf (optarg, "%d", &ppst->ggapval);
-X if (ppst->ggapval > 0) ppst->ggapval = -ppst->ggapval;
-X gap_set = 1;
-X break;
-X case 'h':
-X sscanf (optarg, "%d", &ppst->gshift);
-X if (ppst->gshift > 0) ppst->gshift = -ppst->gshift;
-X shift_set = 1;
-X break;
-X case 'j':
-X sscanf (optarg, "%d", &ppst->gsubs);
-X subs_set = 1;
-X break;
-X case 'k':
-X sscanf (optarg, "%d", &m_msg->shuff_max);
-X mshuff_set = 1;
-X break;
-X case 'n':
-X m_msg->qdnaseq = SEQT_DNA;
-X re_ascii(qascii,nascii);
-X strncpy(m_msg->sqnam,"nt",4);
-X prot2dna = 1;
-X break;
-X case 'o':
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X ppst->param_u.fa.optflag = 0;
-X msg_def_arr[pgm_id].nrelv = m_msg->nrelv = 2;
-X }
-X break;
-X case 'p':
-X m_msg->qdnaseq = SEQT_PROT;
-X ppst->dnaseq = SEQT_PROT;
-X strncpy(m_msg->sqnam,"aa",4);
-X break;
-X case 'P':
-X strncpy(ppst->pgpfile,optarg,MAX_FN);
-X if ((bp=strchr(ppst->pgpfile,' '))!=NULL) {
-X *bp='\0';
-X ppst->pgpfile_type = atoi(bp+1);
-X }
-X else ppst->pgpfile_type = 0;
-X ppst->pgpfile[MAX_FN-1]='\0';
-X ppst->pam_pssm = 1;
-X break;
-X case 'r':
-X sscanf(optarg,"%d/%d",&ppst->p_d_mat,&ppst->p_d_mis);
-X if (ppst->p_d_mat > 0 && ppst->p_d_mis < 0) {
-X ppst->p_d_set = 1;
-X strncpy(ppst->pamfile,optarg,40);
-X }
-X break;
-X case 's':
-X strncpy (ppst->pamfile, optarg, 120);
-X ppst->pamfile[120-1]='\0';
-X if (!standard_pam(ppst->pamfile,ppst,del_set, gap_set)) {
-X initpam (ppst->pamfile, ppst);
-X }
-X ppst->pam_set=1;
-X break;
-X case 'S': /* turn on extended alphabet for seg */
-X ppst->ext_sq_set = 1;
-X break;
-X case 't':
-X if (tolower(optarg[0])=='t') {
-X m_msg->term_code = aascii['*']; optarg++;
-X }
-X if (*optarg) {sscanf (optarg, "%d", &ppst->tr_type);}
-X break;
-X case 'U':
-X m_msg->qdnaseq = SEQT_RNA;
-X memcpy(qascii,nascii,sizeof(qascii));
-X strncpy(m_msg->sqnam,"nt",4);
-X nt[nascii['T']]='U';
-X prot2dna=1;
-X break;
-X case 'x':
-X if (strchr(optarg,',')!=NULL) {
-X sscanf (optarg,"%d,%d",&ppst->pam_xx, &ppst->pam_xm);
-X }
-X else {
-X sscanf (optarg,"%d",&ppst->pam_xx);
-X ppst->pam_xm = ppst->pam_xx;
-X }
-X ppst->pam_x_set=1;
-X break;
-X case 'y':
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X sscanf (optarg, "%d", &ppst->param_u.fa.optwid);
-X wid_set = 1;
-X }
-X break;
-X }
-}
-X
-void
-f_lastenv (struct mngmsg *m_msg, struct pstruct *ppst)
-{
-X char save_str[MAX_SSTR];
-X
-#if !defined(FASTM) && !defined(FASTS) && !defined(FASTF)
-X strncpy(save_str,"*",sizeof(save_str));
-#else
-X strncpy(save_str,",",sizeof(save_str));
-#endif
-X
-X if (m_msg->qdnaseq == SEQT_UNK) {
-X build_xascii(qascii,save_str);
-X if (m_msg->ann_flg) ann_ascii(qascii,m_msg->ann_arr);
-X }
-X
-/* this check allows lc DNA sequence queries with FASTX */
-#if defined(FASTA) && !defined(FASTS) && !defined(FASTM) && !defined(FASTF)
-X else
-X init_ascii(ppst->ext_sq_set,qascii,m_msg->qdnaseq);
-#endif
-}
-X
-void
-f_getarg (int argc, char **argv, int optind,
-X struct mngmsg *m_msg, struct pstruct *ppst)
-{
-X
-X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
-X if (argc - optind >= 4) {
-X sscanf (argv[optind + 3], "%d", &ppst->param_u.fa.ktup);
-X ktup_set = 1;
-X }
-X else
-X ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;
-X }
-X
-X if (ppst->pgm_id == RSS_PID && argc - optind > 3) {
-X sscanf (argv[optind + 3], "%d", &m_msg->shuff_max);
-X }
-X
-X if (ppst->pgm_id == RFX_PID && argc - optind > 4) {
-X sscanf (argv[optind + 4], "%d", &m_msg->shuff_max);
-X }
-}
-X
-/* fills in the query ascii mapping from the parameter
-X ascii mapping.
-*/
-X
-void
-re_ascii(int *qascii, int *pascii) {
-X int i;
-X
-X for (i=0; i < 128; i++) {
-X if (qascii[i] > '@' || qascii[i] < ESS) {
-X qascii[i] = pascii[i];
-X }
-X }
-}
-X
-X
-/* recode has become function specific to accommodate FASTS/M */
-/* modified 28-Dec-2004 to ensure that all mapped characters
-X are valid */
-int
-recode(unsigned char *seq, int n, int *qascii, int nsqx) {
-X int i,j;
-X char save_c;
-X
-#if defined(FASTS) || defined(FASTM)
-X qascii[',']=ESS;
-#endif
-X
-X for (i=0; i < n; i++) {
-X save_c = seq[i];
-X if (seq[i] > '@') seq[i] = qascii[seq[i]];
-X if (seq[i] > nsqx && seq[i]!=ESS) {
-X fprintf(stderr, "*** Warning - unrecognized residue at %d:%c - %2d\n",
-X i,save_c,save_c);
-X seq[i] = qascii['X'];
-X }
-X }
-X seq[i]=EOSEQ;
-X return i;
-}
-X
-/* here we have the query sequence, all the command line options,
-X but we need to set various parameter options based on the type
-X of the query sequence (m_msg->qdnaseq = 0:protein/1:DNA) and
-X the function (FASTA/FASTX/TFASTA)
-*/
-X
-/* this resetp is for conventional a FASTA/TFASTXYZ search */
-void
-resetp (struct mngmsg *m_msg, struct pstruct *ppst) {
-X int i, pgm_id;
-X
-X pgm_id = ppst->pgm_id;
-X
-#if defined(TFAST)
-X if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
-X fprintf(stderr," %s compares a protein to a translated\n\
-DNA sequence library. Do not use a DNA query/scoring matrix.\n",prog_func);
-X exit(1);
-X }
-#else
-#if (defined(FASTX) || defined(FASTY))
-X if (!(m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA)) {
-X fprintf(stderr," FASTX/Y compares a DNA sequence to a protein database\n");
-X fprintf(stderr," Use a DNA query\n");
-X exit(1);
-X }
-#endif
-#endif
-X
-/* this code changes parameters for programs (FA_PID, SS_PID, FS_PID,
-X RSS_PID) that can examine either protein (initial state) or DNA
-X Modified May, 2006 to reset e_cut for DNA comparisons.
-*/
-X
-X if (msg_def_arr[pgm_id].q_seqt == SEQT_UNK) {
-X if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
-X msg_def_arr[pgm_id].q_seqt = m_msg->qdnaseq;
-X msg_def_arr[pgm_id].p_seqt = SEQT_DNA;
-X msg_def_arr[pgm_id].l_seqt = SEQT_DNA;
-X if (m_msg->qdnaseq == SEQT_DNA) msg_def_arr[pgm_id].qframe = 2;
-X pgm_def_arr[pgm_id].e_cut /= 5.0;
-X }
-X else {
-X msg_def_arr[pgm_id].q_seqt = SEQT_PROT;
-X }
-X }
-X
-X ppst->dnaseq = msg_def_arr[pgm_id].p_seqt;
-X if (!sw_flag_set) ppst->sw_flag = msg_def_arr[pgm_id].sw_flag;
-X if (!m_msg->e_cut_set) m_msg->e_cut=pgm_def_arr[pgm_id].e_cut;
-X
-X if (ppst->dnaseq == SEQT_DNA && m_msg->qdnaseq==SEQT_RNA) {
-X ppst->dnaseq = SEQT_RNA;
-X ppst->nt_align = 1;
-X }
-X if (ppst->dnaseq==SEQT_DNA) pascii = &nascii[0];
-X else if (ppst->dnaseq==SEQT_RNA) {
-X pascii = &nascii[0];
-X ppst->sq[nascii['T']] = 'U';
-X }
-X else pascii = &aascii[0];
-X m_msg->ldnaseq = msg_def_arr[pgm_id].l_seqt;
-X if (m_msg->ldnaseq & SEQT_DNA) {
-X memcpy(lascii,nascii,sizeof(lascii));
-#ifndef TFAST
-#ifdef DNALIB_LC
-X init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);
-#endif
-#else
-X /* no init_ascii() because we translate lower case library sequences */
-#endif
-X }
-X else {
-X memcpy(lascii,aascii,sizeof(lascii)); /* initialize lib mapping */
-X
-#if defined(FASTF) || defined(FASTS) || defined(FASTM)
-X lascii['*'] = NA;
-#endif
-X init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);
-X }
-X
-X if (!nframe_set) {
-X m_msg->qframe = msg_def_arr[pgm_id].qframe;
-X m_msg->nframe = msg_def_arr[pgm_id].nframe;
-X }
-X
-X /* the possibilities:
-X -i -3 qframe revcomp
-X FA_D/FX - - 2 0
-X FA_D/FX + - 2 1
-X FA_D/FX - + 1 0
-X FA_D/FX + + 2 1
-X */
-X
-X if (m_msg->qdnaseq == SEQT_DNA) {
-X m_msg->nframe = 1;
-X if (m_msg->qframe == 1 && m_msg->revcomp==1) {
-X m_msg->qframe = m_msg->revcomp+1;
-X }
-X }
-X else if (m_msg->qdnaseq == SEQT_RNA) {
-X m_msg->qframe = m_msg->revcomp+1;
-X m_msg->nframe = 1;
-X }
-X
-X /* change settings for DNA search */
-X if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {
-X ppst->histint = 4;
-X
-X if (!del_set) {
-#ifdef OLD_FASTA_GAP
-X ppst->gdelval = -16; /* def. del penalty */
-#else
-X ppst->gdelval = -12; /* def. open penalty */
-#endif
-X }
-X if (!gap_set) ppst->ggapval = -4; /* def. gap penalty */
-X
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X /* these parameters are used to scale optcut, they should be replaced
-X by statistically based parameters */
-X if (!wid_set) ppst->param_u.fa.optwid = 16;
-X ppst->param_u.fa.bestscale = 80;
-X ppst->param_u.fa.bkfact = 5;
-X ppst->param_u.fa.scfact = 1;
-X ppst->param_u.fa.bktup = 6;
-X ppst->param_u.fa.bestmax = 80;
-X ppst->param_u.fa.bestoff = 45;
-X
-X if (!sw_flag_set) {
-X ppst->sw_flag = 0;
-X strncpy(m_msg->f_id1,"bs",sizeof(m_msg->f_id1));
-X }
-X
-X /* largest ktup */
-X mktup = 6;
-X
-X if (ppst->param_u.fa.pamfact >= 0) ppst->param_u.fa.pamfact = 0;
-X if (ppst->param_u.fa.ktup < 0)
-X ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;
-X }
-X
-X ppst->nsq = nnt;
-X ppst->nsqx = nntx;
-X for (i=0; i<=ppst->nsqx; i++) {
-X ppst->hsq[i] = hnt[i];
-X ppst->sq[i] = nt[i];
-X ppst->hsqx[i] = hntx[i];
-X ppst->sqx[i] = ntx[i];
-X }
-X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
-X
-X if (!ppst->pam_set) {
-X if (ppst->p_d_set)
-X mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
-#if !defined(FASTS) && !defined(FASTM)
-X else if (ppst->pamfile[0]=='\0' || strncmp(ppst->pamfile,"BL50",4)==0) {
-X strncpy (ppst->pamfile, "+5/-4", sizeof(ppst->pamfile));
-X }
-#else
-X else if (strncmp(ppst->pamfile,"MD20",4)==0) {
-X strncpy (ppst->pamfile, "+2/-2", sizeof(ppst->pamfile));
-X ppst->p_d_mat = +2;
-X ppst->p_d_mis = -2;
-X mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);
-X }
-#endif
-X pam = npam;
-X }
-X
-X strncpy (m_msg->sqnam, "nt",sizeof(m_msg->sqnam));
-X strncpy (m_msg->sqtype, "DNA",sizeof(m_msg->sqtype));
-X } /* end DNA reset */
-X
-X else { /* other parameters for protein comparison */
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X if (!wid_set) {
-X if (ppst->param_u.fa.ktup==1) ppst->param_u.fa.optwid = 32;
-X else ppst->param_u.fa.optwid = 16;
-X }
-X }
-X if (!del_set) {ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;}
-X if (!shift_set) {ppst->gshift = pgm_def_arr[pgm_id].gshift;}
-X if (!subs_set) {ppst->gsubs = pgm_def_arr[pgm_id].hshift;}
-X }
-X
-}
-X
-/* query_parm() this function asks for any additional parameters
-X that have not been provided. Could be null. */
-void
-query_parm (struct mngmsg *m_msp, struct pstruct *ppst)
-{
-X char qline[40];
-X
-X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
-X if (ppst->param_u.fa.ktup < 0)
-X ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
-X
-X if (ppst->param_u.fa.ktup == 0) {
-X printf (" ktup? (1 to %d) [%d] ", mktup, ppst->param_u.fa.bktup);
-X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
-X else sscanf(qline,"%d",&ppst->param_u.fa.ktup);
-X }
-X if (ppst->param_u.fa.ktup == 0)
-X ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
-X else ktup_set = 1;
-X }
-X
-#if defined(PRSS)
-X if (m_msp->shuff_max < 10) m_msp->shuff_max = 200;
-X
-X if (!mshuff_set) {
-X printf(" number of shuffles [%d]? ",m_msp->shuff_max);
-X fflush(stdout);
-X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
-X else sscanf(qline,"%d",&m_msp->shuff_max);
-X }
-X
-X if (ppst->zs_win == 0) {
-X printf (" local (window) (w) or uniform (u) shuffle [u]? ");
-X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
-X else if (qline[0]=='w' || qline[0]=='W') {
-X m_msp->shuff_wid = 20;
-X printf(" local shuffle window size [%d]? ",m_msp->shuff_wid);
-X if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);
-X else sscanf(qline,"%d",&m_msp->shuff_wid);
-X }
-X }
-#endif
-}
-X
-/* last_init() cannot look at aa0, n0, because it is only run once,
-X it is not run before each new aa0 search */
-void
-last_init (struct mngmsg *m_msg, struct pstruct *ppst
-#ifdef PCOMPLIB
-X ,int nnodes
-#endif
-X )
-{
-X int ix_l, ix_i, i, pgm_id;
-X double *kar_p;
-X double aa0_f[MAXSQ];
-X
-X pgm_id = ppst->pgm_id;
-X
-#if defined(FASTF) || defined(FASTS) || defined(FASTM)
-X m_msg->nohist = 1;
-X m_msg->shuff_max = 2000;
-#ifndef PCOMPLIB
-X ppst->shuff_node = m_msg->shuff_max/max_workers;
-#else
-X ppst->shuff_node = m_msg->shuff_max/nnodes;
-#endif
-#endif
-X
-X if (m_msg->aln.llen < 1) {
-X m_msg->aln.llen = 60;
-X }
-X
-#ifndef PCOMPLIB
-#if defined(FASTX) || defined(FASTY) || defined(TFAST)
-X /* set up translation tables: faatran.c */
-X aainit(ppst->tr_type,ppst->debug_lib);
-#endif
-#endif
-X
-/* a sanity check */
-#if !defined(TFAST)
-X if (m_msg->revcomp && m_msg->qdnaseq!=SEQT_DNA && m_msg->qdnaseq!=SEQT_RNA) {
-X fprintf(stderr," cannot reverse complement protein\n");
-X m_msg->revcomp = 0;
-X }
-#endif
-X
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X
-X if (ppst->param_u.fa.ktup < 0)
-X ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;
-X
-X if (ppst->param_u.fa.ktup < 1 || ppst->param_u.fa.ktup > mktup) {
-X fprintf(stderr," warning ktup = %d out of range [1..%d], reset to %d\n",
-X ppst->param_u.fa.ktup, mktup, ppst->param_u.fa.bktup);
-X ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;
-X }
-X }
-X
-X if (pgm_id == TFA_PID) {
-X m_msg->revcomp *= 3;
-X if (m_msg->nframe == 3) m_msg->nframe += m_msg->revcomp;
-X }
-X else if (pgm_id == TFX_PID || pgm_id == TFY_PID) {
-X if (m_msg->nframe == 1) m_msg->nframe += m_msg->revcomp;
-X }
-X
-#if !defined(TFAST)
-X /* for fasta/fastx searches, itt iterates the the query strand */
-X m_msg->nitt1 = m_msg->qframe-1;
-#else
-X /* for tfasta/tfastxy searches, itt iterates the library frames */
-X m_msg->nitt1 = m_msg->nframe-1;
-#endif
-X
-X if (pgm_def_arr[pgm_id].ktup > 0) {
-X if (ppst->param_u.fa.ktup>=2 && !wid_set) {
-X ppst->param_u.fa.optwid=16;
-X switch (pgm_id) {
-X case FA_PID:
-X m_msg->thr_fact = 32;
-X break;
-X case FX_PID:
-X case FY_PID:
-X m_msg->thr_fact = 16;
-X break;
-X case TFA_PID:
-X case TFX_PID:
-X case TFY_PID:
-X m_msg->thr_fact = 8;
-X break;
-X default:
-X m_msg->thr_fact = 4;
-X }
-X }
-X else { m_msg->thr_fact = 4;}
-X }
-X else m_msg->thr_fact = 4;
-X
-#if defined(PRSS)
-X if (m_msg->shuff_max < 10) m_msg->shuff_max = 200;
-X if (ppst->zsflag < 10) ppst->zsflag += 10;
-X if (ppst->zs_win > 0) {
-X m_msg->shuff_wid = ppst->zs_win;
-X }
-#endif
-X
-X if (pgm_def_arr[ppst->pgm_id].ktup > 0) {
-X if (ppst->param_u.fa.iniflag) {
-X ppst->score_ix = 1;
-X strncpy (m_msg->label, "initn init1", sizeof(m_msg->label));
-X }
-X else if (ppst->param_u.fa.optflag) {
-X ppst->score_ix = 2;
-X m_msg->stages = 1;
-X }
-X }
-X
-X if (!ppst->have_pam2) {
-X alloc_pam (MAXSQ, MAXSQ, ppst);
-X init_pam2(ppst);
-X }
-X init_pamx(ppst);
-X
-X if (ppst->pam_ms) {
-X if (m_msg->qdnaseq == SEQT_PROT) {
-X /* code to make 'L'/'I' identical scores */
-X ix_l = pascii['L'];
-X ix_i = pascii['I'];
-X ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
-X ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
-X (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;
-X for (i=1; i<=ppst->nsq; i++) {
-X ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
-X (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;
-X ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
-X (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;
-X }
-X
-X /* code to make 'Q'/'K' identical scores */
-X if (!shift_set) {
-X ix_l = pascii['Q'];
-X ix_i = pascii['K'];
-X ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =
-X ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =
-X (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;
-X for (i=1; i<=ppst->nsq; i++) {
-X ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =
-X (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;
-X ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =
-X (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;
-X }
-X }
-X }
-X }
-X
-X /*
-X print_pam(ppst);
-X */
-X
-X /* once we have a complete pam matrix, we can calculate Lambda and K
-X for "average" sequences */
-X kar_p = NULL;
-X init_karlin_a(ppst, aa0_f, &kar_p);
-X do_karlin_a(ppst->pam2[0], ppst, aa0_f,
-X kar_p, &m_msg->Lambda, &m_msg->K, &m_msg->H);
-X free(kar_p);
-X
-#if defined(FASTF) || defined(FASTS) || defined(FASTM)
-X if (ppst->ext_sq_set) {
-X fprintf(stderr," -S not available on [t]fast[fs]\n");
-X ppst->ext_sq_set = 0;
-X
-X /* reset sascii to ignore -S, map lc */
-X init_ascii(0,lascii,0);
-X }
-#endif
-}
-X
-/* this function is left over from the older FASTA format scoring
-X matrices that allowed additional parameters (bktup, bkfact) to be
-X set in the scoring matrix. It is no longer used. A modern version
-X would set parameters based on lambda and K.
-*/
-/*
-void
-f_initpam (line, ppst)
-char *line;
-struct pstruct *ppst;
-{
-X if (sscanf (line, " %d %d %d %d %d %d %d", &ppst->param_u.fa.scfact,
-X &ppst->param_u.fa.bestoff, &ppst->param_u.fa.bestscale,
-X &ppst->param_u.fa.bkfact, &ppst->param_u.fa.bktup,
-X &ppst->param_u.fa.bestmax, &ppst->histint) != 7)
-X {
-X printf (" bestcut parameters - bad format\n");
-X exit (1);
-X }
-}
-*/
-X
-/* alloc_pam2 creates a profile structure */
-int **
-alloc_pam2p(int len, int nsq) {
-X int i;
-X int **pam2p;
-X
-X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
-X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
-X return NULL;
-X }
-X
-X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
-X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
-X free(pam2p);
-X return NULL;
-X }
-X
-X for (i=1; i<len; i++) {
-X pam2p[i] = pam2p[0] + (i*(nsq+1));
-X }
-X
-X return pam2p;
-}
-X
-void free_pam2p(int **pam2p) {
-X if (pam2p) {
-X free(pam2p[0]);
-X free(pam2p);
-X }
-}
-X
-/* sortbest has now become comparison function specific so that we can use
-X a different comparison for fasts/f
-*/
-#if !defined(FASTS) && !defined (FASTF) && !defined(FASTM)
-#ifndef PCOMPLIB
-void
-qshuffle() {}
-#endif
-X
-int
-last_calc(unsigned char *aa0, unsigned char *aa1, int maxn,
-X struct beststr **bestp_arr, int nbest,
-X struct mngmsg *m_msg, struct pstruct *pst,
-X void **f_str, void *rs_str)
-{
-X return nbest;
-}
-X
-void sortbest (bptr, nbest, irelv)
-struct beststr **bptr;
-int nbest, irelv;
-{
-X int gap, i, j;
-X struct beststr *tmp;
-X
-X for (gap = nbest/2; gap > 0; gap /= 2)
-X for (i = gap; i < nbest; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (bptr[j]->score[irelv] >= bptr[j + gap]->score[irelv]) break;
-X tmp = bptr[j];
-X bptr[j] = bptr[j + gap];
-X bptr[j + gap] = tmp;
-X }
-}
-X
-void show_aux(FILE *fp, struct beststr *bptr) {}
-void header_aux(FILE *fp) {}
-X
-#else
-void sortbest (bptr, nbest, irelv)
-struct beststr **bptr;
-int nbest, irelv;
-{
-X int gap, i, j;
-X struct beststr *tmp;
-X
-X for (gap = nbest/2; gap > 0; gap /= 2)
-X for (i = gap; i < nbest; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (bptr[j]->escore < bptr[j + gap]->escore) break;
-X tmp = bptr[j];
-X bptr[j] = bptr[j + gap];
-X bptr[j + gap] = tmp;
-X }
-}
-X
-#if defined(FASTS) || defined(FASTM)
-X
-#ifndef PCOMPLIB
-/* this shuffle is for FASTS */
-/* convert ',' -> '\0', shuffle each of the substrings */
-void
-qshuffle(unsigned char *aa0, int n0, int nm0) {
-X
-X unsigned char **aa0start, *aap, tmp;
-X int i,j,k, ns;
-X
-X if ((aa0start=(unsigned char **)calloc(nm0+1,
-X sizeof(unsigned char *)))==NULL) {
-X fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);
-X exit(1);
-X }
-X
-X aa0start[0]=aa0;
-X for (k=1,i=0; i<n0; i++) {
-X if (aa0[i]==EOSEQ || aa0[i]==ESS) {
-X aa0[i]='\0';
-X aa0start[k++] = &aa0[i+1];
-X }
-X }
-X
-X /* aa0start has the beginning of each substring */
-X for (k=0; k<nm0; k++) {
-X aap=aa0start[k];
-X ns = strlen((char *)aap);
-X for (i=ns; i>1; i--) {
-X j = nrand(i);
-X tmp = aap[j];
-X aap[j] = aap[i-1];
-X aap[i-1] = tmp;
-X }
-X aap[ns] = 0;
-X }
-X
-X for (k=1; k<nm0; k++) {
-/* aap = aa0start[k];
-X while (*aap) fputc(pst.sq[*aap++],stderr);
-X fputc('\n',stderr);
-*/
-X aa0start[k][-1]=ESS;
-X }
-X
-X free(aa0start);
-}
-#endif
-#endif
-X
-#ifdef FASTF
-#ifndef PCOMPLIB
-void qshuffle(unsigned char *aa0, int n0, int nm0) {
-X
-X int i, j, k, nmpos;
-X unsigned char tmp;
-X int nmoff;
-X
-X nmoff = (n0 - nm0 - 1)/nm0 + 1;
-X
-X for (i = nmoff-1 ; i > 0 ; i--) {
-X
-X /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
-X j = (nmoff -1 ) - i;
-X if (i <= j) break; /* reverse columns */
-X
-X /* swap all i'th column residues for all j'th column residues */
-X for(nmpos = 0, k = 0 ; k < nm0 ; k++, nmpos += nmoff+1 ) {
-X tmp = aa0[nmpos + i];
-X aa0[nmpos + i] = aa0[nmpos + j];
-X aa0[nmpos + j] = tmp;
-X }
-X }
-}
-#endif
-#endif
-X
-X
-/* show additional best_str values */
-void show_aux(FILE *fp, struct beststr *bptr) {
-X fprintf(fp," %2d %3d",bptr->segnum,bptr->seglen);
-}
-X
-void header_aux(FILE *fp) {
-X fprintf(fp, " sn sl");
-}
-#endif
-X
-void
-fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {
-X int i, j;
-X double freq;
-X
-X /* fprintf(stderr, "scale: %g\n", scale); */
-X
-X /* now fill in the pam matrix: */
-X for (i = 0 ; i < n0 ; i++) {
-X for (j = 1 ; j <=20 ; j++) {
-X freq = scale * freq2d[i][j-1];
-X if ( freq < 0.0) freq -= 0.5;
-X else freq += 0.5;
-X pam2p[i][j] = (int)(freq);
-X }
-X }
-}
-X
-double
-get_lambda(int **pam2p, int n0, int nsq, unsigned char *query) {
-X double lambda, H;
-X double *pr, tot, sum;
-X int i, ioff, j, min, max;
-X
-X /* get min and max scores */
-X min = BIGNUM;
-X max = -BIGNUM;
-X if(pam2p[0][1] == -BIGNUM) {
-X ioff = 1;
-X n0++;
-X } else {
-X ioff = 0;
-X }
-X
-X for (i = ioff ; i < n0 ; i++) {
-X for (j = 1; j <= nsq ; j++) {
-X if (min > pam2p[i][j])
-X min = pam2p[i][j];
-X if (max < pam2p[i][j])
-X max = pam2p[i][j];
-X }
-X }
-X
-X /* fprintf(stderr, "min: %d\tmax:%d\n", min, max); */
-X
-X if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
-X exit(1);
-X }
-X
-X tot = (double) rrtotal * (double) rrtotal * (double) n0;
-X for (i = ioff ; i < n0 ; i++) {
-X for (j = 1; j <= nsq ; j++) {
-X pr[pam2p[i][j] - min] +=
-X (double) ((double) rrcounts[aascii[query[i]]] * (double) rrcounts[j]) / tot;
-X }
-X }
-X
-X sum = 0.0;
-X for(i = 0 ; i <= max-min ; i++) {
-X sum += pr[i];
-X /* fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
-X }
-X /* fprintf(stderr, "sum: %g\n", sum); */
-X
-X for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
-X
-X if (!karlin(min, max, pr, &lambda, &H)) {
-X fprintf(stderr, "Karlin lambda estimation failed\n");
-X }
-X
-X /* fprintf(stderr, "lambda: %g\n", lambda); */
-X free(pr);
-X
-X return lambda;
-}
-X
-/*
-X *aa0 - query sequence
-X n0 - length
-X pamscale - scaling for pam matrix - provided by apam.c, either
-X 0.346574 = ln(2)/2 (P120, BL62) or
-X 0.231049 = ln(2)/3 (P250, BL50)
-*/
-X
-void
-scale_pssm(int **pssm2p, double **freq2d,
-X unsigned char *query, int n0,
-X int **pam2, double pamscale);
-X
-static unsigned char ustandard_aa[] ="\0ARNDCQEGHILKMFPSTWYV";
-X
-void
-read_pssm(unsigned char *aa0, int n0, int nsq,
-X double pamscale,
-X FILE *fp, int pgpf_type, struct pstruct *ppst) {
-X int i, j, len, k;
-X int qi, rj; /* qi - index query; rj - index residues (1-20) */
-X int **pam2p;
-X int first, too_high;
-X unsigned char *query, ctmp;
-X char dline[512];
-X double freq, **freq2d, lambda, new_lambda;
-X double scale, scale_high, scale_low;
-X
-X pam2p = ppst->pam2p[0];
-X
-X if (pgpf_type == 0) {
-X
-X if(1 != fread(&len, sizeof(int), 1, fp)) {
-X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
-X exit(1);
-X }
-X
-X if(len != n0) {
-X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
-X len,n0);
-X exit(1);
-X }
-X
-X /* read over query sequence stored in BLAST profile */
-X if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
-X fprintf(stderr, "Couldn't allocate memory for query!\n");
-X exit(1);
-X }
-X
-X if(len != fread(query, sizeof(char), len, fp)) {
-X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
-X exit(1);
-X }
-X }
-X else if (pgpf_type == 1) {
-X
-X if ((fgets(dline,sizeof(dline),fp) == NULL) ||
-X (1 != sscanf(dline, "%d",&len))) {
-X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
-X exit(1);
-X }
-X
-X if(len != n0) {
-X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
-X len,n0);
-X exit(1);
-X }
-X
-X /* read over query sequence stored in BLAST profile */
-X if(NULL == (query = (unsigned char *) calloc(len+2, sizeof(char)))) {
-X fprintf(stderr, "Couldn't allocate memory for query!\n");
-X exit(1);
-X }
-X
-X if (fgets((char *)query,len+2,fp)==NULL) {
-X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
-X exit(1);
-X }
-X }
-X else {
-X fprintf(stderr," Unrecognized PSSM file type: %d\n",pgpf_type);
-X exit(1);
-X }
-X
-X /* currently we don't do anything with query; ideally, we should
-X check to see that it actually matches aa0 ... */
-X
-X /* quick 2d array alloc: */
-X if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
-X exit(1);
-X }
-X
-X if((freq2d[0] = (double *) calloc(n0 * 20, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
-X exit(1);
-X }
-X
-X /* a little pointer arithmetic to fill out 2d array: */
-X for (i = 1 ; i < n0 ; i++) {
-X freq2d[i] = freq2d[i-1] + 20;
-X }
-X
-X if (pgpf_type == 0) {
-X for (qi = 0 ; qi < n0 ; qi++) {
-X for (rj = 0 ; rj < 20 ; rj++) {
-X if(1 != fread(&freq, sizeof(double), 1, fp)) {
-X fprintf(stderr, "Error while reading frequencies!\n");
-X exit(1);
-X }
-X freq2d[qi][rj] = freq;
-X }
-X }
-X }
-X else {
-X for (qi = 0 ; qi < n0 ; qi++) {
-X if ((fgets(dline,sizeof(dline),fp) ==NULL) ||
-X (k = sscanf(dline,"%c %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg\n",
-X &ctmp, &freq2d[qi][0], &freq2d[qi][1], &freq2d[qi][2], &freq2d[qi][3], &freq2d[qi][4],
-X &freq2d[qi][5], &freq2d[qi][6], &freq2d[qi][7], &freq2d[qi][8], &freq2d[qi][9],
-X &freq2d[qi][10], &freq2d[qi][11], &freq2d[qi][12], &freq2d[qi][13], &freq2d[qi][14],
-X &freq2d[qi][15], &freq2d[qi][16], &freq2d[qi][17], &freq2d[qi][18], &freq2d[qi][19]))<1) {
-X fprintf(stderr, "Error while reading frequencies: %d read!\n",k);
-X exit(1);
-X }
-X for (rj=0; rj<20; rj++) { freq2d[qi][rj] /= 10.0; } /* reverse scaling */
-X }
-X }
-X
-X scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0],pamscale);
-X
-X free(freq2d[0]);
-X free(freq2d);
-X
-X free(query);
-}
-X
-void
-scale_pssm(int **pssm2p, double **freq2d, unsigned char *query, int n0, int **pam2, double pamscale) {
-X int i, qi, rj;
-X double freq, new_lambda, lambda;
-X int first, too_high;
-X double scale, scale_high, scale_low;
-X
-X for (qi = 0 ; qi < n0 ; qi++) {
-X for (rj = 0 ; rj < 20 ; rj++) {
-X if (freq2d[qi][rj] > 1e-20) {
-X freq = log(freq2d[qi][rj] /((double) (rrcounts[rj+1])/(double) rrtotal));
-X freq /= pamscale; /* this gets us close to originial pam scores */
-X freq2d[qi][rj] = freq;
-X }
-X else {
-X /* when blastpgp decides to leave something out, it puts 0's in all the frequencies
-X in the binary checkpoint file. In the ascii version, however, it uses BLOSUM62
-X values. I will put in scoring matrix values as well */
-X
-X freq2d[qi][rj] = pam2[aascii[query[qi]]][rj+1];
-X }
-X }
-X }
-X
-X /* now figure out the right scale */
-X scale = 1.0;
-X lambda = get_lambda(pam2, 20, 20, ustandard_aa);
-X
-X /* should be near 1.0 because of our initial scaling by ppst->pamscale */
-X /* fprintf(stderr, "real_lambda: %g\n", lambda); */
-X
-X /* get initial high/low scale values: */
-X first = 1;
-X while (1) {
-X fill_pam(pssm2p, n0, 20, freq2d, scale);
-X new_lambda = get_lambda(pssm2p, n0, 20, query);
-X
-X if (new_lambda > lambda) {
-X if (first) {
-X first = 0;
-X scale = scale_high = 1.0 + 0.05;
-X scale_low = 1.0;
-X too_high = 1;
-X } else {
-X if (!too_high) break;
-X scale = (scale_high += scale_high - 1.0);
-X }
-X } else if (new_lambda > 0) {
-X if (first) {
-X first = 0;
-X scale_high = 1.0;
-X scale = scale_low = 1.0 - 0.05;
-X too_high = 0;
-X } else {
-X if (too_high) break;
-X scale = (scale_low += scale_low - 1.0);
-X }
-X } else {
-X fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
-X exit(1);
-X }
-X }
-X
-X /* now do binary search between low and high */
-X for (i = 0 ; i < 10 ; i++) {
-X scale = 0.5 * (scale_high + scale_low);
-X fill_pam(pssm2p, n0, 20, freq2d, scale);
-X new_lambda = get_lambda(pssm2p, n0, 20, query);
-X
-X if (new_lambda > lambda) scale_low = scale;
-X else scale_high = scale;
-X }
-X
-X scale = 0.5 * (scale_high + scale_low);
-X fill_pam(pssm2p, n0, 20, freq2d, scale);
-X
-X /*
-X fprintf(stderr, "final scale: %g\n", scale);
-X
-X for (qi = 0 ; qi < n0 ; qi++) {
-X fprintf(stderr, "%4d %c: ", qi+1, query[qi]);
-X for (rj = 1 ; rj <= 20 ; rj++) {
-X fprintf(stderr, "%4d", pssm2p[qi][rj]);
-X }
-X fprintf(stderr, "\n");
-X }
-X */
-}
-X
-#if defined(SSEARCH) || (defined(PRSS) && !defined(FASTX))
-int
-parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
-X unsigned char **query, double ***freqs,
-X char *matrix, int *gap_open, int *gap_extend,
-X double *lambda);
-X
-/* the ASN.1 pssm includes information about the scoring matrix used
-X (though not the gap penalty in the current version PSSM:2) The PSSM
-X scoring matrix and gap penalties should become the default if they
-X have not been set explicitly.
-*/
-X
-int
-read_asn_pssm(unsigned char *aa0, int n0, int nsq,
-X double pamscale, FILE *fp, struct pstruct *ppst) {
-X
-X int i, j, len, k;
-X int qi, rj; /* qi - index query; rj - index residues (1-20) */
-X int **pam2p;
-X int first, too_high;
-X unsigned char *query, ctmp;
-X char dline[512];
-X char matrix[MAX_SSTR];
-X double psi2_lambda;
-X double freq, **freq2d, lambda, new_lambda;
-X double scale, scale_high, scale_low;
-X int gap_open, gap_extend;
-X int n_rows, n_cols;
-X
-X pam2p = ppst->pam2p[0];
-X
-X if (parse_pssm_asn_fa(fp, &n_rows, &n_cols, &query, &freq2d,
-X matrix, &gap_open, &gap_extend, &psi2_lambda)<=0) {
-X return -1;
-X }
-X
-X if (!gap_set) {
-X if (gap_open) {
-X if (gap_open > 0) {gap_open = -gap_open;}
-X ppst->gdelval = gap_open;
-X }
-X else if (strncmp(matrix,"BLOSUM62",8)==0) {
-X ppst->gdelval = -11;
-X }
-X gap_set = 1;
-X }
-X if (!del_set) {
-X if (gap_extend) {
-X if (gap_extend > 0) {gap_extend = -gap_extend;}
-X ppst->ggapval = gap_extend;
-X }
-X else if (strncmp(matrix,"BLOSUM62",8)==0) {
-X ppst->ggapval = -1;
-X }
-X del_set = 1;
-X }
-X
-X if (strncmp(matrix, "BLOSUM62", 8)== 0 && !ppst->pam_set) {
-X strncpy(ppst->pamfile, "BL62", 120);
-X standard_pam(ppst->pamfile,ppst,del_set, gap_set);
-X if (!ppst->have_pam2) {
-X alloc_pam (MAXSQ, MAXSQ, ppst);
-X }
-X init_pam2(ppst);
-X ppst->pam_set = 1;
-X }
-X
-X if (n_cols < n0) {
-X fprintf(stderr, " query length: %d != n_cols: %d\n",n0, n_cols);
-X exit(1);
-X }
-X
-X scale_pssm(ppst->pam2p[0], freq2d, query, n0, ppst->pam2[0],pamscale);
-X
-X free(freq2d[0]);
-X free(freq2d);
-X
-X free(query);
-X return 1;
-}
-#endif
-X
-void
-last_params(unsigned char *aa0, int n0,
-X struct mngmsg *m_msg,
-X struct pstruct *ppst
-#ifdef PCOMPLIB
-X , struct qmng_str *qm_msg
-#endif
-X ) {
-X int i, nsq;
-X FILE *fp;
-X
-X if (n0 < 0) { return;}
-X
-X ppst->n0 = m_msg->n0;
-X
-X if (ppst->ext_sq_set) { nsq = ppst->nsqx; }
-X else {nsq = ppst->nsq;}
-X
-/* currently, profiles are only available for SSEARCH, PRSS */
-#if defined(SSEARCH) || defined(PRSS)
-X
-X ppst->pam2p[0] = alloc_pam2p(n0,nsq);
-X ppst->pam2p[1] = alloc_pam2p(n0,nsq);
-X
-X if (ppst->pam_pssm) {
-X if ((ppst->pgpfile_type == 0) && (fp=fopen(ppst->pgpfile,"rb"))) {
-X read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 0, ppst);
-X extend_pssm(aa0, n0, ppst);
-X }
-X else if ((ppst->pgpfile_type == 1) && (fp=fopen(ppst->pgpfile,"r"))) {
-X read_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, 1, ppst);
-X extend_pssm(aa0, n0, ppst);
-X }
-#if defined(SSEARCH) || (defined(PRSS) && !defined(FASTX))
-X else if ((ppst->pgpfile_type == 2) && (fp=fopen(ppst->pgpfile,"rb"))) {
-X if (read_asn_pssm(aa0, n0, ppst->nsq, ppst->pamscale, fp, ppst)>0) {
-X extend_pssm(aa0, n0, ppst);
-X }
-X else {
-X fprintf(stderr," Could not parse PSSM file: %s\n",ppst->pgpfile);
-X ppst->pam_pssm = 0;
-X return;
-X }
-X }
-#endif
-X else {
-X fprintf(stderr," Could not open PSSM file: %s\n",ppst->pgpfile);
-X ppst->pam_pssm = 0;
-X return;
-X }
-X }
-#endif
-X
-#if defined(FASTF) || defined(FASTS) || defined(FASTM)
-X m_msg->nm0 = 1;
-X for (i=0; i<n0; i++)
-X if (aa0[i]==EOSEQ || aa0[i]==ESS) m_msg->nm0++;
-X
-/*
-X for FASTS, we can do statistics in one of two different ways
-X if there are <= 10 query fragments, then we calculate probabilistic
-X scores for every library sequence. If there are > 10 fragments, this
-X takes much too long and too much memory, so we use the old fashioned
-X raw score only z-score normalized method initially, and then calculate
-X the probabilistic scores for the best hits. To scale those scores, we
-X also need a set of random probabilistic scores. So we do the qshuffle
-X to get them.
-X
-X For FASTF, precalculating probabilities is prohibitively expensive,
-X so we never do it; FASTF always acts like FASTS with nfrags>10.
-X
-*/
-X
-#if defined(FASTS) || defined(FASTM)
-X if (m_msg->nm0 > 10) m_msg->escore_flg = 0;
-X else m_msg->escore_flg = 1;
-#endif
-X
-X if (m_msg->escore_flg && (ppst->zsflag&1)) {
-X m_msg->last_calc_flg = 0;
-X m_msg->qshuffle = 0;
-X }
-X else { /* need random query, second set of 2000 scores */
-X m_msg->last_calc_flg = 1;
-X m_msg->qshuffle = 1;
-X }
-#else
-X m_msg->last_calc_flg = 0;
-X m_msg->qshuffle = 0;
-X m_msg->escore_flg = 0;
-X m_msg->nm0 = 1;
-#endif
-X
-/* adjust the ktup if appropriate */
-X
-X if (!ktup_set && pgm_def_arr[ppst->pgm_id].ktup > 0) {
-X if (m_msg->qdnaseq == SEQT_PROT) {
-X ppst->param_u.fa.ktup = pgm_def_arr[ppst->pgm_id].ktup;
-#if defined(FASTS) || defined(FASTM)
-X if (n0 > 100) ppst->param_u.fa.ktup = 2;
-#endif
-X if (n0 < 40) ppst->param_u.fa.ktup = 1;
-X }
-X else if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {
-X if (n0 < 20) ppst->param_u.fa.ktup = 1;
-#if defined(FASTS) || defined(FASTM)
-X /* with the current (April 12 2005) dropfs2.c - ktup cannot be > 2 */
-X else ppst->param_u.fa.ktup = 2;
-#else
-X else if (n0 < 50) ppst->param_u.fa.ktup = 2;
-X else if (n0 < 100) ppst->param_u.fa.ktup = 3;
-#endif
-X }
-X }
-X
-#ifdef PCOMPLIB
-X qm_msg->nm0 = m_msg->nm0;
-X qm_msg->escore_flg = m_msg->escore_flg;
-X qm_msg->qshuffle = m_msg->qshuffle;
-X qm_msg->pam_pssm = 0;
-#endif
-}
-X
-/* given a good profile in ppst->pam2p[0], make an extended profile
-X in ppst->pam2p[1]
-*/
-void
-extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst) {
-X
-X int i, j, nsq;
-X int sa_x, sa_t, sa_b, sa_z;
-X int **pam2p0, **pam2p1;
-X
-X nsq = ppst->nsq;
-X
-X pam2p0 = ppst->pam2p[0];
-X pam2p1 = ppst->pam2p[1];
-X
-X sa_x = pascii['X'];
-X sa_t = pascii['*'];
-X sa_b = pascii['B'];
-X sa_z = pascii['Z'];
-X
-X /* fill in boundaries, B, Z, *, X */
-X for (i=0; i<n0; i++) {
-X pam2p0[i][0] = -BIGNUM;
-X pam2p0[i][sa_b] = (int)
-X (((float)pam2p0[i][pascii['N']]+(float)pam2p0[i][pascii['D']]+0.5)/2.0);
-X pam2p0[i][sa_z] = (int)
-X (((float)pam2p0[i][pascii['Q']]+(float)pam2p0[i][pascii['E']]+0.5)/2.0);
-X pam2p0[i][sa_x] = ppst->pam_xm;
-X pam2p0[i][sa_t] = ppst->pam_xm;
-X }
-X
-X /* copy pam2p0 into pam2p1 */
-X for (i=0; i<n0; i++) {
-X pam2p1[i][0] = -BIGNUM;
-X for (j=1; j<=ppst->nsq; j++) {
-X pam2p1[i][j] = pam2p0[i][j];
-X }
-X }
-X
-X /* then fill in extended characters, if necessary */
-X if (ppst->ext_sq_set) {
-X for (i=0; i<n0; i++) {
-X for (j=1; j<=ppst->nsq; j++) {
-X pam2p0[i][nsq+j] = pam2p0[i][j];
-X pam2p1[i][nsq+j] = ppst->pam_xm;
-X }
-X }
-X }
-}
-SHAR_EOF
-chmod 0644 initfa.c ||
-echo 'restore of initfa.c failed'
-Wc_c="`wc -c < 'initfa.c'`"
-test 54882 -eq "$Wc_c" ||
- echo 'initfa.c: original size 54882, current size' "$Wc_c"
-fi
-# ============= karlin.c ==============
-if test -f 'karlin.c' -a X"$1" != X"-c"; then
- echo 'x - skipping karlin.c (File already exists)'
-else
-echo 'x - extracting karlin.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'karlin.c' &&
-/**************** Statistical Significance Parameter Subroutine ****************
-X
-X $Name: fa_34_26_5 $ - $Id: karlin.c,v 1.18 2006/06/01 16:05:30 wrp Exp $
-X
-X Version 1.0 February 2, 1990
-X Version 2.0 March 18, 1993
-X
-X Program by: Stephen Altschul
-X
-X Address: National Center for Biotechnology Information
-X National Library of Medicine
-X National Institutes of Health
-X Bethesda, MD 20894
-X
-X Internet: altschul@ncbi.nlm.nih.gov
-X
-X See: Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
-X Significance of Molecular Sequence Features by Using General Scoring
-X Schemes," Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.
-X
-X Computes the parameters lambda and K for use in calculating the
-X statistical significance of high-scoring segments or subalignments.
-X
-X The scoring scheme must be integer valued. A positive score must be
-X possible, but the expected (mean) score must be negative.
-X
-X A program that calls this routine must provide the value of the lowest
-X possible score, the value of the greatest possible score, and a pointer
-X to an array of probabilities for the occurence of all scores between
-X these two extreme scores. For example, if score -2 occurs with
-X probability 0.7, score 0 occurs with probability 0.1, and score 3
-X occurs with probability 0.2, then the subroutine must be called with
-X low = -2, high = 3, and pr pointing to the array of values
-X { 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }. The calling program must also provide
-X pointers to lambda and K; the subroutine will then calculate the values
-X of these two parameters. In this example, lambda=0.330 and K=0.154.
-X
-X The parameters lambda and K can be used as follows. Suppose we are
-X given a length N random sequence of independent letters. Associated
-X with each letter is a score, and the probabilities of the letters
-X determine the probability for each score. Let S be the aggregate score
-X of the highest scoring contiguous segment of this sequence. Then if N
-X is sufficiently large (greater than 100), the following bound on the
-X probability that S is greater than or equal to x applies:
-X
-X P( S >= x ) <= 1 - exp [ - KN exp ( - lambda * x ) ].
-X
-X In other words, the p-value for this segment can be written as
-X 1-exp[-KN*exp(-lambda*S)].
-X
-X This formula can be applied to pairwise sequence comparison by assigning
-X scores to pairs of letters (e.g. amino acids), and by replacing N in the
-X formula with N*M, where N and M are the lengths of the two sequences
-X being compared.
-X
-X In addition, letting y = KN*exp(-lambda*S), the p-value for finding m
-X distinct segments all with score >= S is given by:
-X
-X 2 m-1 -y
-X 1 - [ 1 + y + y /2! + ... + y /(m-1)! ] e
-X
-X Notice that for m=1 this formula reduces to 1-exp(-y), which is the same
-X as the previous formula.
-X
-*******************************************************************************/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-X
-#define MAXIT 25 /* Maximum number of iterations used in calculating lambda */
-#define NMAP_X 23
-#define NMAP 33
-X
-#define TINY 1e-6
-X
-/* first build a residue map to automatically put residues in score bins */
-X
-#include "defs.h"
-#include "param.h"
-X
-/* initialize the Karlin frequency, probability arrays using
-X a specific query sequence */
-X
-int karlin(int , int, double *, double *, double *);
-static int karlin_k(int , int , double *, double *, double *, double *);
-X
-void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
-X double *aa0_f, double **kp)
-{
-X int kar_nsq, kar_range, kar_min, kar_max;
-X
-X const unsigned char *aa0p;
-X int i;
-X int r_cnt[NMAP+1];
-X double fn0, *kar_p;
-X
-X kar_range = ppst->pam_h - ppst->pam_l + 1;
-X if (*kp == NULL) {
-X if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
-X fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
-X exit(1);
-X }
-X *kp = kar_p;
-X }
-X kar_nsq = ppst->nsq; /* alphabet size */
-X kar_min = ppst->pam_l; /* low pam value */
-X kar_max = ppst->pam_h; /* high pam value */
-X
-X /* must have at least 1 residue of each type */
-X r_cnt[NMAP]=0;
-X for (i=1; i<=kar_nsq; i++) r_cnt[i]=1;
-X
-X fn0 = 100.0/(double)(n0+kar_nsq); /* weight of each residue */
-X
-X aa0p = aa0;
-X /* increment residue count for each residue in query sequence */
-X while (*aa0p) r_cnt[ppst->hsqx[*aa0p++]]++;
-X
-X /* map all unmapped residues to 'X' */
-X r_cnt[NMAP_X] += r_cnt[NMAP];
-X
-X for (i=1; i<=kar_nsq; i++) aa0_f[i] = fn0*(double)r_cnt[i];
-}
-X
-double nt_f[] = {0.0, 0.25, 0.25, 0.25, 0.25 };
-X
-/* Robinson and Robinson frequencies */
-double aa_f[] = {
-/* NULL */ 0.00,
-/* A */ 0.0780474700897585,
-/* R */ 0.0512953149316987,
-/* N */ 0.0448725775979007,
-/* D */ 0.0536397361638076,
-/* C */ 0.0192460110427568,
-/* Q */ 0.0426436013507063,
-/* E */ 0.0629485981204668,
-/* G */ 0.0737715654561964,
-/* H */ 0.0219922696262025,
-/* I */ 0.0514196403000682,
-/* L */ 0.090191394464413,
-/* K */ 0.0574383201866657,
-/* M */ 0.0224251883196316,
-/* F */ 0.0385564048655621,
-/* P */ 0.0520279465667327,
-/* S */ 0.0711984743501224,
-/* T */ 0.0584129422708473,
-/* W */ 0.013298374223799,
-/* Y */ 0.0321647488738564,
-/* V */ 0.0644094211988074};
-X
-/* initialize the Karlin frequency, probability arrays using
-X an "average" composition (average length if n0 <=0) */
-X
-void
-init_karlin_a(struct pstruct *ppst, double *aa0_f, double **kp)
-{
-X int kar_nsq, kar_range;
-X
-X int i;
-X double fn0, *kar_p;
-X
-X kar_range = ppst->pam_h - ppst->pam_l + 1;
-X if (*kp == NULL) {
-X if ((kar_p=(double *)calloc(kar_range+1,sizeof(double)))==NULL) {
-X fprintf(stderr," cannot allocate kar_p array: %d\n",kar_range+1);
-X exit(1);
-X }
-X *kp = kar_p;
-X }
-X
-X if (ppst->nt_align) {
-X kar_nsq = 4;
-X for (i=1; i<=kar_nsq; i++) aa0_f[i] = nt_f[i];
-X }
-X else if (ppst->dnaseq==SEQT_PROT || ppst->dnaseq == SEQT_UNK) {
-X kar_nsq = 20;
-X for (i=1; i<=kar_nsq; i++) aa0_f[i] = aa_f[i];
-X }
-X else {
-X kar_nsq = ppst->nsq;
-X fn0 = 1.0/(double)(kar_nsq-1);
-X for (i=1; i< kar_nsq; i++) aa0_f[i] = fn0;
-X aa0_f[kar_nsq]=0.0;
-X }
-X
-}
-X
-/* calculate set up karlin() to calculate Lambda, K, by calculating
-X aa1 frequencies */
-int
-do_karlin(const unsigned char *aa1, int n1,
-X int **pam2, struct pstruct *ppst,
-X double *aa0_f, double *kar_p, double *lambda, double *H)
-{
-X register unsigned const char *aap;
-X int kar_range, kar_min, kar_max, kar_nsq;
-X int r_cnt[NMAP+1];
-X double aa1_f[NMAP];
-X double fn1, kar_tot;
-X int i, j;
-X
-X kar_nsq = ppst->nsq;
-X kar_min = ppst->pam_l;
-X kar_max = ppst->pam_h;
-X kar_range = kar_max - kar_min + 1;
-X
-X r_cnt[NMAP]=0;
-X for (i=1; i<=kar_nsq; i++) r_cnt[i]=1;
-X
-X /* residue counts */
-X
-X aap=aa1;
-X while (*aap) r_cnt[ppst->hsqx[*aap++]]++;
-X
-X r_cnt[NMAP_X] += r_cnt[NMAP];
-X
-X /* residue frequencies */
-X fn1 = 100.0/(double)(n1+kar_nsq);
-X for (i=1; i<=kar_nsq; i++) aa1_f[i]= fn1*(double)r_cnt[i];
-X
-X for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
-X
-X for (i=1; i<=kar_nsq; i++) {
-X for (j=1; j<=kar_nsq; j++)
-X kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
-X }
-X
-X kar_tot = 0.0;
-X for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
-X if (kar_tot <= 0.00001) return 0;
-X
-X for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
-X
-X return karlin(kar_min, kar_max, kar_p, lambda, H);
-}
-X
-int
-do_karlin_a(int **pam2, struct pstruct *ppst,
-X double *aa0_f, double *kar_p, double *lambda, double *K, double *H)
-{
-X double *aa1fp;
-X int kar_range, kar_min, kar_max, kar_nsq;
-X double aa1_f[NMAP];
-X double fn1, kar_tot;
-X int i, j;
-X
-X kar_min = ppst->pam_l;
-X kar_max = ppst->pam_h;
-X kar_range = kar_max - kar_min + 1;
-X
-X kar_tot = 0.0;
-X if (ppst->nt_align ) {
-X kar_nsq = 4;
-X aa1fp = nt_f;
-X for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
-X for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
-X }
-X else if (!ppst->nt_align) {
-X kar_nsq = 20;
-X aa1fp = aa_f;
-X for (i=1; i<=kar_nsq; i++) {kar_tot += aa1fp[i];}
-X for (i=1; i<=kar_nsq; i++) {aa1_f[i]= aa1fp[i]/kar_tot;}
-X }
-X else {
-X kar_nsq = ppst->nsq;
-X fn1 = 1.0/(double)(kar_nsq-1);
-X for (i=1; i< kar_nsq; i++) aa1_f[i] = fn1;
-X aa1_f[kar_nsq]=0.0;
-X }
-X
-X for (i=0; i<=kar_range; i++) kar_p[i] = 0.0;
-X
-X for (i=1; i<=kar_nsq; i++) {
-X for (j=1; j<kar_nsq; j++)
-X kar_p[pam2[i][j]-kar_min] += aa0_f[i]*aa1_f[j];
-X }
-X
-X kar_tot = 0.0;
-X for (i=0; i<=kar_range; i++) kar_tot += kar_p[i];
-X if (kar_tot <= 0.00001) return 0;
-X
-X for (i=0; i<=kar_range; i++) kar_p[i] /= kar_tot;
-X
-X return karlin_k(kar_min, kar_max, kar_p, lambda, K, H);
-}
-X
-/* take a array of letters and pam information and get *lambda, *H */
-int
-karlin(int low, /* Lowest score (must be negative) */
-X int high, /* Highest score (must be positive) */
-X double *pr, /* Probabilities for various scores */
-X double *lambda_p, /* Pointer to parameter lambda */
-X double *H_p) /* Pointer to parameter H */
-{
-X int i,range, nit;
-X double up,new,sum,av,beta,ftemp;
-X double lambda;
-X double *p,*ptr1;
-X
-X /* Calculate the parameter lambda */
-X
-X p = pr;
-X range = high-low;
-X
-X /* check for E() < 0.0 */
-X sum = 0;
-X ptr1 = pr;
-X for (i=low; i <= high ; i++) sum += i* (*ptr1++);
-X if (sum >= 0.0) {
-#ifdef DEBUG
-X fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
-X sum);
-#endif
-X return 0;
-X }
-X
-X /* up is upper bound on lambda */
-X up=0.5;
-X do {
-X up *= 2.0;
-X ptr1=p;
-X
-X beta=exp(up);
-X
-X ftemp=exp(up*(low-1));
-X sum = 0.0;
-X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
-X }
-X while (sum<1.0);
-X
-X /* avoid overflow from very large lambda*S */
-/*
-X do {
-X up /= 2.0;
-X ptr1=p;
-X beta=exp(up);
-X
-X ftemp=exp(up*(low-1));
-X sum = 0.0;
-X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
-X } while (sum > 2.0);
-X
-X up *= 2.0;
-*/ /* we moved past, now back up */
-X
-X /* for (lambda=j=0;j<25;++j) { */
-X lambda = 0.0;
-X nit = 0;
-X while ( nit++ < MAXIT ) {
-X new = (lambda+up)/2.0;
-X beta = exp(new);
-X ftemp = exp(new*(low-1));
-X ptr1=p;
-X sum = 0.0;
-X /* multiply by exp(new) for each score */
-X for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
-X
-X if (sum > 1.0 + TINY) up=new;
-X else {
-X if ( fabs(lambda - new) < TINY ) goto done;
-X lambda = new;
-X }
-X }
-X
-X if (lambda <= 1e-10) {
-X lambda = -1.0;
-X return 0;
-X }
-X
-X done:
-X *lambda_p = lambda;
-X
-X /* Calculate the parameter K */
-X
-X ptr1=p;
-X ftemp=exp(lambda*(low-1));
-X for (av=0.0, i=low; i<=high; ++i)
-X av+= *ptr1++ *i*(ftemp*=beta);
-X *H_p= lambda*av;
-X
-X return 1; /* Parameters calculated successfully */
-}
-X
-static int a_gcd (int, int);
-X
-/* take a array of letters and pam information and get *lambda, *K, *H */
-static int
-karlin_k(int low, /* Lowest score (must be negative) */
-X int high, /* Highest score (must be positive) */
-X double *pr, /* Probabilities for various scores */
-X double *lambda_p, /* Pointer to parameter lambda */
-X double *K_p,
-X double *H_p) /* Pointer to parameter H */
-{
-X int i,j,range,lo,hi,first,last, nit;
-X double up,new,sum,Sum,av,beta,oldsum,ratio,ftemp;
-X double lambda;
-X double *p,*P,*ptrP,*ptr1,*ptr2;
-X
-X /* Calculate the parameter lambda */
-X
-X p = pr;
-X range = high-low;
-X
-X /* check for E() < 0.0 */
-X sum = 0;
-X ptr1 = pr;
-X for (i=low; i <= high ; i++) sum += i* (*ptr1++);
-X if (sum >= 0.0) {
-#ifdef DEBUG
-X fprintf(stderr," (karlin lambda) non-negative expected score: %.4lg\n",
-X sum);
-#endif
-X return 0;
-X }
-X
-X /* up is upper bound on lambda */
-X up=0.5;
-X do {
-X up *= 2.0;
-X ptr1=p;
-X
-X beta=exp(up);
-X
-X ftemp=exp(up*(low-1));
-X sum = 0.0;
-X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
-X }
-X while (sum<1.0);
-X
-X /* avoid overflow from very large lambda*S */
-X /*
-X do {
-X up /= 2.0;
-X ptr1=p;
-X beta=exp(up);
-X
-X ftemp=exp(up*(low-1));
-X sum = 0.0;
-X for (i=0; i<=range; ++i) sum+= *ptr1++ * (ftemp*=beta);
-X } while (sum > 2.0);
-X
-X up *= 2.0;
-X */
-X /* we moved past, now back up */
-X
-X /* for (lambda=j=0;j<25;++j) { */
-X lambda = 0.0;
-X nit = 0;
-X while ( nit++ < MAXIT ) {
-X new = (lambda+up)/2.0;
-X beta = exp(new);
-X ftemp = exp(new*(low-1));
-X ptr1=p;
-X sum = 0.0;
-X /* multiply by exp(new) for each score */
-X for (i=0;i<=range;++i) sum+= *ptr1++ * (ftemp*=beta);
-X
-X if (sum > 1.0 + TINY) up=new;
-X else {
-X if ( fabs(lambda - new) < TINY ) goto done;
-X lambda = new;
-X }
-X }
-X
-X if (lambda <= 1e-10) {
-X lambda = -1.0;
-X return 0;
-X }
-X
-X done:
-X *lambda_p = lambda;
-X
-X /* Calculate the parameter H */
-X
-X ptr1=p;
-X ftemp=exp(lambda*(low-1));
-X for (av=0.0, i=low; i<=high; ++i) av+= *ptr1++ *i*(ftemp*=beta);
-X *H_p= lambda*av;
-X
-X /* Calculate the pamameter K */
-X Sum=lo=hi=0;
-X P= (double *) calloc(MAXIT*range+1,sizeof(double));
-X for (*P=sum=oldsum=j=1;j<=MAXIT && sum>0.001;Sum+=sum/=j++) {
-X first=last=range;
-X for (ptrP=P+(hi+=high)-(lo+=low); ptrP>=P; *ptrP-- =sum) {
-X ptr1=ptrP-first;
-X ptr2=p+first;
-X for (sum=0,i=first; i<=last; ++i) sum += *ptr1-- * *ptr2++;
-X if (first) --first;
-X if (ptrP-P<=range) --last;
-X }
-X ftemp=exp(lambda*(lo-1));
-X for (sum=0,i=lo;i;++i) sum+= *++ptrP * (ftemp*=beta);
-X for (;i<=hi;++i) sum+= *++ptrP;
-X ratio=sum/oldsum;
-X oldsum=sum;
-X }
-X for (;j<=200;Sum+=oldsum/j++) oldsum*=ratio;
-X for (i=low;!p[i-low];++i);
-X for (j= -i;i<high && j>1;) if (p[++i-low]) j=a_gcd(j,i);
-X *K_p = (j*exp(-2*Sum))/(av*(1.0-exp(- lambda*j)));
-X free(P);
-X
-X return 1; /* Parameters calculated successfully */
-}
-X
-int
-a_gcd(int a, int b)
-{
-X int c;
-X
-X if (b<0) b= -b;
-X if (b>a) { c=a; a=b; b=c; }
-X for (;b;b=c) { c=a%b; a=b; }
-X return a;
-}
-X
-SHAR_EOF
-chmod 0644 karlin.c ||
-echo 'restore of karlin.c failed'
-Wc_c="`wc -c < 'karlin.c'`"
-test 13727 -eq "$Wc_c" ||
- echo 'karlin.c: original size 13727, current size' "$Wc_c"
-fi
-# ============= last_tat.c ==============
-if test -f 'last_tat.c' -a X"$1" != X"-c"; then
- echo 'x - skipping last_tat.c (File already exists)'
-else
-echo 'x - extracting last_tat.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'last_tat.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: last_tat.c,v 1.8 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#include "structs.h"
-#include "param.h"
-X
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "msg.h"
-#include "p_mw.h"
-X
-void do_stage2(struct beststr **bptr, int nbest, struct mngmsg m_msg0,
-X int s_func, struct qmng_str *qm_msp);
-#endif
-X
-X
-extern int (*ranlib) (char *str, int cnt,
-X fseek_t libpos, char *libstr,
-X struct lmf_str *lm_fd);
-X
-#define RANLIB (m_fptr->ranlib)
-X
-#define MAX_BLINE 200
-X
-int
-re_getlib(unsigned char *, int, int, int, int, int, long *, long *,
-X struct lmf_str *m_fptr);
-X
-void
-do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame,
-X struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
-X
-extern void
-do_opt (unsigned char *aa0, int n0, unsigned char *aa1, int n1,
-X int frame, struct pstruct *pst, void *f_str,
-X struct rstruct *rst);
-X
-struct lmf_str *re_openlib(struct lmf_str *, int outtty);
-X
-void sortbestz (struct beststr **bptr, int nbest);
-X
-double zs_to_E(double zs,int n1, int isdna, long entries, struct db_str db);
-X
-double scale_one_score(int ipos, double escore, struct db_str db, void *rs_str);
-X
-void sortbests (struct beststr **bptr, int nbest)
-{
-X int gap, i, j;
-X struct beststr *tmp;
-X
-X for (gap = nbest/2; gap > 0; gap /= 2)
-X for (i = gap; i < nbest; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (bptr[j]->score[0] >= bptr[j + gap]->score[0]) break;
-X tmp = bptr[j];
-X bptr[j] = bptr[j + gap];
-X bptr[j + gap] = tmp;
-X }
-}
-X
-int
-last_calc(
-#ifndef PCOMPLIB
-X unsigned char **aa0, unsigned char *aa1, int maxn,
-#endif
-X struct beststr **bptr, int nbest,
-X struct mngmsg m_msg, struct pstruct *ppst
-#ifdef PCOMPLIB
-X , struct qmng_str *qm_msp
-#else
-X , void **f_str
-#endif
-X , void *rstat_str)
-{
-X int nopt, ib;
-X struct beststr *bbp;
-X long loffset, l_off;
-X int n0, n1;
-X struct rstruct rst;
-X struct lmf_str *m_fptr;
-X char bline[60];
-X int tat_samp, tat_inc, loop_cnt, i;
-X double min_escore, ess;
-X
-X n0 = m_msg.n0;
-X
-X sortbestz(bptr,nbest);
-X
-X tat_inc = 500;
-/*
-X if (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg.db)/
-X zs_to_E(bptr[nbest-1]->zscore,bptr[nbest-1]->n1,0,ppst->zdb_size,m_msg.db)
-X < 1e-20) { tat_inc /= 4 ;}
-*/
-X
-/* || (zs_to_E(bptr[0]->zscore,bptr[0]->n1,0,ppst->zdb_size,m_msg.db)< 1e-5); */
-X
-X ib = tat_samp = 0;
-X for (loop_cnt = 0; loop_cnt < 5; loop_cnt++) {
-X tat_samp += tat_inc;
-X nopt = min(nbest,tat_samp);
-X min_escore = 1000000.0;
-#ifndef PCOMPLIB
-X for ( ; ib<nopt; ib++) {
-X bbp = bptr[ib];
-X
-X if (bbp->score[0] < 0) break;
-X
-X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg.quiet))==NULL) {
-X fprintf(stderr,"*** cannot re-open %s\n",bbp->m_file_p->lb_name);
-X exit(1);
-X }
-X RANLIB(bline,sizeof(bline),bbp->lseek,bbp->libstr,m_fptr);
-X
-X n1 = re_getlib(aa1,maxn,m_msg.maxt3,m_msg.loff,bbp->cont,m_msg.term_code,
-X &loffset,&l_off,bbp->m_file_p);
-X
-X do_opt(aa0[bbp->frame],m_msg.n0,aa1,n1,bbp->frame,ppst,
-X f_str[bbp->frame],&rst);
-X bbp->score[0]=rst.score[0];
-X bbp->score[1]=rst.score[1];
-X bbp->score[2]=rst.score[2];
-X bbp->escore=rst.escore;
-X bbp->segnum = rst.segnum;
-X bbp->seglen = rst.seglen;
-X
-X if ((ess=scale_one_score(ib, bbp->escore, m_msg.db, rstat_str)) <
-X min_escore) { min_escore = ess;}
-X /*
-X fprintf(stderr,"%d: %4d %2d %3d %.4g %.4g\n",
-X ib, bbp->score[0], bbp->segnum,bbp->seglen,bbp->escore, ess);
-X */
-X }
-#else
-X do_stage2(&bptr[ib], nopt-ib, m_msg, DO_CALC_FLG, qm_msp);
-X
-X for ( ; ib < nopt; ib++) {
-X if ((ess=scale_one_score(ib, bptr[ib]->escore, m_msg.db, rstat_str)) <
-X min_escore) { min_escore = ess;}
-X /*
-X fprintf(stderr, "%d: %4d %2d %3d %.4g %.4g\n",
-X ib,bptr[ib]->score[0],bptr[ib]->segnum,bptr[ib]->seglen,bptr[ib]->escore,ess);
-X */
-X }
-#endif
-X
-X
-X if (min_escore > m_msg.e_cut) return ib;
-X }
-X return ib;
-}
-SHAR_EOF
-chmod 0644 last_tat.c ||
-echo 'restore of last_tat.c failed'
-Wc_c="`wc -c < 'last_tat.c'`"
-test 4128 -eq "$Wc_c" ||
- echo 'last_tat.c: original size 4128, current size' "$Wc_c"
-fi
-# ============= lcbo.aa ==============
-if test -f 'lcbo.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping lcbo.aa (File already exists)'
-else
-echo 'x - extracting lcbo.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'lcbo.aa' &&
->LCBO - Prolactin precursor - Bovine
-MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQVSLRDLFDRAVMVSHYIHDLSS
-EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHHEVLMSLILGLLRSWNDPLYHL
-VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGAKETEPYPVWSGLPSLQTKDED
-ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC*
-SHAR_EOF
-chmod 0644 lcbo.aa ||
-echo 'restore of lcbo.aa failed'
-Wc_c="`wc -c < 'lcbo.aa'`"
-test 271 -eq "$Wc_c" ||
- echo 'lcbo.aa: original size 271, current size' "$Wc_c"
-fi
-# ============= lib_sel.c ==============
-if test -f 'lib_sel.c' -a X"$1" != X"-c"; then
- echo 'x - skipping lib_sel.c (File already exists)'
-else
-echo 'x - extracting lib_sel.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'lib_sel.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: lib_sel.c,v 1.16 2006/12/06 17:30:52 wrp Exp $ */
-X
-/* modified Dec 13, 1989 requires different FASTLIBS */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctype.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "structs.h"
-X
-#ifdef NCBIBL13
-#define LASTLIB NCBIBL13+1
-#else
-#define LASTLIB 11
-#endif
-X
-X
-int getlnames(char *tname, struct mngmsg *m_msg);
-void addfile(char *, char *, struct mngmsg *);
-void libchoice(char *lname, int nl, struct mngmsg *m_msg);
-void libselect(char *lname, struct mngmsg *m_msg);
-void subs_env(char *dest, char *src, int dest_size);
-char *ulindex(char *str, char *chr);
-X
-static char ldname[MAX_FN];
-static char *libenv;
-X
-int
-getlnames(char *iname, struct mngmsg *m_msg) /* read in the library names */
-{
-X char *bp, tsave[MAX_STR], *tname;
-X char lline[MAX_FN], *llp;
-X FILE *tptr;
-X
-X /* expand environment variables */
-X
-X tname = tsave;
-X subs_env(tname, iname, sizeof(tsave));
-X
-X if (*tname != '@') {addfile(tname,"\0",m_msg); return 1;}
-X else tname++;
-X
-X /* remove ' ' before deftype if present */
-X if ((bp=strchr(tname,' '))!=NULL) *bp='\0';
-X
-X if ((tptr=fopen(tname,"r"))==NULL) {
-X fprintf(stderr," could not open file of names: %s\n",tname);
-X return 0;
-X }
-X
-X while (fgets(lline,sizeof(lline),tptr)!=NULL) {
-X if (lline[0]==';') continue;
-X if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
-X subs_env(tsave, lline, sizeof(tsave));
-X if (tsave[0]=='<') {
-X strncpy(ldname,&tsave[1],sizeof(ldname));
-X ldname[sizeof(ldname)-1]='\0';
-X libenv=ldname;
-X }
-X else addfile(tsave,libenv,m_msg);
-X }
-X fclose(tptr);
-X return 1;
-}
-X
-/* libchoice displays a list of potential library files
-X in the new &lib& version, only traditional 1-letter files will be
-X shown initially
-*/
-X
-void
-libchoice(char *lname, int nl, struct mngmsg *m_msg)
-{
-X FILE *fch;
-X char line[MAX_STR], *bp;
-X char *chstr[MAX_CH],*chfile[MAX_CH];
-X char *chtmp, *charr;
-X int i,j,k,chlen;
-X
-X charr = NULL;
-X if (strlen(m_msg->flstr)> (size_t)0) {
-X chlen = MAX_CH*MAX_FN;
-X if ((chtmp=charr=calloc((size_t)chlen,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate choice file array\n");
-X goto l1;
-X }
-X chlen--;
-X if ((fch=fopen(m_msg->flstr,"r"))==NULL) {
-X fprintf(stderr," cannot open choice file: %s\n",m_msg->flstr);
-X goto l1;
-X }
-X fprintf(stderr,"\n Choose sequence library:\n\n");
-X
-X for (i=j=0; j<MAX_CH; i++) {
-X if (fgets(line,sizeof(line),fch)==NULL) break;/* check for comment */
-X if (line[0]==';') continue;
-X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; /* remove \n */
-X if ((bp=strchr(line,'$'))==NULL) continue; /* if no '$', continue */
-X *bp++='\0'; /* replace $ with \0, bp points to libtype */
-X
-X /* if libtypes don't match, continue */
-X if ((*bp++ -'0')!=m_msg->ldnaseq) continue;
-X
-X /* if the library file name is too long, quit */
-X if ((k=strlen(line))>chlen) break;
-X
-X /* save the library file name */
-X strncpy(chstr[j]=chtmp,line,chlen);
-X chtmp += k+1; chlen -= k+1;
-X
-X if ((k=strlen(bp))>chlen) break;
-X strncpy(chfile[j]=chtmp,bp,chlen);
-X chtmp += k+1; chlen -= k+1;
-X fprintf(stderr," %c: %s\n",*chfile[j++],line);
-X }
-X l2: fprintf(stderr,"\n Enter library filename (e.g. %s), letter (e.g. P)\n",
-X (m_msg->ldnaseq==0)? "prot.lib" : "dna.lib");
-X fprintf(stderr," or a %% followed by a list of letters (e.g. %%PN): ");
-X fflush(stderr);
-X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
-X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
-X if (strlen(line)==0) goto l2;
-X strncpy(lname,line,nl);
-X }
-X else {
-X l1: fprintf(stderr," library file name: ");
-X fflush(stderr);
-X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
-X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
-X if (strlen(line)> (size_t)0) strncpy(lname,line,nl);
-X else goto l1;
-X }
-X if (charr!=NULL) {
-X fclose(fch);
-X free(charr);
-X }
-}
-X
-/* libselect parses the choices in char *lname and builds the list
-X of library files
-*/
-void
-libselect(char *lname, struct mngmsg *m_msg)
-{
-X char line[MAX_FN*2], *bp, *bp1;
-X char *llnames[MAX_LF]; /* pointers into new list of names */
-X int new_abbr,ich, nch; /* use new multi-letter abbr */
-X FILE *fch;
-X
-X new_abbr = 0;
-X m_msg->nln = 0;
-X if (strlen(lname) > (size_t)1 && *lname != '%' && *lname != '+') {
-X getlnames(lname,m_msg); /* file name */
-X return;
-X }
-X else {
-X if (*m_msg->flstr=='\0') {
-X fprintf(stderr," abbrv. list request but FASTLIBS undefined, cannot use %s\n",lname);
-X exit(1);
-X }
-X
-X if (strchr(lname,'+')) {
-X /* indicates list of database abbrevs (not files) */
-X new_abbr=1;
-X nch = 0;
-X bp = lname+1; if (*bp == '+') bp++;
-X for (bp1=bp; bp!=NULL && bp1!=NULL; bp=bp1+1) {
-X if ((bp1=strchr(bp,'+'))!=NULL) *bp1='\0';
-X llnames[nch++] = bp;
-X }
-X }
-X else if (*lname=='%') { /* list of single letter abbreviations */
-X lname++; /* bump over '%' to get letters */
-X }
-X
-X /* else just use a single character abbreviation */
-X
-X if (strlen(m_msg->flstr) > (size_t)0) {
-X if ((fch=fopen(m_msg->flstr,"r"))==NULL) {
-X fprintf(stderr," cannot open choice file: %s\n",m_msg->flstr);
-X return;
-X }
-X }
-X else {
-X fprintf(stderr," FASTLIBS undefined\n");
-X addfile(lname,"\0",m_msg);
-X return;
-X }
-X
-X /* read each line of FASTLIBS */
-X while (fgets(line,sizeof(line),fch)!=NULL) {
-X if (line[0]==';') continue; /* skip comments */
-X if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; /* remove '\n' */
-X if ((bp=strchr(line,'$'))==NULL) continue; /* no delim, continue */
-X *bp++='\0'; /* point to library type */
-X if ((*bp++ -'0')!=m_msg->ldnaseq) continue; /* doesn't match, continue */
-X
-X /* if !new_abbr, match on one letter with ulindex() */
-X if (!new_abbr) {
-X if (*bp=='+') continue; /* not a &lib& */
-X else if (ulindex(lname,bp)!=NULL) {
-X strncpy(m_msg->ltitle,line,MAX_FN);
-X getlnames(bp+1,m_msg);
-X }
-X }
-X else {
-X if (*bp!='+') continue;
-X else {
-X bp++;
-X if ((bp1 = strchr(bp,'+'))!=NULL) {
-X *bp1='\0';
-X for (ich = 0; ich<nch; ich++) {
-X if (strcmp(llnames[ich],bp)==0) {
-X strncpy(m_msg->ltitle,line,MAX_FN);
-X getlnames(bp1+1,m_msg);
-X break;
-X }
-X }
-X *bp1='+';
-X }
-X else fprintf(stderr,"%s missing final '+'\n",bp);
-X }
-X }
-X }
-X fclose(fch);
-X }
-}
-X
-void
-addfile(char *fname, char *env, struct mngmsg *m_msg)
-{
-X char tname[MAX_STR], *bp, *bp1;
-X char *lbptr;
-X int len, lenv, l_size;
-X
-X /* check for default directory for files */
-X if (env != NULL && *env != '\0') lenv = strlen(env)+1;
-X else lenv = 0;
-X
-X len=strlen(fname)+1+lenv;
-X
-X if (lenv > 1 && *fname != '#') { /* add default directory to file name */
-X strncpy(tname,env,sizeof(tname)-1);
-#ifdef UNIX
-X strcat(tname,"/");
-#endif
-X }
-X else tname[0]='\0';
-X
-X /* add fname to tname, allocate space, and move to space */
-X strncat(tname,fname,sizeof(tname)-strlen(tname)-1);
-X len=strlen(tname)+1;
-X if ((lbptr=calloc(len,sizeof(char)))==NULL) {
-X fprintf(stderr,"no more space for filenames: %s ignored\n",fname);
-X return;
-X }
-X else {
-X strncpy(lbptr,tname,len);
-X lbptr[len-1]='\0';
-X }
-X
-X if (m_msg->nln< MAX_LF) {
-X m_msg->lbnames[m_msg->nln++]=lbptr;
-X }
-X else fprintf(stderr," no more file name slots: %s ignored\n",lbptr);
-}
-X
-char *
-ulindex(char *str, char *chr)
-{
-X char c;
-X
-X c = tolower((int)(*chr));
-X
-X while (*str != '\0' && tolower(*str) !=c ) str++;
-X if (*str=='\0') return NULL;
-X else return str;
-}
-SHAR_EOF
-chmod 0644 lib_sel.c ||
-echo 'restore of lib_sel.c failed'
-Wc_c="`wc -c < 'lib_sel.c'`"
-test 7638 -eq "$Wc_c" ||
- echo 'lib_sel.c: original size 7638, current size' "$Wc_c"
-fi
-# ============= list_db.c ==============
-if test -f 'list_db.c' -a X"$1" != X"-c"; then
- echo 'x - skipping list_db.c (File already exists)'
-else
-echo 'x - extracting list_db.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'list_db.c' &&
-/* list_db.c - report values from map_db.c */
-X
-/* copyright (c) 1999 William R. Pearson */
-X
-/* format of the index file:
-X
-1) map_db version number ["MP"+2 bytes]
-2) number of sequences in database [4 bytes]
-3) total length of database [8 bytes]
-4) longest sequence in database [8 bytes]
-5) list of offsets to definitions [num_seq+1] int*8
-6) list of offsets to sequences [num_seq+1] int*8
-7) list of flag characters for sequences [num_seq+1] bytes
-X (used for GCG binary to encode 2bit or 4 bit representation)
-X
-X sequence files will be as defined by their format
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "uascii.h"
-#include "ncbl2_head.h"
-X
-void src_int4_write(FILE *, int);
-void src_int4_read(FILE *, int *);
-void src_long4_read(FILE *, long *);
-void src_long8_write(FILE *, long);
-void src_long8_read(FILE *, long *);
-X
-void newname(char *nname, char *oname, char *suff, int maxn);
-X
-main(int argc, char **argv)
-{
-X FILE *libi;
-X char lname[256];
-X char iname[256];
-X char format[4];
-X char *bp;
-X
-X int i;
-X int d_pos; /* start of description */
-X int s_pos; /* start of sequence */
-X int attr; /* sequence attribute */
-X int lib_aa; /* 0 => DNA, 1 => protein */
-X int nlib; /* number of entries */
-X long f_size;
-X long max_len; /* longest sequence */
-X long tot_len; /* total sequence length */
-X int n1;
-X
-X int lib_size; /* current space available - may be realloc'ed */
-X int lib_inc;
-X int lib_type; /* 1 for protein, 0 for DNA */
-X int lib_dna; /* dna=1; prot=0; */
-X long *d_pos_arr; /* array of description pointers */
-X long *s_pos_arr; /* array of description pointers */
-X char *attr_arr; /* array of attribute chars */
-X
-X int mm64_flag;
-X
-X lib_type = 0;
-X lib_dna = 0;
-X
-X /* open the database */
-X if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
-X else {
-X fprintf(stderr," Entry library name: ");
-X fgets(lname,sizeof(lname),stdin);
-X if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
-X }
-X
-X if ((bp=strchr(lname,' '))!=NULL) {
-X lib_type = atoi(bp+1);
-X *bp='\0';
-X }
-X else lib_type = 0;
-X
-X newname(iname,lname,"xin",sizeof(iname));
-X
-X if ((libi=fopen(iname,"r"))==NULL) {
-X fprintf(stderr," cannot open %s\n",iname);
-X exit(1);
-X }
-X
-X fread(format,1,sizeof(format),libi);
-X printf("%c%c%d %d\n",format[0],format[1],format[2],format[3]);
-X mm64_flag = (format[2]==1);
-X
-X src_int4_read(libi,&lib_aa);
-X
-X if (mm64_flag) src_long8_read(libi,&f_size);
-X else src_long4_read(libi,&f_size);
-X
-X src_int4_read(libi,&nlib);
-X
-X if (mm64_flag) {
-X src_long8_read(libi,&tot_len);
-X src_long8_read(libi,&max_len);
-X }
-X else {
-X src_long4_read(libi,&tot_len);
-X src_long4_read(libi,&max_len);
-X }
-X
-X printf(" %d entries; tot: %ld; max: %ld\n",nlib,tot_len,max_len);
-X
-X /* allocate array of description pointers */
-X if ((d_pos_arr=(long *)calloc(nlib+1, sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate %d for desc. array\n",nlib+1);
-X exit(1);
-X }
-X /* allocate array of sequence pointers */
-X if ((s_pos_arr=(long *)calloc(nlib+1, sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate %d for seq. array\n",nlib+1);
-X exit(1);
-X }
-X if ((attr_arr=(char *)calloc(nlib+1, sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate %d for attr. array\n",nlib+1);
-X exit(1);
-X }
-X
-X if (mm64_flag) {
-X for (i=0; i<=nlib; i++) src_long8_read(libi,&d_pos_arr[i]);
-X for (i=0; i<=nlib; i++) src_long8_read(libi,&s_pos_arr[i]);
-X }
-X else {
-X for (i=0; i<=nlib; i++) src_long4_read(libi,&d_pos_arr[i]);
-X for (i=0; i<=nlib; i++) src_long4_read(libi,&s_pos_arr[i]);
-X }
-X
-X fread(attr_arr,nlib+1,sizeof(char),libi);
-X fclose(libi);
-X
-X printf("header\tseq\n");
-X
-X for (i=0; i<nlib; i++) printf("%ld\t%ld\n",d_pos_arr[i],s_pos_arr[i]);
-}
-X
-void src_int4_read(FILE *fd, int *val)
-{
-X int tval;
-#ifdef IS_BIG_ENDIAN
-X fread(&tval,(size_t)4,(size_t)1,fd);
-X *val = tval;
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-#endif
-}
-X
-void src_long4_read(FILE *fd, long *val)
-{
-X int tval;
-#ifdef IS_BIG_ENDIAN
-X fread(&tval,(size_t)4,(size_t)1,fd);
-X *val = tval;
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-#endif
-}
-X
-void src_long8_read(FILE *fd, long *val)
-{
-#ifdef IS_BIG_ENDIAN
-X fread((char *)val,(size_t)8,(size_t)1,fd);
-#else
-X unsigned char b[8];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
-X *val = 0;
-X *val = (int)
-X ((((((((int)b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)+(int)b[3]<<8)+
-X (int)b[4]<<8)+(int)b[5]<<8)+(int)b[6]<<8)+(int)b[7];
-#endif
-}
-X
-void src_int4_write(FILE *fd, int val)
-{
-#ifdef IS_BIG_ENDIAN
-X fwrite(&val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X b[3] = val & 255;
-X b[2] = (val=val>>8)&255;
-X b[1] = (val=val>>8)&255;
-X b[0] = (val=val>>8)&255;
-X
-X fwrite(b,(size_t)1,(size_t)4,fd);
-#endif
-}
-X
-void
-newname(char *nname, char *oname, char *suff, int maxn)
-{
-X strncpy(nname,oname,maxn-1);
-X strncat(nname,".",1);
-X strncat(nname,suff,maxn-strlen(nname));
-}
-SHAR_EOF
-chmod 0644 list_db.c ||
-echo 'restore of list_db.c failed'
-Wc_c="`wc -c < 'list_db.c'`"
-test 5150 -eq "$Wc_c" ||
- echo 'list_db.c: original size 5150, current size' "$Wc_c"
-fi
-# ============= llgetaa.c ==============
-if test -f 'llgetaa.c' -a X"$1" != X"-c"; then
- echo 'x - skipping llgetaa.c (File already exists)'
-else
-echo 'x - extracting llgetaa.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'llgetaa.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: llgetaa.c,v 1.25 2007/01/08 15:38:46 wrp Exp $ */
-X
-/*
-X Feb, 1998 - version for prss
-X
-X March, 2001 - modifications to support comp_thr.c: use libpos to indicate
-X whether the score is shuffled==1 or unshuffled==0. This simplifies
-X complib.c and makes comp_thr.c possible
-X
-X modified version of nxgetaa.c that generates random sequences
-X for a library
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#include "uascii.h"
-#include "structs.h"
-X
-#define XTERNAL
-#include "upam.h"
-#undef XTERNAL
-X
-#define YES 1
-#define NO 0
-#define MAXLINE 512
-X
-#ifndef min
-#define min(x,y) ((x) > (y) ? (y) : (x))
-#endif
-X
-int nsfnum; /* number of superfamily numbers */
-int sfnum[10]; /* superfamily number from types 0 and 5 */
-int nsfnum_n;
-int sfnum_n[10];
-X
-static int use_stdin=0;
-static char llibstr0[256];
-static char llibstr1[256];
-static char o_line[256];
-X
-#define NO_FORMAT 0
-#define FASTA_FORMAT 1
-#define GCG_FORMAT 2
-static int seq_format=NO_FORMAT;
-static char seq_title[200];
-X
-extern int irand(int);
-extern void shuffle(unsigned char *from, unsigned char *to, int n);
-extern void wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven);
-X
-int
-getseq(char *filen, int *qascii,
-X unsigned char *seq, int maxs, char *libstr,
-X int n_libstr, long *sq0off)
-{
-X FILE *fptr;
-X char line[512],*bp;
-X int i, j, n;
-X int ic;
-X int sstart, sstop, sset=0;
-X int have_desc = 0;
-X int desc_complete = 0;
-X int llen, l_offset;
-X
-X seq_title[0]='\0';
-X
-X sstart = sstop = -1;
-#ifndef DOS
-X if ((bp=strchr(filen,':'))!=NULL) {
-#else
-X if ((bp=strchr(filen+3,':'))!=NULL) {
-#endif
-X *bp='\0';
-X if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
-X else sscanf(bp+1,"%d-%d",&sstart,&sstop);
-X sset=1;
-X }
-X
-X if (strcmp(filen,"-") && strcmp(filen,"@")) {
-X if ((fptr=fopen(filen,"r"))==NULL) {
-X fprintf(stderr," could not open %s\n",filen);
-X return 0;
-X }
-X }
-X else {
-X fptr = stdin;
-X use_stdin++;
-X }
-X
-X if (use_stdin > 1) {
-X have_desc = 1;
-X if ((bp=strchr(o_line,'\001'))!=NULL) *bp='\0';
-X strncpy(llibstr1,o_line,sizeof(llibstr1));
-X strncpy(libstr,o_line,n_libstr);
-X libstr[n_libstr-1]='\0';
-X l_offset = 0;
-X }
-X
-X if (sset==1) {
-X filen[strlen(filen)]=':';
-X if (*sq0off==1 || sstart>1) *sq0off = sstart;
-X }
-X
-X desc_complete = 0;
-X n=0;
-X while(fgets(line,sizeof(line),fptr)!=NULL) {
-X if (line[0]=='>') {
-X if (have_desc) {
-X strncpy(o_line,line,sizeof(o_line));
-X goto last;
-X }
-X l_offset = 0;
-X seq_format = FASTA_FORMAT;
-#ifdef STAR_X
-X qascii['*'] = qascii['X'];
-#endif
-X sfnum[0] = nsfnum = 0;
-X
-X if ((bp=(char *)strchr(line,'\n'))!=NULL) {
-X *bp='\0'; /* have newline */
-X desc_complete = 1;
-X }
-X
-X if ((bp=strchr(line+1,'\001'))!=NULL) *bp='\0';
-X strncpy(seq_title,line+1,sizeof(seq_title));
-X strncpy(llibstr0,line+1,sizeof(llibstr0));
-X if (n_libstr <= 20) {
-X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
-X }
-X strncpy(libstr,line+1,n_libstr);
-X libstr[n_libstr-1]='\0';
-X
-X if (!desc_complete) {
-X while (fgets(line, sizeof(line), fptr) != NULL) {
-X if (strchr(line,'\n') != NULL) {
-X line[0]='>';
-X break;
-X }
-X }
-X desc_complete = 1;
-X }
-X }
-X else if (seq_format==NO_FORMAT) {
-X seq_format = GCG_FORMAT;
-X qascii['*'] = qascii['X'];
-X l_offset = 10;
-X llen = strlen(line);
-X while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
-X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
-X llen = strlen(line);
-X }
-X if (n_libstr <= 20) {
-X if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
-X else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
-X }
-X strncpy(libstr,line,n_libstr);
-X libstr[n_libstr-1]='\0';
-X if (fgets(line,sizeof(line),fptr)==NULL) return 0;
-X }
-X
-X if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
-X
-X if (line[0]!='>'&& line[0]!=';') {
-X for (i=l_offset; (n<maxs)&&
-X ((ic=qascii[line[i]&AAMASK])<EL); i++)
-X if (ic<NA) seq[n++]= ic;
-X if (ic == ES) break;
-X }
-X else {
-X if (have_desc) {
-X strncpy(o_line,line,sizeof(o_line));
-X goto last;
-X }
-X else {
-X have_desc = 1;
-X }
-X }
-X }
-X
-X last:
-X if (n==maxs) {
-X fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
-X fflush(stderr);
-X }
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
-X if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
-X seq[n]= EOSEQ;
-X
-X if (fptr!=stdin) fclose(fptr);
-X
-X if (sset) {
-X if (sstart <= 0) sstart = 1;
-X if (sstop <= 0) sstop = n;
-X sstart--;
-X sstop--;
-X for (i=0, j=sstart; j<=sstop; i++,j++)
-X seq[i] = seq[j];
-X n = sstop - sstart +1;
-X seq[n]=EOSEQ;
-X }
-X
-X return n;
-}
-X
-int
-gettitle(filen,title,len)
-X char *filen, *title; int len;
-{
-X FILE *fptr;
-X char line[512];
-X char *bp;
-X int ll,sset;
-#ifdef WIN32
-X char *strpbrk();
-#endif
-X sset = 0;
-X
-X if (use_stdin) {
-X if (use_stdin == 1) {
-X /* use_stdin++; */
-X strncpy(title,llibstr0,len);
-X }
-X else {
-X strncpy(title,llibstr1,len);
-X }
-X if ((bp=strchr(title,'\001'))!=NULL) *bp='\0';
-X return strlen(title);
-X }
-X
-X if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
-X
-X if ((fptr=fopen(filen,"r"))==NULL) {
-X fprintf(stderr," file %s was not found\n",filen);
-X fflush(stderr);
-X return 0;
-X }
-X
-X if (sset==1) filen[strlen(filen)]=':';
-X
-X while(fgets(line,sizeof(line),fptr)!=0) {
-X if (line[0]=='>'|| line[0]==';') goto found;
-X }
-X fclose(fptr);
-X title[0]='\0';
-X return 0;
-X
-X found:
-X if ((bp=strchr(line,'\001'))!=NULL) *bp = 0;
-#ifdef WIN32
-X bp = strpbrk(line,"\n\r");
-#else
-X bp = strchr(line,'\n');
-#endif
-X if (bp!=NULL) *bp = 0;
-X strncpy(title,line,len);
-X title[len-1]='\0';
-X fclose(fptr);
-X return strlen(title);
-}
-X
-FILE *libf=NULL;
-X
-long lpos;
-char lline[MAXLINE];
-int lfflag=0; /* flag for CRLF in EMBL CDROM files */
-#define LFCHAR '\015' /* for MWC 5.5 */
-X
-int agetlib(); void aranlib(); /* pearson fasta format */
-X
-/* the following is from fgetgb.c */
-X
-/* a file name for openlib may now include a library type suffix */
-/* only opens fasta format files */
-X
-static char libn_save[MAX_FN];
-static int ldna_save=0;
-static int do_shuffle;
-static int shuff_cnt=10;
-static int w_flag = 0;
-#ifdef DEBUG
-static FILE *dfile=NULL;
-#endif
-static unsigned char *aa_save;
-static int n1_save;
-static int i_even;
-X
-/* lmf_str * is used here for compatibility with the "normal" openlib,
-X but is largely unnecessary */
-X
-void
-set_shuffle(struct mngmsg m_msg) {
-X char dfname[MAX_FN];
-X
-X if (m_msg.shuff_wid > 0) w_flag = m_msg.shuff_wid;
-X if (m_msg.shuff_max > shuff_cnt) shuff_cnt = m_msg.shuff_max;
-X
-#ifdef DEBUG
-X if (m_msg.dfile[0]!='\0') {
-X strncpy(dfname,m_msg.dfile,sizeof(dfname));
-X strncat(dfname,"_rlib",sizeof(dfname));
-X dfile = fopen(dfname,"w");
-X }
-#endif
-}
-X
-struct lmf_str *
-openlib(char *lname, int ldnaseq, int *sascii, int quiet, struct lmf_str *m_fd)
-{
-X char rline[10],libn[MAX_FN], *bp;
-X int wcnt, ll, opnflg;
-X int libtype;
-X struct lmf_str *m_fptr;
-X
-X wcnt = 0;
-X libtype = 0;
-X
-X strncpy(libn_save,lname,sizeof(libn_save));
-X
-X /* now allocate a buffer for the opened text file */
-X if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
-X sizeof(struct lmf_str),lname);
-X return NULL;
-X }
-X
-X strncpy(m_fptr->lb_name,lname,MAX_FN);
-X m_fptr->lb_name[MAX_FN-1]='\0';
-X
-X m_fptr->sascii = sascii;
-X m_fptr->getlib = agetlib;
-X m_fptr->ranlib = aranlib;
-X m_fptr->mm_flg = 0;
-X
-X do_shuffle = 0;
-X irand(0); /* initialize the random number generator */
-X
-X return m_fptr;
-}
-X
-void
-closelib()
-{
-X if (libf!=NULL) {
-X fclose(libf);
-X libf = NULL;
-X }
-#ifdef DEBUG
-X if (dfile) fclose(dfile);
-#endif
-}
-X
-static int ieven=0;
-static char *desc_save;
-X
-int
-agetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lf_fd,
-X long *l_off)
-{
-X long sq1_off;
-X char lib_desc[120];
-X int i;
-X
-X *l_off = 1;
-X
-X if (!do_shuffle) {
-X do_shuffle = 1;
-X
-X if ((n1_save = getseq(libn_save,lf_fd->sascii,
-X seq,maxs,lib_desc,sizeof(lib_desc),&sq1_off)) < 1)
-X return n1_save;
-X
-X strncpy(libstr,lib_desc,n_libstr);
-X libstr[n_libstr-1]='\0';
-X
-X if ((aa_save = (unsigned char *)calloc(n1_save+1,sizeof(unsigned char)))==
-X NULL) fprintf(stderr," cannot allocate %d for saved sequence\n",
-X n1_save);
-X memcpy((void *)aa_save,(void *)seq,n1_save);
-X
-X if ((desc_save =
-X (char *)calloc(strlen(lib_desc)+1,sizeof(char)))== NULL) {
-X fprintf(stderr," cannot allocate saved desciption [%d]\n",
-X strlen(lib_desc)+1);
-X }
-X else {
-X strncpy (desc_save,lib_desc,strlen(lib_desc));
-X desc_save[strlen(lib_desc)]=='\0';
-X }
-X
-X *libpos = 0;
-X return n1_save;
-X }
-X else { /* return a shuffled sequence - here we need a window size; */
-X strncpy(libstr,desc_save,n_libstr);
-X libstr[n_libstr-1]='\0';
-X
-X if (shuff_cnt-- <= 0 ) return -1;
-X if (w_flag > 0) wshuffle(aa_save,seq,n1_save,w_flag,&ieven);
-X else shuffle(aa_save,seq,n1_save);
-X seq[n1_save] = EOSEQ;
-#ifdef DEBUG
-X if (dfile!=NULL) {
-X fprintf(dfile,">%d\n",shuff_cnt);
-X for (i=0; i<n1_save; i++) {
-X if (aa[seq[i]]>0) fputc(aa[seq[i]],dfile);
-X else {fprintf(stderr,"error aa0[%d]: %d %d\n",
-X i,seq[i],aa[seq[i]]);}
-X if (i%60 == 59) fputc('\n',dfile);
-X }
-X fputc('\n',dfile);
-X }
-#endif
-X *libpos = 1;
-X return n1_save;
-X }
-}
-X
-void
-aranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp;
-X int ll;
-X
-X if (use_stdin == 2) {
-X if (llibstr1[0]=='>' || llibstr1[0]==';') {
-X strncpy(str,llibstr1+1,cnt);
-X }
-X else {
-X strncpy(str,llibstr1,cnt);
-X }
-X }
-X else {
-X strncpy(str,desc_save,cnt);
-X }
-X str[cnt-1]='\0';
-X if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
-X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X else str[cnt-1]='\0';
-}
-X
-/*
-void
-revcomp(unsigned char *seq, int n, int *c_nt)
-{
-X unsigned char tmp;
-X int i, ni;
-X
-X
-X for (i=0, ni = n-1; i< n/2; i++,ni--) {
-X tmp = c_nt[seq[i]];
-X seq[i] = c_nt[seq[ni]];
-X seq[ni] = tmp;
-X }
-X if ((n%2)==1) {
-X i = n/2;
-X seq[i] = c_nt[seq[i]];
-X }
-}
-*/
-X
-struct lmf_str *
-re_openlib(struct lmf_str *om_fptr, int outtty)
-{
-X return om_fptr;
-}
-X
-int re_getlib(unsigned char *aa1, int n1, int maxt3, int loff, int cont,
-X int term_code, long *loffset, long *l_off,
-X struct lmf_str *m_file_p)
-{
-X *loffset = 0;
-X *l_off = 1;
-X return n1;
-}
-X
-SHAR_EOF
-chmod 0644 llgetaa.c ||
-echo 'restore of llgetaa.c failed'
-Wc_c="`wc -c < 'llgetaa.c'`"
-test 10617 -eq "$Wc_c" ||
- echo 'llgetaa.c: original size 10617, current size' "$Wc_c"
-fi
-# ============= m1r.aa ==============
-if test -f 'm1r.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping m1r.aa (File already exists)'
-else
-echo 'x - extracting m1r.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'm1r.aa' &&
->test | 40001 90043 | mgstm1
-MGCEN,
-MIDYP,
-MLLAY,
-MLLGY
-SHAR_EOF
-chmod 0644 m1r.aa ||
-echo 'restore of m1r.aa failed'
-Wc_c="`wc -c < 'm1r.aa'`"
-test 56 -eq "$Wc_c" ||
- echo 'm1r.aa: original size 56, current size' "$Wc_c"
-fi
-# ============= m2.aa ==============
-if test -f 'm2.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping m2.aa (File already exists)'
-else
-echo 'x - extracting m2.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'm2.aa' &&
->tests from mgstm1
-MILGYW,
-MLLEYT,
-MGDAPD,
-MLCYNP
-SHAR_EOF
-chmod 0644 m2.aa ||
-echo 'restore of m2.aa failed'
-Wc_c="`wc -c < 'm2.aa'`"
-test 50 -eq "$Wc_c" ||
- echo 'm2.aa: original size 50, current size' "$Wc_c"
-fi
-# ============= make_osx_univ.sh ==============
-if test -f 'make_osx_univ.sh' -a X"$1" != X"-c"; then
- echo 'x - skipping make_osx_univ.sh (File already exists)'
-else
-echo 'x - extracting make_osx_univ.sh (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'make_osx_univ.sh' &&
-#!/bin/csh
-X
-make -f Makefile.os_x all
-make -f Makefile.os_x install
-make -f Makefile.os_x clean-up
-X
-make -f Makefile.os_x86 all
-make -f Makefile.os_x86 install
-make -f Makefile.os_x86 clean-up
-X
-foreach n ( ppc/* )
-set f=$n:t
-lipo -create ppc/$f i386/$f -output bin/$f
-echo "Universal $f built"
-end
-echo "Done!"
-X
-SHAR_EOF
-chmod 0755 make_osx_univ.sh ||
-echo 'restore of make_osx_univ.sh failed'
-Wc_c="`wc -c < 'make_osx_univ.sh'`"
-test 312 -eq "$Wc_c" ||
- echo 'make_osx_univ.sh: original size 312, current size' "$Wc_c"
-fi
-# ============= map_db.1 ==============
-if test -f 'map_db.1' -a X"$1" != X"-c"; then
- echo 'x - skipping map_db.1 (File already exists)'
-else
-echo 'x - extracting map_db.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'map_db.1' &&
-.TH MAP_DB "September, 1999"
-.SH NAME
-.B map_db
-\- read a FASTA (0), GENBANK flat file (1) PIR/VMS (5) or GCG binary
-(6) sequence database and produce the offsets necessary for efficient
-memory mapping.
-.SH SYNOPSIS
-.B map_db
-[-n] filename | "filename libtype"
-.SH DESCRIPTION
-.B map_db
-.I filename
-reads the sequence database in
-.I filename
-and produce a new file
-.I filename.xin
-with the offset information necessary for efficient memory mapping.
-.LP
-The programs in fasta version 32t08 can use memory mapped i/o to load
-sequence database files and read them efficiently. Memory mapping is
-used only if a "\c
-.I .xin\c
-\&" file is available. The "\c
-.I .xin\c
-\&" file is created by
-.B map_db\c
-\&.
-.LP
-In addition to
-.B map_db\c
-\&,
-.B list_db
-is available to display the database size, etc, and set of offsets calculated
-by
-.B map_db\c
-\&.
-.SH OPTIONS
-.TP
-\-n
-Read file as DNA database.
-.SH BUGS
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 map_db.1 ||
-echo 'restore of map_db.1 failed'
-Wc_c="`wc -c < 'map_db.1'`"
-test 948 -eq "$Wc_c" ||
- echo 'map_db.1: original size 948, current size' "$Wc_c"
-fi
-# ============= map_db.c ==============
-if test -f 'map_db.c' -a X"$1" != X"-c"; then
- echo 'x - skipping map_db.c (File already exists)'
-else
-echo 'x - extracting map_db.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'map_db.c' &&
-/* map_db.c - read a FASTA or GCG format database and generate a list
-X of indices for rapid memory mapping */
-X
-/* copyright (c) 1999 William R. Pearson */
-X
-/* $Name: fa_34_26_5 $ - $Id: map_db.c,v 1.9 2005/09/27 15:32:58 wrp Exp $ */
-X
-/* input is a libtype 1,5, or 6 sequence database */
-/* output is a BLAST2 formatdb type index file */
-X
-/* format of the index file:
-X
-1) map_db version number ["MP"+2 bytes]
-2) number of sequences in database [4 bytes]
-3) total length of database [8 bytes] (MP1, 4 bytes for MP0)
-4) longest sequence in database [8 bytes] (MP1, 4 bytes for MP0)
-5) list of offsets to definitions [num_seq+1] int*8 (MP1, 4 bytes for MP0)
-6) list of offsets to sequences [num_seq+1] int*8 (MP1, 4 bytes for MP1)
-7) list of flag characters for sequences [num_seq+1]bytes
-X (used for GCG binary to encode 2bit or 4 bit representation)
-X
-X sequence files will be as defined by their format
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include <sys/types.h>
-#include <sys/stat.h>
-X
-#include "uascii.h"
-#include "ncbl2_head.h"
-X
-#define GCGBIN 6
-#define LASTLIB 6
-X
-int (*get_entry) ();
-X
-int a_get_ent(long *, long *);
-int v_get_ent(long *, long *);
-int gcg_get_ent(long *, long *);
-int gbf_get_ent(long *, long *);
-X
-void src_int4_write(FILE *, int);
-void src_int4_read(FILE *, int *);
-void src_long4_write(FILE *, long);
-void src_long4_read(FILE *, long *);
-void src_long8_write(FILE *, long);
-void src_long8_read(FILE *, long *);
-X
-void newname(char *nname, char *oname, char *suff, int maxn);
-X
-int (*get_ent_arr[LASTLIB+1])()={a_get_ent, gbf_get_ent, NULL, NULL, NULL,
-X v_get_ent, gcg_get_ent};
-X
-long openlib(char *, int);
-X
-static int *sascii;
-X
-main(int argc, char **argv)
-{
-X FILE *libi;
-X char lname[256];
-X char iname[256];
-X char format[4];
-X char *bp;
-X
-X int i;
-X int nlib; /* number of entries */
-X
-X long max_len; /* longest sequence */
-X long tot_len; /* total sequence length */
-X
-X int n1;
-X
-X long f_size; /* file size from fstat() */
-X int lib_size; /* current space available - may be realloc'ed */
-X int lib_inc;
-X int lib_type; /* 1 for protein, 0 for DNA */
-X int lib_aa; /* dna=1; prot=0; */
-X
-X /* file offsets */
-X long d_pos; /* start of description */
-X long s_pos; /* start of sequence */
-X long *d_pos_arr; /* array of description pointers */
-X long *s_pos_arr; /* array of description pointers */
-X
-X lib_type = 0;
-X lib_size = 200000;
-X lib_inc = 100000;
-X
-X lib_aa = 1;
-X
-X while (argc > 1 && *argv[1]=='-') {
-X if (strcmp(argv[1],"-n")==0) lib_aa = 0;
-X argv++;
-X argc--;
-X }
-X
-X /* open the database */
-X if (argc > 1) strncpy(lname, argv[1],sizeof(lname));
-X else {
-X fprintf(stderr," Entry library name: ");
-X fgets(lname,sizeof(lname),stdin);
-X if ((bp=strchr(lname,'\n'))!=NULL) *bp='\0';
-X }
-X
-X if ((bp=strchr(lname,' '))!=NULL) {
-X lib_type = atoi(bp+1);
-X *bp='\0';
-X }
-X else lib_type = 0;
-X
-X if (get_ent_arr[lib_type] == NULL) {
-X fprintf(stderr," cannot index file %s type %d\n",lname,lib_type);
-X exit(1);
-X }
-X
-X if (lib_type == 6) lib_aa = 0;
-X if (lib_type == 1) lib_aa = 0;
-X
-X if (lib_aa == 1) sascii = aascii;
-X else sascii = nascii;
-X
-X if ((f_size=openlib(lname,lib_type))==0) {
-X fprintf(stderr," cannot open %s (type: %d)\n",lname,lib_type);
-X exit(1);
-X }
-X
-X /* allocate array of description pointers */
-X if ((d_pos_arr=(long *)calloc(lib_size, sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate %d for desc. array\n",lib_size);
-X exit(1);
-X }
-X /* allocate array of sequence pointers */
-X if ((s_pos_arr=(long *)calloc(lib_size, sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate %d for seq. array\n",lib_size);
-X exit(1);
-X }
-X
-X /* allocate array of sequence flags */
-X
-X nlib = 0; tot_len=0; max_len=-1;
-X while ((n1=get_entry(&d_pos, &s_pos)) > 0) {
-X d_pos_arr[nlib] = d_pos;
-X s_pos_arr[nlib] = s_pos;
-X nlib++;
-X tot_len += n1;
-X if (n1 > max_len) max_len = n1;
-X if (nlib >= lib_size) { /* too many entries */
-X lib_size += lib_inc;
-X if ((d_pos_arr=(long *)realloc(d_pos_arr,lib_size*sizeof(long)))==NULL) {
-X fprintf(stderr," cannot realloc allocate %d for desc.. array\n",
-X lib_size);
-X exit(1);
-X }
-X if ((s_pos_arr=(long *)realloc(s_pos_arr,lib_size*sizeof(long)))==NULL) {
-X fprintf(stderr," cannot realloc allocate %d for seq. array\n",
-X lib_size);
-X exit(1);
-X }
-X }
-X }
-X
-X d_pos_arr[nlib]= d_pos; /* put in the end of the file */
-X s_pos_arr[nlib]=0;
-X
-X /* all the information is in, write it out */
-X
-X newname(iname,lname,"xin",sizeof(iname));
-X
-X if ((libi=fopen(iname,"w"))==NULL) {
-X fprintf(stderr," cannot open %s for writing\n",iname);
-X exit(1);
-X }
-X
-X /* write out format version */
-X format[0]='M';
-X format[1]='P';
-#ifdef BIG_LIB64
-X format[2]= 1; /* format 1 for 8-byte offsets */
-#else
-X format[2]='\0'; /* format '\0' for original 4-byte */
-#endif
-X
-X format[3]=lib_type;
-X fwrite(format,4,sizeof(char),libi);
-X
-X /* write out sequence type */
-X src_int4_write(libi, lib_aa);
-X
-X /* write out file fstat as integrity check */
-#ifdef BIG_LIB64
-X src_long8_write(libi, f_size);
-#else
-X src_int4_write(libi, f_size);
-#endif
-X
-X /* write out num_seq */
-X src_int4_write(libi, nlib);
-X
-#ifdef BIG_LIB64
-X /* write out tot_len, max_len */
-X src_long8_write(libi, tot_len);
-#else
-X src_int4_write(libi, tot_len);
-#endif
-X src_int4_write(libi, max_len);
-X
-#ifdef BIG_LIB64
-X for (i=0; i<=nlib; i++) src_long8_write(libi,d_pos_arr[i]);
-X for (i=0; i<=nlib; i++) src_long8_write(libi,s_pos_arr[i]);
-#else
-X for (i=0; i<=nlib; i++) src_int4_write(libi,d_pos_arr[i]);
-X for (i=0; i<=nlib; i++) src_int4_write(libi,s_pos_arr[i]);
-#endif
-X
-X fclose(libi);
-X
-#ifdef BIG_LIB64
-X fprintf(stderr," wrote %d sequences (tot=%ld, max=%ld) to %s\n",
-X nlib,tot_len,max_len,iname);
-#else
-X fprintf(stderr," wrote %d sequences (tot=%ld, max=%ld) to %s\n",
-X nlib,tot_len,max_len,iname);
-#endif
-}
-X
-X
-FILE *libf=NULL;
-long lpos;
-X
-#define MAXLINE 4096
-char lline[MAXLINE+1];
-X
-long
-openlib(char *lname, int lib_type)
-{
-X long f_size;
-X struct stat stat_buf;
-X
-X if (stat(lname,&stat_buf)<0) {
-X fprintf(stderr," cannot stat library: %s\n",lname);
-X return 0;
-X }
-X
-X if ((libf=fopen(lname,"r"))==NULL) {
-X fprintf(stderr," cannot open library: %s (type: %d)\n",
-X lname, lib_type);
-X return 0;
-X }
-X
-X f_size = stat_buf.st_size;
-X
-X get_entry = get_ent_arr[lib_type];
-X
-X lpos = ftell(libf);
-X if (fgets(lline,MAXLINE,libf)==NULL) return 0;
-X return f_size;
-}
-X
-int
-a_get_ent(long *d_pos, long *s_pos)
-{
-X register char *cp;
-X register int *ap, n1;
-X
-X ap = sascii;
-X
-X while (lline[0]!='>' && lline[0]!=';') {
-X lpos = ftell(libf);
-X if (fgets(lline,sizeof(lline),libf)==NULL) {
-X *d_pos = lpos;
-X return 0;
-X }
-X }
-X
-X *d_pos = lpos;
-X
-X /* make certain we have the end of the line */
-X while (strchr((char *)lline,'\n')==NULL) {
-X if (fgets(lline,sizeof(lline),libf)==NULL) break;
-X }
-X
-X *s_pos = ftell(libf);
-X lline[0]='\0';
-X n1 = 0;
-X while (fgets(lline,sizeof(lline),libf)!=NULL) {
-X if (lline[0]=='>') break;
-X if (lline[0]==';') {
-X if (strchr(lline,'\n')==NULL) {
-X fprintf(stderr," excessive continuation\n%s",lline);
-X return -1;
-X }
-X }
-X
-X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
-X lpos = ftell(libf);
-X }
-X return n1;
-}
-X
-int
-v_get_ent(long *d_pos, long *s_pos)
-{
-X register char *cp;
-X register int *ap;
-X int n1;
-X
-X ap = sascii;
-X
-X /* check for seq_id line */
-X while (lline[0]!='>' && lline[0]!=';') {
-X lpos = ftell(libf);
-X if (fgets(lline,sizeof(lline),libf)==NULL) {
-X *d_pos = lpos;
-X return 0;
-X }
-X }
-X *d_pos = lpos;
-X
-X /* get the description line */
-X if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
-X /* make certain we have the end of the line */
-X while (strchr((char *)lline,'\n')==NULL) {
-X if (fgets(lline,sizeof(lline),libf)==NULL) break;
-X }
-X
-X *s_pos = ftell(libf);
-X lline[0]='\0';
-X n1 = 0;
-X while (fgets(lline,sizeof(lline),libf)!=NULL) {
-X if (lline[0]=='>') break;
-X
-X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
-X lpos = ftell(libf);
-X }
-X return n1;
-}
-X
-static char gcg_type[10];
-static long gcg_len;
-static int gcg_bton[4]={2,4,1,3};
-X
-int
-gcg_get_ent(long *d_pos, long *s_pos)
-{
-X register char *cp;
-X register int *ap;
-X char libstr[20], dummy[20];
-X char gcg_date[6];
-X int r_block;
-X int n1;
-X
-X /* check for seq_id line */
-X while (lline[0]!='>') {
-X lpos = ftell(libf);
-X if (fgets(lline,sizeof(lline),libf)==NULL) {
-X *d_pos = lpos;
-X return 0;
-X }
-X }
-X *d_pos = lpos;
-X
-X /* get the encoding/sequence length info */
-X
-X sscanf(&lline[4],"%s %s %s %s %ld",
-X libstr,gcg_date,gcg_type,dummy,&gcg_len);
-X
-X /* get the description line */
-X if (fgets(lline,MAXLINE,libf)==NULL) return;
-X
-X *s_pos = ftell(libf);
-X /* seek to the end of the sequence; +1 to jump over newline */
-X if (gcg_type[0]=='2') {
-X r_block = (gcg_len+3)/4;
-X fseek(libf,r_block+1,SEEK_CUR);
-X }
-X else fseek(libf,gcg_len+1,SEEK_CUR);
-X
-X lpos = ftell(libf);
-X fgets(lline,MAXLINE,libf);
-X
-X return gcg_len;
-}
-X
-int
-gbf_get_ent(long *d_pos, long *s_pos)
-{
-X int n1;
-X char *cp;
-X register int *ap;
-X
-#if !defined(TFAST)
-X ap = sascii;
-#else
-X ap = nascii;
-#endif
-X
-X while (lline[0]!='L' || lline[1]!='O' ||
-X strncmp(lline,"LOCUS",5)) { /* find LOCUS */
-X lpos = ftell(libf);
-X if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
-X }
-X *d_pos=lpos;
-X
-X while (lline[0]!='O' || lline[1]!='R' ||
-X strncmp(lline,"ORIGIN",6)) { /* find ORIGIN */
-X if (fgets(lline,MAXLINE,libf)==NULL) return (-1);
-X }
-X *s_pos = ftell(libf);
-X
-X lline[0]='\0';
-X n1=0;
-X while (fgets(lline,MAXLINE,libf)!=NULL) {
-X if (lline[0]=='/') break;
-X for (cp=lline; *cp; ) if (ap[*cp++]<NA) n1++;
-X }
-X lpos = ftell(libf);
-X fgets(lline,MAXLINE,libf);
-X
-X return n1;
-}
-X
-void src_int4_read(FILE *fd, int *val)
-{
-#ifdef IS_BIG_ENDIAN
-X fread((char *)val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-#endif
-}
-X
-void src_int4_write(FILE *fd, int val)
-{
-#ifdef IS_BIG_ENDIAN
-X fwrite(&val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X b[3] = val & 255;
-X b[2] = (val=val>>8)&255;
-X b[1] = (val=val>>8)&255;
-X b[0] = (val=val>>8)&255;
-X
-X fwrite(b,(size_t)1,(size_t)4,fd);
-#endif
-}
-X
-void src_long8_write(FILE *fd, long val)
-{
-#ifdef IS_BIG_ENDIAN
-X fwrite(&val,(size_t)8,(size_t)1,fd);
-#else
-X unsigned char b[8];
-X
-X b[7] = val & 255;
-X b[6] = (val=val>>8)&255;
-X b[5] = (val=val>>8)&255;
-X b[4] = (val=val>>8)&255;
-X b[3] = (val=val>>8)&255;
-X b[2] = (val=val>>8)&255;
-X b[1] = (val=val>>8)&255;
-X b[0] = (val=val>>8)&255;
-X
-X fwrite(b,(size_t)1,(size_t)8,fd);
-#endif
-}
-X
-void
-newname(char *nname, char *oname, char *suff, int maxn)
-{
-X strncpy(nname,oname,maxn-1);
-X strncat(nname,".",1);
-X strncat(nname,suff,maxn-strlen(nname));
-}
-SHAR_EOF
-chmod 0644 map_db.c ||
-echo 'restore of map_db.c failed'
-Wc_c="`wc -c < 'map_db.c'`"
-test 10852 -eq "$Wc_c" ||
- echo 'map_db.c: original size 10852, current size' "$Wc_c"
-fi
-# ============= mchu.aa ==============
-if test -f 'mchu.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping mchu.aa (File already exists)'
-else
-echo 'x - extracting mchu.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mchu.aa' &&
->MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
-ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
-FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
-DIDGDGQVNYEEFVQMMTAK
-SHAR_EOF
-chmod 0644 mchu.aa ||
-echo 'restore of mchu.aa failed'
-Wc_c="`wc -c < 'mchu.aa'`"
-test 212 -eq "$Wc_c" ||
- echo 'mchu.aa: original size 212, current size' "$Wc_c"
-fi
-# ============= md_10.mat ==============
-if test -f 'md_10.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping md_10.mat (File already exists)'
-else
-echo 'x - extracting md_10.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'md_10.mat' &&
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 11 -13 -12 -11 -13 -13 -10 -8 -15 -13 -15 -14 -13 -18 -7 -5 -4 -20 -19 -6 -12 -11 -1
-R -12 12 -13 -18 -10 -5 -15 -9 -5 -17 -14 -2 -14 -22 -11 -10 -12 -9 -17 -17 -15 -10 -1
-N -12 -13 13 -3 -14 -11 -12 -11 -5 -13 -19 -6 -15 -20 -17 -4 -7 -21 -12 -17 5 -11 -1
-D -11 -18 -3 12 -20 -13 -2 -9 -10 -19 -21 -15 -18 -23 -18 -12 -14 -24 -13 -15 5 -7 -1
-C -13 -10 -14 -20 17 -19 -22 -12 -12 -18 -16 -21 -15 -11 -18 -7 -14 -9 -7 -12 -17 -21 -1
-Q -13 -5 -11 -13 -19 13 -5 -15 -3 -19 -12 -6 -14 -22 -8 -13 -13 -17 -16 -17 -12 4 -1
-E -10 -15 -12 -2 -22 -5 12 -9 -15 -19 -20 -8 -17 -23 -17 -15 -15 -20 -21 -14 -7 3 -1
-G -8 -9 -11 -9 -12 -16 -9 11 -16 -21 -21 -15 -18 -22 -16 -7 -14 -13 -21 -13 -10 -13 -1
-H -16 -5 -5 -10 -12 -3 -15 -16 16 -17 -13 -13 -15 -14 -10 -11 -13 -20 -3 -19 -7 -9 -1
-I -13 -17 -14 -19 -17 -20 -19 -21 -18 12 -7 -17 -4 -11 -19 -14 -7 -20 -15 -1 -16 -19 -1
-L -15 -14 -19 -21 -16 -12 -20 -21 -13 -7 10 -18 -4 -6 -10 -13 -15 -13 -16 -8 -20 -16 -1
-K -14 -2 -6 -15 -21 -6 -8 -15 -13 -17 -18 12 -12 -24 -17 -13 -10 -19 -20 -18 -11 -7 -1
-M -13 -14 -15 -18 -15 -14 -18 -19 -15 -4 -4 -12 16 -14 -17 -15 -7 -16 -18 -5 -16 -16 -1
-F -18 -22 -19 -22 -11 -22 -23 -22 -14 -11 -6 -23 -14 14 -17 -11 -18 -13 -3 -12 -21 -22 -1
-P -7 -12 -17 -18 -18 -8 -17 -16 -10 -19 -10 -16 -17 -17 13 -6 -9 -22 -20 -16 -17 -13 -1
-S -5 -10 -4 -12 -7 -13 -15 -7 -11 -14 -13 -13 -15 -11 -6 11 -4 -15 -12 -14 -8 -14 -1
-T -4 -12 -7 -14 -14 -13 -15 -14 -13 -7 -16 -10 -7 -19 -9 -4 12 -19 -17 -10 -10 -14 -1
-W -21 -9 -21 -21 -10 -17 -21 -13 -21 -21 -13 -21 -17 -13 -21 -15 -18 18 -12 -16 -21 -19 -1
-Y -20 -17 -12 -13 -7 -16 -21 -20 -3 -15 -16 -20 -17 -3 -20 -12 -17 -12 15 -18 -13 -19 -1
-V -6 -17 -17 -15 -12 -17 -14 -13 -19 -1 -8 -18 -5 -12 -16 -14 -10 -16 -18 11 -16 -15 -1
-B -12 -15 5 5 -17 -12 -7 -10 -7 -16 -20 -11 -17 -21 -17 -8 -10 -22 -13 -16 13 -9 -1
-Z -16 -18 -17 -8 -32 1 9 -17 -17 -29 -26 -11 -24 -34 -21 -21 -21 -29 -29 -22 -9 13 -1
-XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 md_10.mat ||
-echo 'restore of md_10.mat failed'
-Wc_c="`wc -c < 'md_10.mat'`"
-test 2255 -eq "$Wc_c" ||
- echo 'md_10.mat: original size 2255, current size' "$Wc_c"
-fi
-# ============= md_20.mat ==============
-if test -f 'md_20.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping md_20.mat (File already exists)'
-else
-echo 'x - extracting md_20.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'md_20.mat' &&
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 10 -10 -9 -8 -10 -10 -7 -5 -12 -10 -12 -11 -9 -15 -5 -2 -1 -17 -16 -3 -9 -8 -1
-R -10 12 -10 -14 -7 -3 -11 -6 -3 -14 -12 0 -11 -18 -9 -7 -9 -6 -14 -14 -12 -7 -1
-N -9 -10 13 -1 -11 -8 -9 -8 -2 -11 -15 -4 -12 -16 -13 -1 -4 -18 -9 -14 6 -8 -1
-D -8 -14 -1 12 -16 -9 1 -6 -7 -16 -18 -11 -15 -20 -15 -9 -11 -20 -11 -12 6 -4 -1
-C -10 -7 -11 -16 17 -16 -19 -9 -9 -14 -13 -17 -12 -8 -14 -4 -11 -7 -4 -10 -14 -17 -1
-Q -10 -3 -8 -9 -16 13 -3 -12 0 -16 -9 -3 -11 -18 -5 -10 -10 -14 -12 -14 -9 5 -1
-E -7 -11 -9 1 -19 -3 11 -7 -12 -16 -17 -5 -14 -20 -14 -12 -12 -17 -18 -11 -4 4 -1
-G -5 -6 -8 -6 -9 -12 -7 11 -13 -17 -18 -12 -15 -19 -12 -5 -11 -10 -17 -11 -7 -9 -1
-H -12 -3 -2 -7 -9 0 -12 -13 15 -14 -10 -9 -12 -11 -7 -8 -10 -16 0 -15 -4 -6 -1
-I -10 -14 -11 -16 -14 -16 -16 -17 -14 12 -4 -14 -1 -8 -15 -11 -4 -16 -12 2 -13 -16 -1
-L -12 -11 -15 -18 -13 -9 -17 -18 -10 -4 10 -15 -2 -4 -7 -10 -12 -10 -13 -5 -17 -13 -1
-K -11 0 -4 -12 -17 -3 -5 -12 -9 -14 -15 12 -9 -21 -13 -10 -7 -16 -17 -15 -8 -4 -1
-M -9 -11 -12 -15 -12 -11 -15 -16 -12 -1 -2 -9 15 -10 -14 -12 -4 -13 -14 -3 -13 -13 -1
-F -15 -19 -16 -19 -8 -18 -20 -19 -11 -8 -4 -19 -10 13 -14 -8 -15 -10 0 -9 -17 -19 -1
-P -5 -9 -13 -15 -14 -5 -14 -12 -7 -15 -7 -13 -14 -14 12 -3 -7 -18 -16 -13 -14 -10 -1
-S -2 -8 -1 -9 -4 -10 -12 -5 -8 -11 -10 -10 -12 -8 -3 10 -1 -12 -9 -11 -5 -11 -1
-T -1 -9 -4 -11 -10 -10 -12 -11 -10 -4 -12 -7 -4 -15 -7 -1 11 -16 -14 -7 -7 -11 -1
-W -17 -6 -18 -18 -7 -14 -18 -10 -17 -17 -10 -17 -14 -10 -18 -12 -15 18 -9 -13 -18 -16 -1
-Y -16 -14 -9 -11 -4 -12 -18 -17 0 -12 -12 -17 -14 0 -16 -9 -13 -9 14 -15 -10 -15 -1
-V -3 -14 -14 -12 -9 -14 -11 -11 -15 2 -5 -15 -2 -9 -13 -11 -7 -13 -14 11 -13 -12 -1
-B -9 -12 6 6 -14 -9 -4 -7 -4 -13 -17 -8 -13 -18 -14 -5 -7 -19 -10 -13 12 -6 -1
-Z -12 -13 -13 -4 -27 4 10 -13 -12 -24 -21 -6 -20 -29 -17 -17 -17 -24 -24 -18 -6 12 -1
-XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 md_20.mat ||
-echo 'restore of md_20.mat failed'
-Wc_c="`wc -c < 'md_20.mat'`"
-test 2256 -eq "$Wc_c" ||
- echo 'md_20.mat: original size 2256, current size' "$Wc_c"
-fi
-# ============= md_40.mat ==============
-if test -f 'md_40.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping md_40.mat (File already exists)'
-else
-echo 'x - extracting md_40.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'md_40.mat' &&
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 9 -7 -6 -6 -7 -7 -5 -3 -10 -6 -9 -8 -7 -11 -2 0 1 -13 -12 -1 -6 -6 -1
-R -7 11 -6 -10 -5 0 -8 -4 0 -10 -9 3 -8 -14 -6 -5 -6 -4 -10 -11 -8 -4 -1
-N -6 -6 12 2 -8 -5 -5 -5 0 -8 -12 -1 -9 -13 -9 1 -2 -16 -6 -10 7 -5 -1
-D -6 -10 2 11 -13 -6 3 -4 -5 -12 -15 -8 -11 -16 -11 -6 -7 -15 -8 -9 6 -1 -1
-C -6 -5 -8 -13 16 -12 -15 -7 -6 -11 -11 -13 -9 -6 -11 -2 -7 -4 -2 -7 -11 -13 -1
-Q -7 0 -5 -6 -12 12 0 -9 2 -13 -6 0 -8 -14 -3 -7 -7 -11 -9 -11 -6 6 -1
-E -5 -8 -5 3 -15 0 10 -4 -8 -12 -13 -3 -11 -16 -10 -8 -8 -13 -14 -8 -1 5 -1
-G -3 -4 -5 -4 -7 -9 -4 10 -10 -13 -14 -9 -12 -15 -9 -2 -8 -7 -15 -8 -5 -7 -1
-H -10 0 0 -5 -6 2 -8 -10 14 -11 -7 -6 -9 -7 -4 -6 -7 -12 2 -12 -2 -3 -1
-I -6 -10 -8 -12 -11 -13 -12 -13 -11 11 -1 -11 1 -6 -11 -8 -2 -12 -9 4 -10 -12 -1
-L -9 -9 -12 -14 -11 -6 -13 -14 -7 -1 9 -12 1 -1 -5 -7 -9 -7 -9 -2 -13 -10 -1
-K -8 3 -1 -8 -13 0 -3 -9 -6 -11 -12 11 -7 -18 -10 -7 -5 -12 -13 -12 -5 -2 -1
-M -7 -8 -9 -11 -8 -8 -11 -12 -9 1 1 -7 14 -7 -10 -8 -2 -11 -11 0 -10 -10 -1
-F -11 -14 -12 -16 -6 -14 -16 -15 -7 -6 -1 -17 -7 13 -11 -5 -11 -7 2 -6 -14 -15 -1
-P -2 -6 -9 -12 -11 -3 -10 -9 -4 -11 -5 -10 -10 -11 12 -1 -4 -14 -12 -9 -11 -7 -1
-S 0 -5 1 -6 -2 -7 -8 -2 -6 -8 -7 -7 -8 -5 -1 9 1 -10 -7 -7 -3 -8 -1
-T 1 -6 -2 -7 -7 -7 -8 -8 -7 -2 -9 -5 -2 -11 -4 1 10 -14 -10 -4 -5 -8 -1
-W -14 -4 -17 -15 -4 -12 -13 -7 -11 -12 -7 -13 -11 -7 -14 -10 -14 18 -6 -11 -16 -12 -1
-Y -12 -9 -6 -8 -2 -9 -14 -14 2 -9 -9 -13 -11 2 -12 -7 -11 -6 14 -11 -7 -11 -1
-V -1 -11 -10 -9 -7 -11 -8 -8 -12 4 -2 -12 0 -6 -10 -7 -4 -10 -11 10 -10 -9 -1
-B -6 -8 7 6 -11 -6 -1 -5 -2 -10 -13 -5 -10 -14 -10 -3 -5 -16 -7 -10 11 -3 -1
-Z -8 -8 -8 0 -21 6 10 -9 -7 -18 -16 -3 -15 -23 -12 -12 -12 -19 -18 -14 -3 11 -1
-XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 md_40.mat ||
-echo 'restore of md_40.mat failed'
-Wc_c="`wc -c < 'md_40.mat'`"
-test 2255 -eq "$Wc_c" ||
- echo 'md_40.mat: original size 2255, current size' "$Wc_c"
-fi
-# ============= mgstm1.aa ==============
-if test -f 'mgstm1.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.aa (File already exists)'
-else
-echo 'x - extracting mgstm1.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.aa' &&
->GT8.7 | 266 40001 90043 | transl. of pa875.con, 19 to 675
-MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
-YTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLI
-DGSHKITQSNAILRYLARKHHLDGETEEERIR
-ADIVENQVMDTRMQLIMLCYNPDFEKQKPEFL
-KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
-YDILDQYRMFEPKCLDAFPNLRDFLARFEGLK
-KISAYMKSSRYIATPIFSKMAHWSNK
-SHAR_EOF
-chmod 0644 mgstm1.aa ||
-echo 'restore of mgstm1.aa failed'
-Wc_c="`wc -c < 'mgstm1.aa'`"
-test 284 -eq "$Wc_c" ||
- echo 'mgstm1.aa: original size 284, current size' "$Wc_c"
-fi
-# ============= mgstm1.aaa ==============
-if test -f 'mgstm1.aaa' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.aaa (File already exists)'
-else
-echo 'x - extracting mgstm1.aaa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.aaa' &&
->GT8.7 | 266 40001 90043 | transl. of pa875.con, 19 to 675
-MPMILGY@WNVRGLT#HPIRMLLEY@T#DS*S*Y@DEKR
-Y@T#MGDAPDFDRS*QWLNEKFKLGLDFPNLPY@LI
-DGS*HKIT#QSNAILRY@LARKHHLDGET#EEERIR
-ADIVENQVMDT#RMQLIMLCY@NPDFEKQKPEFL
-KT#IPEKMKLY@SEFLGKRPWFAGDKVT#Y@VDFLA
-Y@DILDQY@RMFEPKCLDAFPNLRDFLARFEGLK
-KISAY@MKSSRY@IAT#PIFSKMAHWSNK
-SHAR_EOF
-chmod 0644 mgstm1.aaa ||
-echo 'restore of mgstm1.aaa failed'
-Wc_c="`wc -c < 'mgstm1.aaa'`"
-test 310 -eq "$Wc_c" ||
- echo 'mgstm1.aaa: original size 310, current size' "$Wc_c"
-fi
-# ============= mgstm1.e05 ==============
-if test -f 'mgstm1.e05' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.e05 (File already exists)'
-else
-echo 'x - extracting mgstm1.e05 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.e05' &&
->pGT875 | 266 with an average of 5% of residues modified by mutr.
-GCTGAAGCCTAGTTTGAGAAGACCACCAGCACCACCACCATGCCTATGATATGGGATACTG
-GAAAGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACCCAAGTTA
-TGATGAGAAGAGATACACTATGGGTGACGGCTCCCGACTTTGACAGACAGTGGCTGA
-ATGAGAAGNTTCAAGCTGGGCCTGGAATTTCCCTAATCTGCCTTACTTGATCGATGGATCA
-CACAAGATCACCCAGAGAATGCCATCCTGCGCTACCTGGCCACAAAGCCCACCTGGAGGA
-GATGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGATAGCATGGAAACC
-CGCTGCAGCNNNNCATGCTCTCGTTACAACCTTGACTTTGAGAAGCAGAAGCCAGAGTTC
-TTGAAGACCATCCCTGAGAAAATGAGCTCTACTCTGAGTTCCTGGGATGCAAGAGGCCATGGT
-TTGCATGGGACAAGTGTCACCTATGTGGATTTCTTTGCTTATGACATTCTTGACCAGTAC
-CGTATGTTTGAGCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGTGACTTCCTGGCCCGC
-TTCGAGGGCCTCAAGAAGATCTCTGCTCTACATGAAGAGTAGCCGGTACATCGGCACAGC
-TCATATTTACAAAGATGGCCCACTGGAGTAACAAGCAGGCCCTTGCTACACGGCACTCAC
-TAGGAGGACCTGTCCNNACTGGTGGCTCCTGCAGTCCCTGTGTGGGGACAAGCACCCTGG
-CCTTCTGCACTGTGGCTCCTGGTTCCTCTCCTCCCGCTCCCTTCTGCAGTTGGTCAGCCC
-CATCTCCTCACCCTCTTCCCAGTCAAGGCCACACGCCTTCATTCGTCCCCGTCTTCTTTC
-ACATGGCCTCCTTCTTCGATTGGCTCCCTGACCCACACCTCACAGCCCGTTTCTGCGAAC
-TGAGGTCTGTCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACCACTATCTTAGT
-GCTAGCCCTGCCCTAGAGTTACCCGAAGTCAATACTTGAAGTGCCAGCCTGCTTCCTGGT
-GGTAGTAGCCTCCCCAGGTCGGTCTCGTCTACAATAAAGTCATGAAACACACT
-SHAR_EOF
-chmod 0644 mgstm1.e05 ||
-echo 'restore of mgstm1.e05 failed'
-Wc_c="`wc -c < 'mgstm1.e05'`"
-test 1220 -eq "$Wc_c" ||
- echo 'mgstm1.e05: original size 1220, current size' "$Wc_c"
-fi
-# ============= mgstm1.eeq ==============
-if test -f 'mgstm1.eeq' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.eeq (File already exists)'
-else
-echo 'x - extracting mgstm1.eeq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.eeq' &&
->mgstm1 | 266
-ATGCCTATGATACTGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
-CTGGAATACACAGACTCAAGCTATAGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
-TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCCTGGGCCTGGACTTTCCCAATCT
-GCCTTACTTATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
-TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGACCGTGCAGACATTGT
-GGAGAAGGCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
-TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
-GTTCCTGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
-CTTATGACATTCTTGACCAGTACCGTTGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
-ACCTGAGGGACTTCCTTTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
-GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATCCCACTGGAGTAACAAGTAGG
-CCCTTGCTACACGGGCACACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
-GGGTGGGGACAGCACCCTGGCCTTCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
-CCCTTCTGCAGCTTGTTTGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
-CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCTTGGCTCCTGACCCAACCT
-CACAGCCCGTTTCTGCGAATGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
-ATGGTCAACACTATCTTAGTGCTAGCACCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
-TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGC
-AAACACACTT
-SHAR_EOF
-chmod 0644 mgstm1.eeq ||
-echo 'restore of mgstm1.eeq failed'
-Wc_c="`wc -c < 'mgstm1.eeq'`"
-test 1122 -eq "$Wc_c" ||
- echo 'mgstm1.eeq: original size 1122, current size' "$Wc_c"
-fi
-# ============= mgstm1.esq ==============
-if test -f 'mgstm1.esq' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.esq (File already exists)'
-else
-echo 'x - extracting mgstm1.esq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.esq' &&
->mgstm1e
-ATGCCTATGATACTGGGATACTGGGTCCGCGGACTGACACACCCGATCCGCATGCTC
-CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
-TTTGACAGAAGCCAGTGGCTGAAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCT
-GCCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCT
-TGCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGT
-GGAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTT
-TGAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGA
-GTTCCTGGGCAAGAGGCCATGGTTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTG
-CTTATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAA
-ACCTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGA
-GTAGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGG
-CCCTTGCTACACGGGCACTCACTAGGAGGACCTGTCCACACTGGGGATCCTGCAGGCCCT
-GGGTGGGGACAGCACCCTGGCCTTCTGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCT
-CCCTTCTGCAGCTTGGTCAGCCCCATCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGC
-CTTCATTCTCCCCAGTTTCTTTCACATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCT
-CACAGCCCGTTTCTGCGAACTGAGGTCTGTCCTGAACTCACGCTTCCTAGAATTACCCCG
-ATGGTCAACACTATCTTAGTGCTAGCCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAG
-TGCCAGCCTGTTCCTGGTGGAGTAGCCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTG
-AAACACACTT
-SHAR_EOF
-chmod 0644 mgstm1.esq ||
-echo 'restore of mgstm1.esq failed'
-Wc_c="`wc -c < 'mgstm1.esq'`"
-test 1116 -eq "$Wc_c" ||
- echo 'mgstm1.esq: original size 1116, current size' "$Wc_c"
-fi
-# ============= mgstm1.gcg ==============
-if test -f 'mgstm1.gcg' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.gcg (File already exists)'
-else
-echo 'x - extracting mgstm1.gcg (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.gcg' &&
-GT8.7 transl. of pa875.con, 19 to 675
-X gt87 Length: 217 July 31, 1996 19:51 Type: P Check: 9358 ..
-X
-X 1 PMILGYWNVR GLTHPIRMLL EYTDSSYDEK RYTMGDAPDF DRSQWLNEKF
-X
-X 51 KLGLDFPNLP YLIDGSHKIT QSNAILRYLA RKHHLDGETE EERIRADIVE
-X
-X 101 NQVMDTRMQL IMLCYNPDFE KQKPEFLKTI PEKMKLYSEF LGKRPWFAGD
-X
-X 151 KVTYVDFLAY DILDQYRMFE PKCLDAFPNL RDFLARFEGL KKISAYMKSS
-X
-X 201 RYIATPIFSK MAHWSNK
-X
-SHAR_EOF
-chmod 0644 mgstm1.gcg ||
-echo 'restore of mgstm1.gcg failed'
-Wc_c="`wc -c < 'mgstm1.gcg'`"
-test 406 -eq "$Wc_c" ||
- echo 'mgstm1.gcg: original size 406, current size' "$Wc_c"
-fi
-# ============= mgstm1.lc ==============
-if test -f 'mgstm1.lc' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.lc (File already exists)'
-else
-echo 'x - extracting mgstm1.lc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.lc' &&
->GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
-MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
-ytmgdapdfdrsqwlnekfklgldfpnlpyli
-DGSHKITQSNAILRYLARKHHLDGETEEERIR
-adivenqvmdtrmqlimlcynpdfekqkpefl
-KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
-ydildqyrmfepkcldafpnlrdflarfeglk
-KISAYMKSSRYIATPIFSKMAHWSNK
-SHAR_EOF
-chmod 0644 mgstm1.lc ||
-echo 'restore of mgstm1.lc failed'
-Wc_c="`wc -c < 'mgstm1.lc'`"
-test 282 -eq "$Wc_c" ||
- echo 'mgstm1.lc: original size 282, current size' "$Wc_c"
-fi
-# ============= mgstm1.nt ==============
-if test -f 'mgstm1.nt' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.nt (File already exists)'
-else
-echo 'x - extracting mgstm1.nt (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.nt' &&
->pGT875
-ATGCCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTC
-CTGGAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGAC
-TTTGACAGAAGCCAGTGGCTGAATGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTG
-CCTTACTTGATCGATGGATCACACAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTT
-GCCCGAAAGCACCACCTGGATGGAGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTG
-GAGAACCAGGTCATGGACACCCGCATGCAGCTCATCATGCTCTGTTACAACCCTGACTTT
-GAGAAGCAGAAGCCAGAGTTCTTGAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAG
-TTCCTGGGCAAGAGGCCATGGTTTGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCT
-TATGACATTCTTGACCAGTACCGTATGTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAAC
-CTGAGGGACTTCCTGGCCCGCTTCGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGT
-AGCCGCTACATCGCAACACCTATATTTTCAAAGATGGCCCACTGGAGTAACAAGTAG
-SHAR_EOF
-chmod 0644 mgstm1.nt ||
-echo 'restore of mgstm1.nt failed'
-Wc_c="`wc -c < 'mgstm1.nt'`"
-test 677 -eq "$Wc_c" ||
- echo 'mgstm1.nt: original size 677, current size' "$Wc_c"
-fi
-# ============= mgstm1.nts ==============
-if test -f 'mgstm1.nts' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.nts (File already exists)'
-else
-echo 'x - extracting mgstm1.nts (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.nts' &&
->mgstm1
-GCACCATGCCTATGAT,
-GATACACCA,
-CCATCCTGCGCTACCTTGCC,
-aaggtcacctatgtggatttccttgcttat,
-CCTGTCCACACTGGG,
-TCAAGTCCACACAGCC,
-TCACGCTTCCTA,
-CAATACTTGAGTGCCAGCC
-SHAR_EOF
-chmod 0644 mgstm1.nts ||
-echo 'restore of mgstm1.nts failed'
-Wc_c="`wc -c < 'mgstm1.nts'`"
-test 160 -eq "$Wc_c" ||
- echo 'mgstm1.nts: original size 160, current size' "$Wc_c"
-fi
-# ============= mgstm1.raa ==============
-if test -f 'mgstm1.raa' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.raa (File already exists)'
-else
-echo 'x - extracting mgstm1.raa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.raa' &&
->mgstm1.aa shuffled
-LEGLPLKPCK RPQDRFSEDR VILFESFTYG FILAAWNMGY NEAEDMDRSH YLLTKELPKS
-YGGRRYYAPD FTYLFLILRN PPVKRAAPDR GNTMLQIFMA FLDDQYVMQD AFLPIGDGLK
-DKPMRSNMKY ITHNVYIDED IVRCKWIFAD EMSTPLLLWL MHKQKPGHRF LEKSWSHTRR
-EEEYNSIIDL KKSYKYLKNM AELKITSQTI FFDKDAE
-SHAR_EOF
-chmod 0644 mgstm1.raa ||
-echo 'restore of mgstm1.raa failed'
-Wc_c="`wc -c < 'mgstm1.raa'`"
-test 259 -eq "$Wc_c" ||
- echo 'mgstm1.raa: original size 259, current size' "$Wc_c"
-fi
-# ============= mgstm1.rev ==============
-if test -f 'mgstm1.rev' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.rev (File already exists)'
-else
-echo 'x - extracting mgstm1.rev (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.rev' &&
->mgstm1 reverse complement
-AAGTGTGTTTCAGACTTTATTGTAGACGAGACAGACCTGGGGAGGCTACTCCACCAGGAACAGGCTGGCACTCAA
-GTATTGACTTCGGGGTAACTCTAGGGAGGGCTAGCACTAAGATAGTGTTGACCATCGGGGTAATTCTAGGAAGCG
-TGAGTTCAGGACAGACCTCAGTTCGCAGAAACGGGCTGTGAGGTTGGGTCAGGGAGCCAATGAAGAAGGGGCCAT
-gtgaaagaaactggggagaatgaaggctgtgtggacttgactgggaagagggtgaggagatggggctgaccaagc
-tgcagaagggagcgggaaggagagagaaccaggagccacagtgcagaaggccagggtgctgtccccacccagggc
-CTGCAGGATCCCCAGTGTGGACAGGTCCTCCTAGTGAGTGCCCGTGTAGCAAGGGCCTACTTGTTACTCCAGTGG
-GCCATCTTTGAAAATATAGGTGTTGCGATGTAGCGGCTACTCTTCATGTAGGCAGAGATCTTCTTGAGGCCCTCG
-AAGCGGGCCAGGAAGTCCCTCAGGTTTGGGAAGGCGTCCAGGCACTTGGGCTCAAACATACGGTACTGGTCAAGA
-ATGTCATAAGCAAGGAAATCCACATAGGTGACCTTGTCCCCTGCAAACCATGGCCTCTTGCCCAGGAACTCAGAG
-tagagcttcattttctcagggatggtcttcaagaactctggcttctgcttctcaaagtcagggttgtaacagagc
-atgatgagctgcatgcgggtgtccatgacctggttctccacaatgtctgcacggatcctctcctcctctgtctct
-ccatccaggtggtgctttcgggcaaggtagcgcaggatggcattgctctgggtgatcttgtgtgatccatcgatc
-AAGTAAGGCAGATTGGGAAAGTCCAGGCCCAGCTTGAACTTCTCATTCAGCCACTGGCTTCTGTCAAAGTCGGGA
-GCGTCACCCATGGTGTATCTCTTCTCATCATAGCTTGAGTCTGTGTATTCCAGGAGCATGCGGATCGGGTGTGTC
-AGTCCGCGGACGTTCCAGTATCCCAGTATCATAGGCATGGTGCTGGTGCTGTGGTCTTCTCAAACTGGCTTCAGC
-SHAR_EOF
-chmod 0644 mgstm1.rev ||
-echo 'restore of mgstm1.rev failed'
-Wc_c="`wc -c < 'mgstm1.rev'`"
-test 1167 -eq "$Wc_c" ||
- echo 'mgstm1.rev: original size 1167, current size' "$Wc_c"
-fi
-# ============= mgstm1.seq ==============
-if test -f 'mgstm1.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping mgstm1.seq (File already exists)'
-else
-echo 'x - extracting mgstm1.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgstm1.seq' &&
->pGT875 | 266
-GCTGAAGCCAGTTTGAGAAGACCACAGCACCAGCACCATGCCTATGATACTGGGATACTG
-GAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTGGAATACACAGACTCAAGCTA
-TGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTTGACAGAAGCCAGTGGCTGAA
-TGAGAAGTTCAAGCTGGGCCTGGACTTTCCCAATCTGCCTTACTTGATCGATGGATCACA
-CAAGATCACCCAGAGCAATGCCATCCTGCGCTACCTTGCCCGAAAGCACCACCTGGATGG
-AGAGACAGAGGAGGAGAGGATCCGTGCAGACATTGTGGAGAACCAGGTCATGGACACCCG
-CATGCAGCtCATCATGCTCTGTTACAACCCTGACTTTGAGAAGCAGAAGCCAGAGTTCTT
-GAAGACCATCCCTGAGAAAATGAAGCTCTACTCTGAGTTCCTGGGCAAGAGGCCATGGTT
-TGCAGGGGACAAGGTCACCTATGTGGATTTCCTTGCTTATGACATTCTTGACCAGTACCG
-TATgTTTGAGCCCAAGTGCCTGGACGCCTTCCCAAACCTGAGGGACTTCCTGGCCCGCTT
-CGAGGGCCTCAAGAAGATCTCTGCCTACATGAAGAGTAGCCGCTACATCGCAACACCTAT
-ATTTTCAAAGATGGCCCACTGGAGTAACAAGTAGGCCCTTGCTACACGGGCACTCACTAG
-GAGGACCTGTCCACACTGGGgATCCTGCAGGCCCTGGGTGGGGACAGCACCCTGGCCTTC
-TGCACTGTGGCTCCTGGTTCTCTCTCCTTCCCGCTCCCTTCTGCAGCTTGGTCAGCCCCA
-TCTCCTCACCCTCTTCCCAGTCAAGTCCACACAGCCTTCATTCTCCCCAGTTTCTTTCAC
-ATGGCCCCTTCTTCATTGGCTCCCTGACCCAACCTCACAGCCCGTTTCTGCGAACTGAGG
-TCTGTCCTGAACTCACGCTTCCTAGAATTACCCCGATGGTCAACACTATCTTAGTGCTAG
-CCCTCCCTAGAGTTACCCCGAAGTCAATACTTGAGTGCCAGCCTGTTCCTGGTGGAGTAG
-CCTCCCCAGGTCTGTCTCGTCTACAATAAAGTCTGAAACACACTT
-SHAR_EOF
-chmod 0644 mgstm1.seq ||
-echo 'restore of mgstm1.seq failed'
-Wc_c="`wc -c < 'mgstm1.seq'`"
-test 1158 -eq "$Wc_c" ||
- echo 'mgstm1.seq: original size 1158, current size' "$Wc_c"
-fi
-# ============= mgtt2_x.seq ==============
-if test -f 'mgtt2_x.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping mgtt2_x.seq (File already exists)'
-else
-echo 'x - extracting mgtt2_x.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mgtt2_x.seq' &&
->>mgtt2_x Length: 1089 January 26, 2000 04:00 Type: N Check: 1394 ..
-CTGAGTTGGG TCCACGAAAG CCCAGCTAGG CCATTACCGC GTCCGGGTGA GACTAAGGTC CTGGGCTGGA TTCCTGGCTC CACGGTCCGC TGGAGCAAAT
-CGCATAAGTC AGTCTGAGTG CGCGCGCCCT CAGCCCTGCT TTTGGTATAA AGTCCTCCAA AGCGTCTCCC TCCCCAANNN NGATCagCAg GtGTCAGCTA
-TCCAGAGGAG GAAATCGTTT GGCTTGGcCA ACTGAGGcTG TGCTGGACCC CAGCTTGCTG TTATCGAACG CAGTCGGCAC ACCATCTTGT GTCGCTACCG
-GCAATGGGCT TGGAGCTCTA CCTGGACCTG CTGTCACAAC CCAGCCGCGC TGTCTACATC tTCNGCCAaG AAGAATGGCA TCCCCTTCCA GACGCGTACC
-GTGGATATAC TCAAAGGGCA GCACATGAGC GAGCAATTCT CCCAGGTGAA CTGCTTAAAC AAAGTTCCTG TACTCAAAGA CGGAAGCTTC GTGTTGACCG
-AAAGCACAGC CATCTtGATT TACCTGAGTT CCAAGTACCA GGTGGCAGAC CACTGGTACC CGGCCGACCT ACAGGCCCGT GCCCAAGTCC ACGAATACCT
-GGGCTGGCAT GcCGACAACA TCCgtGGTAC TTtcgGAGTG CTCCTATGGA CCNAAgGTGT TgGGGCCACT CATTGgGGTc CAgGTTCCCC agGAGAAGGT
-GGAACgGAAC agAGATAGAA TGGTCCTGGt TCTGCaACAG CTGGAgGACA AGTTCTCAGG GACAGGsCTC CTGTTGGCAG CAGTGAGCTA GCGATCTCAT
-TCTCTGGAGA GTGATGCAGC GTGCTCTTGC TATACCTGTT GAGGACGGCT CAGCTGACAG CATGCGAGAA AGGTGGAGGC GTCTTGGTGC TGAGCTGTGT
-AGAGCTCATA GACATCTGGC ATCTGGACAA GCAGCAGAAA TGTACAGTAC CCCTTCGAGT CATGCACATG CACTCAATTG TAGATCCTGA TGGTTGACCA
-CATAAGACTA TTTGTGTTAA AAAAGGGGGC CGTCCCATTC CCTTATGATC GATACATACT GGCTCCTTTA CACATNGATG GAAAACTGC
-SHAR_EOF
-chmod 0644 mgtt2_x.seq ||
-echo 'restore of mgtt2_x.seq failed'
-Wc_c="`wc -c < 'mgtt2_x.seq'`"
-test 1286 -eq "$Wc_c" ||
- echo 'mgtt2_x.seq: original size 1286, current size' "$Wc_c"
-fi
-# ============= mm_file.h ==============
-if test -f 'mm_file.h' -a X"$1" != X"-c"; then
- echo 'x - skipping mm_file.h (File already exists)'
-else
-echo 'x - extracting mm_file.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mm_file.h' &&
-/*
-X copyright (c) 1999 William R. Pearson
-*/
-X
-/* $Name: fa_34_26_5 $ - $Id: mm_file.h,v 1.26 2006/10/05 18:20:40 wrp Exp $ */
-X
-/*
-X mm_file.h - defines m_file_str for mmap()ed files
-*/
-X
-#include <sys/types.h>
-X
-#ifndef USE_FSEEKO
-#define FSEEK fseek
-#define FTELL ftell
-typedef long fseek_t;
-#else
-#define FSEEK fseeko
-#define FTELL ftello
-typedef off_t fseek_t;
-#endif
-#define FSEEK_T_DEF
-X
-#ifdef HAS_INTTYPES
-#include <inttypes.h>
-#else
-#ifdef WIN32
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-#else
-typedef long int64_t;
-typedef unsigned long uint64_t;
-#endif
-#endif
-#ifdef BIG_LIB64
-typedef int64_t MM_OFF;
-#else
-typedef long MM_OFF;
-#endif
-X
-#ifdef MYSQL_DB
-#include <mysql.h>
-#endif
-#ifdef PGSQL_DB
-#include <libpq-fe.h>
-#endif
-X
-struct lmf_str {
-X FILE *libf; /* sequence file being read */
-X FILE *hfile; /* BLAST2.0 description file */
-X unsigned int *oid_list; /* oid list for subsets */
-X int oid_seqs; /* start offset for mask array */
-X int pref_db; /* preferred database */
-X unsigned int max_oid; /* start offset for mask array */
-X
-X char lb_name[120]; /* file name */
-X int lb_type; /* library type */
-X int *sascii; /* ascii -> sq mapping */
-X
-X /* used by flat files */
-X char *lline; /* last line read */
-X unsigned char *cpsave; /* position in line for lgetlib() */
-X fseek_t lpos; /* position in file */
-X
-X /* Genbank Flat files */
-X int lfflag; /* flag for CRLF in EMBL CDROM files */
-X
-X /* stuff for GCG format files (5,6) */
-X int gcg_binary; /* flag for binary gcg format */
-X long gcg_len; /* length of GCG sequence */
-X
-X int bl_lib_pos; /* for ncbl2 */
-X int bl_format_ver; /* blast formatdb version */
-X char opt_text[MAX_FN]; /* text after filename */
-X
-X /* used when memory mapping */
-X int mm_flg; /* mmap worked */
-X int mmap_fd; /* mmap_fd */
-X char *mmap_base; /* base */
-X char *mmap_addr; /* current pos */
-X long st_size; /* file size */
-X
-X MM_OFF *d_pos_arr; /* pointer to desc. offsets */
-X MM_OFF *s_pos_arr; /* pointer to seq. offsets */
-X MM_OFF *a_pos_arr; /* pointer to aux offsets */
-X
-X /* currently available only for memory mapped files */
-X int max_cnt; /* # database entries */
-X int64_t tot_len; /* total residue length */
-X long max_len; /* maximum sequence lengh */
-X int lib_aa; /* 0 = DNA, 1 = prot */
-X char *tmp_buf; /* temporary buffer */
-X int tmp_buf_max; /* max size */
-X
-X /* used for SQL database queries */
-X char *sql_db, *sql_query, *sql_getdesc, *sql_getseq;
-X int sql_reopen;
-X char **sql_uid_arr; /* indexed by lpos */
-X /* used to get sequence data */
-X char *sql_seqp;
-X
-#ifdef MYSQL_DB
-X /* used to open the database */
-X MYSQL *mysql_conn;
-X MYSQL_RES *mysql_res;
-X MYSQL_ROW mysql_row;
-#endif
-X
-#ifdef PGSQL_DB
-X /* used to open the database */
-X PGconn *pgsql_conn;
-X PGresult *pgsql_res;
-#endif
-X
-X int (*getlib)(unsigned char *seq, int maxs,
-X char *libstr, int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off);
-X
-X void (*ranlib)(char *str, int cnt,
-X fseek_t libpos, char *libstr,
-X struct lmf_str *lm_fd);
-};
-X
-SHAR_EOF
-chmod 0644 mm_file.h ||
-echo 'restore of mm_file.h failed'
-Wc_c="`wc -c < 'mm_file.h'`"
-test 3057 -eq "$Wc_c" ||
- echo 'mm_file.h: original size 3057, current size' "$Wc_c"
-fi
-# ============= mmgetaa.c ==============
-if test -f 'mmgetaa.c' -a X"$1" != X"-c"; then
- echo 'x - skipping mmgetaa.c (File already exists)'
-else
-echo 'x - extracting mmgetaa.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mmgetaa.c' &&
-/* mmgetaa.c - functions for mmap()ed access to libraries */
-X
-/* copyright (c) 1999,2000 William R. Pearson */
-X
-/* version 0 September, 1999 */
-X
-/*
-X This is one of two alternative files that can be used to
-X read a database. The two files are nmgetaa.c, and mmgetaa.c
-X (nxgetaa.c has been retired).
-X
-X nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
-X the same functions as nxgetaa.c if memory mapping is not used,
-X mmgetaa.c provides the database reading functions if memory
-X mapping is used. The decision to use memory mapping is made on
-X a file-by-file basis.
-*/
-X
-/* $Name: fa_34_26_5 $ - $Id: mmgetaa.c,v 1.41 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-X
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-X
-#define MAXLINE 512
-#define EOSEQ 0
-X
-#define XTERNAL
-#include "uascii.h"
-/* #include "upam.h" */
-#undef XTERNAL
-X
-#ifdef SUPERFAMNUM
-extern int nsfnum; /* number of superfamily numbers */
-extern int sfnum[10]; /* superfamily number from types 0 and 5 */
-extern int nsfnum_n;
-extern int sfnum_n[10];
-static char tline[MAXLINE];
-#endif
-X
-#define GCGBIN 6
-X
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-extern MM_OFF bl2_long8_cvt(int64_t);
-extern int bl2_uint4_cvt(int);
-X
-X
-long crck(char *, int);
-extern void src_int4_read(FILE *fd, int *val);
-extern void src_long4_read(FILE *fd, long *valp);
-extern void src_long8_read(FILE *fd, int64_t *val);
-X
-/* load_mmap() loads the d_pos[] and s_pos[] arrays for rapid access */
-X
-struct lmf_str *
-load_mmap(FILE *libi, /* fd for already open ".xin" file */
-X char *sname, /* name of sequence database file */
-X int lib_type, /* 0-Fasta, 5-vms_pir, 6-gcg_binary */
-X int ldnaseq, /* 1 for DNA, 0 for protein */
-X struct lmf_str *m_fd)
-{
-X char format[4];
-X int i, lib_aa;
-X MM_OFF f_size;
-X long lf_size;
-X struct stat statbuf;
-X int max_cnt;
-X MM_OFF *d_pos_arr, *s_pos_arr;
-X int mm_flag, mm64_flag;
-X int *tmp_pos_arr;
-X
-X /* first check that the necessary indices are up-to-date */
-X /* read the offsets in ".xin" file */
-X if (fread(format,1,4,libi)==0) {
-X fprintf(stderr," cannot read .xin format\n");
-X return NULL;
-X }
-X
-X mm64_flag = (format[2]==1); /* 4 bytes or 8 bytes for long? */
-X
-#ifndef BIG_LIB64
-X if (mm64_flag) {return NULL;}
-#endif
-X
-X if (format[3]!=lib_type) {
-X fprintf(stderr," cannot read format %d != lib_type %d\n",
-X format[3],lib_type);
-X return NULL;
-X }
-X
-X src_int4_read(libi,&lib_aa);
-X if (lib_aa == ldnaseq) { /* database residue mismatch */
-X fprintf(stderr," residue type mismatch %s != %s (.xin) in %s\n",
-X (lib_aa ? "DNA" : "prot."),(ldnaseq ? "prot." : "DNA"),
-X sname);
-X return NULL;
-X }
-X
-X /* everything looks good, allocate an lmf_str */
-X
-X m_fd->lib_aa = lib_aa;
-X
-X /* get get file size from index */
-X if (mm64_flag) src_long8_read(libi,&f_size);
-X else {
-X src_long4_read(libi,&lf_size);
-X f_size = lf_size;
-X }
-X
-X /* now, start to open mmap()ed file */
-X mm_flag=((m_fd->mmap_fd=open(sname,O_RDONLY))>=0);
-X if (!mm_flag) {
-X fprintf(stderr," cannot open %s for mmap()", sname);
-X perror("...");
-X return NULL; /* file did not open */
-X }
-X
-X /* fstat the library file and get size */
-X if(fstat(m_fd->mmap_fd, &statbuf) < 0) {
-X fprintf(stderr," cannot stat %s for mmap()", sname);
-X perror("...");
-X m_fd->mm_flg = 0;
-X goto finish;
-X }
-X
-X /* check for identical sizes - if different, do not mmap */
-X if (f_size != statbuf.st_size) {
-X fprintf(stderr," %s file size (%lld) and expected size (%ld) don't match\n",
-X sname,statbuf.st_size,f_size);
-X mm_flag = 0;
-X goto finish;
-X }
-X
-X /* the index file and library file are open and the sizes match */
-X /* allocate the m_file struct and map the file */
-X
-X m_fd->st_size = statbuf.st_size;
-X if((m_fd->mmap_base =
-X mmap(NULL, m_fd->st_size, PROT_READ,
-X MAP_FILE | MAP_SHARED, m_fd->mmap_fd, 0)) == (char *) -1) {
-X mm_flag = 0;
-#ifdef DEBUG
-X fprintf(stderr," cannot mmap %s", sname);
-X perror("...");
-#endif
-X }
-X finish:
-X close(m_fd->mmap_fd);
-X if (!mm_flag) { return NULL; }
-X
-X /* now finish reading the index file */
-X src_int4_read(libi,&max_cnt);
-X
-X if (mm64_flag) {
-X src_long8_read(libi,&m_fd->tot_len);
-X }
-X else {
-X src_long4_read(libi,&lf_size);
-X m_fd->tot_len = lf_size;
-X }
-X src_long4_read(libi,&lf_size);
-X m_fd->max_len = lf_size;
-X
-#ifdef DEBUG
-X fprintf(stderr,
-X "%s\tformat: %c%c%d %d; max_cnt: %d; tot_len: %lld max_len: %ld\n",
-X sname,format[0],format[1],format[2],format[3],
-X max_cnt,m_fd->tot_len,m_fd->max_len);
-#endif
-X
-X /* allocate array of description pointers */
-X if (!mm64_flag) {
-X if ((tmp_pos_arr=(int *)calloc(max_cnt+1,sizeof(int)))==NULL) {
-X fprintf(stderr," cannot allocate %d for tmp_pos array\n",
-X max_cnt+1);
-X }
-X }
-X
-X if ((d_pos_arr=(MM_OFF *)calloc(max_cnt+1, sizeof(MM_OFF)))==NULL) {
-X fprintf(stderr," cannot allocate %d for desc. array\n",max_cnt+1);
-X exit(1);
-X }
-X
-X /* read m_fd->d_pos[max_cnt+1] */
-X if (mm64_flag) {
-X if (fread(d_pos_arr,sizeof(MM_OFF),max_cnt+1,libi)!=
-X max_cnt+1) {
-X fprintf(stderr," error reading desc. offsets: %s\n",sname);
-X return NULL;
-X }
-X }
-X else {
-X if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
-X max_cnt+1) {
-X fprintf(stderr," error reading desc. offsets: %s\n",sname);
-X return NULL;
-X }
-#ifdef DEBUG
-X fprintf(stderr,"d_pos_crc: %ld\n",
-X crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
-#endif
-X }
-X
-X
-#ifndef IS_BIG_ENDIAN
-X if (mm64_flag)
-X for (i=0; i<=max_cnt; i++) {
-X d_pos_arr[i] = bl2_long8_cvt(d_pos_arr[i]);
-X }
-X else
-X for (i=0; i<=max_cnt; i++) {
-X d_pos_arr[i] = bl2_uint4_cvt(tmp_pos_arr[i]);
-X }
-#else
-X if (!mm64_flag) {
-X for (i=0; i<=max_cnt; i++) {
-X d_pos_arr[i] = tmp_pos_arr[i];
-X }
-X }
-#endif
-X
-#ifdef DEBUG
-X for (i=0; i<max_cnt-1; i++) {
-X if (d_pos_arr[i+1] <= d_pos_arr[i] )
-X fprintf(stderr," ** dpos_error [%d]\t%ld\t%ld\n",
-X i,d_pos_arr[i],d_pos_arr[i+1]);
-X }
-#endif
-X
-X /* allocate array of sequence pointers */
-X if ((s_pos_arr=(MM_OFF *)calloc(max_cnt+1,sizeof(MM_OFF)))==NULL) {
-X fprintf(stderr," cannot allocate %d for seq. array\n",max_cnt+1);
-X exit(1);
-X }
-X
-X /* read m_fd->s_pos[max_cnt+1] */
-X if (mm64_flag) {
-X if (fread(s_pos_arr,sizeof(long),max_cnt+1,libi)!=
-X max_cnt+1) {
-X fprintf(stderr," error reading seq offsets: %s\n",sname);
-X return NULL;
-X }
-X }
-X else {
-X if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
-X max_cnt+1) {
-X fprintf(stderr," error reading seq offsets: %s\n",sname);
-X return NULL;
-X }
-#ifdef DEBUG
-X fprintf(stderr,"s_pos_crc: %ld\n",
-X crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
-#endif
-X }
-X
-#ifndef IS_BIG_ENDIAN
-X if (mm64_flag)
-X for (i=0; i<=max_cnt; i++)
-X s_pos_arr[i] = bl2_long8_cvt(s_pos_arr[i]);
-X else
-X for (i=0; i<=max_cnt; i++)
-X s_pos_arr[i] = (long)bl2_uint4_cvt(tmp_pos_arr[i]);
-#else
-X if (!mm64_flag)
-X for (i=0; i<=max_cnt; i++)
-X s_pos_arr[i] = (long)tmp_pos_arr[i];
-#endif
-X
-#ifdef DEBUG
-X for (i=1; i<max_cnt-1; i++) {
-X if (s_pos_arr[i+1]<s_pos_arr[i])
-X fprintf(stderr," ** spos_error [%d]\t%ld\t%ld\n",
-X i,s_pos_arr[i],s_pos_arr[i]);
-X }
-#endif
-X
-X if (!mm64_flag) free(tmp_pos_arr);
-X
-X m_fd->max_cnt = max_cnt;
-X m_fd->d_pos_arr = d_pos_arr;
-X m_fd->s_pos_arr = s_pos_arr;
-X m_fd->lpos = 0;
-X
-X /* check_mmap(m_fd,-2); */
-X
-X return m_fd;
-}
-X
-char *mgets (char *s, int n, struct lmf_str *m_fd)
-{
-X char *cs, *mfp;
-X
-X mfp = m_fd->mmap_addr;
-X cs = s;
-X
-X while (--n > 0 && (*mfp != (char)EOF))
-X if ((*cs++ = *mfp++) == '\n') break;
-X *cs = '\0';
-X
-X m_fd->mmap_addr = mfp;
-X return (*mfp == (char)EOF && cs == s) ? NULL : s;
-}
-X
-int
-agetlibm(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X char *desc;
-X int lpos; /* entry number in library */
-X long l;
-X unsigned char *seqm, *seqm1;
-X char *bp;
-X static long seq_len;
-X static unsigned char *cp_max;
-#ifdef SUPERFAMNUM
-X char *bp1, *bpa, *tp;
-X int i;
-#endif
-X
-X *l_off = 1;
-X
-X lpos = m_fd->lpos;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = m_fd->sascii;
-X
-X if (*lcont==0) {
-X if (lpos >= m_fd->max_cnt) return (-1);
-X seq_len = m_fd->d_pos_arr[lpos+1] - m_fd->s_pos_arr[lpos];
-X if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
-X fprintf(stderr," ** sequence over-run: %ld at %d\n",seq_len,lpos);
-X return(-1);
-X }
-X *libpos = (fseek_t)lpos;
-X
-X desc = m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1;
-X strncpy(libstr,desc,n_libstr-1);
-X libstr[n_libstr-1]='\0';
-X if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
-X if (n_libstr > MAX_UID) {
-X bp = libstr;
-X while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
-X }
-X
-X for (bp = desc; *bp && (*bp != '\n'); *bp++ )
-X if (*bp == '@' && !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
-X
-#ifdef SUPERFAMNUM
-X sfnum[0]=nsfnum=0;
-X strncpy(tline,desc,sizeof(tline));
-X tline[MAXLINE-1]='\0';
-X if ((bp=strchr(tline,'\n'))!=NULL) *bp='\0';
-X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
-X if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';
-X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
-X fprintf(stderr," second %c missing: %s\n",SFCHAR,tline);
-X }
-X else {
-X *bp1 = '\0';
-X i = 0;
-X if ((tp = strtok(bp+1," \t"))!=NULL) {
-X sfnum[i++] = atoi(tp);
-X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
-X sfnum[i++] = atoi(tp);
-X if (i>=9) break;
-X }
-X }
-X sfnum[nsfnum=i]= 0;
-X if (nsfnum>1) sf_sort(sfnum,nsfnum);
-X else {
-X if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
-X }
-X }
-X }
-X else {
-X sfnum[0] = nsfnum = 0;
-X }
-#endif
-X
-X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
-X cp_max = (unsigned char *)(m_fd->mmap_addr+seq_len);
-X }
-X
-X for (cp=(unsigned char *)m_fd->mmap_addr; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X --seqp;
-X if (cp >= cp_max) break;
-X }
-X m_fd->mmap_addr = (char *)cp;
-X
-X if (seqp>=seqm1) (*lcont)++;
-X else {
-X *lcont=0;
-X lpos++;
-X m_fd->lpos = lpos;
-X }
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-aranlibm(char *str,
-X int cnt,
-X fseek_t libpos,
-X char *libstr,
-X struct lmf_str *m_fd)
-{
-X char *bp;
-X int llen;
-X int lpos;
-X
-X lpos = (int) libpos;
-X
-X llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
-X if (llen >= cnt) llen = cnt-1;
-X
-X strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen);
-X str[llen]='\0';
-X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X bp = str;
-X while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
-X m_fd->lpos = lpos;
-}
-X
-/* there is no vgetlibm() because vgetlibm() and agetlibm() are
-X identical - the difference in the two file formats relates to the
-X location of the sequence, which is already available in spos_arr[].
-X
-X however vranlibm must accomodate both type 5 and 6 files;
-X type 6 has extra stuff after the seq_id.
-*/
-X
-void
-vranlibm(char *str,
-X int cnt,
-X fseek_t libpos,
-X char *libstr,
-X struct lmf_str *m_fd)
-{
-X char *bp, *mp;
-X int llen;
-X int lpos;
-X
-X lpos = (int)libpos;
-X
-X llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
-X
-X mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos];
-X
-X strncpy(str,mp+4,20);
-X str[20]='\0';
-X if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0';
-X else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' ';
-X bp = strchr(mp,'\n');
-X
-X llen -= (bp-mp)-5;
-X if (llen > cnt-strlen(str)) llen = cnt-strlen(str)-1;
-X
-X strncat(str,bp+1,llen);
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X str[cnt-1]='\0';
-X m_fd->lpos = lpos;
-}
-X
-void
-close_mmap(struct lmf_str *m_fd) {
-X free(m_fd->s_pos_arr);
-X free(m_fd->d_pos_arr);
-X if (m_fd->mm_flg) {
-X munmap(m_fd->mmap_base,m_fd->st_size);
-X free(m_fd);
-X }
-X m_fd->mm_flg=0;
-}
-X
-#ifndef min
-#define min(x,y) ((x) > (y) ? (y) : (x))
-#endif
-X
-static int gcg_bton[4]={2,4,1,3};
-X
-int
-gcg_getlibm(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X char dummy[20];
-X char gcg_date[6];
-X char gcg_type[10];
-X register unsigned char *cp, *seqp, stmp;
-X register int *ap, lpos;
-X unsigned char *seqm, *seqm1;
-X long r_block, b_block, r_fact, r16_block;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = m_fd->sascii;
-X lpos = m_fd->lpos;
-X
-X if (*lcont==0) {
-X if (lpos >= m_fd->max_cnt) return (-1);
-X sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n",
-X libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len));
-X
-X m_fd->gcg_binary = (gcg_type[0]=='2');
-X
-X libstr[12]='\0';
-X *libpos = lpos;
-X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
-X }
-X
-X r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len);
-X if (m_fd->gcg_binary) {
-X r_block = (r_block+3)/4;
-X }
-X
-X cp=(unsigned char *)m_fd->mmap_addr;
-X if (!m_fd->gcg_binary) {
-X r_fact = 1;
-X r16_block = r_block/16;
-X while (r16_block-- > 0) {
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X *seqp++ = ap[*cp++];
-X }
-X while (seqp<seq+r_block) *seqp++ = ap[*cp++];
-X }
-X else if (m_fd->gcg_binary) {
-X r_fact = 4;
-X r16_block = r_block/8;
-X while(r16_block-- > 0) {
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X }
-X
-X while (seqp < seq+4*r_block) {
-X stmp = *cp++;
-X *seqp++ = gcg_bton[(stmp>>6) &3];
-X *seqp++ = gcg_bton[(stmp>>4) &3];
-X *seqp++ = gcg_bton[(stmp>>2) &3];
-X *seqp++ = gcg_bton[(stmp) &3];
-X }
-X }
-X if (r_fact * r_block >= m_fd->gcg_len) {
-X *lcont = 0;
-X m_fd->lpos++;
-X }
-X else {
-X if (m_fd->gcg_binary) b_block = 4*r_block;
-X m_fd->gcg_len -= b_block;
-X (*lcont)++;
-X }
-X
-X seq[b_block] = EOSEQ;
-X /* if (b_block==0) return 1; else */
-X return b_block;
-}
-X
-void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);
-X
-int
-lgetlibm(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap, lpos;
-X unsigned char *seqm, *seqm1;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-11];
-X seqm1 = seqm-1;
-X
-X lpos = m_fd->lpos;
-X ap = m_fd->sascii;
-X
-X if (*lcont==0) {
-X if (lpos >= m_fd->max_cnt) return (-1);
-X
-X if (n_libstr <= 21) {
-X strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12);
-X libstr[12]='\0';
-X }
-X else {
-X lget_ann_m(m_fd,libstr,n_libstr);
-X }
-X *libpos = lpos;
-X
-X m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
-X cp = (unsigned char *)m_fd->mmap_addr;
-X }
-X else cp = (unsigned char *)m_fd->mmap_addr;
-X
-X while (seqp<seqm1) {
-X if (*cp=='/' && *(cp-1)=='\n') break;
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X --seqp;
-X if (*cp=='\n' && *(cp+1)==' ') cp += 11;
-X }
-X
-X if (seqp>=seqm1) {
-X (*lcont)++;
-X m_fd->mmap_addr = (char *)cp;
-X }
-X else {
-X *lcont=0;
-X m_fd->lpos++;
-X }
-X
-X *seqp = EOSEQ;
-X return (int)(seqp-seq);
-}
-X
-void
-lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
-X char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
-X
-X /* copy in locus from lm_fd->lline */
-X strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus));
-X if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
-X
-X /* get description */
-X mgets(desc,sizeof(desc),lm_fd);
-X while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
-X mgets(desc,sizeof(desc),lm_fd);
-X if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
-X
-X /* get accession */
-X mgets(acc,sizeof(acc),lm_fd);
-X while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
-X mgets(acc,sizeof(acc),lm_fd);
-X if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
-X break;
-X }
-X if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
-X if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
-X
-X /* get version */
-X mgets(ver,sizeof(ver),lm_fd);
-X while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
-X mgets(ver,sizeof(ver),lm_fd);
-X if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
-X break;
-X }
-X if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
-X
-X /* extract gi:123456 from version line */
-X bp_gid = strchr(&ver[12],':');
-X if (bp_gid != NULL) {
-X if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
-X bp_gid++;
-X }
-X if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
-X
-X /* build up FASTA header line */
-X if (bp_gid != NULL) {
-X strncpy(libstr,"gi|",n_libstr-1);
-X strncat(libstr,bp_gid,n_libstr-4);
-X strncat(libstr,"|gb|",n_libstr-20);
-X }
-X else {libstr[0]='\0';}
-X
-X /* if we have a version number, use it, otherwise accession,
-X otherwise locus/description */
-X
-X if (ver[0]=='V') {
-X strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
-X strncat(libstr,"|",n_libstr-1-strlen(libstr));
-X }
-X else if (acc[0]=='A') {
-X strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
-X strncat(libstr," ",n_libstr-1-strlen(libstr));
-X }
-X
-X strncat(libstr,locus,n_libstr-1-strlen(libstr));
-X strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
-X libstr[n_libstr-1]='\0';
-}
-X
-void
-lranlibm(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *m_fd)
-{
-X char *bp, *llp;
-X char acc[MAXLINE], desc[MAXLINE];
-X
-X llp = m_fd->mmap_addr = m_fd->mmap_base + m_fd->d_pos_arr[seek];
-X
-X lget_ann_m(m_fd,str,cnt);
-X
-X str[cnt-1]='\0';
-X
-X m_fd->lpos = seek;
-}
-X
-static int check_status=0;
-X
-void
-check_mmap(struct lmf_str *m_fd,long ntt) {
-X
-X int i, seq_len, ok_stat;
-X
-X ok_stat = 1;
-X if ( ++check_status > 5) return;
-X
-X fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt);
-X for (i=0; i<m_fd->max_cnt; i++) {
-X seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i];
-X if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
-X fprintf(stderr,"%d:\t%ld\t%ld\t%ld\n",
-X i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i],
-X m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]);
-X ok_stat=0;
-X }
-X }
-X if (ok_stat) {
-X if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n",
-X m_fd->lb_name,ntt);
-X }
-}
-X
-#ifdef DEBUG
-/* C H K 3 -- Compute a type-3 Kermit block check. */
-/*
-X Calculate the 16-bit CRC of a null-terminated string using a byte-oriented
-X tableless algorithm invented by Andy Lowry (Columbia University). The
-X magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1.
-X Note - this function could be adapted for strings containing imbedded 0's
-X by including a length argument.
-*/
-long
-crck(s,n)
-X char *s; int n;
-{
-X unsigned int c, q;
-X long crc = 0;
-X
-X while (n-->0) {
-X c = *s++;
-X /* if (parity)*/
-X c &= 0177;
-X q = (crc ^ c) & 017; /* Low-order nibble */
-X crc = (crc >> 4) ^ (q * 010201);
-X q = (crc ^ (c >> 4)) & 017; /* High order nibble */
-X crc = (crc >> 4) ^ (q * 010201);
-X }
-X return(crc);
-}
-#endif
-SHAR_EOF
-chmod 0644 mmgetaa.c ||
-echo 'restore of mmgetaa.c failed'
-Wc_c="`wc -c < 'mmgetaa.c'`"
-test 21318 -eq "$Wc_c" ||
- echo 'mmgetaa.c: original size 21318, current size' "$Wc_c"
-fi
-# ============= ms1.aa ==============
-if test -f 'ms1.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping ms1.aa (File already exists)'
-else
-echo 'x - extracting ms1.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ms1.aa' &&
->test m1
-MPMIL,
-MLLEY,
-MGDAP,
-MDTRX,
-MLCYN
-SHAR_EOF
-chmod 0644 ms1.aa ||
-echo 'restore of ms1.aa failed'
-Wc_c="`wc -c < 'ms1.aa'`"
-test 43 -eq "$Wc_c" ||
- echo 'ms1.aa: original size 43, current size' "$Wc_c"
-fi
-# ============= msg.h ==============
-if test -f 'msg.h' -a X"$1" != X"-c"; then
- echo 'x - skipping msg.h (File already exists)'
-else
-echo 'x - extracting msg.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'msg.h' &&
-/* Concurrent read version */
-X
-/* $Name: fa_34_26_5 $ - $Id: msg.h,v 1.9 2006/03/17 18:34:59 wrp Exp $ */
-X
-/* Cube definitions */
-X
-#ifdef PVM_SRC
-#define FIRSTNODE 1
-#define FIRSTWORK 1
-#else
-#define FIRSTNODE 1
-#define FIRSTWORK 1
-#endif
-X
-#define MAXNOD 128
-#define ALLTYPES -1
-#ifdef IPSC2
-#define HOSTPID 99
-#define MANAGEPID 100
-#define WORKPID 101
-#else
-#define HOSTPID 0
-#define MANAGEPID 0
-#define WORKPID 0
-#endif
-#define MANAGER 0
-#define ALLNODES -1
-#define ALLPIDS -1
-#define STARTTYPE0 0
-#define STARTTYPE1 1
-#define STARTTYPE2 2
-#define STARTTYPE3 3
-#define STARTTYPE4 4
-#define STARTTYPE5 5
-#define STARTTYPE6 6
-#define PARAMTYPE 7
-#define HSEQTYPE 3
-#define MSEQTYPE 4
-#define ONETYPE 5
-#define TWOTYPE 6
-#define MSEQTYPE0 7
-#define MSEQTYPE1 8
-#define MSEQTYPE2 8
-#define LISTTYPE 10
-#define LISTRTYPE 11
-#define CODERTYPE 12
-#define ALN1TYPE 21
-#define ALN2TYPE 22
-#define ALN3TYPE 23
-#define FINISHED 16384 /* this must be larger than BFR */
-X
-#define DO_SEARCH_FLG 0
-#define DO_OPT_FLG 1
-#define DO_ALIGN_FLG 2
-#define DO_CALC_FLG 3
-SHAR_EOF
-chmod 0644 msg.h ||
-echo 'restore of msg.h failed'
-Wc_c="`wc -c < 'msg.h'`"
-test 1085 -eq "$Wc_c" ||
- echo 'msg.h: original size 1085, current size' "$Wc_c"
-fi
-# ============= mshowalign.c ==============
-if test -f 'mshowalign.c' -a X"$1" != X"-c"; then
- echo 'x - skipping mshowalign.c (File already exists)'
-else
-echo 'x - extracting mshowalign.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mshowalign.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: mshowalign.c,v 1.43 2007/01/08 15:38:46 wrp Exp $ */
-X
-/* mshowalign.c - show sequence alignments in pvcomplib */
-X
-/*
-X this is a merged version of showalign.c that works properly with
-X both the comp_lib (serial, threaded) and PCOMPLIB parallel versions
-X of the programs.
-X
-X In the serial and current threaded versions of the programs,
-X showalign gets a list of high scoring sequences and must
-X re_getlib() the sequence, do_walign(), and then calculate the
-X alignment.
-X
-X In the PCOMPLIB parallel versions, the worker programs do the
-X aligning, so showalign() must send them the appropriate messages to
-X have the alignment done, and then collect the alignment results
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "msg.h"
-#include "structs.h"
-#include "param.h"
-X
-#ifdef PCOMPLIB
-#ifdef PVM_SRC
-#include "pvm3.h"
-extern int pinums[];
-#endif
-#ifdef MPI_SRC
-#include "mpi.h"
-#endif
-#include "p_mw.h"
-#else
-#include "mm_file.h"
-#include "mw.h"
-#endif
-X
-#ifndef PCOMPLIB
-X
-/* used to position the library sequence for re_getlib - also gets
-X description */
-#define RANLIB (m_fptr->ranlib)
-X
-extern struct lmf_str *
-re_openlib(struct lmf_str *, int outtty);
-X
-int
-re_getlib(unsigned char *aa1, int maxn, int maxt,
-X int loff, int cont, int term_code,
-X long *loffset, long *l_off,
-X struct lmf_str *m_fptr);
-X
-#include "drop_func.h"
-X
-#endif
-X
-X
-extern void cal_coord(int n0, int n1, long sq0off, long loffset,
-X struct a_struct *aln);
-X
-void initseq(char **, char **, char **, char **, int);
-void freeseq(char **, char **, char **, char **);
-X
-void do_show(FILE *fp, int n0, int n1, int score,
-X char *name0, char *name1, int nml,
-X struct mngmsg m_msg, struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
-X float percent, float gpercent, int lc,
-X struct a_struct *aln, long loffset);
-X
-extern void discons(FILE *fd, struct mngmsg m_msg, struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca,
-X int nc,
-X int n0, int n1, char *name0, char *name1, int nml,
-X struct a_struct *aln,
-X long loffset);
-X
-extern void disgraph(FILE *fd, int n0, int n1,
-X float percent, int score,
-X int min0, int min1, int max0, int max1, long sq0off,
-X char *name0, char *name1, int nml, int llen, int markx);
-X
-extern double zs_to_bit(double, int, int);
-X
-extern void
-do_url1(FILE *, struct mngmsg, struct pstruct, char *, int,
-X struct a_struct , long);
-X
-#ifndef A_MARK
-#define A_MARK ">>"
-#endif
-X
-static char l_name[200]; /* link name */
-X
-#ifdef PCOMPLIB
-#define BBP_INFO(info) bbp->desptr->info
-#else
-#define BBP_INFO(info) bbp->info
-#endif
-X
-/* this version does not check for m_msg.e_cut because nshow/nbest has
-X already been set to limit on e_cut */
-X
-void showalign (FILE *fp,
-#ifndef PCOMPLIB
-X unsigned char **aa0, unsigned char *aa1, int maxn,
-#endif
-X struct beststr **bptr, int nbest, int qlib,
-X struct mngmsg m_msg, struct pstruct pst, char *gstring2
-#ifndef PCOMPLIB
-X , void **f_str
-#endif
-)
-{
-X char tmp_str[20];
-X char info_str[200];
-X char bline[2048], *bl_ptr, *bp, fmt[40];
-X int tmp_len, l_llen;
-X int t_have_ares;
-X char name0[80], name0s[80], name1[200];
-X int istart = 0, istop, i = 0, ib, nml;
-X int n1tot;
-X struct beststr *bbp;
-X int nc, lc, maxc;
-X float percent, gpercent;
-X char *seqc0, *seqc0a, *seqc1, *seqca;
-X long loffset, l_off;
-#ifdef PCOMPLIB
-X struct stage2_str liblist;
-X struct qmng_str qm_msg;
-#ifdef MPI_SRC
-X int int_msg_b[10];
-X MPI_Status mpi_status;
-#endif
-#else
-X int n1;
-X struct lmf_str *m_fptr;
-X int ngap;
-#endif
-X
-#ifdef PCOMPLIB
-X /* this function has its own copy of qm_msg, so we must fill it
-X appropriately */
-X qm_msg.n0 = m_msg.n0;
-X strncpy(qm_msg.libstr,m_msg.qtitle,sizeof(qm_msg.libstr));
-#endif
-X
-X /* set the name0,1 label length */
-X if (m_msg.markx & MX_M10FORM) nml = 12;
-X else nml = m_msg.nmlen;
-X
-X if (strlen(m_msg.qtitle) > 0) {
-X if (m_msg.qtitle[0]=='>') strncpy(name0s,&m_msg.qtitle[1],sizeof(name0s));
-X else strncpy(name0s,m_msg.qtitle,sizeof(name0s));
-X }
-X else {
-X strncpy(name0s,m_msg.tname,sizeof(name0s));
-X }
-X name0s[sizeof(name0s)-1]='\0';
-X
-X if ((bp=strchr(name0s,' '))!=NULL) *bp='\0';
-X
-X if (m_msg.revcomp) name0[nml-1]='-';
-X
-X l_llen = m_msg.aln.llen;
-X if ((m_msg.markx & MX_M9SUMM) && m_msg.show_code != SHOW_CODE_ID) {
-X l_llen += 40;
-X if (l_llen > 200) l_llen=200;
-X }
-X
-X sprintf(fmt,"%s%%-%ds (%%d %s)\n",A_MARK,l_llen-5,m_msg.sqnam);
-X
-X if (!(m_msg.markx&MX_M10FORM)) fprintf(fp,"\n");
-X
-X if (m_msg.ashow < 0) m_msg.ashow = m_msg.nshow;
-X istart = 0; istop = min(min(nbest,m_msg.ashow),m_msg.nshow);
-X
-X for (ib=istart; ib<istop; ib++) {
-X bbp = bptr[ib];
-X
-#ifdef SHOWUN
-X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg.qsfnum,BBP_INFO(sfnum))) {
-X istop = min(istop+1,nbest);
-X continue;
-X }
-#endif
-X if (bbp->score[0] <= 0) break;
-X
-X if (m_msg.quiet==1 && pst.zsflag>=0
-X && bbp->escore < m_msg.e_low) continue;
-X
-#ifndef PCOMPLIB
-X /* get the alignment and score by re-aligning */
-X
-X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg.quiet))==NULL)
-X exit(1);
-X
-X /* get the description - do not "edit" it yet */
-X
-X if (!(m_msg.markx & MX_M10FORM)){
-X if (m_msg.long_info) {tmp_len = sizeof(bline)-1;}
-X else {tmp_len = l_llen-5;}
-X RANLIB(bline,tmp_len,bbp->lseek,bbp->libstr,bbp->m_file_p);
-X bline[tmp_len]='\0';
-X }
-X else {
-X RANLIB(bline,sizeof(bline),bbp->lseek,bbp->libstr,bbp->m_file_p);
-X bline[sizeof(bline)-1]='\0';
-X }
-X
-X n1 = re_getlib(aa1,maxn,m_msg.maxt3,m_msg.loff,bbp->cont,m_msg.term_code,
-X &loffset,&l_off,bbp->m_file_p);
-#ifdef DEBUG
-X if (n1 != bbp->n1) {
-X fprintf(stderr," library sequence: %s lengths differ: %d != %d\n",
-X bline,bbp->n1, n1);
-X fprintf(stderr, "offset is: %lld\n",bbp->lseek);
-X }
-#endif
-X
-X if (!bbp->have_ares) {
-X bbp->sw_score =
-X do_walign(aa0[bbp->frame],m_msg.n0, aa1, n1, bbp->frame, &pst,
-X f_str[bbp->frame], &bbp->a_res, &t_have_ares);
-X }
-X else {
-X pre_cons(aa1,n1,bbp->frame,f_str[bbp->frame]);
-X }
-X
-X aln_func_vals(bbp->frame, &m_msg.aln);
-X
-#else /* PCOMPLIB - get the alignment information from a worker */
-X
-X /* we have a sequence that we need an alignment for -
-X send a message to the appropriate worker to produce an alignment
-X qm_msg.slist == 1 -> one alignment
-X qm_msg.s_func == DO_ALIGN_FLG -> use the alignment function
-X send mngmsg (MSEQTYPE)
-X then send number of sequence to be aligned
-X */
-X
-X qm_msg.slist = 1;
-X qm_msg.s_func = DO_ALIGN_FLG;
-X
-X liblist.seqnm = bbp->seqnm;
-X liblist.frame = bbp->frame;
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)&qm_msg,sizeof(struct qmng_str),1);
-X pvm_send(pinums[bbp->wrkr],MSEQTYPE);
-X
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)&liblist,sizeof(struct stage2_str),1);
-X pvm_send(pinums[bbp->wrkr],LISTTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&qm_msg,sizeof(struct qmng_str),MPI_BYTE,bbp->wrkr,
-X MSEQTYPE,MPI_COMM_WORLD);
-X MPI_Send(&liblist,sizeof(struct stage2_str),MPI_BYTE,bbp->wrkr,
-X LISTTYPE,MPI_COMM_WORLD);
-#endif
-X /* information should be sent */
-X /* pick up description */
-X strncpy(bline,bbp->desptr->bline,l_llen-5);
-X bline[l_llen-5]='\0';
-#endif /* PCOMPLIB */
-X
-X if (strlen(bline)==0) {
-X bline[0]='>';
-X strncpy(&bline[1],m_msg.lname,l_llen-5);
-X bline[l_llen-5]='\0';
-X }
-X /* re-format bline */
-X while ((bp=strchr(bline,'\n'))!=NULL) *bp=' ';
-X if (m_msg.long_info) {
-X tmp_len = strlen(bline);
-X bl_ptr = bline;
-X if (!(m_msg.markx & MX_M10FORM)) while (tmp_len > l_llen) {
-X for (i=l_llen; i>10; i--)
-X if (bl_ptr[i]==' ') {
-X bl_ptr[i]='\n';
-X break;
-X }
-X if (i <= 10) break;
-X tmp_len -= i;
-X bl_ptr += i;
-X }
-X bline[sizeof(bline)-1]='\0';
-X }
-X
-X n1tot = (BBP_INFO(n1tot_p)) ? *BBP_INFO(n1tot_p) : bbp->n1;
-X
-X strncpy(name1,bline,sizeof(name1));
-X
-X if ((!m_msg.markx & MX_M10FORM)) name1[nml]='\0';
-X if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
-X
-X /* l_name is used to build an HTML link from the bestscore line to
-X the alignment. It can also be used to discriminate multiple hits
-X from the same long sequence. Text must match that in showbest.c */
-X
-X strncpy(name1,bline,sizeof(name1));
-X name1[sizeof(name1)-1]='\0';
-X if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
-X strncpy(l_name,name1,sizeof(l_name));
-X l_name[sizeof(l_name)-1]='\0';
-X if ((bp=strchr(&l_name[3],'|'))!=NULL) *bp='\0';
-X if (m_msg.nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
-X else if (m_msg.qframe >= 0 && bbp->frame == 1)
-X strncat(l_name,"_r",sizeof(l_name));
-X if (bbp->cont-1 > 0) {
-X sprintf(tmp_str,":%d",bbp->cont-1);
-X strncat(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
-X }
-X
-X if (!(m_msg.markx & MX_M10FORM)) name1[nml]='\0';
-X
-X /* print out score information; */
-X
-X if (m_msg.markx & MX_HTML ) {
-X fprintf (fp,"<A name=%s>\n<tt><pre>\n",l_name);
-X }
-X strncpy(name0,name0s,nml);
-X name0[nml]='\0';
-X
-X if (pst.zsflag%10 == 6) {
-X sprintf(info_str," comp: %.5f H: %.5f",bbp->comp,bbp->H);
-X }
-X else info_str[0]='\0';
-X
-X if ((m_msg.markx & MX_ATYPE)!=7 && !(m_msg.markx & MX_M10FORM)) {
-X fprintf (fp, fmt,bp=bline,n1tot);
-X if (m_msg.nframe > 2)
-X fprintf (fp, "Frame: %d",bbp->frame+1);
-X else if (m_msg.nframe > 1)
-X fprintf (fp, "Frame: %c",(bbp->frame? 'r': 'f'));
-X else if (m_msg.qframe >= 0 && bbp->frame > 0 ) {
-X fputs("rev-comp",fp);
-X name0[nml-1]='\0';
-X strcat(name0,"-");
-X }
-X
-X if (m_msg.arelv > 0)
-X fprintf (fp, " %s: %3d", m_msg.alab[0],bbp->score[0]);
-X if (m_msg.arelv > 1)
-X fprintf (fp, " %s: %3d", m_msg.alab[1],bbp->score[1]);
-X if (m_msg.arelv > 2)
-X fprintf (fp, " %s: %3d", m_msg.alab[2],bbp->score[2]);
-X fprintf(fp,"%s",info_str);
-X if (pst.zsflag>=0)
-X fprintf (fp, " Z-score: %4.1f bits: %3.1f E(): %4.2g",
-X bbp->zscore,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),bbp->escore);
-X fprintf (fp, "\n");
-X }
-X else if (m_msg.markx & MX_M10FORM) {
-X fprintf(fp,">>%s\n",bline);
-X if (m_msg.qframe > -1) {
-X if (m_msg.nframe > 2) {
-X fprintf(fp,"; %s_frame: %d\n",m_msg.f_id0,bbp->frame+1);
-X }
-X else {
-X fprintf(fp,"; %s_frame: %c\n",m_msg.f_id0,(bbp->frame > 0? 'r':'f'));
-X }
-X }
-X fprintf (fp, "; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[0],bbp->score[0]);
-X if (m_msg.arelv > 1)
-X fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[1],bbp->score[1]);
-X if (m_msg.arelv > 2)
-X fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[2],bbp->score[2]);
-X if (info_str[0]) fprintf(fp,"; %s_info: %s\n",m_msg.f_id0,info_str);
-X if (pst.zsflag>=0)
-X fprintf (fp,"; %s_z-score: %4.1f\n; %s_bits: %3.1f\n; %s_expect: %6.2g\n",
-X m_msg.f_id0,bbp->zscore,
-X m_msg.f_id0,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),
-X m_msg.f_id0,bbp->escore);
-X }
-X
-X
-#ifdef PCOMPLIB
-X /* get the sw_score, alignment information, get seqc0, seqc1 */
-X
-#ifdef PVM_SRC
-X /* get alignment lengths, percents */
-X pvm_recv(pinums[bbp->wrkr],ALN1TYPE);
-X pvm_upkint(&nc,1,1);
-X pvm_upkint(&lc,1,1);
-X pvm_upkint(&maxc,1,1);
-X
-X pvm_upkfloat(&percent,1,1);
-X pvm_upkfloat(&gpercent,1,1);
-X
-X pvm_upkint(&bbp->sw_score,1,1);
-X pvm_upkbyte((char *)&m_msg.aln,sizeof(struct a_struct),1);
-X
-X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
-X
-X pvm_recv(pinums[bbp->wrkr],ALN2TYPE);
-X pvm_upkbyte(seqc0,maxc,1);
-X if (m_msg.ann_flg) pvm_upkbyte(seqc0a,maxc,1);
-X pvm_upkbyte(seqc1,maxc,1);
-X pvm_upkbyte(seqca,maxc,1);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(int_msg_b,4,MPI_INT,bbp->wrkr,ALN1TYPE,MPI_COMM_WORLD,
-X &mpi_status);
-X nc = int_msg_b[0];
-X lc = int_msg_b[1];
-X maxc = int_msg_b[2];
-X bbp->sw_score = int_msg_b[3];
-X MPI_Recv(&percent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(&gpercent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(&m_msg.aln,sizeof(struct a_struct),MPI_BYTE,
-X bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
-X
-X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
-X MPI_Recv(seqc0,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
-X if (m_msg.ann_flg)
-X MPI_Recv(seqc0a,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
-X MPI_Recv(seqc1,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
-X MPI_Recv(seqca,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
-#endif
-X
-X /* l_off is the coordinate of the first residue */
-X l_off = 1;
-X /* loffset is the offset of the aa1 in the full sequence */
-X loffset = bbp->desptr->loffset-l_off;
-X
-#else /* not PCOMPLIB */
-X
-X /* estimate space for alignment consensus */
-X if (m_msg.aln.showall==1) {
-X maxc = bbp->a_res.nres + max(bbp->a_res.min0,bbp->a_res.min1)+
-X max((m_msg.n0-bbp->a_res.max0),(n1-bbp->a_res.max1))+4;
-X }
-X else {
-X maxc = bbp->a_res.nres + 4*m_msg.aln.llen+4;
-X }
-X
-X /* get space to put the sequence alignment consensus */
-X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
-X
-X /* build consensus from res, nres (done by workers if PCOMPLIB) */
-X if (!m_msg.ann_flg) {
-X nc=calcons(aa0[bbp->frame],m_msg.n0,aa1,n1,
-X &lc,&m_msg.aln, bbp->a_res, pst, seqc0, seqc1, seqca,
-X f_str[bbp->frame]);
-X memset(seqc0a,' ',nc);
-X seqc0a[nc]='\0';
-X }
-X else {
-X nc=calcons_a(aa0[bbp->frame],m_msg.aa0a,m_msg.n0,aa1,n1,
-X &lc,&m_msg.aln,bbp->a_res,pst, seqc0, seqc0a,
-X seqc1, seqca, m_msg.ann_arr,f_str[bbp->frame]);
-X }
-X
-X /* PCOMPLIB workers return percent, gpercent, so calculate it here */
-X if (lc > 0) percent = (100.0*(float)m_msg.aln.nident)/(float)lc;
-X else percent = -1.00;
-X ngap = m_msg.aln.ngap_q + m_msg.aln.ngap_l;
-#ifndef SHOWSIM
-X if (lc-ngap> 0) gpercent =(100.0*(float)m_msg.aln.nident)/(float)(lc-ngap);
-#else
-X if (lc > 0) gpercent =(100.0*(float)m_msg.aln.nsim)/(float)lc;
-#endif
-X else gpercent = -1.00;
-#endif
-X
-X if (max(strlen(seqc0),strlen(seqc1)) > nc) {
-X fprintf(stderr," mshowalign: nc/maxc: %d/%d seqc0/1: %u/%u\n",
-X nc,maxc,strlen(seqc0),strlen(seqc1));
-X }
-X
-X /* here PCOMPLIB/comp_lib logic is the same */
-X
-#ifdef DEBUG
-X if (bbp->sw_score < bbp->score[pst.score_ix]) {
-X fprintf(stderr," *** warning - SW score=%d < opt score=%d ***\n",
-X bbp->sw_score, bbp->score[pst.score_ix]);
-X }
-#endif
-X
-X cal_coord(m_msg.n0,bbp->n1,m_msg.sq0off,loffset+l_off-1,&m_msg.aln);
-X
-#ifndef PCOMPLIB
-X if (bbp->a_res.nres > 0)
-#endif
-X do_show(fp, m_msg.n0, bbp->n1, bbp->sw_score, name0, name1, nml,
-X m_msg, pst, seqc0, seqc0a, seqc1, seqca,
-X nc, percent, gpercent, lc, &m_msg.aln,
-X loffset+l_off-1);
-X
-X if (m_msg.markx & MX_HTML) fprintf(fp,"</pre></tt>\n<hr>\n");
-X fflush(fp);
-X
-X freeseq(&seqc0,&seqc0a,&seqc1, &seqca);
-X }
-X if (fp!=stdout) fprintf(fp,"\n");
-}
-X
-void do_show(FILE *fp, int n0,int n1, int score,
-X char *name0, char *name1, int nml,
-X struct mngmsg m_msg, struct pstruct pst,
-X char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
-X float percent, float gpercent, int lc,
-X struct a_struct *aln, long loffset)
-{
-X int tmp;
-X
-X if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)==7)
-X disgraph(fp, n0, n1, percent, score,
-X aln->amin0, aln->amin1, aln->amax0, aln->amax1, m_msg.sq0off,
-X name0, name1, nml, aln->llen, m_msg.markx);
-X else if (m_msg.markx & MX_M10FORM) {
-X if (pst.sw_flag && m_msg.arelv>0)
-X fprintf(fp,"; %s_score: %d\n",m_msg.f_id1,score);
-X fprintf(fp,"; %s_ident: %5.3f\n",m_msg.f_id1,percent/100.0);
-#ifndef SHOWSIM
-X fprintf(fp,"; %s_gident: %5.3f\n",m_msg.f_id1,gpercent/100.0);
-#else
-X fprintf(fp,"; %s_sim: %5.3f\n",m_msg.f_id1,gpercent/100.0);
-#endif
-X
-X fprintf(fp,"; %s_overlap: %d\n",m_msg.f_id1,lc);
-X discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
-X n0, n1, name0, name1, nml, aln, loffset);
-X }
-X else {
-X if (pst.sw_flag) fprintf(fp,"Smith-Waterman score: %d; ",score);
-X else fprintf(fp,"banded Smith-Waterman score: %d; ",score);
-#ifndef SHOWSIM
-X fprintf(fp," %6.3f%% identity (%6.3f%% ungapped) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
-X percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
-X aln->d_start1,aln->d_stop1);
-#else
-X fprintf(fp," %6.3f%% identity (%6.3f%% similar) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
-X percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
-X aln->d_start1,aln->d_stop1);
-#endif
-X
-X if (m_msg.markx & MX_HTML) {
-X do_url1(fp, m_msg, pst, l_name,n1,*aln,loffset);
-X }
-X
-X if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)!=7) {
-X fputc('\n',fp);
-X tmp = n0;
-X
-X if (m_msg.qdnaseq == SEQT_DNA && m_msg.ldnaseq== SEQT_PROT)
-X tmp /= 3;
-X
-X disgraph(fp, tmp, n1, percent, score,
-X aln->amin0, aln->amin1,
-X aln->amax0, aln->amax1,
-X m_msg.sq0off,
-X name0, name1, nml, aln->llen,m_msg.markx);
-X }
-X
-X discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
-X n0, n1, name0, name1, nml, aln, loffset);
-X
-X fputc('\n',fp);
-X
-X }
-}
-X
-X
-#ifndef MPI_SRC
-void /* initialize consensus arrays */
-initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz)
-{
-X *seqc0=(char *)calloc((size_t)seqsiz*4,sizeof(char));
-X if (*seqc0==NULL)
-X {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
-X exit(1);}
-X *seqc0a=*seqc0 + seqsiz;
-X *seqc1=*seqc0a + seqsiz;
-X *seqca=*seqc1 + seqsiz;
-}
-X
-void freeseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca)
-{
-X free(*seqc0);
-}
-#endif
-SHAR_EOF
-chmod 0644 mshowalign.c ||
-echo 'restore of mshowalign.c failed'
-Wc_c="`wc -c < 'mshowalign.c'`"
-test 17780 -eq "$Wc_c" ||
- echo 'mshowalign.c: original size 17780, current size' "$Wc_c"
-fi
-# ============= mshowbest.c ==============
-if test -f 'mshowbest.c' -a X"$1" != X"-c"; then
- echo 'x - skipping mshowbest.c (File already exists)'
-else
-echo 'x - extracting mshowbest.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mshowbest.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: mshowbest.c,v 1.44 2006/06/30 19:46:36 wrp Exp $ */
-X
-/* 29-Oct-2003 - changes so that bbp->cont < 0 => aa1 sequence is
-X already in aa1, no re_openlib or re_getlib required
-*/
-X
-/* 14-May-2003 Changes to use a more consistent coordinate numbering
-X system for displays. aln->d_start[01] is now consistently used
-X to report the start of the alignment in all functions, and
-X mshowbest.c has been modified to use d_start[01] instead of
-X d_start[01]-1. aln->min[01] now starts at 0 for all functions;
-X instead of 1 for some functions (dropnfa.c, dropgsw.c, dropfs2.c
-X earlier).
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "structs.h"
-#include "param.h"
-X
-#ifndef PCOMPLIB
-#include "mm_file.h"
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-X
-#define MAX_BLINE 256
-X
-#ifndef PCOMPLIB
-/* function calls necessary to re_getlib() the sequence and, do
-X alignments, if necessary
-*/
-X
-#define RANLIB (m_fptr->ranlib)
-X
-int
-re_getlib(unsigned char *, int, int, int, int, int, long *, long *,
-X struct lmf_str *m_fptr);
-X
-#include "drop_func.h"
-X
-struct lmf_str *re_openlib(struct lmf_str *, int outtty);
-#endif
-X
-extern void cal_coord(int n0, int n1, long sq0off, long loffset,
-X struct a_struct *aln);
-X
-void header_aux(FILE *);
-void show_aux(FILE *, struct beststr *);
-void w_abort (char *p, char *p1);
-X
-/* BBP_INFO get stuff directly from beststr or from beststr->desptr */
-#ifdef PCOMPLIB
-#define BBP_INFO(info) bbp->desptr->info
-#else
-#define BBP_INFO(info) bbp->info
-#endif
-X
-extern double zs_to_bit(double, int, int);
-X
-/* showbest() shows a list of high scoring sequence descriptions, and
-X their scores. If -m 9, then an additional complete set of
-X alignment information is provided.
-X
-X If PCOMPLIB or m_msg.quiet then the number of high scores to be
-X shown is pre-determined by m_msg.mshow before showbest is called.
-X
-X The comp_lib.c version re_getlib()'s the sequence for its
-X discription, and then does another alignment for -m 9 (Thus, it
-X needs an f_str. The PCOMPLIB version has everything available in
-X beststr before showbest() is called.
-*/
-X
-void showbest (FILE *fp,
-#ifndef PCOMPLIB
-X unsigned char **aa0, unsigned char *aa1, int maxn,
-#endif
-X struct beststr **bptr,int nbest, int qlib, struct mngmsg *m_msg,
-X struct pstruct pst, struct db_str db,
-X char *gstring2
-#ifndef PCOMPLIB
-X ,void **f_str
-#endif
-)
-{
-X int ntmp = 0;
-X char bline[MAX_BLINE], fmt[40], pad[MAX_BLINE], rline[40];
-X char l_name[128];
-X int istart = 0, istop, ib;
-X int nshow;
-X int quiet;
-X int r_margin;
-X struct beststr *bbp;
-X int n1tot;
-X char *bp;
-X char rel_label[12];
-X char tmp_str[20], *seqc;
-X int seqc_len;
-X long loffset, l_off;
-X int n0, n1;
-X struct rstruct rst;
-X int lc, maxc, nident, ngap;
-X float percent, gpercent;
-X struct a_struct *aln_p;
-X int *tres;
-X int gi_num;
-X
-#ifndef PCOMPLIB
-X struct lmf_str *m_fptr;
-#endif
-X
-X strncpy(rel_label,"\0",2);
-#ifdef SHOWREL
-X strncpy(rel_label," related",sizeof(rel_label));
-#endif
-#ifdef SHOWUN
-X strncpy(rel_label," unrelated",sizeof(rel_label));
-#endif
-X rel_label[sizeof(rel_label)-1]='\0';
-X
-#ifdef PCOMPLIB
-X quiet = 1;
-#else
-X quiet = m_msg->quiet;
-#endif
-X
-X n0 = m_msg->n0;
-X
-X if (m_msg->aln.llen > MAX_BLINE) m_msg->aln.llen = MAX_BLINE;
-X
-X if (pst.zsflag < 0) r_margin = 10;
-X else if (pst.zsflag>=0 && m_msg->srelv > 1 ) r_margin = 19;
-X else r_margin = 10;
-X
-X if (m_msg->markx & MX_M9SUMM && m_msg->show_code == SHOW_CODE_ID) {
-#ifdef SHOWSIM
-X r_margin += 15;
-#else
-X r_margin += 10;
-#endif
-X }
-X
-X if (m_msg->nframe < 0)
-#ifndef SUPERFAMNUM
-X sprintf(fmt,"%%-%ds (%%4d)",m_msg->aln.llen-r_margin);
-#else
-X sprintf(fmt,"%%-%ds [%%4d](%%4d)",m_msg->aln.llen-(r_margin+4));
-#endif
-X else
-X sprintf(fmt,"%%-%ds (%%4d)",m_msg->aln.llen-(r_margin+4));
-X
-X memset(pad,' ',m_msg->aln.llen-(r_margin+6));
-X pad[m_msg->aln.llen-(r_margin+12)]='\0';
-X
-X if (quiet != -1) { /* quiet is set to -1 in comp_mlib.c to force
-X all significant hits to be shown */
-X nshow = 20;
-X if (m_msg->mshow == -1) nshow = nbest; /* show all */
-X /* show specified number */
-X else if (m_msg->mshow_flg) {
-X nshow = min (m_msg->mshow, nshow);
-X }
-X }
-X else nshow = m_msg->nshow;
-X
-X if (quiet==0) istop = 20;
-X else istop = nshow;
-X
-X if (quiet==0) {
-X printf(" How many scores would you like to see? [%d] ",m_msg->nshow);
-X fflush(stdout);
-X if (fgets(rline,20,stdin)==NULL) exit(0);
-X nshow = m_msg->nshow;
-X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
-X if (nshow<=0) nshow = min(20,nbest);
-X }
-X
-X if ((bp = strchr (m_msg->qtitle, '\n')) != NULL) *bp = '\0';
-/* fprintf (fp, "%3d %s\n", qlib,m_msg->qtitle); */
-X
-X if (m_msg->markx & MX_HTML) fprintf(fp,"<p><tt><pre>\n");
-X
-X if (pst.zsflag >= 0) {
-X if (bptr[0]->escore < m_msg->e_cut) {
-X if (m_msg->z_bits==1) {/* show bit score */
-X fprintf(fp,"The best%s scores are:%s%s bits E(%ld)",
-X rel_label,pad,m_msg->label,pst.zdb_size);
-X }
-X else {/* show z-score */
-X fprintf(fp,"The best%s scores are:%s%s z-sc E(%ld)",
-X rel_label,pad,m_msg->label,pst.zdb_size);
-X }
-X header_aux(fp);
-X if (m_msg->markx & MX_M9SUMM) {
-X if (m_msg->show_code == SHOW_CODE_ID) {
-#ifdef SHOWSIM
-X fprintf(fp," %%_id %%_sim alen");
-#else
-X fprintf(fp," %%_id alen");
-#endif
-X }
-X else {
-X if (m_msg->markx & MX_HTML && m_msg->show_code !=1) { fprintf(fp,"<!-- ");}
-#ifndef SHOWSIM
-X fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
-#else
-X fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
-#endif
-X }
-X if (m_msg->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
-X if (m_msg->markx & MX_HTML && m_msg->show_code!=1) { fprintf(fp," -->");}
-X }
-X fprintf(fp,"\n");
-X }
-X else {
-X fprintf(fp,"!! No library sequences with E() < %.2g\n",m_msg->e_cut);
-X m_msg->nshow = 0;
-X if (m_msg->markx & MX_HTML) fprintf(fp,"<p></tt></pre>\n");
-X return;
-X }
-X }
-X else {
-X fprintf(fp,"The best%s scores are:%s%s",rel_label,pad,m_msg->label);
-X header_aux(fp);
-X if (m_msg->markx & MX_M9SUMM) {
-X if (m_msg->show_code == SHOW_CODE_ID) {
-#ifdef SHOWSIM
-X fprintf(fp," %%_id %%_sm alen");
-#else
-X fprintf(fp," %%_id alen");
-#endif
-X }
-X else {
-#ifndef SHOWSIM
-X fprintf(fp,"\t%%_id %%_gid %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
-#else
-X fprintf(fp,"\t%%_id %%_sim %4s alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs ",m_msg->f_id1);
-#endif
-X }
-X }
-X if (m_msg->show_code == SHOW_CODE_ALIGN) { fprintf(fp," aln_code"); }
-X fprintf(fp,"\n");
-X }
-X
-X istart = 0;
-l1:
-X istop = min(nbest,nshow);
-X for (ib=istart; ib<istop; ib++) {
-X bbp = bptr[ib];
-#ifdef SUPERFAMNUM
-X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg->qsfnum_n,BBP_INFO(sfnum))) continue;
-#ifdef SHOWUN
-X if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg->qsfnum,BBP_INFO(sfnum))) {
-X istop = min(istop+1,nbest);
-X /*
-X fprintf(stderr,"skipping %d: %d==%d\n",ib,m_msg->qsfnum,BBP_INFO(sfnum));
-X */
-X continue;
-X }
-#endif
-#ifdef SHOWREL
-X if (BBP_INFO(nsfnum) == 0 || (BBP_INFO(nsfnum) > 0 && !sfn_cmp(m_msg->qsfnum,BBP_INFO(sfnum)))) {
-X istop = min(istop+1,nbest);
-X /*
-X fprintf(stderr,"skipping %d: %d==%d\n",ib,m_msg->qsfnum,BBP_INFO(sfnum));
-X */
-X continue;
-X }
-#endif
-#endif
-X if (quiet==1 && pst.zsflag>=0) {
-X if (bbp->escore > m_msg->e_cut) {
-X nshow = ib;
-X goto done;
-X }
-X else if (bbp->escore < m_msg->e_low) continue;
-X }
-X
-#ifndef PCOMPLIB
-X if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg->quiet))==NULL) {
-X fprintf(stderr,"*** cannot re-open %s\n",bbp->m_file_p->lb_name);
-X exit(1);
-X }
-X RANLIB(bline,m_msg->aln.llen,bbp->lseek,bbp->libstr,m_fptr);
-#else
-X strncpy(bline,BBP_INFO(bline),m_msg->aln.llen-r_margin);
-X bline[m_msg->aln.llen]='\0';
-#endif
-X
-X /* l_name is used to build an HTML link from the bestscore line to
-X the alignment. It can also be used to discriminate multiple hits
-X from the same long sequence. This requires that fast_pan use -m 6. */
-X
-X strncpy(l_name,bline,sizeof(l_name)); /* get rid of text after second "|" */
-X l_name[sizeof(l_name)-1]='\0';
-X if ((bp=strchr(l_name,' '))!=NULL) *bp=0;
-X if ((bp=strchr(&l_name[3],'|'))!=NULL) *bp='\0';
-X if (m_msg->nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
-X else if (m_msg->nframe > 0 && bbp->frame == 1)
-X strncat(l_name,"_r",sizeof(l_name));
-X if (bbp->cont-1 > 0) {
-X sprintf(tmp_str,":%d",bbp->cont-1);
-X strncat(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
-X }
-X
-X
-#ifndef PCOMPLIB
-X if (m_msg->stages>1 || m_msg->markx & MX_M9SUMM) {
-X if (bbp->cont >= 0) {
-X n1 = re_getlib(aa1,maxn,m_msg->maxt3,m_msg->loff,bbp->cont,m_msg->term_code,
-X &loffset,&l_off,bbp->m_file_p);
-X }
-X else { n1 = maxn;}
-X if (! m_msg->markx & MX_M9SUMM) {
-X do_opt (aa0[bbp->frame], m_msg->n0, aa1, n1, bbp->frame, &pst, f_str[bbp->frame], &rst);
-X bbp->score[2]=rst.score[2];
-X }
-X else {
-X bbp->sw_score =
-X do_walign(aa0[bbp->frame],m_msg->n0, aa1, n1, bbp->frame,
-X &pst, f_str[bbp->frame], &bbp->a_res, &bbp->have_ares);
-X
-X
-X /* save the alignment encoding for future use */
-X if (bbp->have_ares && ((tres = calloc(bbp->a_res.nres+1,sizeof(int)))!=NULL)) {
-X memcpy(tres,bbp->a_res.res,sizeof(int)*bbp->a_res.nres);
-X bbp->a_res.res = tres;
-X }
-X
-X aln_func_vals(bbp->frame, &m_msg->aln);
-X
-X maxc = bbp->a_res.nres + 4*m_msg->aln.llen+4;
-X seqc = NULL;
-X seqc_len = 0;
-X if (m_msg->show_code == SHOW_CODE_ALIGN) {
-X if ((seqc=(char *)calloc(maxc,sizeof(char)))!=NULL) {
-X lc=calc_code(aa0[bbp->frame],m_msg->n0,
-X aa1,n1,
-X &m_msg->aln,bbp->a_res,
-X pst,seqc,maxc,f_str[bbp->frame]);
-X seqc_len = strlen(seqc);
-X }
-X }
-X else {
-X lc=calc_id(aa0[bbp->frame],m_msg->n0,aa1,n1,
-X &m_msg->aln, bbp->a_res,
-X pst,f_str[bbp->frame]);
-X }
-X m_msg->aln.a_len = lc;
-X
-X nident = m_msg->aln.nident;
-X if (lc > 0) percent = (100.0*(float)nident)/(float)lc;
-X else percent = -1.00;
-X
-X ngap = m_msg->aln.ngap_q + m_msg->aln.ngap_l;
-#ifndef SHOWSIM
-X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
-X else gpercent = -1.00;
-#else
-X if (lc-ngap > 0) gpercent = (100.0*(float)m_msg->aln.nsim)/(float)(lc);
-X else gpercent = -1.00;
-#endif
-X
-X }
-X }
-#endif
-X
-X n1tot = (BBP_INFO(n1tot_p)) ? *BBP_INFO(n1tot_p) : bbp->n1;
-X
-X bp = bline;
-X if ((m_msg->markx & MX_HTML) && !strncmp(bline,"gi|",3)) {
-X bp = strchr(bline+4,'|')+1;
-X *(bp-1) = 0;
-X gi_num = atoi(bline+3);
-X }
-X
-#ifndef SUPERFAMNUM
-X bp[m_msg->aln.llen-r_margin]='\0';
-#else
-X bp[m_msg->aln.llen-r_margin-5]='\0';
-#endif
-X
-X if (m_msg->nframe == -1) bp[m_msg->aln.llen-r_margin]='\0';
-X else bp[m_msg->aln.llen-(r_margin+4)]='\0';
-X
-#ifndef SUPERFAMNUM
-X fprintf (fp, fmt,bp,n1tot);
-#else
-X if (m_msg->nframe == -1) {
-X fprintf (fp, fmt,bp,BBP_INFO(sfnum[0]),n1tot);
-X }
-X else {fprintf (fp, fmt,bp,n1tot);}
-#endif
-X
-X if (m_msg->nframe > 2) fprintf (fp, " [%d]", bbp->frame+1);
-X else if (m_msg->nframe >= 0) fprintf(fp," [%c]",(bbp->frame > 0 ?'r':'f'));
-X
-X if (m_msg->srelv == 1) fprintf (fp, " %4d", bbp->score[pst.score_ix]);
-X else {
-X if (m_msg->srelv-1 > 0) fprintf (fp, " %4d", bbp->score[0]);
-X if (m_msg->srelv-1 > 1 || m_msg->stages>1)
-X fprintf (fp, " %4d", bbp->score[1]);
-X fprintf (fp, " %4d", bbp->score[pst.score_ix]);
-X }
-X
-X if (pst.zsflag>=0) {
-X if (m_msg->z_bits==1) {
-X fprintf (fp, " %.1f %7.2g",zs_to_bit(bbp->zscore,m_msg->n0,bbp->n1),bbp->escore);
-X }
-X else fprintf (fp, " %.1f %7.2g",bbp->zscore,bbp->escore);
-X }
-X show_aux(fp,bbp);
-X
-#ifdef PCOMPLIB
-X n1 = bbp->n1;
-X percent = bbp->percent;
-X gpercent = bbp->gpercent;
-X aln_p = bbp->aln_d;
-X seqc = bbp->aln_code;
-X seqc_len = bbp->aln_code_n;
-X loffset = bbp->desptr->loffset;
-X l_off = 0;
-#else
-X aln_p = &(m_msg->aln);
-#endif
-X
-X if (m_msg->markx & MX_M9SUMM) {
-X if (m_msg->show_code != SHOW_CODE_ID) {
-X if (m_msg->markx & MX_HTML) fprintf(fp,"<!-- ");
-X cal_coord(m_msg->n0,bbp->n1,m_msg->sq0off,loffset+l_off-1,aln_p);
-X
-X /* %_id %_sim s-w alen an0 ax0 pn0 px0 an1 ax1 pn1 px1 gapq gapl fs */
-X /* alignment min max min max */
-X /* sequence coordinate min max min max */
-X fprintf(fp,"\t%5.3f %5.3f %4d %4d %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %3d %3d %3d",
-X percent/100.0,gpercent/100.0, bbp->sw_score,aln_p->a_len,
-X aln_p->d_start0,aln_p->d_stop0,
-X m_msg->sq0off, m_msg->sq0off+m_msg->n0-1,
-X aln_p->d_start1,aln_p->d_stop1,
-X loffset+l_off, loffset+l_off+bbp->n1-1,
-X aln_p->ngap_q,aln_p->ngap_l,aln_p->nfs);
-X if (m_msg->show_code == SHOW_CODE_ALIGN
-X && seqc_len > 0 && seqc != NULL) {
-X fprintf(fp,"\t%s",seqc);
-X /* fprintf(fp," [%2d:%d]",bbp->wrkr,bbp->seqnm); */
-X free(seqc);
-X seqc = NULL;
-X }
-X if (m_msg->markx & MX_HTML) fprintf(fp," -->");
-X }
-X else {
-#ifdef SHOWSIM
-X fprintf(fp," %5.3f %5.3f %4d", percent/100.0,(float)aln_p->nsim/(float)aln_p->a_len,aln_p->a_len);
-#else
-X fprintf(fp," %5.3f %4d", percent/100.0,aln_p->a_len);
-#endif
-X }
-X }
-X if (m_msg->markx & MX_HTML) fprintf(fp," <A HREF=\"#%s\">align</A>",l_name);
-X fprintf (fp, "\n");
-X fflush(fp);
-X }
-X
-X if (quiet==0) {
-X printf(" More scores? [0] ");
-X fflush(stdout);
-X if (fgets(rline,20,stdin)==NULL) exit(0);
-X ntmp = 0;
-X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
-X if (ntmp<=0) ntmp = 0;
-X if (ntmp>0) {
-X istart = istop;
-X nshow += ntmp;
-X goto l1;
-X }
-X }
-X else if (quiet == 1)
-X if (ib < nbest && (pst.zsflag>=0 && bbp->escore < m_msg->e_cut)) {
-X if (m_msg->mshow_flg && istop >= m_msg->mshow) goto done;
-X istart=istop;
-X nshow += 10;
-X goto l1;
-X }
-X
-X done:
-X m_msg->nshow = nshow;
-X if (m_msg->markx & MX_HTML) fprintf(fp,"</pre></tt><p><hr><p>\n");
-X if (fp!=stdout) fprintf(fp,"\n");
-}
-X
-/*
-X q[] has one set of sfnums, 0 terminated
-X s[] has second
-X return first match or 0
-*/
-SHAR_EOF
-chmod 0644 mshowbest.c ||
-echo 'restore of mshowbest.c failed'
-Wc_c="`wc -c < 'mshowbest.c'`"
-test 14393 -eq "$Wc_c" ||
- echo 'mshowbest.c: original size 14393, current size' "$Wc_c"
-fi
-# ============= mu.lib ==============
-if test -f 'mu.lib' -a X"$1" != X"-c"; then
- echo 'x - skipping mu.lib (File already exists)'
-else
-echo 'x - extracting mu.lib (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mu.lib' &&
->GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU
-PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
-QSNAILRY
-LARKHHLDGETEEERIRADIVENQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFA
-GDKVTYVD
-FLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
->GTM1_HUMAN GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GSTM1-1) (HB SUBUNI
-PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKIT
-QSNAI
-LCYIARKHNLCGETEEEKIRVDILENQTMDNHMQLGMICYNPEFEKLKPKYLEELPEKLKLYSEFLGKRP
-WFAGN
-KITFVDFLVYDVLDLHRIFEPKCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPRPVFSKMAVWGNK
->GTMU_CRILO GLUTATHIONE S-TRANSFERASE Y1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
-PMILGYWNVRGLTNPIRLLLEYTDSSYEEKKYTMGDAPDSDRSQWLNEKFKLGLDFPNLPYLIDGSHKIT
-QSNAI
-LRYIARKHNLCGETEEERIRVDIVENQAMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKMYSEFLGKRP
-WFAGD
-KVTLCGFLAYDVLDQYQMFEPKCLDPFPNLKDFLARFEGLKKISAYMKTSRFLRRPIFSKMAQWSNK
->GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
-PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGSRKIT
-QSNAI
-MRYLARKHHLCGETEEERIRADIVENQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRP
-WFAGD
-KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSSRYLSTPIFSKLAQWSNK
->GTMU_RABIT GLUTATHIONE S-TRANSFERASE MU 1 (EC 2.5.1.18) (GST MU I) (CLASS-MU
-PMTLGYWDVRGLALPIRMLLEYTDTSYEEKKYTMGDAPNYDQSKWLSEKFTLGLDFPNLPYLIDGTHKLT
-QSNAI
-LRYLARKHGLCGETEEERIRVDILENQLMDNRFQLVNVCYSPDFEKLKPEYLKGLPEKLQLYSQFLGSLP
-WFAGD
-KITFADFLVYDVLDQNRIFVPGCLDAFPNLKDFHVRFEGLPKISAYMKSSRFIRVPVFLKKATWTGI
->GTM4_HUMAN GLUTATHIONE S-TRANSFERASE MU 4 (EC 2.5.1.18) (GSTM4-4) (GTS-MU2)
-MSMTLGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLPYLIDGAHKI
-TQSNAILC
-YIARKHNLCGETEEEKIRVDILENQAMDVSNQLARVCYSPDFEKLKPEYLEELPTMMQHFSQFLGKRPWF
-VGDKITFV
-DFLAYDVLDLHRIFEPNCLDAFPNLKDFISRFEGLEKISAYMKSSRFLPKPLYTRVAVWGNK
->GLNA_ANASP GLUTAMINE SYNTHETASE (EC 6.3.1.2) (GLUTAMATE--AMMONIA LIGASE).
-TTPQEVLKRIQDEKIELIDLKFIDTVGTWQHLTLYQNQIDESSFSDGVPFDGSSIRGWKAINESDMTMVL
-DPNTA
-WIDPFMEVPTLSIVCSIKEPRTGEWYNRCPRVIAQKAIDYLVSTGIGDTAFFGPEAEFFIFDSARFAQNA
-NEGYY
-FLDSVEGAWNSGKEGTADKPNLAYKPRFKEGYFPVSPTDSFQDIRTEMLLTMAKLGVPIEKHHHEVATGG
-QCELG
-FRFGKLIEAADWLMIYKYVIKNVAKKYGKTVTFMPKPIFGDNGSGMHCHQSIWKDGKPLFAGDQYAGLSE
-MGLYY
-IGGLLKHAPALLAITNPSTNSYKRLVPGYEAPVNLAYSQGNRSASIRIPLSGTNPKAKRLEFRCPDATSN
-PYLAF
-AAMLCAGIDGIKNKIHPGEPLDKNIYELSPEELAKVPSTPGSLELALEALENDHAFLTDTGVFTEDFIQN
-WIDYK
-LANEVKQMQLRPHPYEFSIYYDV
-SHAR_EOF
-chmod 0644 mu.lib ||
-echo 'restore of mu.lib failed'
-Wc_c="`wc -c < 'mu.lib'`"
-test 2361 -eq "$Wc_c" ||
- echo 'mu.lib: original size 2361, current size' "$Wc_c"
-fi
-# ============= musplfm.aa ==============
-if test -f 'musplfm.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping musplfm.aa (File already exists)'
-else
-echo 'x - extracting musplfm.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'musplfm.aa' &&
->musplfm transl. of musplfm.seq, 2 to 676
-X M L P S L I Q P C S W I L L L
-X L L V N S S L L W K N V A S F P
-X M C A M R N G R C F M S F E D T
-X F E L A G S L S H N I S I E V S
-X E L F T E F E K H Y S N V S G L
-X R D K S P M R C N T S F L P T P
-X E N K E Q A R L T H Y S A L L K
-X S G A M I L D A W E S P L D D L
-X V S E L S T I K N V P D I I I S
-X K A T D I K K K I N A V R N G V
-X N A L M S T M L Q N G D E E K K
-X N P A W F L Q S D N E D A R I H
-X S L Y G M I S C L D N D F K K V
-X D I Y L N V L K C Y M L K I D N
-X C
-SHAR_EOF
-chmod 0644 musplfm.aa ||
-echo 'restore of musplfm.aa failed'
-Wc_c="`wc -c < 'musplfm.aa'`"
-test 953 -eq "$Wc_c" ||
- echo 'musplfm.aa: original size 953, current size' "$Wc_c"
-fi
-# ============= mw.h ==============
-if test -f 'mw.h' -a X"$1" != X"-c"; then
- echo 'x - skipping mw.h (File already exists)'
-else
-echo 'x - extracting mw.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mw.h' &&
-/* Concurrent read version */
-X
-/* $Name: fa_34_26_5 $ - $Id: mw.h,v 1.20 2006/03/20 17:38:15 wrp Exp $ */
-X
-#include <sys/types.h>
-X
-#include "aln_structs.h"
-X
-#ifndef FSEEK_T_DEF
-#ifndef USE_FSEEKO
-typedef long fseek_t;
-#else
-typedef off_t fseek_t;
-#endif
-#endif
-X
-struct beststr {
-X int n1; /* sequence length */
-X int *n1tot_p; /* pointer (or NULL) to long sequence length */
-X int score[3]; /* score */
-X int sw_score; /* do_walign() score */
-X double comp;
-X double H;
-X double zscore;
-X double escore;
-X int segnum;
-X int seglen;
-X struct lmf_str *m_file_p;
-X fseek_t lseek;
-X char libstr[MAX_UID];
-X int cont;
-X int frame;
-X int nsfnum;
-X int sfnum[10];
-X long loffset;
-X struct a_struct aln_d; /* these values are used by -m9 */
-X struct a_res_str a_res; /* need only a_res, not a_res[2], because different frames
-X for the same sequence are stored separately */
-X int have_ares;
-X float percent, gpercent;
-};
-X
-struct stat_str {
-X int score;
-X int n1;
-X double comp;
-X double H;
-X double escore;
-X int segnum;
-X int seglen;
-};
-X
-X
-SHAR_EOF
-chmod 0644 mw.h ||
-echo 'restore of mw.h failed'
-Wc_c="`wc -c < 'mw.h'`"
-test 1042 -eq "$Wc_c" ||
- echo 'mw.h: original size 1042, current size' "$Wc_c"
-fi
-# ============= mwkw.aa ==============
-if test -f 'mwkw.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping mwkw.aa (File already exists)'
-else
-echo 'x - extracting mwkw.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mwkw.aa' &&
->MWKW Myosin heavy chain - Caenorhabditis elegans
-MEHEKDPGWQYLRRTREQVLEDQSKPYDSKKNVWIPDPEEGYLAGEITATKGDQVTIVTAREMSVIQVTL
-KKELVQEMNPPKFEKTEDMSNLSFLNDASVLHNLRSRYAAMLIYTYSGLFCVVINPYKRLPIYTDSCARM
-FMGKRKTEMPPHLFAVSDEAYRNMLQDHENQSMLITGESGAGKTENTKKVICYFAAVGASQQEGGAEVDP
-NKKKVTLEDQIVQTNPVLEAFGNAKTVRNNNSSRFGKFIRIHFNKHGRLASCDIEHYLLEKSRVIRQAPG
-ERCYHIFYQIYSDFRPELKKELLLDLPIKDYWFVAQAELIIDGIDDVEEFQLTDEAFDILNFSAVEKQDC
-YRLMSAHMHMGNMKFKQRPREEQAEPDGTVEAEKASNMYGIGCE
-EFLKALTKPRVKVGTEWVSKGQNCEQVNWAVGAMAKGLYSRVFNWLVKKCNLTLDQKGIDRDYFIGVLDI
-AGFEIFDFNSFEQLWINFVNEKLQQFFNHHMFVLEQEEYAREGIQWVFIDFGLDLQACIELIEKPLGIIS
-MLDEECIVPKATDLTLASKLVDQHLGKHPNFEKPKPPKGKQGEAHFAMRHYAGTVRYNCLNWLEKNKDPL
-NDTVVSAMKQSKGNDLLVEIWQDYTTQEEAAAKAKEGGGGGKKKGKSGSFMTVSMLYRESLNNLMTMLNK
-THPHFIRCIIPNEKKQSGMIDAALVLNQLTCNGVLEGIRICRKGFPNRTLHPDFVQRYAILAAKEAKSDD
-DKKKCAEAIMSKLVNDGSLSEEMFRIGLTKVFFKAGVLAHLEDI
-RDEKLATILTGFQSQIRWHLGLKDRKRRMEQRAGLLIVQRNVRSWCTLRTWEWFKLYGKVKPMLKAGKEA
-EELEKINDKVKALEDSLAKEEKLRKELEESSAKLVEEKTSLFTNLESTKTQLSDAEERLAKLEAQQKDAS
-KQLSELNDQLADNEDRTADVQRAKKKIEAEVEALKKQIQDLEMSLRKAESEKQSKDHQIRSLQDEMQQQD
-EAIAKLNKEKKHQEEINRKLMEDLQSEEDKGNHQNKVKAKLEQTLDDLEDSLEREKRARADLDKQKRKVE
-GELKIAQENIDESGRQRHDLENNLKKKESELHSVSSRLEDEQALVSKLQRQIKDGQSRISELEEELENER
-QSRSKADRAKSDLQRELEELGEKLDEQGGATAAQVEVNKKREAE
-LAKLRRDLEEANMNHENQLGGLRKKHTDAVAELTDQLDQLNKAKAKVEKDKAQAVRDAEDLAAQLDQETS
-GKLNNEKLAKQFELQLTELQSKADEQSRQLQDFTSLKGRLHSENGDLVRQLEDAESQVNQLTRLKSQLTS
-QLEEARRTADEEARERQTVAAQAKNYQHEAEQLQESLEEEIEGKNEILRQLSKANADIQQWKARFEGEGL
-LKADELEDAKRRQAQKINELQEALDAANSKNASLEKTKSRLVGDLDDAQVDVERANGVASALEKKQKGFD
-KIIDEWRKKTDDLAAELDGAQRDLRNTSTDLFKAKNAQEELAEVVEGLRRENKSLSQEIKDLTDQLGEGG
-RSVHEMQKIIRRLEIEKEELQHALDEAEAALEAEESKVLRAQVE
-VSQIRSEIEKRIQEKEEEFENTRKNHARALESMQASLETEAKGKAELLRIKKKLEGDINELEIALDHANK
-ANADAQKNLKRYQEQVRELQLQVEEEQRNGADTREQFFNAEKRATLLQSEKEELLVANEAAERARKQAEY
-EAADARDQANEANAQVSSLTSAKRKLEGEIQAIHADLDETLNEYKAAEERSKKAIADATRLAEELRQEQE
-HSQHVDRLRKGLEQQLKEIQVRLDEAEAAALKGGKKVIAKLEQRVRELESELDGEQRRFQDANKNLGRAD
-RRVRELQFQVDEDKKNFERLQDLIDKLQQKLKTQKKQVEEAEELANLNLQKYKQLTHQLEDAEERADQAE
-NSLSKMRSKSRASASVAPGLQSSASAAVIRSPSRARASDF
-SHAR_EOF
-chmod 0644 mwkw.aa ||
-echo 'restore of mwkw.aa failed'
-Wc_c="`wc -c < 'mwkw.aa'`"
-test 2047 -eq "$Wc_c" ||
- echo 'mwkw.aa: original size 2047, current size' "$Wc_c"
-fi
-# ============= mwrtc1.aa ==============
-if test -f 'mwrtc1.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping mwrtc1.aa (File already exists)'
-else
-echo 'x - extracting mwrtc1.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mwrtc1.aa' &&
->MWRTC1 - Myosin heavy chain 1, cardiac muscle - Rat (fragment)
-/DLTEQLGEGGKNVHELEKIRKQLEVEKLELQSALEEAEASLEHEEGKILRAQLEFNQIKAEIE
-SKLAEKDEEMEQAKRNHLRVVDSLQTSLDAETRSRNEALRVKKKMEGDLNEMEIQLSQANRIAS
-EAQKHLKNAQAHLKDTQLQLDDAVRANDDLKENIAIVERRNTLLQAELEELRAVVEQTERSRKL
-AEQELIETSERVQLLHSQNNSLINQKKKMDADLSQLQTEVEEAVQECRNAEEKAKKAITDAAMM
-AEELKKEQDTSAHLERMKKNMEQTIKDLQHRLDEAEQIALKGGKKQLQKLEARVRELENELEAE
-QKRNAESVKGMRKSERRIKELNYQTEEDKKNLVRLQDLVNKLQLKVKAYKRQAEEAEEQANTNL
-SKFRKVQHELDEAEERADIAESQVNKLRAKSRDIGAKQKIHDEE*
-SHAR_EOF
-chmod 0644 mwrtc1.aa ||
-echo 'restore of mwrtc1.aa failed'
-Wc_c="`wc -c < 'mwrtc1.aa'`"
-test 500 -eq "$Wc_c" ||
- echo 'mwrtc1.aa: original size 500, current size' "$Wc_c"
-fi
-# ============= myosin_bp.aa ==============
-if test -f 'myosin_bp.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping myosin_bp.aa (File already exists)'
-else
-echo 'x - extracting myosin_bp.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'myosin_bp.aa' &&
->gi|46049110|ref|NP_996557| myosin binding protein C, slow type isoform 4; myosin-binding protein C, slow-type; skeletal muscle C-protein [Homo sapiens]
-MPEPTkkeenevpapapppeepskekeaGTTPAKDWTLVETPPGEEQAKQNANSQLSILF
-IEKPQGGTVKVGEDITFIAKVKAEDLLRKPTIKWFKGKWMDLASKAGKHLQLKETFERHS
-RVYTFEMQIIKAKDNFAGNYRCEVTYKDKFDSCSFDLEVHESTGTTPNIDIRSAFKRSGE
-GQEDAGELDFSGLLKRREVKQQEEEPQVDVWELLKNAKPSEYEKIAFQYGITDLRGmlkr
-lkrmrreekkSAAFAKILDPAYQVDKGGRVRFVVELADPKLEVKWYKNGQEIRPSTKYIF
-EHKGCQRILFINNCQMTDDSEYYVTAGDEKCSTELFVREPPIMVTKQLEDTTAYCGERVE
-LECEVSEDDANVKWFKNGEEIIPGPKSRYRIRVEGKKHILIIEGATKADAAEYSVMTTGG
-QSSAKLSVDLKPLKILTPLTDQTVNLGKEICLKCEISENIPGKWTKNGLPVQESDRLKVV
-HKGRIHKLVIANALTEDEGDYVFAPDAYNVTLPAKVHVIDPPKIILDGLDADNTVTVIAG
-NKLRLEIPISGEPPPKAMWSRGDKAIMEGSGRIRTESYPDSSTLVIDIAERDDSGVYHIN
-LKNEAGEAHASIkvkvvdfpdppvaptvtEVGDDWCIMNWEPPAYDGGSPILGYFIERKK
-KQSSRWMRLNFDLCKETTFEPKKMIEGVAYEVRIFAVNAIGISKPSMPSRPFVPLAVTSP
-PtlltvdsvtdttvtMRWRPPDHIGAAGLDGYVLEYCFEGTEDWIVANKDLIDKTKFTIT
-GLPTDAKIFVRVKAVNAAGASEPKYYSQPILVkeiieppkiriprHLKQTYIRRVGEAVN
-LVIPFQGKPRPELTWKKDGAEIDKNQINIRNSETDTIIFIRKAERSHSGKYDLQVKVDKF
-VETASIDIQIIDRPGPPQIVKIEDVWGENVALTWTPPKDDGNAAITGYTIQKADKKSMEW
-FTVIEHYHRTSATITELVIGNEYYFRVFSENMCGLSEDATMTKESAVIARDGKIYKNPVY
-EDFDFSEAPMFTQPLVNTYAIAGYNATLNCSVRGNPKPKITWMKNKVAIVDDPRYRMFSN
-QGVCTLEIRKPSPYDGGTYCCKAVNDLGTVEIECKLEVKVIAQ
-SHAR_EOF
-chmod 0644 myosin_bp.aa ||
-echo 'restore of myosin_bp.aa failed'
-Wc_c="`wc -c < 'myosin_bp.aa'`"
-test 1294 -eq "$Wc_c" ||
- echo 'myosin_bp.aa: original size 1294, current size' "$Wc_c"
-fi
-# ============= mysql_demo1.sql ==============
-if test -f 'mysql_demo1.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping mysql_demo1.sql (File already exists)'
-else
-echo 'x - extracting mysql_demo1.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mysql_demo1.sql' &&
-xdb.wrplab seqdb_demo wrplab gstmu;
-SELECT acc, protein.seq, sp_name
-X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
-SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
-SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
-X WHERE annot.acc='#' AND db='sp';
-SHAR_EOF
-chmod 0644 mysql_demo1.sql ||
-echo 'restore of mysql_demo1.sql failed'
-Wc_c="`wc -c < 'mysql_demo1.sql'`"
-test 340 -eq "$Wc_c" ||
- echo 'mysql_demo1.sql: original size 340, current size' "$Wc_c"
-fi
-# ============= mysql_demo_pv.sql ==============
-if test -f 'mysql_demo_pv.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping mysql_demo_pv.sql (File already exists)'
-else
-echo 'x - extracting mysql_demo_pv.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mysql_demo_pv.sql' &&
-xdb.wrplab seqdb_demo wrplab gstmu;
-SELECT acc, protein.seq, sp_name, concat('sp|',acc,'|',sp_name,' ',descr)
-X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
-SELECT acc, concat('sp|',acc,'|',sp_name,' ',descr) FROM annot WHERE acc='#' AND db='sp';
-SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
-X WHERE annot.acc='#' AND db='sp';
-SHAR_EOF
-chmod 0644 mysql_demo_pv.sql ||
-echo 'restore of mysql_demo_pv.sql failed'
-Wc_c="`wc -c < 'mysql_demo_pv.sql'`"
-test 381 -eq "$Wc_c" ||
- echo 'mysql_demo_pv.sql: original size 381, current size' "$Wc_c"
-fi
-# ============= mysql_lib.c ==============
-if test -f 'mysql_lib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping mysql_lib.c (File already exists)'
-else
-echo 'x - extracting mysql_lib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'mysql_lib.c' &&
-X
-/* mysql_lib.c copyright (c) 2000 William R. Pearson */
-X
-/* $Name: fa_34_26_5 $ - $Id: mysql_lib.c,v 1.21 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* functions for opening, reading, seeking a mySQL database */
-X
-/*
-X For the moment, this interface assumes that the file to be searched will
-X be specified in a single, long, string with 4 parts:
-X
-X (1) a database open string. This string has four fields, separated by
-X whitespace (' \t'):
-X hostname:port dbname user password
-X
-X '--' dashes at the beginning of lines are ignored -
-X thus the first line could be:
-X -- hostname:port dbname user password
-X
-X (2) a database query string that will return an unique ID (not
-X necessarily numberic, but it must be < 12 characters as libstr[12]
-X is used) and a sequence string
-X
-X (2a) a series of mySQL commands that do not generate results
-X starting with 'DO', followed by a select() statement.
-X
-X (3) a database select string that will return a description
-X given a unique ID
-X
-X (4) a database select string that well return a sequence given a
-X unique ID
-X
-X Lines (3) and (4) are not required for pv34comp* libraries, but
-X line (2) must generate a complete description as well as a sequence.
-X
-X
-X 18-July-2001
-X Additional syntax has been added to support multiline SQL queries.
-X
-X If the host line begins with '+', then the SQL is openned on the same
-X connection as the previous SQL file.
-X
-X If the host line contains '-' just before the terminal ';', then
-X the file will not produce any output.
-X
-X This string can contain "\n". ";" are used to separate the four
-X functions, which must be specified in the order shown above.
-X The last (fourth) query must terminate with a ';' */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-X
-#include <mysql.h>
-#define MYSQL_LIB 16
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#define EOSEQ 0
-/* #include "upam.h" */
-X
-#ifdef SUPERFAMNUM
-int sfnum[10], nsfnum;
-#endif
-X
-int mysql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-void mysql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
-X
-#define MYSQL_BUF 4096
-X
-struct lmf_str *
-mysql_openlib(char *sname, int ldnaseq, int *sascii) {
-X FILE *sql_file;
-X char *tmp_str, *ttmp_str;
-X int tmp_str_len;
-X char *bp, *bps, *bdp, *tp, tchar;
-X int i, qs_len, qqs_len;
-X char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
-X char *sql_do;
-X int sql_do_cnt;
-X int sql_port;
-X struct lmf_str *m_fptr;
-X
-X /* if (sql_reopen) return NULL; - should not be called for re-open */
-X
-X tmp_str_len = MYSQL_BUF;
-X if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate %d for mySQL buffer\n",tmp_str_len);
-X return NULL;
-X }
-X
-X if (sname[0] == '%') {
-X strncpy(tmp_str,sname+1,tmp_str_len);
-X tmp_str[sizeof(tmp_str)-1]='\0';
-X }
-X else {
-X if ((sql_file=fopen(sname,"r"))==NULL) {
-X fprintf(stderr," cannot open mySQL file: %s\n",sname);
-X return NULL;
-X }
-X
-X if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
-X fprintf(stderr," cannot read mySQL file: %s\n",sname);
-X return NULL;
-X }
-X else {
-X tmp_str[qs_len]='\0';
-X qqs_len = qs_len;
-X while (qqs_len >= tmp_str_len-1) {
-X tmp_str_len += MYSQL_BUF;
-X if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
-X fprintf(stderr,
-X " cannot reallocate %d for mySQL buffer\n",tmp_str_len);
-X return NULL;
-X }
-X ttmp_str = &tmp_str[qqs_len];
-X if ((qs_len=fread(ttmp_str,sizeof(char),MYSQL_BUF,sql_file))<0) {
-X fprintf(stderr," cannot read mySQL file: %s\n",sname);
-X return NULL;
-X }
-X ttmp_str[qs_len]='\0';
-X qqs_len += qs_len;
-X }
-X }
-X fclose(sql_file);
-X }
-X
-X bps = tmp_str;
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X return NULL;
-X }
-X /* have database name, parse the fields */
-X else {
-X strcpy(sql_db,bps); /* strcpy OK because allocated strlen(bps) */
-X bps = bp+1; /* points to next char after ';' */
-X while (isspace(*bps)) bps++;
-X *bp=';'; /* replace ; */
-X bp = sql_db;
-X while (*bp=='-') {*bp++ = ' ';}
-X sql_host = strtok(bp," \t\n");
-X sql_dbname = strtok(NULL," \t\n");
-X sql_user = strtok(NULL," \t\n");
-X sql_pass = strtok(NULL," \t\n");
-X if ((tp=strchr(sql_host,':'))!=NULL) {
-X *tp='\0';
-X sql_port=atoi(tp+1);
-X }
-X else sql_port = 0;
-X }
-X }
-X else {
-X fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
-X return NULL;
-X }
-X
-X /* we have all the info we need to open a database, allocate lmf_str */
-X if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
-X sizeof(struct lmf_str),sname);
-X return NULL;
-X }
-X
-X /* have our struct, initialize it */
-X
-X strncpy(m_fptr->lb_name,sname,MAX_FN);
-X m_fptr->lb_name[MAX_FN-1]='\0';
-X
-X m_fptr->sascii = sascii;
-X
-X m_fptr->sql_db = sql_db;
-X m_fptr->getlib = mysql_getlib;
-X m_fptr->ranlib = mysql_ranlib;
-X m_fptr->mm_flg = 0;
-X m_fptr->sql_reopen = 0;
-X m_fptr->lb_type = MYSQL_LIB;
-X
-X /* now open the database, if necessary */
-X if ((m_fptr->mysql_conn=mysql_init(NULL))==NULL) {
-X fprintf(stderr,"*** Error - mysql_init\n");
-X goto error_r;
-X }
-X
-X if (mysql_real_connect(m_fptr->mysql_conn,
-X sql_host,sql_user,sql_pass,
-X sql_dbname,
-X sql_port,
-X NULL,
-X 0)==NULL)
-X {
-X fprintf(stderr,"*** Error %u - could not open database:\n%s\n%s",
-X mysql_errno(m_fptr->mysql_conn),tmp_str,
-X mysql_error(m_fptr->mysql_conn));
-X goto error_r;
-X }
-X else {
-X fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
-X }
-X
-X /* check for 'DO' command - copy to 'DO' string */
-X while (*bps == '-') { *bps++=' ';}
-X if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
-X toupper(bps[1])=='O' && isspace(bps[2])) {
-X /* have some 'DO' commands */
-X /* check where the end of the last DO statement is */
-X
-X sql_do_cnt = 1; /* count up the number of 'DO' statements for later */
-X bdp=bps+3;
-X while ((bp=strchr(bdp,';'))!=NULL) {
-X tp = bp+2; /* skip ;\n */
-X while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
-X if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
-X sql_do_cnt++; /* count the DO statements */
-X bdp = tp+3; /* move to the next DO statement */
-X }
-X else break;
-X }
-X if (bp != NULL) { /* end of the last DO, begin of select */
-X tchar = *(bp+1);
-X *(bp+1)='\0'; /* terminate DO strings */
-X if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate %d for sql_do\n",strlen(bps));
-X goto error_r;
-X }
-X else {
-X strcpy(sql_do,bps);
-X *(bp+1)=tchar; /* replace missing ';' */
-X }
-X bps = bp+1;
-X while (isspace(*bps)) bps++;
-X }
-X else {
-X fprintf(stderr," terminal ';' not found: %s\n",bps);
-X goto error_r;
-X }
-X /* all the DO commands are in m_fptr->sql_do in the form:
-X DO command1; DO command2; DO command3; */
-X bdp = sql_do;
-X while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
-X /* do the mysql statement on bdp+3 */
-X /* check for error */
-X *bp='\0';
-X if (mysql_query(m_fptr->mysql_conn,bdp+3)) {
-X fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
-X mysql_errno(m_fptr->mysql_conn), bdp+3, mysql_error(m_fptr->mysql_conn));
-X goto error_r;
-X }
-X *bp=';';
-X bdp = bp+1;
-X while (isspace(*bdp)) bdp++;
-X }
-X }
-X
-X /* copy 1st query field */
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((m_fptr->sql_query=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X /* have query, copy it */
-X else {
-X strcpy(m_fptr->sql_query,bps);
-X *bp=';'; /* replace ; */
-X bps = bp+1;
-X while(isspace(*bps)) bps++;
-X }
-X }
-X else {
-X fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
-X goto error_r;
-X }
-X
-X /* copy get_desc field */
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X /* have get_desc, copy it */
-X else {
-X strcpy(m_fptr->sql_getdesc,bps);
-X *bp=';'; /* replace ; */
-X bps = bp+1;
-X while(isspace(*bps)) bps++;
-X }
-X }
-X else {
-X fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
-X goto error_r;
-X }
-X
-X if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
-X
-X if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X
-X if (strlen(bps) > 0) {
-X strcpy(m_fptr->sql_getseq,bps);
-X }
-X else {
-X fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
-X return 0;
-X }
-X if (bp!=NULL) *bp=';';
-X
-X /* now do the query */
-X
-X if (mysql_query(m_fptr->mysql_conn,m_fptr->sql_query)) {
-X fprintf(stderr,"*** Error %u - query failed:\n%s\n%s\n",
-X mysql_errno(m_fptr->mysql_conn), m_fptr->sql_query, mysql_error(m_fptr->mysql_conn));
-X goto error_r;
-X }
-X
-X if ((m_fptr->mysql_res = mysql_use_result(m_fptr->mysql_conn)) == NULL) {
-X fprintf(stderr,"*** Error = use result failed\n%s\n",
-X mysql_error(m_fptr->mysql_conn));
-X goto error_r;
-X }
-X return m_fptr;
-X
-X error_r:
-X free(m_fptr->sql_getseq);
-X free(m_fptr->sql_getdesc);
-X free(m_fptr->sql_query);
-X free(m_fptr);
-X free(sql_db);
-X return NULL;
-}
-X
-struct lmf_str *
-mysql_reopen(struct lmf_str *m_fptr) {
-X m_fptr->sql_reopen = 1;
-X return m_fptr;
-}
-X
-void
-mysql_closelib(struct lmf_str *m_fptr) {
-X
-X if (m_fptr == NULL) return;
-X if (m_fptr->mysql_res != NULL)
-X mysql_free_result(m_fptr->mysql_res);
-X mysql_close(m_fptr->mysql_conn);
-X m_fptr->sql_reopen=0;
-}
-X
-/*
-static char *sql_seq = NULL, *sql_seqp;
-static int sql_seq_len;
-static MYSQL_ROW sql_row;
-*/
-X
-int
-mysql_getlib( unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X char *bp;
-X /* int l_start, l_stop, len; */
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-#ifdef SUPERFAMNUM
-X sfnum[0]=nsfnum = 0;
-#endif
-X
-X if (*lcont==0) {
-X /* get a row, with UID, sequence */
-X *l_off = 1;
-X if ((lm_fd->mysql_row =mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
-X *libpos=(fseek_t)atol(lm_fd->mysql_row[0]);
-X
-X /* for @P:1-n removed */
-X /*
-X if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
-X !strncmp(bp+1,"P:",2)) {
-X sscanf(bp+3,"%d-%d",&l_start,&l_stop)
-X l_start--;
-X if (l_start < 0) l_start=0;
-X if (l_stop > (len=strlen(lm_fd->mysql_row[1]))) l_stop= len-1;
-X lm_fd->sql_seqp = lm_fd->mysql_row[1];
-X lm_fd->sql_seqp[l_stop]='\0';
-X lm_fd->sql_seqp += l_start;
-X */
-X
-X if (lm_fd->mysql_row[2] == NULL) {
-X fprintf(stderr," NULL comment at: [%s] %ld\n",
-X lm_fd->mysql_row[0],*libpos);
-X }
-X else if ((bp=strchr(lm_fd->mysql_row[2],'@'))!=NULL &&
-X !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
-X else *l_off = 1;
-X
-X lm_fd->sql_seqp = lm_fd->mysql_row[1];
-X
-X /* because of changes in mysql_ranlib(), it is essential that
-X libstr return the unique identifier; thus we must use
-X sql_row[0], not sql_row[2]. Using libstr as the UID allows
-X one to use any UID, not just numeric ones. *libpos is not
-X used for mysql libraries.
-X */
-X
-X if (n_libstr <= MAX_UID) {
-X /* the normal case returns only GID/sequence */
-X strncpy(libstr,lm_fd->mysql_row[0],MAX_UID-1);
-X libstr[MAX_UID-1]='\0';
-X }
-X else {
-X /* here we do not use the UID in libstr, because we are not
-X going back into the db */
-X /* the PVM case also returns a long description */
-X if (lm_fd->mysql_row[2]!=NULL) {
-X strncpy(libstr,lm_fd->mysql_row[2],n_libstr-1);
-X }
-X else {
-X strncpy(libstr,lm_fd->mysql_row[0],n_libstr-1);
-X }
-X libstr[n_libstr-1]='\0';
-X }
-X }
-X else {
-X mysql_free_result(lm_fd->mysql_res);
-X lm_fd->mysql_res=NULL;
-X *lcont = 0;
-X *seqp = EOSEQ;
-X return -1;
-X }
-X }
-X
-X for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X --seqp;
-X if (*(cp-1)==0) break;
-X }
-X lm_fd->sql_seqp = (char *)cp;
-X
-X if (seqp>=seqm1) (*lcont)++;
-X else {
-X *lcont=0;
-X if (lm_fd->sql_reopen) {
-X mysql_free_result(lm_fd->mysql_res);
-X lm_fd->mysql_res = NULL;
-X }
-X }
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-mysql_ranlib(char *str,
-X int cnt,
-X fseek_t libpos,
-X char *libstr,
-X struct lmf_str *lm_fd
-X )
-{
-X char tmp_query[1024], tmp_val[20];
-X char *bp;
-X
-X str[0]='\0';
-X
-X /* put the UID into the query string - cannot use sprintf because of
-X "%' etc */
-X
-X /* sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
-X
-X if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
-X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getdesc);
-X goto next1;
-X }
-X else {
-X *bp = '\0';
-X strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
-X tmp_query[sizeof(tmp_query)-1]='\0';
-X /* sprintf(tmp_val,"%ld",(long)libpos); */
-X strncat(tmp_query,libstr,sizeof(tmp_query)-1);
-X strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
-X *bp='#';
-X lm_fd->lpos = libpos;
-X }
-X
-X /* fprintf(stderr," requesting: %s\n",tmp_query); */
-X
-X if (lm_fd->mysql_res !=NULL) {
-X mysql_free_result(lm_fd->mysql_res);
-X lm_fd->mysql_res = NULL;
-X }
-X
-X if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
-X fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
-X mysql_error(lm_fd->mysql_conn));
-X sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
-X goto next1;
-X }
-X
-X if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
-/* fprintf(stderr,"*** Error = use result failed\n%s\n",
-X mysql_error(lm_fd->mysql_conn)); */
-X sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
-X goto next0;
-X }
-X
-X /* have the description */
-X if ((lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))==NULL) {
-X /* fprintf(stderr," cannot fetch description: %s\n",tmp_query); */
-X sprintf(str,"gi|%ld ***cannot fetch description***",(long)libpos);
-X goto next0;
-X }
-X
-X if (lm_fd->mysql_row[1] != NULL) strncpy(str,lm_fd->mysql_row[1],cnt-1);
-X else strncpy(str,lm_fd->mysql_row[0],cnt-1);
-X str[cnt-1]='\0';
-X while (strlen(str) < cnt-1 &&
-X (lm_fd->mysql_row = mysql_fetch_row(lm_fd->mysql_res))!=NULL) {
-X strncat(str," ",cnt-2-strlen(str));
-X if (lm_fd->mysql_row[1]!=NULL)
-X strncat(str,lm_fd->mysql_row[1],cnt-2-strlen(str));
-X else break;
-X }
-X
-X str[cnt-1]='\0';
-X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X
-X next0:
-X mysql_free_result(lm_fd->mysql_res);
-X next1:
-X lm_fd->mysql_res = NULL;
-X
-X /* get the sequence, set up for mysql_getseq() */
-X /* put the UID into the query string */
-X
-X if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
-X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
-X return;
-X }
-X else {
-X *bp = '\0';
-X strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
-X tmp_query[sizeof(tmp_query)-1]='\0';
-X /* sprintf(tmp_val,"%ld",(long)libpos); */
-X strncat(tmp_query,libstr,sizeof(tmp_query));
-X strncat(tmp_query,bp+1,sizeof(tmp_query));
-X *bp='#';
-X }
-X
-X if (mysql_query(lm_fd->mysql_conn,tmp_query)) {
-X fprintf(stderr,"*** Error - query failed:\n%s\n%s\n",tmp_query,
-X mysql_error(lm_fd->mysql_conn));
-X }
-X
-X if ((lm_fd->mysql_res = mysql_use_result(lm_fd->mysql_conn)) == NULL) {
-X fprintf(stderr,"*** Error = use result failed\n%s\n",
-X mysql_error(lm_fd->mysql_conn));
-X }
-}
-SHAR_EOF
-chmod 0644 mysql_lib.c ||
-echo 'restore of mysql_lib.c failed'
-Wc_c="`wc -c < 'mysql_lib.c'`"
-test 16406 -eq "$Wc_c" ||
- echo 'mysql_lib.c: original size 16406, current size' "$Wc_c"
-fi
-# ============= n0.aa ==============
-if test -f 'n0.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping n0.aa (File already exists)'
-else
-echo 'x - extracting n0.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n0.aa' &&
->mgstm1
-MGDAPDFD,
-MLLEYTD
-SHAR_EOF
-chmod 0644 n0.aa ||
-echo 'restore of n0.aa failed'
-Wc_c="`wc -c < 'n0.aa'`"
-test 26 -eq "$Wc_c" ||
- echo 'n0.aa: original size 26, current size' "$Wc_c"
-fi
-# ============= n1.aa ==============
-if test -f 'n1.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping n1.aa (File already exists)'
-else
-echo 'x - extracting n1.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n1.aa' &&
->tests from mgstm1
-MILGYW,
-MLLE,
-MGDAP,
-MLCYNP
-SHAR_EOF
-chmod 0644 n1.aa ||
-echo 'restore of n1.aa failed'
-Wc_c="`wc -c < 'n1.aa'`"
-test 47 -eq "$Wc_c" ||
- echo 'n1.aa: original size 47, current size' "$Wc_c"
-fi
-# ============= n2.aa ==============
-if test -f 'n2.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping n2.aa (File already exists)'
-else
-echo 'x - extracting n2.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n2.aa' &&
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
-LIGVITENPVWIIMELCTLGELRSFLQVR,
-KPPRPGAPHLGSLASLNSPVDSYNEGVK,
-EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
-QVTVSWDSGGSDEAPPKPSRPGYPSPR,
-GANPTHLADFNQVQTIQYSNSEDKDR,
-LPMPPNCPPTLYSLMTKCWAYDPSR,
-PGAPHLGSLASLNSPVDSYNEGVK,
-GANPTHLADFNQVQTIQYSNSEDK,
-LSHLQSEEVHWLHLDMGVSNVR,
-QVTVSWDSGGSDEAPPKPSR,
-VFHYFENSSEPTTWASIIR,
-TLLATVDESLPVLPASTHR,
-RQVTVSWDSGGSDEAPPK,
-AQLSTILEEEKLQQEER,
-EKFELAHPPEEWKYELR,
-LAQQYVMTSLQQEYKK,
-FELAHPPEEWKYELR,
-LVNGATQSFIIRPQK,
-KQMLTAAHALAVDAK,
-SNDKVYENVTGLVK,
-QMLTAAHALAVDAK,
-GMGQVLPTHLMEER,
-PQEISPPPTANLDR,
-IQPAPPEEYVPMVK,
-GMGQVLPTHLMEER,
-QFANLNREESILK,
-SHAR_EOF
-chmod 0644 n2.aa ||
-echo 'restore of n2.aa failed'
-Wc_c="`wc -c < 'n2.aa'`"
-test 692 -eq "$Wc_c" ||
- echo 'n2.aa: original size 692, current size' "$Wc_c"
-fi
-# ============= n2_fs.lib ==============
-if test -f 'n2_fs.lib' -a X"$1" != X"-c"; then
- echo 'x - skipping n2_fs.lib (File already exists)'
-else
-echo 'x - extracting n2_fs.lib (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n2_fs.lib' &&
->GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
-ILGYWN,
-DQYRMFEP,
-SRYIATP,
-KCLDAFP,
-EYTDS,
-SYDEKR
->GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
-ILGYWN,
-DQYRMFEP,
-SRYIATP,
-KCLDAFP,
-EYTDS,
-SYDEKR,
-YTMGD,
-EKQKPEFL,
-VRGLTHP,
-TRMQLI,
-FKLGLDFP,
-NLPYLI,
-DGSHKIT,
-LRYLAR,
-KTIPEK,
-KRPWFA,
-ETEEERIR,
-GDKVTYVD,
-HWSNK
->tests from mgstm1
-MLLE,
-MILGYW,
-MGADP,
-MLCYNP
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GANPTHLADF,
-QVTVSWDSGG,
-EDGGLQGPA,
-TLLATVDE,
-LSHLQSEE,
-PGAPHLGS,
-GANPTHLA
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GSIEREDGGLQGPAGNQHIYQPVGKPDHAAPPK,
-LIGVITENPVWIIMELCTLGELRSFLQVR,
-KPPRPGAPHLGSLASLNSPVDSYNEGVK,
-EDGGLQGPAGNQHIYQPVGKPDHAAPPK,
-QVTVSWDSGGSDEAPPKPSRPGYPSPR,
-GANPTHLADFNQVQTIQYSNSEDKDR,
-LPMPPNCPPTLYSLMTKCWAYDPSR,
-PGAPHLGSLASLNSPVDSYNEGVK,
-GANPTHLADFNQVQTIQYSNSEDK,
-LSHLQSEEVHWLHLDMGVSNVR,
-QVTVSWDSGGSDEAPPKPSR,
-VFHYFENSSEPTTWASIIR,
-TLLATVDESLPVLPASTHR,
-RQVTVSWDSGGSDEAPPK,
-AQLSTILEEEKLQQEER,
-EKFELAHPPEEWKYELR,
-LAQQYVMTSLQQEYKK,
-FELAHPPEEWKYELR,
-LVNGATQSFIIRPQK,
-KQMLTAAHALAVDAK,
-SNDKVYENVTGLVK,
-QMLTAAHALAVDAK,
-GMGQVLPTHLMEER,
-PQEISPPPTANLDR,
-IQPAPPEEYVPMVK,
-GMGQVLPTHLMEER,
-QFANLNREESILK,
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GANPTHLADF,
-QVTVSWDSGG,
-EDGGLQGPA,
-TLLATVDE,
-LSHLQSEE,
-PGAPHLGS,
-GANPTHLA,
-AQLSTILE,
-KPPRPGA,
-GSIERED,
-VFHYFEN,
-LIGVIT,
-LPMPP,
-RQVTV,
-QVTV
-SHAR_EOF
-chmod 0644 n2_fs.lib ||
-echo 'restore of n2_fs.lib failed'
-Wc_c="`wc -c < 'n2_fs.lib'`"
-test 1482 -eq "$Wc_c" ||
- echo 'n2_fs.lib: original size 1482, current size' "$Wc_c"
-fi
-# ============= n2s.aa ==============
-if test -f 'n2s.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping n2s.aa (File already exists)'
-else
-echo 'x - extracting n2s.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n2s.aa' &&
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GANPTHLADF,
-QVTVSWDSGG,
-EDGGLQGPA,
-TLLATVDE,
-LSHLQSEE,
-PGAPHLGS,
-GANPTHLA
-SHAR_EOF
-chmod 0644 n2s.aa ||
-echo 'restore of n2s.aa failed'
-Wc_c="`wc -c < 'n2s.aa'`"
-test 178 -eq "$Wc_c" ||
- echo 'n2s.aa: original size 178, current size' "$Wc_c"
-fi
-# ============= n2t.aa ==============
-if test -f 'n2t.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping n2t.aa (File already exists)'
-else
-echo 'x - extracting n2t.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n2t.aa' &&
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GANPTHLADF,
-QVTVSWDSGG,
-EDGGLQGPA,
-TLLATVDE,
-LSHLQSEE,
-PGAPHLGS,
-GANPTHLA,
-AQLSTILE,
-KPPRPGA,
-GSIERED,
-VFHYFEN,
-LIGVIT,
-LPMPP,
-RQVTV,
-QVTV
-SHAR_EOF
-chmod 0644 n2t.aa ||
-echo 'restore of n2t.aa failed'
-Wc_c="`wc -c < 'n2t.aa'`"
-test 243 -eq "$Wc_c" ||
- echo 'n2t.aa: original size 243, current size' "$Wc_c"
-fi
-# ============= n_fs.lib ==============
-if test -f 'n_fs.lib' -a X"$1" != X"-c"; then
- echo 'x - skipping n_fs.lib (File already exists)'
-else
-echo 'x - extracting n_fs.lib (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'n_fs.lib' &&
->tests from mgstm1
-MLLE,
-MILGYW,
-MGADP,
-MLCYNP
->GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
-ILGYWN,
-DQYRMFEP,
-SRYIATP,
-KCLDAFP,
-EYTDS,
-SYDEKR
->gi|345664 gi|345664|pir||A45388 protein-tyrosine kinase (EC 2.7.1.112) - chicken [MASS=116670](65:883)
-GANPTHLADF,
-QVTVSWDSGG,
-EDGGLQGPA,
-TLLATVDE,
-LSHLQSEE,
-PGAPHLGS,
-GANPTHLA
-SHAR_EOF
-chmod 0644 n_fs.lib ||
-echo 'restore of n_fs.lib failed'
-Wc_c="`wc -c < 'n_fs.lib'`"
-test 330 -eq "$Wc_c" ||
- echo 'n_fs.lib: original size 330, current size' "$Wc_c"
-fi
-# ============= ncbl2_head.h ==============
-if test -f 'ncbl2_head.h' -a X"$1" != X"-c"; then
- echo 'x - skipping ncbl2_head.h (File already exists)'
-else
-echo 'x - extracting ncbl2_head.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ncbl2_head.h' &&
-/* ncbl_head.h header files for blast1.3 format */
-X
-/* $Name: fa_34_26_5 $ - $Id: ncbl2_head.h,v 1.3 2006/05/18 19:04:25 wrp Exp $ */
-X
-#define AMINO_ACID_SEQTYPE 1
-#define AA_SEQTYPE AMINO_ACID_SEQTYPE
-#define AAFORMAT AA_SEQTYPE
-X
-#define NUCLEIC_ACID_SEQTYPE 0
-#define NT_SEQTYPE NUCLEIC_ACID_SEQTYPE
-#define NTFORMAT NT_SEQTYPE
-X
-/* Filename extensions used by the two types of databases (a.a. and nt.) */
-#define AA_LIST_EXT "pal"
-#define AA_HEADER_EXT "phr"
-#define AA_INDEX_EXT "pin"
-#define AA_SEARCHSEQ_EXT "psq"
-X
-#define NT_LIST_EXT "nal"
-#define NT_HEADER_EXT "nhr"
-#define NT_INDEX_EXT "nin"
-#define NT_SEARCHSEQ_EXT "nsq"
-X
-#define FORMATDBV3 3 /* formatdb version */
-#define FORMATDBV4 4 /* formatdb version */
-X
-#define NULLB '\0' /* sentinel byte */
-X
-#ifndef CHAR_BIT
-#define CHAR_BIT 8 /* these values should match blast */
-#endif
-X
-#define NBPN 2
-#define NSENTINELS 2
-SHAR_EOF
-chmod 0644 ncbl2_head.h ||
-echo 'restore of ncbl2_head.h failed'
-Wc_c="`wc -c < 'ncbl2_head.h'`"
-test 882 -eq "$Wc_c" ||
- echo 'ncbl2_head.h: original size 882, current size' "$Wc_c"
-fi
-# ============= ncbl2_mlib.c ==============
-if test -f 'ncbl2_mlib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping ncbl2_mlib.c (File already exists)'
-else
-echo 'x - extracting ncbl2_mlib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ncbl2_mlib.c' &&
-/* ncbl2_lib.c functions to read ncbi-blast format files from
-X formatdb (blast2.0 format files)
-X
-X copyright (c) 1999 William R. Pearson
-*/
-X
-/* $Name: fa_34_26_5 $ - $Id: ncbl2_mlib.c,v 1.56 2007/04/02 18:08:11 wrp Exp $ */
-X
-/* to turn on mmap()ing for Blast2 files: */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#ifdef UNIX
-#include <unistd.h>
-#endif
-#include <errno.h>
-X
-X
-/* ****************************************************************
-X
-17-May-2006
-X
-Modified to read NCBI .[np]al and .msk files. The .nal or .pal file
-provides a way to read sequences from a list of files. The .msk file
-provides a compact way of indicating the subset of sequences in a
-larger database (typically nr or nt) that comprise a smaller database
-(e.g. swissprot or pdbaa). A .pal file (e.g. swissprot.00.pal) that
-uses a .msk file has the form:
-X
-X # Alias file generated by genmask
-X # Date created: Mon Apr 10 11:24:05 2006
-X #
-X TITLE Non-redundant SwissProt sequences
-X DBLIST nr.00
-X OIDLIST swissprot.00.msk
-X LENGTH 74351250
-X NSEQ 198346
-X MAXOID 2617347
-X MEMB_BIT 1
-X # end of the file
-X
-To work with this file, we must first load the nr.00 file, and then
-read the swissprot.00.msk file, and then scan all the entries in the
-swissprot.00.msk file (which are packed 32 mask-bit to an int) to
-determine whether a specific libpos index entry is present in the
-subset database.
-X
-**************************************************************** */
-X
-X
-/* ****************************************************************
-This code reads NCBI Blast2 format databases from formatdb version 3 and 4
-X
-(From NCBI) This section describes the format of the databases.
-X
-Formatdb creates three main files for proteins containing indices,
-sequences, and headers with the extensions, respectively, of pin, psq,
-and phr (for nucleotides these are nin, nsq, and nhr). A number of
-other ISAM indices are created, but these are described elsewhere.
-X
-FORMAT OF THE INDEX FILE
-------------------------
-X
-1.) formatdb version number [4 bytes].
-X
-2.) protein dump flag (1 for a protein database, 0 for a nucleotide
-X database) [4 bytes].
-X
-3.) length of the database title in bytes [4 bytes].
-4.) the database title [length given in 3.)].
-5.) length of the date/time string [4 bytes].
-6.) the date/time string [length given in 5.)].
-7.) the number of sequences in the database [4 bytes].
-8.) the total length of the database in residues/basepairs [4 bytes].
-9.) the length of the longest sequence in the database [4 bytes].
-X
-10.) a list of the offsets for definitions (one for each sequence) in
-the header file. There are num_of_seq+1 of these, where num_of_seq is
-the number of sequences given in 7.).
-X
-11.) a list of the offsets for sequences (one for each sequence) in
-the sequence file. There are num_of_seq+1 of these, where num_of_seq
-is the number of sequences given in 7.).
-X
-12.) a list of the offsets for the ambiguity characters (one for each
-sequence) in the sequence file. This list is only present for
-nucleotide databases and, since the database is compressed 4/1 for
-nucleotides, allows the ambiguity characters to be restored when the
-sequence is generated. There are num_of_seq+1 of these, where
-num_of_seq is the number of sequences given in 7.).
-X
-X
-FORMAT OF THE SEQUENCE FILE
----------------------------
-X
-There are different formats for the protein and nucleotide sequence files.
-X
-The protein sequence files is quite simple. The first byte in the
-file is a NULL byte, followed by the sequence in ncbistdaa format
-(described in the NCBI Software Development Toolkit documentation).
-Following the sequence is another NULL byte, followed by the next
-sequence. The file ends with a NULL byte, following the last
-sequence.
-X
-The nucleotide sequence file contains the nucleotide sequence, with
-four basepairs compressed into one byte. The format used is NCBI2na,
-documented in the NCBI Software Development Toolkit manual. Any
-ambiguity characters present in the original sequence are replaced at
-random by A, C, G or T. The true value of ambiguity characters are
-stored at the end of each sequence to allow true reproduction of the
-original sequence.
-X
-FORMAT OF THE HEADER FILE (formatdb version 3)
--------------------------
-X
-The format of the header file depends on whether or not the identifiers in the
-original file were parsed or not. For the case that they were not, then each
-entry has the format:
-X
-gnl|BL_ORD_ID|entry_number my favorite yeast sequence...
-X
-Here entry_number gives the ordinal number of the sequence in the
-database (with zero offset). The identifier
-gnl|BL_ORD_ID|entry_number is used by the BLAST software to identify
-the entry, if the user has not provided another identifier. If the
-identifier was parsed, then gnl|BL_ORD_ID|entry_number is replaced by
-the correct identifier, as described in
-ftp://ncbi.nlm.nih.gov/blast/db/README .
-X
-There are no separators between these deflines.
-X
-For formatdb version 4, the header file contains blast ASN.1 binary
-deflines, which can parsed with parse_fastadl_asn().
-X
-FORMAT OF THE .MSK FILE
------------------------
-X
-The .msk file is simply a packed list of masks for formatdb "oids" for
-some other file (typically nr). The first value is the last oid
-available; the remainder are packed 32 oids/mask, so that the number
-of masks is 1/32 the number of sequences in the file.
-X
-**************************************************************** */
-X
-#ifdef USE_MMAP
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#ifdef IBM_AIX
-#include <fcntl.h>
-#else
-#include <sys/fcntl.h>
-#endif
-#endif
-X
-#ifdef USE_MMAP
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-#endif
-X
-#ifdef UNIX
-#define RBSTR "r"
-#else
-#define RBSTR "rb"
-#endif
-X
-#ifdef WIN32
-#define SLASH_CHAR '\\'
-#define SLASH_STR "\\"
-#else
-#define SLASH_CHAR '/'
-#define SLASH_STR "/"
-#endif
-X
-#define XTERNAL
-#include "uascii.h"
-X
-#define XTERNAL
-#include "upam.h"
-#include "ncbl2_head.h"
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-unsigned int bl2_uint4_cvt(unsigned int);
-unsigned int bl2_long4_cvt(long);
-int64_t bl2_long8_cvt(int64_t);
-void src_int4_read(FILE *fd, int *valp);
-void src_uint4_read(FILE *fd, unsigned int *valp);
-void src_long4_read(FILE *fd, long *valp);
-void ncbi_long8_read(FILE *fd, int64_t *valp);
-void src_char_read(FILE *fd, char *valp);
-unsigned char *parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
-X int *gi_p, int *db, char *acc, char *name,
-X char *title, int t_len, int *taxid);
-X
-/* nt_btoa maps from blast 2bit format to ascii characters */
-static char nt_btoa[5] = {"ACGT"};
-X
-static char aa_b2toa[27]= {"-ABCDEFGHIKLMNPQRSTVWXYZU*"};
-X
-static int aa_btof[32]; /* maps to fasta alphabet */
-X
-static int dbtype, dbformat, amb_cnt;
-X
-#define NCBIBL20 12
-X
-int ncbl2_getliba(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-int ncbl2_getlibn(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-X
-int ncbl2_getliba_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-int ncbl2_getlibn_o(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-X
-void newname(char *, char *, char *, int);
-void parse_pal(char *, char *, int *, int *, FILE *);
-X
-void ncbl2_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
-X
-/* ncbl2_openlib() is used to open (and memory map) a BLAST2.0 format
-X file. Ifdef USE_MMAP, then ncbl2_openlib returns a structure that can
-X be used to read the database. */
-X
-struct lmf_str *
-ncbl2_openlib(char *name, int ldnaseq)
-{
-X char lname[256];
-X char dname[256];
-X char msk_name[256];
-X char hname[256];
-X char sname[256];
-X char tname[256];
-X char db_dir[256];
-X int pref_db= -1;
-X char *bp;
-X int title_len;
-X char *title_str=NULL;
-X int date_len;
-X char *date_str=NULL;
-X long ltmp;
-X int64_t l8tmp;
-X int oid_seqs, max_oid;
-X int oid_cnt, oid_len;
-X unsigned int *oid_list, o_max;
-X int tmp;
-X int i;
-#ifdef USE_MMAP
-X struct stat statbuf;
-#endif
-X FILE *ifile; /* index offsets, also DB info */
-X unsigned int *f_pos_arr;
-X struct lmf_str *m_fptr;
-X
-X if (ldnaseq==SEQT_PROT) { /* read a protein database */
-X newname(lname,name,AA_LIST_EXT,(int)sizeof(lname));
-X newname(tname,name,AA_INDEX_EXT,(int)sizeof(tname));
-X newname(hname,name,AA_HEADER_EXT,(int)sizeof(hname));
-X newname(sname,name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
-X
-X /* initialize map of BLAST2 amino acids to FASTA amino acids */
-X for (i=0; i<sizeof(aa_b2toa); i++) {
-X if ((tmp=aascii[aa_b2toa[i]])<NA) aa_btof[i]=tmp;
-X else if (aa_b2toa[i]=='*') aa_btof[i]=aascii['X'];
-X else aa_b2toa[i]=0;
-/* else aa_btof[i]=aascii['X']; */
-X }
-X }
-X else { /* reading DNA library */
-X newname(lname,name,NT_LIST_EXT,(int)sizeof(lname));
-X newname(tname,name,NT_INDEX_EXT,(int)sizeof(tname));
-X newname(hname,name,NT_HEADER_EXT,(int)sizeof(hname));
-X newname(sname,name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
-X
-X }
-X
-X /* check first for list name */
-X max_oid = oid_seqs = 0;
-X oid_list = NULL;
-X if ((ifile = fopen(lname,"r"))!=NULL) {
-X
-X if ((bp = strrchr(name,SLASH_CHAR))!=NULL) {
-X *bp = '\0';
-X strncpy(db_dir,name,sizeof(db_dir));
-X strncat(db_dir,SLASH_STR,sizeof(db_dir)-strlen(db_dir)-1);
-X *bp = SLASH_CHAR;
-X }
-X else {
-X db_dir[0]='\0';
-X }
-X
-X /* we have a list file, we need to parse it */
-X parse_pal(dname, msk_name, &oid_seqs, &max_oid, ifile);
-X fclose(ifile);
-X
-X pref_db = -1;
-X if (oid_seqs > 0) {
-X
-X /* get the pref_db before adding the directory */
-X if (strncmp(msk_name,"swissprot",9)==0) {
-X pref_db = 7;
-X }
-X else if (strncmp(msk_name,"pdbaa",5)==0) {
-X pref_db = 14;
-X }
-X
-X /* need to add directory to both dname and msk_name */
-X strncpy(tname,db_dir,sizeof(tname));
-X strncat(tname,msk_name, sizeof(tname));
-X strncpy(msk_name, tname, sizeof(msk_name));
-X
-X strncpy(tname,db_dir,sizeof(tname));
-X strncat(tname,dname, sizeof(tname));
-X strncpy(dname,tname,sizeof(dname));
-X
-X if (ldnaseq == SEQT_PROT) {
-X newname(tname,dname,AA_INDEX_EXT,(int)sizeof(tname));
-X newname(hname,dname,AA_HEADER_EXT,(int)sizeof(hname));
-X newname(sname,dname,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
-X }
-X else { /* reading DNA library */
-X newname(tname,dname,NT_INDEX_EXT,(int)sizeof(tname));
-X newname(hname,dname,NT_HEADER_EXT,(int)sizeof(hname));
-X newname(sname,dname,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
-X }
-X /* now load the oid file */
-X if ((ifile = fopen(msk_name,RBSTR))==NULL) {
-X fprintf(stderr,"error - cannot load %s file\n",msk_name);
-X return NULL;
-X }
-X else {
-X src_uint4_read(ifile,&o_max);
-X if (o_max != max_oid) {
-X fprintf(stderr," error - oid count mismatch %d != %d\n",max_oid, o_max);
-X }
-X oid_len = (max_oid/32+1);
-X if ((oid_list=(unsigned int *)calloc(oid_len,sizeof(int)))==NULL) {
-X fprintf(stderr," error - cannot allocate oid_list[%d]\n",oid_len);
-X return NULL;
-X }
-X if ((oid_cnt=fread(oid_list,sizeof(int),oid_len,ifile))==0) {
-X fprintf(stderr," error - cannot read oid_list[%d]\n",oid_len);
-X return NULL;
-X }
-X fclose(ifile);
-X }
-X }
-X else { /* we had a .msk file, but there are no oid's in it.
-X allocate an m_fptr and return it empty */
-X if ((m_fptr=(struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str\n");
-X return NULL;
-X }
-X
-X m_fptr->tmp_buf_max = 0;
-X
-X /* load the oid info */
-X m_fptr->max_oid = 0;
-X m_fptr->oid_seqs = 0;
-X m_fptr->oid_list = (unsigned int *)calloc(1,sizeof(int));
-X m_fptr->pref_db= -1;
-X
-X if (ldnaseq==SEQT_DNA) {
-X m_fptr->getlib = ncbl2_getlibn_o;
-X m_fptr->sascii = nascii;
-X }
-X else {
-X m_fptr->getlib = ncbl2_getliba_o;
-X m_fptr->sascii = aascii;
-X }
-X strncpy(m_fptr->lb_name,sname,MAX_FN);
-X return m_fptr;
-X }
-X }
-X
-X /* open the index file */
-X if ((ifile = fopen(tname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s (%s) INDEX file",tname,name);
-X perror("...");
-X return 0;
-X }
-X src_uint4_read(ifile,(unsigned *)&dbformat); /* get format DB version number */
-X src_uint4_read(ifile,(unsigned *)&dbtype); /* get 1 for protein/0 DNA */
-X
-X if (dbformat != FORMATDBV3 && dbformat!=FORMATDBV4) {
-X fprintf(stderr,"error - %s wrong formatdb version (%d/%d)\n",
-X tname,dbformat,FORMATDBV3);
-X return NULL;
-X }
-X
-X if ((ldnaseq==SEQT_PROT && dbtype != AAFORMAT) ||
-X (ldnaseq==SEQT_DNA && dbtype!=NTFORMAT)) {
-X fprintf(stderr,"error - %s wrong format (%d/%d)\n",
-X tname,dbtype,(ldnaseq ? NTFORMAT: AAFORMAT));
-X return NULL;
-X }
-X
-X /* the files are there - allocate lmf_str */
-X
-X if ((m_fptr=(struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str\n");
-X return NULL;
-X }
-X
-X m_fptr->tmp_buf_max = 4096;
-X if ((m_fptr->tmp_buf=
-X (char *)calloc(m_fptr->tmp_buf_max,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str->tmp_buffer\n");
-X return NULL;
-X }
-X
-X /* load the oid info */
-X m_fptr->max_oid = max_oid;
-X m_fptr->oid_seqs = oid_seqs;
-X m_fptr->oid_list = oid_list;
-X m_fptr->pref_db= pref_db;
-X
-X /* open the header file */
-X if ((m_fptr->hfile = fopen(hname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s header file\n",hname);
-X goto error_r;
-X }
-X
-X /* ncbl2_ranlib is used for all BLAST2.0 access */
-X m_fptr->ranlib = ncbl2_ranlib;
-X m_fptr->bl_format_ver = dbformat;
-X
-X if (ldnaseq==SEQT_DNA) {
-X if (oid_seqs > 0) {
-X m_fptr->getlib = ncbl2_getlibn_o;
-X }
-X else {
-X m_fptr->getlib = ncbl2_getlibn;
-X }
-X m_fptr->sascii = nascii;
-X }
-X else {
-X if (oid_seqs > 0) {
-X m_fptr->getlib = ncbl2_getliba_o;
-X }
-X else {
-X m_fptr->getlib = ncbl2_getliba;
-X }
-X m_fptr->sascii = aascii;
-X }
-X strncpy(m_fptr->lb_name,sname,MAX_FN);
-X
-X /* open the sequence file */
-X
-#if defined (USE_MMAP)
-X m_fptr->mm_flg=((m_fptr->mmap_fd=open(sname,O_RDONLY))>=0);
-X if (!m_fptr->mm_flg) {
-X fprintf(stderr," cannot open %s",sname);
-X perror("...");
-X }
-X else {
-X if(fstat(m_fptr->mmap_fd, &statbuf) < 0) {
-X fprintf(stderr," cannot fstat %s",sname);
-X perror("...");
-X m_fptr->mm_flg = 0;
-X }
-X else {
-X m_fptr->st_size = statbuf.st_size;
-X if((m_fptr->mmap_base =
-X mmap(NULL, m_fptr->st_size, PROT_READ,
-X MAP_FILE | MAP_SHARED, m_fptr->mmap_fd, 0)) == (char *) -1) {
-X fprintf(stderr," cannot mmap %s",sname);
-X perror("...");
-X m_fptr->mm_flg = 0;
-X }
-X else {
-X m_fptr->mmap_addr = m_fptr->mmap_base;
-X m_fptr->mm_flg = 1;
-X }
-X }
-X /* regardless, close the open()ed version */
-X close(m_fptr->mmap_fd);
-X }
-#else
-X m_fptr->mm_flg = 0;
-#endif
-X
-X if (!m_fptr->mm_flg) {
-X if ((m_fptr->libf = fopen(sname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s sequence file",sname);
-X perror("...");
-X goto error_r;
-X }
-X }
-X
-/* all files should be open */
-X
-X src_uint4_read(ifile,(unsigned *)&title_len);
-X
-X if (title_len > 0) {
-X if ((title_str = calloc((size_t)title_len+1,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate title string (%d)\n",title_len);
-X goto error_r;
-X }
-X fread(title_str,(size_t)1,(size_t)title_len,ifile);
-X }
-X
-X src_uint4_read(ifile,(unsigned *)&date_len);
-X
-X if (date_len > 0) {
-X if ((date_str = calloc((size_t)date_len+1,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate date string (%d)\n",date_len);
-X goto error_r;
-X }
-X fread(date_str,(size_t)1,(size_t)date_len,ifile);
-X }
-X
-X m_fptr->lpos = 0;
-X src_uint4_read(ifile,(unsigned *)&m_fptr->max_cnt);
-X
-X if (dbformat == FORMATDBV3) {
-X src_long4_read(ifile,<mp);
-X m_fptr->tot_len = ltmp;
-X }
-X else {
-X ncbi_long8_read(ifile,&l8tmp);
-X m_fptr->tot_len = ltmp;
-X }
-X
-X src_long4_read(ifile,<mp);
-X m_fptr->max_len = ltmp;
-X
-X /* currently we are not using this information, but perhaps later */
-X if (title_str!=NULL) free(title_str);
-X if (date_str!=NULL) free(date_str);
-X
-#ifdef DEBUG
-X fprintf(stderr,"%s format: BL2 (%s) max_cnt: %d, totlen: %lld, maxlen %ld\n",
-X name,m_fptr->mm_flg ? "mmap" : "fopen",
-X m_fptr->max_cnt,m_fptr->tot_len,m_fptr->max_len);
-#endif
-X
-X /* allocate and read hdr indexes */
-X if ((f_pos_arr=(unsigned int *)calloc((size_t)m_fptr->max_cnt+1,sizeof(int)))==NULL) {
-X fprintf(stderr," cannot allocate tmp header pointers\n");
-X goto error_r;
-X }
-X
-X if ((m_fptr->d_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
-X fprintf(stderr," cannot allocate header pointers\n");
-X goto error_r;
-X }
-X
-X /* allocate and read sequence offsets */
-X if ((m_fptr->s_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
-X fprintf(stderr," cannot allocate sequence pointers\n");
-X goto error_r;
-X }
-X
-X /*
-X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->d_pos_arr[i]);
-X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->s_pos_arr[i]);
-X */
-X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
-X fprintf(stderr," error reading hdr offsets: %s\n",tname);
-X goto error_r;
-X }
-X
-X for (i=0; i<=m_fptr->max_cnt; i++)
-#ifdef IS_BIG_ENDIAN
-X m_fptr->d_pos_arr[i] = f_pos_arr[i];
-#else
-X m_fptr->d_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
-#endif
-X
-X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
-X fprintf(stderr," error reading seq offsets: %s\n",tname);
-X goto error_r;
-X }
-X for (i=0; i<=m_fptr->max_cnt; i++) {
-#ifdef IS_BIG_ENDIAN
-X m_fptr->s_pos_arr[i] = f_pos_arr[i];
-#else
-X m_fptr->s_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
-#endif
-X }
-X
-X if (dbtype == NTFORMAT) {
-X /* allocate and ambiguity offsets */
-X if ((m_fptr->a_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {
-X fprintf(stderr," cannot allocate sequence pointers\n");
-X goto error_r;
-X }
-X
-X /*
-X for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->a_pos_arr[i]);
-X */
-X
-X if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {
-X fprintf(stderr," error reading seq offsets: %s\n",tname);
-X goto error_r;
-X }
-X for (i=0; i<=m_fptr->max_cnt; i++) {
-#ifdef IS_BIG_ENDIAN
-X m_fptr->a_pos_arr[i] = f_pos_arr[i];
-#else
-X m_fptr->a_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);
-#endif
-X }
-X }
-X
-X /*
-X for (i=0; i < min(m_fptr->max_cnt,10); i++) {
-X fprintf(stderr,"%d: %d %d %d\n",i,m_fptr->s_pos_arr[i],m_fptr->a_pos_arr[i],m_fptr->d_pos_arr[i]);
-X }
-X */
-X
-X /* all done with ifile, close it */
-X fclose(ifile);
-X
-X free(f_pos_arr);
-X
-X if (!m_fptr->mm_flg) {
-X tmp = fgetc(m_fptr->libf);
-X if (tmp!=NULLB)
-X fprintf(stderr," phase error: %d:%d found\n",0,tmp);
-X }
-X
-X m_fptr->bl_lib_pos = 1;
-X amb_cnt = 0;
-X return m_fptr;
-X
-X error_r:
-X /* here if failure after m_fptr allocated */
-X free(m_fptr);
-X return NULL;
-}
-X
-void ncbl2_closelib(struct lmf_str *m_fptr)
-{
-X if (m_fptr->tmp_buf != NULL) {
-X free(m_fptr->tmp_buf);
-X m_fptr->tmp_buf_max = 0;
-X }
-X
-X if (m_fptr->s_pos_arr !=NULL) {
-X free(m_fptr->s_pos_arr);
-X m_fptr->s_pos_arr = NULL;
-X }
-X if (m_fptr->a_pos_arr!=NULL) {
-X free(m_fptr->a_pos_arr);
-X m_fptr->a_pos_arr = NULL;
-X }
-X
-X if (m_fptr->hfile !=NULL ) {
-X fclose(m_fptr->hfile); m_fptr->hfile=NULL;
-X free(m_fptr->d_pos_arr); m_fptr->d_pos_arr = NULL;
-X }
-X
-X if (m_fptr->oid_list != NULL) {
-X free(m_fptr->oid_list); m_fptr->oid_list = NULL;
-X m_fptr->oid_seqs = m_fptr->max_oid = 0;
-X }
-X
-#ifdef use_mmap
-X if (m_fptr->mm_flg) {
-X munmap(m_fptr->mmap_base,m_fptr->st_size);
-X m_fptr->mmap_fd = -1;
-X }
-X else
-#endif
-X if (m_fptr->libf !=NULL ) {fclose(m_fptr->libf); m_fptr->libf=NULL;}
-}
-X
-int
-ncbl2_getliba_o(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X int tpos;
-X unsigned int t_mask, t_shift, oid_mask;
-X
-X /* get to the next valid pointer */
-X
-X for ( tpos = m_fd->lpos ;tpos <= m_fd->max_oid; tpos++) {
-X t_mask = tpos / 32;
-X t_shift = 31 - (tpos % 32);
-X if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; }
-X
-X if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
-X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
-X m_fd->lpos = tpos; /* already bumped up */
-X m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
-X return ncbl2_getliba(seq, maxs, libstr, n_libstr,
-X libpos, lcont, m_fd, l_off);
-X }
-X }
-X return -1;
-}
-X
-int
-ncbl2_getliba(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X unsigned char *sptr, *dptr;
-X int s_chunk, d_len, lib_cnt;
-X long seqcnt;
-X long tmp;
-X static long seq_len;
-#if defined(DEBUG) || defined(PCOMPLIB)
-X int gi, my_db, taxid;
-X char acc[20], title[21], name[20];
-#endif
-X
-X *l_off = 1;
-X
-X lib_cnt = m_fd->lpos;
-X *libpos = (fseek_t)m_fd->lpos;
-X
-X if (*lcont==0) {
-X if (lib_cnt >= m_fd->max_cnt) return -1; /* no more sequences */
-X seq_len = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]; /* value is +1 off to get the NULL */
-X if (m_fd->mm_flg) m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lib_cnt];
-#if !defined(DEBUG) && !defined(PCOMPLIB)
-X libstr[0]='\0';
-#else
-X /* get the name from the header file */
-X fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
-X
-X if (m_fd->bl_format_ver == FORMATDBV3) {
-X d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
-X fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
-X libstr[d_len]='\0';
-X }
-X else {
-X d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
-X fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile);
-X parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len,
-X &gi, &my_db, acc, name, title, 20, &taxid);
-X sprintf(libstr,"gi|%d",gi);
-X }
-#endif
-X }
-X if (seq_len <= maxs) { /* sequence fits */
-X seqcnt = seq_len;
-X m_fd->lpos++;
-X *lcont = 0;
-X }
-X else { /* doesn't fit */
-X seqcnt = maxs-1;
-X (*lcont)++;
-X }
-X
-X if (m_fd->mm_flg) sptr = (unsigned char *)m_fd->mmap_addr;
-X else {
-X if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,m_fd->libf))!=(size_t)seq_len) {
-X fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",
-X *libpos,tmp,seq_len);
-X goto error;
-X }
-X sptr = seq;
-X }
-X if (seq_len <= maxs) {seqcnt = --seq_len;}
-X
-X /* everything is ready, set up dst. pointer, seq_len */
-X dptr = seq;
-X
-X if (aa_b2toa[sptr[seq_len-1]]=='*') seq_len--;
-X s_chunk = seqcnt/16;
-X while (s_chunk-- > 0) {
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X *dptr++ = aa_btof[*sptr++];
-X }
-X while (dptr < seq+seqcnt) *dptr++ = aa_btof[*sptr++];
-X
-X if (m_fd->mm_flg) m_fd->mmap_addr = (char *)sptr;
-X
-X /* we didn't get it all, so reset for more */
-X if (*lcont) seq_len -= seqcnt;
-X
-X seq[seqcnt]= EOSEQ;
-X return (seqcnt);
-X
-error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);
-X fflush(stderr);
-X return (-1);
-}
-X
-int
-ncbl2_getlibn_o(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X int tpos;
-X unsigned int t_mask, t_shift, oid_mask;
-X
-X /* get to the next valid pointer */
-X
-X for (tpos = m_fd->lpos; tpos <= m_fd->max_oid; tpos++) {
-X t_mask = tpos / 32;
-X t_shift = 31 - (tpos % 32);
-X if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; }
-X
-X if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {
-X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);
-X m_fd->lpos = tpos; /* already bumped up */
-X m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];
-X return ncbl2_getlibn(seq, maxs, libstr, n_libstr,
-X libpos, lcont, m_fd, l_off);
-X }
-X }
-X return -1;
-}
-X
-static char tmp_amb[4096];
-X
-int
-ncbl2_getlibn(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *m_fd,
-X long *l_off)
-{
-X unsigned char *sptr, *tptr, stmp;
-X long seqcnt;
-X int s_chunk, lib_cnt;
-X size_t tmp;
-X char ch;
-X static long seq_len;
-X static int c_len,c_pad;
-X int c_len_set, d_len;
-X
-X *l_off = 1;
-X
-X lib_cnt = m_fd->lpos;
-X *libpos = (fseek_t)lib_cnt;
-X if (*lcont==0) { /* not a continuation of previous */
-X if (lib_cnt >= m_fd->max_cnt) return (-1);
-X c_len = m_fd->a_pos_arr[lib_cnt]- m_fd->s_pos_arr[lib_cnt];
-X if (!m_fd->mm_flg) {
-X if (m_fd->bl_lib_pos != m_fd->s_pos_arr[lib_cnt]) { /* are we positioned to read? */
-X amb_cnt++;
-X if ((m_fd->bl_lib_pos - m_fd->s_pos_arr[lib_cnt]) < sizeof(tmp_amb)) {
-X /* jump over amb_ray */
-X fread(tmp_amb,(size_t)1,(size_t)(m_fd->s_pos_arr[lib_cnt]-m_fd->bl_lib_pos),m_fd->libf);
-X }
-X else { /* fseek over amb_ray */
-X fseek(m_fd->libf,m_fd->s_pos_arr[lib_cnt],0);
-X }
-X m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
-X }
-X }
-X else m_fd->mmap_addr = m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt];
-#if !defined(DEBUG) && !defined(PCOMPLIB)
-X libstr[0]='\0';
-#else
-X /* get the name from the header file */
-X fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);
-X
-X d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);
-X fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);
-X libstr[d_len]='\0';
-#endif
-X } /* end of *lcont==0 */
-X
-X /* To avoid the situation where c_len <= 1; we must anticipate what
-X c_len will be after this pass. If it will be <= 64, back off this
-X time so next time it will be > 64 */
-X
-X seq_len = c_len*4;
-X
-X if ((seq_len+4 > maxs) && (seq_len+4 - maxs <= 256)) {
-X /* we won't be done but we will have less than 256 to go */
-X c_len -= 64; seq_len -= 256; c_len_set = 1; maxs -= 256;}
-X else c_len_set = 0;
-X
-X /*
-X fprintf(stderr," lib_cnt: %d %d %d %d\n",lib_cnt,c_len,seq_len,maxs);
-X */
-X
-X /* does the rest of the sequence fit? */
-X if (seq_len <= maxs-4 && !c_len_set) {
-X seqcnt = c_len;
-X if (!m_fd->mm_flg) {
-X if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,m_fd->libf))!=(size_t)seqcnt) {
-X fprintf(stderr,
-X " could not read sequence record: %s %lld %ld != %ld: %d\n",
-X libstr,*libpos,tmp,seqcnt,*seq);
-X goto error;
-X }
-X m_fd->bl_lib_pos += tmp;
-X sptr = seq + seqcnt;
-X }
-X else sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
-X
-X *lcont = 0; /* this is the last chunk */
-X lib_cnt++; /* increment to the next sequence */
-X /* the last byte is either '0' (no remainder) or the last 1-3 chars and the remainder */
-X c_pad = *(sptr-1);
-X c_pad &= 0x3; /* get the last (low) 2 bits */
-X seq_len -= (4 - c_pad); /* if the last 2 bits are 0, its a NULL byte */
-X }
-X else { /* get the next chunk, but more to come */
-X seqcnt = ((maxs+3)/4)-1;
-X if (!m_fd->mm_flg) {
-X if ((tmp=fread(seq,(size_t)1,(size_t)(seqcnt),m_fd->libf))!=(size_t)(seqcnt)) {
-X fprintf(stderr," could not read sequence record: %lld %ld/%ld\n",
-X *libpos,tmp,seqcnt);
-X goto error;
-X }
-X m_fd->bl_lib_pos += tmp;
-X sptr = seq + seqcnt;
-X }
-X else {
-X sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);
-X m_fd->mmap_addr += seqcnt;
-X }
-X seq_len = 4*seqcnt;
-X c_len -= seqcnt;
-X if (c_len_set) {c_len += 64; maxs += 256;}
-X (*lcont)++;
-/* hopefully we don't need this because of c_len -= 64. */
-/*
-X if (c_len == 1) {
-#if !defined (USE_MMAP)
-X c_pad = fgetc(m_fd->libf);
-X *sptr=c_pad;
-#else
-X c_pad = *m_fd->mmap_addr++;
-X sptr = m_fd->mmap_addr;
-#endif
-X c_pad &= 0x3;
-X seq_len += c_pad;
-X seqcnt++;
-X lib_cnt++;
-X *lcont = 0;
-X }
-*/
-X }
-X
-X /* point to the last packed byte and to the end of the array
-X seqcnt is the exact number of bytes read
-X tptr points to the destination, use multiple of 4 to simplify math
-X sptr points to the source, note that the last byte will be read 4 cycles
-X before it is written
-X */
-X
-X tptr = seq + 4*seqcnt;
-X s_chunk = seqcnt/8;
-X while (s_chunk-- > 0) {
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X }
-X while (tptr>seq) {
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X }
-X /*
-X for (sptr=seq; sptr < seq+seq_len; sptr++) {
-X printf("%c",nt[*sptr]);
-X if ((int)(sptr-seq) % 60 == 59) printf("\n");
-X }
-X printf("\n");
-X */
-X
-X m_fd->lpos = lib_cnt;
-X if (seqcnt*4 >= seq_len) { /* there was enough room */
-X seq[seq_len]= EOSEQ;
-X /* printf("%d\n",seq_len); */
-X return seq_len;
-X }
-X else { /* not enough room */
-X seq[seqcnt*4]=EOSEQ;
-X seq_len -= 4*seqcnt;
-X return (4*seqcnt);
-X }
-X
-error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);
-X fflush(stderr);
-X return (-1);
-}
-X
-X /* 0 1 2 3 4 5 6 7
-X 8 9 10 11 12 13 14 15
-X 16 17 */
-static char
-*db_type_arr[] = {"lcl","gib","gim","gii","gb","emb","pir","sp",
-X "pat","ref","gnl","gi","dbj","prf","pdb","tpg",
-X "tpe","tpd"};
-X
-void
-ncbl2_ranlib(char *str,
-X int cnt,
-X fseek_t libpos,
-X char *libstr,
-X struct lmf_str *m_fd)
-{
-X int llen, lib_cnt;
-X char *bp;
-X unsigned char *my_buff=NULL;
-X char descr[2048];
-X unsigned char *abp;
-X int gi, taxid;
-X int my_db;
-X char db[5], acc[20], name[20];
-X char title[1024];
-X int have_my_buff=0;
-X int have_descr = 0;
-X
-X lib_cnt = (int)libpos;
-X llen = m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt];
-X
-X fseek(m_fd->hfile,m_fd->d_pos_arr[libpos],0);
-X
-X if (m_fd->bl_format_ver == FORMATDBV3) {
-X if (llen >= cnt) llen = cnt-1;
-X fread(str,(size_t)1,(size_t)(llen),m_fd->hfile);
-X }
-X else {
-X if (llen >= m_fd->tmp_buf_max) {
-X if ((my_buff=(unsigned char *)calloc(llen,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate ASN.1 buffer: %d\n",llen);
-X my_buff = (unsigned char *)m_fd->tmp_buf;
-X llen = m_fd->tmp_buf_max;
-X }
-X else have_my_buff = 1;
-X }
-X else {
-X my_buff = (unsigned char *)m_fd->tmp_buf;
-X }
-X abp = my_buff;
-X fread(my_buff,(size_t)1,llen,m_fd->hfile);
-X
-X do {
-X abp = parse_fastadl_asn(abp, my_buff+llen,
-X &gi, &my_db, acc, name,
-X title, sizeof(title), &taxid);
-X
-X if (gi > 0) {
-X sprintf(descr,"gi|%d|%s|%s|%s ",gi,db_type_arr[my_db],acc,name);
-X }
-X else {
-X if (acc[0] != '\0') sprintf(descr,"%s ",acc);
-X else descr[0] = '\0';
-X if (name[0] != '\0' && strcmp(name,"BL_ORD_ID")!=0) sprintf(descr+strlen(descr),"%s ", name);
-X }
-X if (m_fd->pref_db < 0) {
-X if (!have_descr) {
-X strncpy(str,descr,cnt-1);
-X have_descr = 1;
-X }
-X else {
-X strncat(str,"\001",cnt-strlen(str)-1);
-X strncat(str,descr,cnt-strlen(str)-1);
-X }
-X strncat(str,title,cnt-strlen(str)-1);
-X if (strlen(str) >= cnt-1) break;
-X }
-X else if (m_fd->pref_db == my_db) {
-X have_descr = 1;
-X strncpy(str,descr,cnt-1);
-X strncat(str,title,cnt-strlen(str)-1);
-X break;
-X }
-X } while (abp);
-X
-X if (!have_descr) {
-X strncpy(str,descr,cnt-1);
-X strncat(str,descr,cnt-strlen(str)-1);
-X }
-X
-X if (have_my_buff) free(my_buff);
-X }
-X
-X str[cnt-1]='\0';
-X
-X bp = str;
-X while((bp=strchr(bp,'\001'))!=NULL) {*bp++=' ';}
-X
-X if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[libpos],0);
-X
-X m_fd->lpos = lib_cnt;
-X m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];
-}
-X
-unsigned int bl2_uint4_cvt(unsigned int val)
-{
-X unsigned int res;
-#ifdef IS_BIG_ENDIAN
-X return val;
-#else /* it better be LITTLE_ENDIAN */
-X res = ((val&255)*256)+ ((val>>8)&255);
-X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
-X return res;
-#endif
-}
-X
-unsigned int bl2_long4_cvt(long val)
-{
-X int val4;
-X unsigned int res;
-#ifdef IS_BIG_ENDIAN
-X val4 = val;
-X return val4;
-#else /* it better be LITTLE_ENDIAN */
-X res = ((val&255)*256)+ ((val>>8)&255);
-X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
-X return res;
-#endif
-}
-X
-int64_t bl2_long8_cvt(int64_t val)
-{
-X int64_t res;
-#ifdef IS_BIG_ENDIAN
-X return val;
-#else /* it better be LITTLE_ENDIAN */
-X res = ((val&255)*256)+ ((val>>8)&255);
-X res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);
-#ifdef BIG_LIB64
-X res = (res<<16) + (((val>>32)&255)*256) + ((val>>40)&255);
-X res = (res<<16) + (((val>>48)&255)*256) + ((val>>56)&255);
-#else
-X fprintf(stderr,"Cannot use bl2_long8_cvt without 64-bit longs\n");
-X exit(1);
-#endif
-X return res;
-#endif
-}
-X
-void src_int4_read(FILE *fd, int *val)
-{
-#ifdef IS_BIG_ENDIAN
-X fread((char *)val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-#endif
-}
-X
-void src_long4_read(FILE *fd, long *valp)
-{
-X int val4;
-#ifdef IS_BIG_ENDIAN
-X fread(&val4,(size_t)4,(size_t)1,fd);
-X *valp = val4;
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X val4 = 0;
-X val4 = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-X *valp = val4;
-#endif
-}
-X
-void src_uint4_read(FILE *fd, unsigned int *valp)
-{
-#ifdef IS_BIG_ENDIAN
-X fread(valp,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *valp = 0;
-X *valp = (unsigned int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)
-X +(int)b[3];
-#endif
-}
-X
-void src_long8_read(FILE *fd, long *val)
-{
-#ifdef IS_BIG_ENDIAN
-X fread((void *)val,(size_t)8,(size_t)1,fd);
-#else
-X unsigned char b[8];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
-X *val = 0;
-X *val = (long)((((((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)
-X +(long)b[3]<<8)+(long)b[4]<<8)+(long)b[5]<<8)
-X +(long)b[6]<<8)+(long)b[7];
-#endif
-}
-X
-void ncbi_long8_read(FILE *fd, int64_t *val)
-{
-X unsigned char b[8];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)8,fd);
-X *val = 0;
-X *val = (long)((((((long)((long)(b[7]<<8)+(long)b[6]<<8)+(long)b[5]<<8)
-X +(long)b[4]<<8)+(long)b[3]<<8)+(long)b[2]<<8)
-X +(long)b[1]<<8)+(long)b[0];
-}
-X
-void src_char_read(FILE *fd, char *val)
-{
-X fread(val,(size_t)1,(size_t)1,fd);
-}
-X
-void src_fstr_read(FILE *fd, char *val, int slen)
-{
-X fread(val,(size_t)slen,(size_t)1,fd);
-}
-X
-void
-newname(char *nname, char *oname, char *suff, int maxn)
-{
-X strncpy(nname,oname,maxn-1);
-X strncat(nname,".",1);
-X strncat(nname,suff,maxn-strlen(nname));
-}
-X
-#define ASN_SEQ 0x30
-#define ASN_IS_BOOL 1
-#define ASN_IS_INT 2
-#define ASN_IS_STR 26
-X
-unsigned char *
-get_asn_int(unsigned char *abp, int *val) {
-X
-X int v_len, v;
-X
-X v = 0;
-X if (*abp++ != ASN_IS_INT) { /* check for int */
-X fprintf(stderr," int missing\n");
-X }
-X else {
-X v_len = *abp++;
-X while (v_len-- > 0) {
-X v *= 256;
-X v += *abp++;
-X }
-X abp += 2; /* skip over null's */
-X }
-X *val = v;
-X return abp;
-}
-X
-unsigned char *
-get_asn_text(unsigned char *abp, char *text, int t_len) {
-X int tch, at_len;
-X
-X text[0] = '\0';
-X if (*abp++ != ASN_IS_STR) { /* check for str */
-X fprintf(stderr," str missing\n");
-X }
-X else {
-X if ((tch = *abp++) > 128) { /* string length is in next bytes */
-X tch &= 0x7f; /* get number of bytes for len */
-X at_len = 0;
-X while (tch-- > 0) { at_len = (at_len << 8) + *abp++;}
-X }
-X else {
-X at_len = tch;
-X }
-X
-X if ( at_len < t_len-1) {
-X memcpy(text, abp, at_len);
-X text[at_len] = '\0';
-X }
-X else {
-X memcpy(text, abp, t_len-1);
-X text[t_len-1] = '\0';
-X }
-X abp += at_len + 2;
-X }
-X return abp;
-}
-X
-/* something to try to skip over stuff we don't want */
-unsigned char *
-get_asn_junk(unsigned char *abp) {
-X
-X int seq_cnt = 0;
-X int tmp;
-X char string[256];
-X
-X while (*abp) {
-X if ( *abp == ASN_SEQ) { abp += 2; seq_cnt++;}
-X else if ( *abp == ASN_IS_BOOL ) {abp = get_asn_int(abp, &tmp);}
-X else if ( *abp == ASN_IS_INT ) {abp = get_asn_int(abp, &tmp);}
-X else if ( *abp == ASN_IS_STR ) {abp = get_asn_text(abp, string, sizeof(string)-1);}
-X }
-X
-X while (seq_cnt-- > 0) abp += 2;
-X return abp;
-}
-X
-unsigned char *
-get_asn_textseq_id(unsigned char *abp,
-X char *name, char *acc)
-{
-X char release[20], ver_str[10];
-X int version;
-X int seqcnt = 0;
-X
-X ver_str[0]='\0';
-X
-X if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
-X
-X while (*abp) {
-X switch (*abp) {
-X case 0xa0:
-X abp = get_asn_text(abp+2, name, 20);
-X break;
-X case 0xa1:
-X abp = get_asn_text(abp+2, acc, 20);
-X break;
-X case 0xa2:
-X abp = get_asn_text(abp+2, release, sizeof(release));
-X break;
-X case 0xa3:
-X abp = get_asn_int(abp+2, &version);
-X sprintf(ver_str,".%d",version);
-X break;
-X default: abp += 2;
-X }
-X }
-X while (seqcnt-- > 0) abp += 4;
-X strncat(acc,ver_str,20-strlen(acc));
-X acc[19]='\0';
-X return abp; /* skip 2 NULL's */
-}
-X
-unsigned char *
-get_asn_local_id(unsigned char *abp, char *acc)
-{
-X int seqcnt = 0;
-X
-X if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}
-X
-X abp = get_asn_text(abp+2, acc, 20);
-X
-X while (seqcnt-- > 0) abp += 4;
-X acc[19]='\0';
-X return abp; /* skip 2 NULL's */
-}
-X
-unsigned char *
-get_asn_dbtag(unsigned char *abp, char *name, char *str, int *id_p) {
-X
-X if (*abp == ASN_SEQ) { abp += 2;}
-X
-X if (*abp == 0xa0) { /* get db */
-X abp = get_asn_text(abp+2, name, 20);
-X }
-X else {
-X fprintf(stderr," missing dbtag:db %d %d\n",abp[0],abp[1]);
-X abp += 2;
-X }
-X
-X if (*abp == 0xa1) { /* get tag */
-X abp += 2;
-X abp += 2; /* skip over id */
-X if (*abp == 2) abp = get_asn_int(abp,id_p);
-X else abp = get_asn_text(abp+2, str, 20);
-X }
-X else {
-X fprintf(stderr," missing dbtag:tag %2x %2x\n",abp[0],abp[1]);
-X abp += 2;
-X }
-X return abp+2; /* skip 2 NULL's */
-}
-X
-unsigned char *
-get_asn_pdb_id(unsigned char *abp, char *acc, char *chain)
-{
-X int ichain, seq_cnt=0;
-X
-X if (*abp == ASN_SEQ) { abp += 2; seq_cnt++;}
-X
-X while (*abp) {
-X switch (*abp) {
-X case 0: abp += 2; break;
-X case 0xa0: /* mol-id */
-X abp = get_asn_text(abp+2, acc, 20);
-X break;
-X case 0xa1:
-X abp = get_asn_int(abp+2, &ichain);
-X chain[0] = ichain;
-X chain[1] = '\0';
-X break;
-X case 0xa2: /* ignore date - scan until NULL's */
-X while (*abp++) {}
-X abp += 2; /* skip the NULL's */
-X break;
-X default: abp+=2;
-X }
-X }
-X while (seq_cnt-- > 0) {abp += 4;}
-X return abp;
-}
-X
-#define ASN_TYPE_MASK 31
-X
-unsigned char
-*get_asn_seqid(unsigned char *abp,
-X int *gi_p, int *db, char *acc, char *name) {
-X
-X int db_type, itmp, seq_cnt=0;
-X
-X *gi_p = 0;
-X
-X if (*abp != ASN_SEQ) {
-X fprintf(stderr, "seqid - missing SEQ 1: %2x %2x\n",abp[0], abp[1]);
-X return abp;
-X }
-X else { abp += 2; seq_cnt++;}
-X
-X db_type = (*abp & ASN_TYPE_MASK);
-X
-X if (db_type == 11) { /* gi */
-X abp = get_asn_int(abp+2,gi_p);
-X }
-X
-X while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;}
-X
-X db_type = (*abp & ASN_TYPE_MASK);
-X if (db_type > 17) {db_type = 0;}
-X *db = db_type;
-X
-X switch(db_type) {
-X case 0:
-X abp = get_asn_local_id(abp+2, acc);
-X break;
-X case 1:
-X case 2:
-X abp = get_asn_int(abp+2,&itmp);
-X abp += 2;
-X break;
-X case 11:
-X abp = get_asn_int(abp+2,&itmp);
-X break;
-X case 4:
-X case 5:
-X case 6:
-X case 7:
-X case 9:
-X case 12:
-X case 13:
-X case 15:
-X case 16:
-X case 17:
-X abp = get_asn_textseq_id(abp+2,name,acc);
-X break;
-X case 10:
-X abp = get_asn_dbtag(abp+2,name,acc,&itmp);
-X case 14:
-X abp = get_asn_pdb_id(abp+2,acc,name);
-X break;
-X default: abp += 2;
-X }
-X
-X while (seq_cnt-- > 0) { abp += 4;}
-X return abp; /* skip over 2 NULL's */
-}
-X
-#define ASN_FADL_TITLE 0xa0
-#define ASN_FADL_SEQID 0xa1
-#define ASN_FADL_TAXID 0xa2
-#define ASN_FADL_MEMBERS 0xa3
-#define ASN_FADL_LINKS 0xa4
-#define ASN_FADL_OTHER 0xa5
-X
-unsigned char *
-parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,
-X int *gi_p, int *db, char *acc,
-X char *name, char *title, int t_len, int *taxid_p) {
-X unsigned char *abp;
-X char tmp_db[4], tmp_acc[32], tmp_name[32];
-X int this_db;
-X int seq_cnt = 0;
-X int tmp_gi;
-X
-X acc[0] = name[0] = db[0] = title[0] = '\0';
-X
-X abp = asn_buff;
-X while ( abp < asn_max && *abp) {
-X if (*abp == ASN_SEQ) { abp += 2; seq_cnt++; }
-X else if (*abp == ASN_FADL_TITLE) {
-X abp = get_asn_text(abp+2, title, t_len);
-X }
-X else if (*abp == ASN_FADL_SEQID ) {
-X abp = get_asn_seqid(abp+2, gi_p, db, acc, name);
-X if (*db > 17) *db = 0;
-X }
-X else if (*abp == ASN_FADL_TAXID ) {
-X abp = get_asn_int(abp+2, taxid_p);
-X }
-X else if (*abp == ASN_FADL_MEMBERS) {
-X abp = get_asn_junk(abp+2);
-X }
-X else if (*abp == ASN_FADL_LINKS ) {
-X abp = get_asn_junk(abp+2);
-X }
-X else if (*abp == ASN_FADL_OTHER ) {
-X abp = get_asn_junk(abp+2);
-X }
-X else {
-X /* fprintf(stderr, " Error - missing ASN.1 %2x:%2x:%2x:%2x\n",
-X abp[-2],abp[-1],abp[0],abp[1]); */
-X abp += 2;
-X }
-X }
-X while (abp < asn_max && *abp == '\0' ) abp++;
-X if (abp >= asn_max) return NULL;
-X else return abp;
-}
-X
-X
-void
-parse_pal(char *dname, char *msk_name,
-X int *oid_seqs, int *max_oid,
-X FILE *fd) {
-X
-X char line[MAX_STR];
-X
-X while (fgets(line,sizeof(line),fd)) {
-X if (line[0] == '#') continue;
-X
-X if (strncmp(line, "DBLIST", 6)==0) {
-X sscanf(line+7,"%s",dname);
-X }
-X else if (strncmp(line, "OIDLIST", 7)==0) {
-X sscanf(line+8,"%s",msk_name);
-X }
-X else if (strncmp(line, "NSEQ", 4)==0) {
-X sscanf(line+5,"%d",oid_seqs);
-X }
-X else if (strncmp(line, "MAXOID", 6)==0) {
-X sscanf(line+7,"%d",max_oid);
-X }
-X }
-}
-SHAR_EOF
-chmod 0644 ncbl2_mlib.c ||
-echo 'restore of ncbl2_mlib.c failed'
-Wc_c="`wc -c < 'ncbl2_mlib.c'`"
-test 42930 -eq "$Wc_c" ||
- echo 'ncbl2_mlib.c: original size 42930, current size' "$Wc_c"
-fi
-# ============= ncbl_head.h ==============
-if test -f 'ncbl_head.h' -a X"$1" != X"-c"; then
- echo 'x - skipping ncbl_head.h (File already exists)'
-else
-echo 'x - extracting ncbl_head.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ncbl_head.h' &&
-/* ncbl_head.h header files for blast1.3 format */
-X
-/* $Name: fa_34_26_5 $ - $Id: ncbl_head.h,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
-X
-#define AMINO_ACID_SEQTYPE 1
-#define AA_SEQTYPE AMINO_ACID_SEQTYPE
-#define NUCLEIC_ACID_SEQTYPE 2
-#define NT_SEQTYPE NUCLEIC_ACID_SEQTYPE
-X
-/* Filename extensions used by the two types of databases (a.a. and nt.) */
-#define AA_HEADER_EXT "ahd"
-#define AA_TABLE_EXT "atb"
-#define AA_SEARCHSEQ_EXT "bsq"
-#define NT_HEADER_EXT "nhd"
-#define NT_TABLE_EXT "ntb"
-#define NT_SEARCHSEQ_EXT "csq"
-X
-#define DB_TYPE_PRO 0x78857a4f /* Magic # for a protein sequence database */
-#define DB_TYPE_NUC 0x788325f8 /* Magic # for a nt. sequence database */
-X
-#define AAFORMAT 3 /* Latest a.a. database format ID number */
-#define NTFORMAT 6 /* Latest nt. database format ID number */
-X
-#define NULLB '\0' /* sentinel byte */
-#define NT_MAGIC_BYTE 0xfc /* Magic byte at end of compressed nt db */
-X
-#ifndef CHAR_BIT
-#define CHAR_BIT 8 /* these values should match blast */
-#endif
-X
-#define NBPN 2
-#define NSENTINELS 2
-SHAR_EOF
-chmod 0644 ncbl_head.h ||
-echo 'restore of ncbl_head.h failed'
-Wc_c="`wc -c < 'ncbl_head.h'`"
-test 1034 -eq "$Wc_c" ||
- echo 'ncbl_head.h: original size 1034, current size' "$Wc_c"
-fi
-# ============= ncbl_lib.c ==============
-if test -f 'ncbl_lib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping ncbl_lib.c (File already exists)'
-else
-echo 'x - extracting ncbl_lib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ncbl_lib.c' &&
-/* ncbl_lib.c functions to read ncbi-blast format files from
-X setdb (blastp 1.3.2) format files
-X
-X copyright (c) 1992 William R. Pearson
-*/
-X
-/* $Name: fa_34_26_5 $ - $Id: ncbl_lib.c,v 1.9 2006/10/05 18:22:07 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#ifndef WIN32
-#define RBSTR "r"
-#else
-#define RBSTR "rb"
-#endif
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#include "ncbl_head.h"
-#include "mm_file.h"
-X
-int ncbl_getliba(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-int ncbl_getlibn(char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-X
-void src_ulong_read();
-X
-#ifndef NCBL13_ONLY
-static void src_char_read();
-static void newname(char *, char *, char *, int);
-#else
-void src_char_read();
-void newname(char *, char *, char *, int);
-#endif
-X
-/* nt_btoa maps from blast 2bit format to ascii characters */
-static char nt_btoa[5] = {"ACGT"};
-X
-static char aa_btoa[27]= {"-ARNDCQEGHILKMFPSTWYVBZX*"};
-static int aa_btof[32]; /* maps to fasta alphabet */
-X
-static FILE *tfile=NULL, /* table of offsets, also DB info */
-X *hfile=NULL, /* description lines */
-X *sfile=NULL; /* binary sequence data */
-X
-static unsigned long lib_cnt, max_cnt, totlen, mxlen, dbline_len;
-static unsigned long *seq_beg, *hdr_beg;
-static unsigned char *ambiguity_ray;
-static long seq_format, dbtype, dbformat;
-static char dline[512];
-X
-#define NCBIBL13 11
-X
-struct lmf_str *
-ncbl_openlib(char *name, int ldnaseq)
-{
-X char hname[256];
-X char sname[256];
-X char tname[256];
-X long title_len;
-X char *title_str;
-X int rdtmp;
-X int i;
-X unsigned long line_len, c_len, clean_count;
-X
-X if (ldnaseq!=1) {
-X newname(tname,name,AA_TABLE_EXT,(int)sizeof(tname));
-X if ((tfile = fopen(tname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s (%s.%s) table file\n",
-X name,tname,NT_TABLE_EXT);
-X return (-1);
-X }
-X seq_format = AAFORMAT;
-X }
-X else {
-X newname(tname,name,NT_TABLE_EXT,(int)sizeof(tname));
-X if ((tfile = fopen(tname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s (%s.%s) table file\n",
-X name,tname,NT_TABLE_EXT);
-X return (-1);
-X }
-X seq_format = NTFORMAT;
-X }
-X
-X src_ulong_read(tfile,&dbtype);
-X src_ulong_read(tfile,&dbformat);
-X
-X if (seq_format == AAFORMAT && (dbformat != seq_format || dbtype !=
-X DB_TYPE_PRO)) {
-X fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
-X tname,dbtype,DB_TYPE_PRO,dbformat,seq_format);
-X return (-1);
-X }
-X else if (seq_format == NTFORMAT && (dbformat != seq_format || dbtype !=
-X DB_TYPE_NUC)) {
-X fprintf(stderr,"error - %s wrong type (%ld/%d) or format (%ld/%ld)\n",
-X tname,dbtype,DB_TYPE_NUC,dbformat,seq_format);
-X return (-1);
-X }
-X
-X if (seq_format == AAFORMAT) {
-X newname(hname,name,AA_HEADER_EXT,(int)sizeof(hname));
-X if ((hfile = fopen(hname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s header file\n",hname);
-X return (-1);
-X }
-X newname(sname,name,AA_SEARCHSEQ_EXT,(int)sizeof(sname));
-X if ((sfile = fopen(sname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s sequence file\n",sname);
-X return (-1);
-X }
-X }
-X else {
-X newname(hname,name,NT_HEADER_EXT,(int)sizeof(hname));
-X if ((hfile = fopen(hname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s header file\n",hname);
-X return (-1);
-X }
-X newname(sname,name,NT_SEARCHSEQ_EXT,(int)sizeof(sname));
-X if ((sfile = fopen(sname,RBSTR))==NULL) {
-X fprintf(stderr," cannot open %s sequence file\n",sname);
-X return (-1);
-X }
-X }
-X
-/* all files should be open */
-X
-X src_ulong_read(tfile,&title_len);
-X rdtmp = title_len + ((title_len%4 !=0 ) ? 4-(title_len%4) : 0);
-X if ((title_str = calloc((size_t)rdtmp,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate title string (%d)\n",rdtmp);
-X return(-1);
-X }
-X fread(title_str,(size_t)1,(size_t)rdtmp,tfile);
-X
-X lib_cnt = 0;
-X if (seq_format == AAFORMAT) {
-X src_ulong_read(tfile,&max_cnt);
-X src_ulong_read(tfile,&totlen);
-X src_ulong_read(tfile,&mxlen);
-X
-X /* fprintf(stderr," max_cnt: %d, totlen: %d\n",max_cnt,totlen); */
-X
-X if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate sequence pointers\n");
-X return -1;
-X }
-X if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate header pointers\n");
-X return -1;
-X }
-X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
-X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
-X
-X for (i=0; i<sizeof(aa_btoa); i++) {
-X if ((rdtmp=aascii[aa_btoa[i]])<NA) aa_btof[i]=rdtmp;
-X else aa_btof[i]=aascii['X'];
-X }
-X }
-X else if (seq_format == NTFORMAT) {
-X src_ulong_read(tfile,&dbline_len); /* length of uncompress DB lines */
-X src_ulong_read(tfile,&max_cnt); /* number of entries */
-X src_ulong_read(tfile,&mxlen); /* maximum length sequence */
-X src_ulong_read(tfile,&totlen); /* total count */
-X src_ulong_read(tfile,&c_len); /* compressed db length */
-X src_ulong_read(tfile,&clean_count); /* count of nt's cleaned */
-X
-X fseek(tfile,(size_t)((clean_count)*4),1);
-X /* seek over clean_count */
-X if ((seq_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate sequence pointers\n");
-X return -1;
-X }
-X if ((hdr_beg=(unsigned long *)calloc((size_t)max_cnt+1,sizeof(long)))==NULL) {
-X fprintf(stderr," cannot allocate header pointers\n");
-X return -1;
-X }
-X if ((ambiguity_ray=
-X (unsigned char *)calloc((size_t)max_cnt/CHAR_BIT+1,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate ambiguity_ray\n");
-X return -1;
-X }
-X
-X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&seq_beg[i]);
-X fseek(tfile,(size_t)((max_cnt+1)*4),1);
-X /* seek over seq_beg */
-X for (i=0; i<max_cnt+1; i++) src_ulong_read(tfile,&hdr_beg[i]);
-X for (i=0; i<max_cnt/CHAR_BIT+1; i++)
-X src_char_read(tfile,&ambiguity_ray[i]);
-X }
-X return 1;
-}
-X
-void ncbl_closelib()
-{
-X if (tfile !=NULL ) {fclose(tfile); tfile=NULL;}
-X if (hfile !=NULL ) {fclose(hfile); hfile=NULL;}
-X if (sfile !=NULL ) {fclose(sfile); sfile=NULL;}
-}
-X
-int
-ncbl_getliba(char *seq, int maxs,
-X char *libstr, int n_libstr,
-X fseek_t *libpos,
-X int lcont)
-{
-X register char *sptr;
-X long seqcnt;
-X long tmp;
-X char ch;
-X static long seq_len;
-X
-X *libpos = lib_cnt;
-X if (*lcont==0) {
-X if (lib_cnt >= max_cnt) return -1;
-X seq_len = seq_beg[lib_cnt+1] - seq_beg[lib_cnt] -1;
-X tmp=(long)fgetc(sfile); /* skip the null byte */
-X if (tmp!=NULLB)
-X fprintf(stderr," phase error: %ld:%ld found\n",lib_cnt,tmp);
-X libstr[0]='\0';
-X }
-X
-X if (seq_len < maxs) {
-X if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,sfile))!=(size_t)seq_len) {
-X fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",
-X *libpos,tmp,seq_len);
-X goto error;
-X }
-X if (aa_btoa[seq[seq_len-1]]=='*') seqcnt = seq_len-1;
-X else seqcnt=seq_len;
-X lib_cnt++;
-X *lcont = 0;
-X }
-X else {
-X if (fread(seq,(size_t)1,(size_t)(maxs-1),sfile)!=(size_t)(maxs-1)) {
-X fprintf(stderr," could not read sequence record: %ld %ld\n",
-X *libpos,seq_len);
-X goto error;
-X }
-X (*lcont)++;
-X seqcnt = maxs-1;
-X seq_len -= seqcnt;
-X }
-X sptr = seq+seqcnt;
-X
-X while (--sptr >= seq) *sptr = aa_btof[*sptr];
-X
-X seq[seqcnt]= EOSEQ;
-X return (seqcnt);
-X
-error: fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
-X fflush(stderr);
-X return (-1);
-}
-X
-int
-ncbl_getlibn(char *seq, int maxs,
-X char *libstr, int n_libstr,
-X fseek_t *libpos, int *lcont)
-{
-X register char *sptr, *tptr, stmp;
-X long seqcnt;
-X long tmp;
-X char ch;
-X static long seq_len;
-X static int c_len,c_pad;
-X
-X *libpos = lib_cnt;
-X if (*lcont==0) {
-X if (lib_cnt >= max_cnt) return -1;
-X c_len = seq_beg[lib_cnt+1]/(CHAR_BIT/NBPN)
-X - seq_beg[lib_cnt]/(CHAR_BIT/NBPN);
-X c_len -= NSENTINELS;
-X
-X seq_len = c_len*(CHAR_BIT/NBPN);
-X c_pad = seq_beg[lib_cnt] & ((CHAR_BIT/NBPN)-1);
-X if (c_pad != 0) seq_len -= ((CHAR_BIT/NBPN) - c_pad);
-X
-X tmp=fgetc(sfile); /* skip the null byte */
-X if (tmp!=NT_MAGIC_BYTE) {
-X fprintf(stderr," phase error: %ld:%ld (%ld/%d) found\n",
-X lib_cnt,seq_len,tmp,NT_MAGIC_BYTE);
-X goto error;
-X }
-X libstr[0]='\0';
-X }
-X
-X if (seq_len < maxs-3) {
-X seqcnt=(seq_len+3)/4;
-X if (seqcnt==0) seqcnt++;
-X if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,sfile))
-X !=(size_t)seqcnt) {
-X fprintf(stderr,
-X " could not read sequence record: %s %ld %ld != %ld: %d\n",
-X libstr,*libpos,tmp,seqcnt,*seq);
-X goto error;
-X }
-X tmp=fgetc(sfile); /* skip the null byte */
-X if (tmp!=(unsigned char)NT_MAGIC_BYTE) {
-X fprintf(stderr," phase2 error: %ld:%ld (%ld/%d) next ",
-X lib_cnt,seqcnt,tmp,NT_MAGIC_BYTE);
-X
-X goto error;
-X }
-X *lcont = 0;
-X lib_cnt++;
-X }
-X else {
-X seqcnt = ((maxs+3)/4)-1;
-X if (fread(seq,(size_t)1,(size_t)(seqcnt),sfile)!=(size_t)(seqcnt)) {
-X fprintf(stderr," could not read sequence record: %s %ld %ld\n",
-X libstr,*libpos,seqcnt);
-X goto error;
-X }
-X (*lcont)++;
-X }
-X
-X /* point to the last packed byte and to the end of the array
-X seqcnt is the exact number of bytes read
-X tptr points to the destination, use multiple of 4 to simplify math
-X sptr points to the source, note that the last byte will be read 4 cycles
-X before it is written
-X */
-X
-X sptr = seq + seqcnt;
-X tptr = seq + 4*seqcnt;
-X while (sptr>seq) {
-X stmp = *--sptr;
-X *--tptr = (stmp&3) +1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X *--tptr = ((stmp >>= 2)&3)+1;
-X }
-X /*
-X for (sptr=seq; sptr < seq+seq_len; sptr++) {
-X printf("%c",nt[*sptr]);
-X if ((int)(sptr-seq) % 60 == 59) printf("\n");
-X }
-X printf("\n");
-X */
-X if (seqcnt*4 >= seq_len) { /* there was enough room */
-X seq[seq_len]= EOSEQ;
-X /* printf("%d\n",seq_len); */
-X return seq_len;
-X }
-X else { /* not enough room */
-X seq[seqcnt*4]=EOSEQ;
-X seq_len -= 4*seqcnt;
-X return (4*seqcnt);
-X }
-X
-error: fprintf(stderr," error reading %ld at %ld\n",libstr,*libpos);
-X fflush(stderr);
-X return (-1);
-}
-X
-void
-ncbl_ranlib(str,cnt,libpos)
-X char *str; int cnt;
-X long libpos;
-{
-X char hline[256], *bp, *bp0;
-X int llen;
-X long spos;
-X
-X lib_cnt = libpos;
-X llen = hdr_beg[lib_cnt+1]-hdr_beg[lib_cnt];
-X if (llen > sizeof(hline)) llen = sizeof(hline);
-X fseek(hfile,hdr_beg[lib_cnt]+1,0);
-X
-X fread(hline,(size_t)1,(size_t)(llen-1),hfile);
-X hline[llen-1]='\0';
-X
-X if (hline[9]=='|' || hline[10]=='|') {
-X bp0 = strchr(hline+3,'|');
-X if ((bp=strchr(bp0+1,' '))!=NULL) *bp='\0';
-X if (dbformat == NTFORMAT &&
-X (ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
-X sprintf(str,"*%-9s ",bp0+1);
-X }
-X else sprintf(str,"%-10s ",bp0+1);
-X strncat(str+11,bp+1,cnt-strlen(str));
-X }
-X else {
-X if (dbformat == NTFORMAT &&
-X (ambiguity_ray[lib_cnt/CHAR_BIT]&(1<<lib_cnt%CHAR_BIT))) {
-X str[0]='*';
-X strncpy(str+1,hline,cnt-1);
-X }
-X else strncpy(str,hline,cnt);
-X }
-X str[cnt-1]='\0';
-X
-X if (dbformat == AAFORMAT)
-X fseek(sfile,seq_beg[lib_cnt]-1,0);
-X else {
-X spos = (seq_beg[lib_cnt])/(CHAR_BIT/NBPN);
-X fseek(sfile,spos-1,0);
-X }
-}
-X
-void src_ulong_read(fd, val)
-X FILE *fd;
-X unsigned long *val;
-{
-#ifdef IS_BIG_ENDIAN
-X fread((char *)val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (unsigned long)((unsigned long)((unsigned long)(b[0]<<8) +
-X (unsigned long)b[1]<<8) + (unsigned long)b[2]<<8)+(unsigned long)b[3];
-#endif
-}
-X
-void src_long_read(fd,val)
-X FILE *fd;
-X long *val;
-{
-#ifdef IS_BIG_ENDIAN
-X fread((char *)val,(size_t)4,(size_t)1,fd);
-#else
-X unsigned char b[4];
-X
-X fread((char *)&b[0],(size_t)1,(size_t)4,fd);
-X *val = 0;
-X *val = (long)((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)
-X +(long)b[3];
-#endif
-}
-X
-#ifndef NCBL13_ONLY
-static void
-#else
-void
-#endif
-src_char_read(fd, val)
-X FILE *fd;
-X char *val;
-{
-X fread(val,(size_t)1,(size_t)1,fd);
-}
-X
-#ifndef NCBL13_ONLY
-static void
-#else
-void
-#endif
-src_fstr_read(fd, val, slen)
-X FILE *fd;
-X char *val;
-X long slen;
-{
-X fread(val,(size_t)slen,(size_t)1,fd);
-}
-X
-#ifndef NCBL13_ONLY
-static void
-#else
-void
-#endif
-newname(char *nname, char *oname, char *suff, int maxn)
-{
-X char *tptr;
-X
-X if (oname[0]=='@') strncpy(nname,&oname[1],maxn);
-X else strncpy(nname,oname,maxn);
-X for (tptr=nname; *tptr=='.' && *tptr; tptr++);
-X for (; *tptr!='.'&& *tptr; tptr++); /* get to '.' or EOS */
-X *tptr++='.'; *tptr='\0';
-X strncat(nname,suff,maxn);
-}
-X
-SHAR_EOF
-chmod 0644 ncbl_lib.c ||
-echo 'restore of ncbl_lib.c failed'
-Wc_c="`wc -c < 'ncbl_lib.c'`"
-test 12694 -eq "$Wc_c" ||
- echo 'ncbl_lib.c: original size 12694, current size' "$Wc_c"
-fi
-# ============= ngt.aa ==============
-if test -f 'ngt.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping ngt.aa (File already exists)'
-else
-echo 'x - extracting ngt.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ngt.aa' &&
->GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
-ILGYWN,
-DQYRMFEP,
-SRYIATP,
-KCLDAFP,
-EYTDS,
-SYDEKR,
-YTMGD,
-EKQKPEFL,
-VRGLTHP,
-TRMQLI,
-FKLGLDFP,
-NLPYLI,
-DGSHKIT,
-LRYLAR,
-KTIPEK,
-KRPWFA,
-ETEEERIR,
-GDKVTYVD,
-HWSNK
-SHAR_EOF
-chmod 0644 ngt.aa ||
-echo 'restore of ngt.aa failed'
-Wc_c="`wc -c < 'ngt.aa'`"
-test 217 -eq "$Wc_c" ||
- echo 'ngt.aa: original size 217, current size' "$Wc_c"
-fi
-# ============= ngts.aa ==============
-if test -f 'ngts.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping ngts.aa (File already exists)'
-else
-echo 'x - extracting ngts.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'ngts.aa' &&
->GT8.7 | 40001 90043 | transl. of pa875.con, 19 to 675
-ILGY*WN,
-EYTDS?,
-S?YDEKR,
-DQY*RMFEP,
-KCLDAFP,
-S*RY*IATP
-SHAR_EOF
-chmod 0644 ngts.aa ||
-echo 'restore of ngts.aa failed'
-Wc_c="`wc -c < 'ngts.aa'`"
-test 111 -eq "$Wc_c" ||
- echo 'ngts.aa: original size 111, current size' "$Wc_c"
-fi
-# ============= nmgetlib.c ==============
-if test -f 'nmgetlib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping nmgetlib.c (File already exists)'
-else
-echo 'x - extracting nmgetlib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'nmgetlib.c' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: nmgetlib.c,v 1.35 2007/01/08 15:38:46 wrp Exp $ */
-X
-/* May, June 1987 - modified for rapid read of database
-X
-X copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson
-X
-X revised (split) version of nmgetaa.c -> renamed nmgetlib.c
-X
-X This version seeks to be a thread safe, no global, library
-X reading program. While adjusting the routines in this file
-X should be relatively easy, ncbl2_mlib.c and mysql_lib.c may be
-X more difficult.
-X
-X nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
-X the same functions as nxgetaa.c if memory mapping is not used,
-X mmgetaa.c provides the database reading functions if memory
-X mapping is used. The decision to use memory mapping is made on
-X a file-by-file basis.
-X
-X June 2, 1987 - added TFASTA
-X March 30, 1988 - combined ffgetaa, fgetgb;
-X April 8, 1988 - added PIRLIB format for unix
-X Feb 4, 1989 - added universal subroutines for libraries
-X December, 1995 - added range option file.name:1-1000
-X September, 1999 - added option for mmap()ed files using ".xin" */
-X
-X
-/*
-X February 4, 1988 - this starts a major revision of the getaa
-X routines. The goal is to be able to seach the following format
-X libraries:
-X
-X 0 - normal FASTA format
-X 1 - full Genbank tape format
-X 2 - NBRF/PIR CODATA format
-X 3 - EMBL/Swiss-prot format
-X 4 - Intelligentics format
-X 5 - NBRF/PIR VMS format
-X 6 - GCG 2bit format
-X
-X 11 - NCBI setdb/blastp (1.3.2) AA/NT
-X 12 - NCBI setdb/blastp (2.0) AA/NT
-X 16 - mySQL queries
-X
-X see file altlib.h to confirm numbers
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-X
-#include "defs.h"
-#include "structs.h"
-X
-#ifndef SFCHAR
-#define SFCHAR ':'
-#endif
-X
-#define EOSEQ 0
-X
-#include "uascii.h"
-/* #include "upam.h" */
-X
-#define LFCHAR '\015' /* for MWC 5.5 */
-X
-#include "altlib.h"
-X
-X
-#include <fcntl.h>
-#ifndef O_RAW
-#ifdef O_BINARY
-#define O_RAW O_BINARY
-#else
-#define O_RAW 0
-#endif /* O_BINARY */
-#endif /* O_RAW */
-X
-#ifdef WIN32
-#define RBSTR "rb" /* read file in binary mode */
-#else
-#define RBSTR "r"
-#endif
-X
-#include "mm_file.h"
-struct lmf_str *load_mmap(FILE *, char *, int, int, struct lmf_str *);
-struct lmf_str *ncbl2_reopen(struct lmf_str *);
-struct lmf_str *ncbl2_openlib(char *, int);
-X
-static struct lmf_str *last_m_fptr=NULL;
-X
-#ifdef MYSQL_DB
-struct lmf_str *mysql_openlib(char *, int, int *);
-struct lmf_str *mysql_reopen(struct lmf_str *);
-#endif
-X
-#ifdef PGSQL_DB
-struct lmf_str *pgsql_openlib(char *, int, int *);
-struct lmf_str *pgsql_reopen(struct lmf_str *);
-#endif
-X
-void closelib(struct lmf_str *m_fptr);
-extern void newname(char *nname, char *oname, char *suff, int maxn);
-X
-/* a file name for openlib may include a library type suffix */
-X
-struct lmf_str *
-openlib(char *lname, int ldnaseq, int *sascii,
-X int outtty, struct lmf_str *om_fptr)
-{
-X char rline[10],sname[MAX_FN], iname[MAX_FN], *bp;
-X char opt_text[MAX_FN]; /* save text after ':' */
-X int wcnt, opnflg;
-X int libtype;
-X FILE *libi=NULL;
-X FILE *libf;
-X int use_stdin;
-X struct lmf_str *m_fptr=NULL;
-X
-X /* this is currently unavailable - later it can return a value somewhere */
-X /*
-X if (lname[0]=='#') {return -9;}
-X */
-X
-X if (om_fptr != NULL && om_fptr->mm_flg) {
-X om_fptr->lpos = 0;
-X return om_fptr;
-X }
-X
-X wcnt = 0; /* number of times to ask for file name */
-X
-X /* check to see if there is a file option ":1-100" */
-#ifndef WIN32
-X if ((bp=strchr(lname,':'))!=NULL && *(bp+1)!='\0') {
-#else
-X if ((bp=strchr(lname+3,':'))!=NULL && *(bp+1)!='\0') {
-#endif
-X strncpy(opt_text,bp+1,sizeof(opt_text));
-X opt_text[sizeof(opt_text)-1]='\0';
-X *bp = '\0';
-X }
-X else opt_text[0]='\0';
-X
-X if (lname[0] == '-' || lname[0] == '@') {
-X use_stdin = 1;
-X }
-X else use_stdin=0;
-X
-X strncpy(sname,lname,sizeof(sname));
-X sname[sizeof(sname)-1]='\0';
-X /* check for library type */
-X if ((bp=strchr(sname,' '))!=NULL) {
-X *bp='\0';
-X sscanf(bp+1,"%d",&libtype);
-X if (libtype<0 || libtype >= LASTLIB) {
-X fprintf(stderr," invalid library type: %d (>%d)- resetting\n%s\n",
-X libtype,LASTLIB,lname);
-X libtype=0;
-X }
-X }
-X else libtype=0;
-X
-X if (use_stdin && libtype !=0) {
-X fprintf(stderr," @/- STDIN libraries must be in FASTA format\n");
-X return NULL;
-X }
-X
-X /* check to see if file can be open()ed? */
-X
-X l1:
-X if (libtype<=LASTTXT) {
-X if (!use_stdin) {
-X opnflg=((libf=fopen(sname,RBSTR))!=NULL);
-X }
-X else {
-X libf=stdin;
-X strncpy(sname,"STDIN",sizeof(sname));
-X sname[sizeof(sname)-1]='\0';
-X opnflg=1;
-X }
-X }
-#ifdef NCBIBL13
-X else if (libtype==NCBIBL13) opnflg=(ncbl_openlib(sname,ldnaseq)!= -1);
-#endif
-#ifdef NCBIBL20
-X else if (libtype==NCBIBL20) {
-X opnflg=((m_fptr=ncbl2_openlib(sname,ldnaseq))!=NULL);
-X }
-#endif
-X
-#ifdef MYSQL_DB
-X /* a mySQL filename contains mySQL commands, not sequences */
-X else if (libtype==MYSQL_LIB) {
-X opnflg=((m_fptr=mysql_openlib(sname,ldnaseq,sascii))!=NULL);
-X }
-#endif
-#ifdef PGSQL_DB
-X /* a mySQL filename contains mySQL commands, not sequences */
-X else if (libtype==PGSQL_LIB) {
-X opnflg=((m_fptr=pgsql_openlib(sname,ldnaseq,sascii))!=NULL);
-X }
-#endif
-X
-X if (!opnflg) { /* here if open failed */
-X if (outtty) {
-X fprintf(stderr," cannot open %s library\n",sname);
-X fprintf(stderr," enter new file name or <RET> to quit ");
-X fflush(stderr);
-X if (fgets(sname,sizeof(sname),stdin)==NULL) return NULL;
-X if ((bp=strchr(sname,'\n'))!=0) *bp='\0';
-X if (strlen(sname)==0) return NULL;
-X if (++wcnt > 10) return NULL;
-X strncpy(lname,sname,sizeof(lname)-1);
-X lname[sizeof(lname)-1]='\0';
-X goto l1;
-X }
-X else return NULL;
-X } /* !openflg */
-X
-X if (libtype <= LASTTXT) {
-X /* now allocate a buffer for the opened text file */
-X if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
-X sizeof(struct lmf_str),sname);
-X return NULL;
-X }
-X if ((m_fptr->lline = calloc(MAX_STR,sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate lline (%d) for %s\n",
-X MAX_STR,sname);
-X return NULL;
-X }
-X
-X strncpy(m_fptr->lb_name,sname,MAX_FN);
-X m_fptr->lb_name[MAX_FN-1]='\0';
-X strncpy(m_fptr->opt_text,opt_text,MAX_FN);
-X m_fptr->opt_text[MAX_FN-1]='\0';
-X m_fptr->sascii = sascii;
-X
-X m_fptr->libf = libf;
-X m_fptr->lb_type = libtype;
-X m_fptr->getlib = getliba[libtype];
-X m_fptr->ranlib = ranliba[libtype];
-X m_fptr->mm_flg = 0;
-X m_fptr->tot_len = 0;
-X m_fptr->max_len = 0;
-X m_fptr->lib_aa = (ldnaseq==0);
-X }
-X last_m_fptr = m_fptr;
-X
-#ifdef USE_MMAP
-X /* check for possible mmap()ed files */
-X if (!use_stdin && (libtype <= LASTTXT) && (getlibam[libtype]!=NULL)) {
-X /* this is a file we can mmap() */
-X /* look for .xin file */
-X newname(iname,sname,"xin",sizeof(iname));
-X if ((libi=fopen(iname,"r"))!=NULL) { /* have a *.xin file, use mmap */
-X if (load_mmap(libi,sname,libtype,ldnaseq,m_fptr)!=NULL) {
-X fclose(libi); /* close index file */
-X m_fptr->lb_type = libtype;
-X m_fptr->getlib = getlibam[libtype];
-X m_fptr->ranlib = ranlibam[libtype];
-X m_fptr->mm_flg = 1;
-X return m_fptr;
-X }
-X fclose(libi); /* memory mapping failed, but still must close file */
-X }
-X }
-#endif
-X
-X if (libtype <= LASTTXT) {
-X m_fptr->lpos = 0;
-X if (fgets(m_fptr->lline,MAX_STR,libf)==NULL) return NULL;
-X }
-X return m_fptr;
-}
-X
-void
-closelib(struct lmf_str *m_fptr) {
-X
-X
-#ifdef MMAP
-X if (m_fptr->mm_flag) {
-/* don't close memory mapped files
-X close_mmap(m_fptr);
-*/
-X return;
-X }
-#endif
-X
-X if (m_fptr->libf!=NULL && m_fptr->libf != stdin) {
-X fclose(m_fptr->libf);
-X m_fptr->libf = NULL;
-X }
-X
-#ifdef NCBIBL13
-X if (m_fptr->lb_type == NCBIBL13) ncbl_closelib(m_fptr);
-#endif
-#ifdef NCBIBL20
-X if (m_fptr->lb_type == NCBIBL20) ncbl2_closelib(m_fptr);
-#endif
-#ifdef MYSQL_DB
-X if (m_fptr->lb_type == MYSQL_LIB) mysql_closelib(m_fptr);
-#endif
-}
-X
-struct lmf_str *
-re_openlib(struct lmf_str *om_fptr, int outtty)
-{
-X int opnflg;
-X
-X /* if the file mmap()ed and has been opened - use it and return */
-X if (om_fptr->mm_flg) {
-X return om_fptr;
-X }
-#ifdef MYSQL_DB
-X /* if this is a mysql database - use it and return */
-X else if (om_fptr->lb_type == MYSQL_LIB) {
-X return om_fptr;
-X }
-#endif
-X
-X /* data is available, but file is closed or not memory mapped, open it */
-X /* no longer check to memory map - because we could not do it before */
-X
-X opnflg = 1;
-X if (om_fptr->lb_type<=LASTTXT && om_fptr->libf==NULL)
-X opnflg=((om_fptr->libf=fopen(om_fptr->lb_name,RBSTR))!=NULL);
-#ifdef NCBIBL13
-X else if (om_fptr->lb_type==NCBIBL13)
-X opnflg=(ncbl_openlib(om_fptr->lb_name,!om_fptr->lib_aa)!= -1);
-#endif
-#ifdef NCBIBL20
-X else if (om_fptr->lb_type==NCBIBL20) {
-X opnflg=((om_fptr=ncbl2_openlib(om_fptr->lb_name,!om_fptr->lib_aa))!=NULL);
-X }
-#endif
-#ifdef MYSQL_DB
-X /* a mySQL filename contains mySQL commands, not sequences */
-X else if (om_fptr->lb_type==MYSQL_LIB)
-X opnflg=(mysql_reopen(om_fptr)!=NULL);
-#endif
-X
-X if (!opnflg) {
-X fprintf(stderr,"*** could not re_open %s\n",om_fptr->lb_name);
-X return NULL;
-X }
-X
-X /* use the old buffer for the opened text file */
-X om_fptr->mm_flg = 0;
-X last_m_fptr = om_fptr;
-X
-X return om_fptr;
-}
-X
-#ifdef SUPERFAMNUM
-static char tline[512];
-extern int nsfnum; /* number of superfamily numbers */
-extern int sfnum[10]; /* superfamily number from types 0 and 5 */
-extern int nsfnum_n;
-extern int sfnum_n[10];
-#endif
-X
-void sf_sort(int *, int);
-X
-int
-agetlib(unsigned char *seq, int maxs,
-X char *libstr, int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X int i;
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X /* int ic, l_start, l_stop, l_limit, rn; */
-X char *bp, *bp1, *bpa, *tp;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X *l_off = 1;
-X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
-X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X }
-#ifdef SUPERFAMNUM
-X strncpy(tline,lm_fd->lline+1,sizeof(tline));
-X tline[sizeof(tline)-1]='\0';
-X sfnum[0]=nsfnum=0;
-X if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
-X if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';
-X if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
-/* fprintf(stderr," second %c missing: %s\n",SFCHAR,libstr); */
-X }
-X else {
-X *bp1 = '\0';
-X i = 0;
-X if ((tp = strtok(bp+1," \t"))!=NULL) {
-X sfnum[i++] = atoi(tp);
-X while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
-X if (isdigit(*tp)) sfnum[i++] = atoi(tp);
-X if (i>=9) break;
-X }
-X }
-X sfnum[nsfnum=i]= 0;
-X if (nsfnum>1) sf_sort(sfnum,nsfnum);
-X else {
-X if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
-X }
-X }
-X }
-X else {
-X sfnum[0] = nsfnum = 0;
-X }
-#endif
-X
-X if ((bp=strchr(lm_fd->lline,'@'))!=NULL && !strncmp(bp+1,"C:",2)) {
-X sscanf(bp+3,"%ld",l_off);
-X }
-X
-X strncpy(libstr,lm_fd->lline+1,n_libstr-1);
-X libstr[n_libstr-1]='\0';
-X if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
-X if (n_libstr > MAX_UID) {
-X tp = libstr;
-X while (*tp++) if (*tp == '\001' || *tp== '\t') *tp = ' ';
-X }
-X
-X *libpos = lm_fd->lpos;
-X
-X /* make certain we have the end of the line */
-X while (strchr((char *)lm_fd->lline,'\n')==NULL) {
-X if (strlen(lm_fd->lline)<MAX_STR/2)
-X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
-X else
-X fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf);
-X }
-X lm_fd->lline[MAX_STR-1]='\0';
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
-X if (*seqp=='>') goto new;
-X if (*seqp==';') {
-X if (strchr((char *)seqp,'\n')==NULL) goto cont;
-X continue;
-X }
-X
-X /* removed - used for @P:1-n
-X if (l_limit) {
-X for (cp=seqp; seqp<seqm1 && rn < l_stop && (ic=ap[*cp++])<EL; )
-X if (ic < NA && ++rn > l_start) *seqp++ = (unsigned char)ic;
-X if (rn > l_stop) goto finish;
-X }
-X else {
-X */
-X for (cp=seqp; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
-X }
-X goto done;
-X new:
-X strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
-X lm_fd->lline[MAX_STR-1]='\0';
-X /* be certain to get complete line, if possible */
-X if (strchr(lm_fd->lline,'\n')==NULL)
-X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
-X lm_fd->lline[MAX_STR-1]='\0';
-X if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL)
-X lm_fd->lline[strlen(lm_fd->lline)-1]='\n';
-X goto done;
-X
-X /* removed - used for @P:1-n
-finish:
-X while (lm_fd->lline[0]!='>' &&
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
-X if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);
-X }
-X goto done;
-*/
-X cont:
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X seqm1 = seqp;
-X done:
-X if (seqp>=seqm1) (*lcont)++;
-X else {
-X *lcont=0;
-X }
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-aranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd)
-{
-X char *bp;
-X
-X if (lm_fd->libf != stdin) {
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-X if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
-X strncpy(str,lm_fd->lline+1,cnt);
-X str[cnt-1]='\0';
-X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X /*
-X if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0';
-X else if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
-X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X else str[cnt-1]='\0';
-X */
-X bp = str;
-X while (*bp++) if (*bp=='\001' || *bp=='\t') *bp=' ';
-X }
-X else {
-X str[0]='\0';
-X }
-X }
-X else str[0]='\0';
-}
-X
-void lget_ann(struct lmf_str *, char *, int);
-X
-int
-lgetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X char *bp, *bp_gid;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-11];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!='L' || lm_fd->lline[1]!='O' ||
-X strncmp(lm_fd->lline,"LOCUS",5)) { /* find LOCUS */
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X }
-X *libpos= lm_fd->lpos;
-X
-X if (n_libstr <= 21) {
-X strncpy(libstr,&lm_fd->lline[12],12);
-X libstr[12]='\0';
-X }
-X else {
-X lget_ann(lm_fd,libstr,n_libstr);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X }
-X
-X while (lm_fd->lline[0]!='O' || lm_fd->lline[1]!='R' ||
-X strncmp(lm_fd->lline,"ORIGIN",6)) { /* find ORIGIN */
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X }
-X }
-X else {
-X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X if (lm_fd->lline[0]=='/') goto new;
-X for (cp= (unsigned char *)&lm_fd->lline[10]; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X }
-X goto done;
-new:
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-done:
-X if (seqp>=seqm1) {
-X lm_fd->cpsave = cp;
-X (*lcont)++;
-X }
-X else *lcont=0;
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-lget_ann(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
-X char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
-X
-X /* copy in locus from lm_fd->lline */
-X strncpy(locus,&lm_fd->lline[12],sizeof(locus));
-X if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
-X
-X /* get description */
-X fgets(desc,sizeof(desc),lm_fd->libf);
-X while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
-X fgets(desc,sizeof(desc),lm_fd->libf);
-X if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
-X
-X /* get accession */
-X fgets(acc,sizeof(acc),lm_fd->libf);
-X while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
-X fgets(acc,sizeof(acc),lm_fd->libf);
-X if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
-X break;
-X }
-X if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
-X if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
-X
-X /* get version */
-X fgets(ver,sizeof(ver),lm_fd->libf);
-X while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
-X fgets(ver,sizeof(ver),lm_fd->libf);
-X if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
-X break;
-X }
-X if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
-X
-X /* extract gi:123456 from version line */
-X bp_gid = strchr(&ver[12],':');
-X if (bp_gid != NULL) {
-X if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
-X bp_gid++;
-X }
-X if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
-X
-X /* build up FASTA header line */
-X if (bp_gid != NULL) {
-X strncpy(libstr,"gi|",n_libstr-1);
-X strncat(libstr,bp_gid,n_libstr-4);
-X strncat(libstr,"|gb|",n_libstr-20);
-X }
-X else {libstr[0]='\0';}
-X
-X /* if we have a version number, use it, otherwise accession,
-X otherwise locus/description */
-X
-X if (ver[0]=='V') {
-X strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
-X strncat(libstr,"|",n_libstr-1-strlen(libstr));
-X }
-X else if (acc[0]=='A') {
-X strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
-X strncat(libstr," ",n_libstr-1-strlen(libstr));
-X }
-X
-X strncat(libstr,locus,n_libstr-1-strlen(libstr));
-X strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
-X libstr[n_libstr-1]='\0';
-}
-X
-X
-/* this code seeks to provide both the various accession numbers
-X necessary to identify the sequence, and also some description.
-X
-X Unfortunately, the various contributors to Genbank use three
-X slightly different formats for including the accession number.
-X
-(1)LOCUS HSJ214M20 107422 bp DNA HTG 16-JUN-2000
-X DEFINITION Homo sapiens chromosome 6 clone RP1-214M20 map p12.1-12.3, ***
-X SEQUENCING IN PROGRESS ***, in unordered pieces.
-X ACCESSION AL121969
-X
-(2)LOCUS AL359201 117444 bp DNA HTG 15-JUN-2000
-X DEFINITION Homo sapiens chromosome 1 clone RP4-671C13 map p13.2-21.1, ***
-X SEQUENCING IN PROGRESS ***, in unordered pieces.
-X ACCESSION AL359201
-X
-(3)LOCUS BB067000 280 bp mRNA EST 19-JUN-2000
-X DEFINITION BB067000 RIKEN full-length enriched, 15 days embryo male testis Mus
-X musculus cDNA clone 8030456L01 3', mRNA sequence.
-X ACCESSION BB067000
-X
-This makes it more difficult to both provide the accession number in a
-standard location and to conserve definition space
-*/
-X
-void
-lranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp, acc[MAX_STR], desc[MAX_STR];
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-X lget_ann(lm_fd, str, cnt);
-X str[cnt-1]='\0';
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-}
-X
-int
-pgetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X int ic;
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-11];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!='E' || lm_fd->lline[1]!='N' || strncmp(lm_fd->lline,"ENTRY",5))
-X { /* find ENTRY */
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X }
-X strncpy(libstr,&lm_fd->lline[16],8);
-X libstr[8]='\0';
-X *libpos = lm_fd->lpos;
-X while (lm_fd->lline[2]!='Q' || lm_fd->lline[0]!='S' || strncmp(lm_fd->lline,"SEQUENCE",8))
-X { /* find SEQUENCE */
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X }
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf); /* get the extra line */
-X }
-X else {
-X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
-X if (lm_fd->lline[0]=='/') goto new;
-X for (cp= (unsigned char *)&lm_fd->lline[8]; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X };
-X if (*seqp==ES) goto done;
-X }
-X goto done;
-new:
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-done:
-X if (seqp>=seqm1) {
-X lm_fd->cpsave = cp;
-X (*lcont)++;
-X }
-X else *lcont=0;
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-pranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp;
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-X strncpy(str,&lm_fd->lline[16],8);
-X str[8]='\0';
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X while (lm_fd->lline[0]!='T' || lm_fd->lline[1]!='I' || strncmp(lm_fd->lline,"TITLE",5))
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X strncpy(&str[8],&lm_fd->lline[16],cnt-9);
-X str[cnt-9]='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-}
-X
-int
-egetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X int ll;
-X int ic;
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X char id[11]; /* Holds Identifier */
-X
-X *l_off=1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-11];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!='I' || lm_fd->lline[1]!='D') { /* find ID */
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X }
-X sscanf(&lm_fd->lline[5],"%s",id);
-X sprintf(libstr,"%-12.12s",id);
-X libstr[12]='\0';
-X *libpos = lm_fd->lpos;
-X while (lm_fd->lline[0]!='S' || lm_fd->lline[1]!='Q') { /* find ORIGIN */
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X }
-X sscanf(&lm_fd->lline[14],"%ld",&lm_fd->gcg_len);
-X }
-X else {
-X for (cp= lm_fd->cpsave; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X if (lm_fd->lline[0]=='/') goto new;
-X lm_fd->lline[70]='\0';
-X for (cp= (unsigned char *)&lm_fd->lline[5]; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X }
-X goto done;
-new: lm_fd->lpos = FTELL(lm_fd->libf);
-fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-if (lm_fd->lfflag) getc(lm_fd->libf);
-goto done;
-X
-done: if (seqp>=seqm1) {
-X lm_fd->cpsave = cp;
-X (*lcont)++;
-X lm_fd->gcg_len -= (long)(seqp-seq);
-}
-else *lcont=0;
-X
-*seqp = EOSEQ;
-/* if ((int)(seqp-seq)==0) return 1; */
-/* if (*lcont==0 && (long)(seqp-seq)!=lm_fd->gcg_len)
-X printf("%s read %d of %d\n",libstr,(int)(seqp-seq),lm_fd->gcg_len);
-X */
-return (int)(seqp-seq);
-}
-X
-void
-eranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp;
-X char id[11]; /* Holds Identifier */
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-X sscanf(&lm_fd->lline[5],"%s",id);
-X sprintf(str,"%-10.10s ",id);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X while (lm_fd->lline[0]!='D' || lm_fd->lline[1]!='E') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X strncpy(&str[11],&lm_fd->lline[5],cnt-11);
-X str[cnt-11]='\0';
-X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-}
-X
-int
-igetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X char *bp;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!=';') {
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X }
-X *libpos = lm_fd->lpos;
-X while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X strncpy(libstr,lm_fd->lline+1,12);
-X libstr[12]='\0';
-X if((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
-X if (*seqp=='>') goto new;
-X if (*seqp==';') {
-X if (strchr((char *)seqp,'\n')==NULL) goto cont;
-X continue;
-X }
-X for (cp=seqp; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X }
-X goto done;
-new: strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
-X lm_fd->lline[MAX_STR-1]='\0';
-X if (strchr((char *)seqp,'\n')==NULL)
-X fgets(lm_fd->lline,MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
-X goto done;
-X
-cont:
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X seqm1 = seqp;
-X
-done: if (seqp>=seqm1) {
-X (*lcont)++;
-X }
-X else {
-X *lcont=0;
-X }
-X
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-X }
-X
-void
-iranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp;
-X char tline[MAX_FN];
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-X if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {
-X strncpy(tline,lm_fd->lline+1,sizeof(tline));
-X tline[sizeof(tline)-1]='\0';
-X if ((bp = strchr(tline,'\n'))!=NULL) *bp='\0';
-X }
-X else {
-X tline[0]='\0';
-X }
-X
-X while (lm_fd->lline[0]==';') fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp=0;
-X if ((bp=strchr(lm_fd->lline,' '))!=NULL) *bp=0;
-X strncpy(str,lm_fd->lline,cnt);
-X str[cnt-1]='\0';
-X strncat(str," ",cnt-strlen(str)-1);
-X strncat(str,tline,cnt-strlen(str)-1);
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X }
-X
-int
-vgetlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X int i, ich;
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X char *bp, *tp;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X }
-X
-#ifdef SUPERFAMNUM
-X if ((bp=strchr(&lm_fd->lline[1],' ')) &&
-X (bp=strchr(bp+1,SFCHAR))) {
-X i=0;
-X if ((tp = strtok(bp+1," \t\n"))!=NULL) sfnum[i++] = atoi(tp);
-X while ((tp = strtok(NULL," \t")) != NULL) {
-X sfnum[i++] = atoi(tp);
-X if (i>=10) break;
-X }
-X sfnum[nsfnum=i]= 0;
-X if (nsfnum>1) sf_sort(sfnum,nsfnum);
-X else {
-X if (nsfnum < 1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
-X }
-X }
-X else sfnum[0]=nsfnum=0;
-#endif
-X
-X if ((bp=strchr(lm_fd->lline,'\n'))!=NULL) *bp='\0';
-X strncpy(libstr,&lm_fd->lline[4],12);
-X libstr[12]='\0';
-X if ((bp=strchr(libstr,' '))!=NULL) *bp='\0';
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
-X
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-X if (n_libstr > 21) {
-X strcat(libstr," ");
-X strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
-X if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
-X libstr[n_libstr-1]='\0';
-X }
-X *libpos = lm_fd->lpos;
-X }
-X
-X lm_fd->lline[0]='\0';
-X while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),lm_fd->libf)!=NULL) {
-X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
-X if (*seqp=='>') goto new;
-X if (*seqp==';') {
-X if (strchr((char *)seqp,'\n')==NULL) goto cont;
-X continue;
-X }
-X for (cp=seqp; seqp<seqm1; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X if (*(--seqp)>NA) break;
-X }
-X if (*seqp==ES) goto done;
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X }
-X goto done;
-new:
-X strncpy(lm_fd->lline,(char *)seqp,MAX_STR);
-X lm_fd->lline[MAX_STR-1]='\0';
-X if (strchr((char *)seqp,'\n')==NULL) {
-X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);
-X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
-X }
-X goto done;
-X
-cont:
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag && (ich=getc(lm_fd->libf))!=LFCHAR) ungetc(ich,lm_fd->libf);
-X seqm1 = seqp;
-X
-done:
-X if (seqp>=seqm1) {
-X (*lcont)++;
-X }
-X else {
-X *lcont=0;
-X }
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1;*/
-X return (int)(seqp-seq);
-}
-X
-void
-vranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp, *llp;
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-X if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||lm_fd->lline[3]=='>')) {
-X strncpy(str,&lm_fd->lline[4],cnt-1);
-X str[cnt-1]='\0';
-X
-X if ((bp = strchr(str,':'))!=NULL) *bp='\0';
-X if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
-X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X else str[cnt-1]='\0';
-X
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-X
-X /* skip over redundant stuff */
-X for (llp=lm_fd->lline,bp=str; *llp==*bp; llp++,bp++);
-X if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
-X
-X if ((bp=strchr(llp,'\r'))!=NULL) *bp=' ';
-X if ((bp=strchr(llp,'\n'))!=NULL) *bp='\0';
-X strncat(str," ",(size_t)1);
-X strncat(str,llp,(size_t)cnt-strlen(str)-1);
-X }
-X else {
-X str[0]='\0';
-X }
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X if (lm_fd->lfflag) getc(lm_fd->libf);
-}
-X
-static int gcg_bton[4]={2,4,1,3};
-X
-int
-gcg_getlib(unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X char dummy[20];
-X char gcg_date[10];
-X register unsigned char *cp, *seqp, stmp;
-X register int *ap;
-X char gcg_type[10];
-X unsigned char *seqm, *seqm1;
-X long r_block, b_block;
-X char *bp;
-X
-X *l_off = 1;
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-X if (*lcont==0) {
-X while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {
-X lm_fd->lpos = FTELL(lm_fd->libf);
-X if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);
-X }
-X sscanf(&lm_fd->lline[4],"%s %s %s %s %ld",
-X libstr,gcg_date,gcg_type,dummy,&(lm_fd->gcg_len));
-X
-X lm_fd->gcg_binary = (gcg_type[0]=='2');
-X
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X while (strchr((char *)lm_fd->lline,'\n')==NULL) {
-X if (strlen(lm_fd->lline)<MAX_STR/2)
-X fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);
-X else
-X fgets(&lm_fd->lline[strlen(lm_fd->lline)-MAX_STR/2],MAX_STR/2,lm_fd->libf);
-X }
-X lm_fd->lline[MAX_STR-1]='\0';
-X if (n_libstr <= 21) {
-X libstr[12]='\0';
-X }
-X else {
-X strncat(libstr," ",1);
-X strncat(libstr,lm_fd->lline,n_libstr-1-strlen(libstr));
-X if ((bp = strchr(libstr,'\n'))!=NULL) *bp='\0';
-X libstr[n_libstr-1]='\0';
-X }
-X *libpos = lm_fd->lpos;
-X }
-X
-X lm_fd->lline[0]='\0';
-X
-X r_block = b_block = min((size_t)(seqm-seqp),lm_fd->gcg_len);
-X if (lm_fd->gcg_binary) { r_block = (r_block+3)/4; }
-X
-X fread((char *)seqp,(size_t)r_block,(size_t)1,lm_fd->libf);
-X if (!lm_fd->gcg_binary)
-X for (cp=seqp; seqp<seq+r_block; ) *seqp++ = ap[*cp++];
-X else if (lm_fd->gcg_binary) {
-X seqp = seq + r_block;
-X cp = seq + 4*r_block;
-X while (seqp > seq) {
-X stmp = *--seqp;
-X *--cp = gcg_bton[stmp&3];
-X *--cp = gcg_bton[(stmp >>= 2)&3];
-X *--cp = gcg_bton[(stmp >>= 2)&3];
-X *--cp = gcg_bton[(stmp >>= 2)&3];
-X }
-X }
-X if (4 * r_block >= lm_fd->gcg_len) {
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X *lcont = 0;
-X }
-X else {
-X if (lm_fd->gcg_binary) b_block = 4*r_block;
-X lm_fd->gcg_len -= b_block;
-X (*lcont)++;
-X }
-X
-X seq[b_block] = EOSEQ;
-X /* if (b_block==0) return 1; else */
-X return b_block;
-}
-X
-void
-gcg_ranlib(char *str,
-X int cnt,
-X fseek_t seek,
-X char *libstr,
-X struct lmf_str *lm_fd)
-{
-X char *bp, *bp1, *llp;
-X
-X FSEEK(lm_fd->libf, seek, 0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-X if (lm_fd->lline[0]=='>'&&(lm_fd->lline[3]==';'||lm_fd->lline[3]=='>')) {
-X strncpy(str,&lm_fd->lline[4],cnt-1);
-X str[cnt-1]='\0';
-X if ((bp = strchr(str,' '))!=NULL) *bp='\0';
-X else if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
-X else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X else str[cnt-1]='\0';
-X
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-X
-X /* check beginning of line it is a duplicate */
-X for (llp=lm_fd->lline,bp=str; *llp == *bp; llp++,bp++);
-X if ((int)(llp-lm_fd->lline)<5) llp = lm_fd->lline;
-X
-X /* here we would like to skip over some species stuff */
-X /*
-X if ((bp1 = strchr(llp,';'))!=NULL && (int)(bp1-llp)<50) {
-X if ((bp2 = strchr(bp1+1,';'))!=NULL && (int)(bp2-bp1)<50) {
-X *(bp2+1)='\0'; bp1 = bp2+2;
-X }
-X else {bp1=llp;}
-X }
-X else if ((bp1=strchr(llp,'.'))!=NULL && *(bp1+1)==' ') {
-X *(bp1+1) = '\0'; bp1 += 2;}
-X else bp1 = llp;
-X */
-X
-X bp1 = llp;
-X if ((bp=strchr(bp1,'\r'))!=NULL) *bp='\0';
-X if ((bp=strchr(bp1,'\n'))!=NULL) *bp='\0';
-X strncat(str," ",(size_t)1);
-X strncat(str,bp1,(size_t)cnt-strlen(str));
-X if (bp1!=llp) strncat(str,llp,(size_t)cnt-strlen(str));
-X }
-X else {
-X str[0]='\0';
-X }
-X
-X FSEEK(lm_fd->libf,seek,0);
-X fgets(lm_fd->lline,MAX_STR,lm_fd->libf);
-}
-X
-void
-sf_sort(s,n)
-X int *s, n;
-{
-X int gap, i, j;
-X int itmp;
-X
-X if (n == 1) return;
-X
-X for (i=0; i<n-1; i++)
-X if (s[i]>s[i+1]) goto l2;
-X return;
-X
-l2:
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if (s[j] <= s[j+gap]) break;
-X itmp = s[j];
-X s[j]=s[j+gap];
-X s[j+gap]=itmp;
-X }
-}
-SHAR_EOF
-chmod 0644 nmgetlib.c ||
-echo 'restore of nmgetlib.c failed'
-Wc_c="`wc -c < 'nmgetlib.c'`"
-test 36301 -eq "$Wc_c" ||
- echo 'nmgetlib.c: original size 36301, current size' "$Wc_c"
-fi
-# ============= nr_to_sql.pl ==============
-if test -f 'nr_to_sql.pl' -a X"$1" != X"-c"; then
- echo 'x - skipping nr_to_sql.pl (File already exists)'
-else
-echo 'x - extracting nr_to_sql.pl (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'nr_to_sql.pl' &&
-#!/usr/bin/perl -w
-X
-use DBI;
-X
-$SIG{__WARN__} = sub { die @_ };
-X
-my $mysql = DBI->connect("DBI:mysql:database=seq_demo;user=seq_demo;password=demo_pass");
-X
-$mysql->do(q{LOCK TABLES prot WRITE,
-X annot WRITE,
-X sp WRITE });
-X
-my $EL = 125;
-my $NA = 123;
-X
-my @aatrans = ($EL,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$EL,$NA,$NA,$EL,$NA,$NA,
-X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
-X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA, 24,$NA,$NA,$NA,$NA,$NA,
-X $NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,$NA,
-X $NA, 1, 21, 5, 4, 7, 14, 8, 9, 10,$NA, 12, 11, 13, 3,$NA,
-X 15, 6, 2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA,
-X $NA, 1, 21, 5, 4, 7, 14, 8, 9, 10,$NA, 12, 11, 13, 3,$NA,
-X 15, 6, 2, 16, 17,$NA, 20, 18, 23, 19, 22,$NA,$NA,$NA,$NA,$NA
-X );
-X
-my $ins_prot = $mysql->prepare(q{
-X INSERT INTO prot (seq,bin,len) VALUES (?, ?, ?)
-X });
-X
-my $ins_annot = $mysql->prepare(q{
-X INSERT INTO annot (gi, prot_id, db, descr) VALUES (?, ?, ?, ?)
-X });
-X
-my $ins_sp = $mysql->prepare(q{
-X INSERT INTO sp (gi, acc, name) VALUES (?, ?, ?)
-X });
-X
-use vars qw( $seq $bin $tot_seq $tot_annot $tot_sp );
-use vars qw( $gi $prot_id $db $desc $sp_acc $sp_name );
-use vars qw( $header $seq @entries );
-use vars qw( $gi $db $db_acc $db_name $desc);
-X
-$tot_seq = $tot_annot = $tot_sp = 0;
-X
-for my $db_file ( @ARGV ) {
-X open(DATA, "<$db_file") or die $!;
-X local $/ = "\n>";
-X while (<DATA>) {
-X chomp; # remove trailing "\n>" record header
-X ($header, $seq) = $_ =~ m/^>? # record separator (first entry)
-X ( [^\n]* ) \n # header line
-X ( .* ) # the sequence
-X /osx; # optimize, multiline, commented
-X
-X $seq =~ s/\W|\d//sg;
-X $bin = pack('C*', map { $aatrans[unpack('C', $_)] } split(//, $seq));
-X $ins_prot->execute($seq,$bin,length($seq));
-X $prot_id = $ins_prot->{mysql_insertid};
-X
-X $tot_seq++;
-X
-# print STDERR "Inserted $prot_id: ". length($seq)."\n";
-X
-X @entries = split(/\001/, $header);
-X
-X for ( @entries ) {
-X ($gi,$db,$db_acc,$db_name,$desc)=
-X $_ =~ /^gi\|(\d+)\|([a-z]+)\|(\S*)\|(\S*) (.*)$/o;
-# print "$prot_id: $gi\t$db\t$db_acc\t$desc\n";
-X $ins_annot->execute($gi,$prot_id,$db,$desc);
-X
-X $tot_annot++;
-X
-X if ($db eq "sp") {
-X $ins_sp->execute($gi,$db_acc,$db_name);
-X $tot_sp++;
-X }
-X }
-X }
-X close(DATA);
-}
-X
-print "Inserted $tot_seq sequences; $tot_annot annotations; $tot_sp swissprot\n";
-X
-X
-X
-SHAR_EOF
-chmod 0755 nr_to_sql.pl ||
-echo 'restore of nr_to_sql.pl failed'
-Wc_c="`wc -c < 'nr_to_sql.pl'`"
-test 2452 -eq "$Wc_c" ||
- echo 'nr_to_sql.pl: original size 2452, current size' "$Wc_c"
-fi
-# ============= nrand.c ==============
-if test -f 'nrand.c' -a X"$1" != X"-c"; then
- echo 'x - skipping nrand.c (File already exists)'
-else
-echo 'x - extracting nrand.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'nrand.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: nrand.c,v 1.2 2005/09/23 16:27:25 wrp Exp $ */
-X
-#include <stdlib.h>
-#include <time.h>
-X
-int
-irand(int n) /* initialize random number generator */
-{
-X
-X if (n == 0) {
-X n = time(NULL);
-X n = n % 16381;
-X if ((n % 2)==0) n++;
-X
-X }
-X srand(n);
-}
-X
-int
-nrand(int n) /* returns a random number between 1 and n where n < 64K) */
-{
-X int rand();
-X long rn;
-X
-X rn = rand();
-#ifdef RAND32
-X rn = rn >> 16;
-#endif
-X rn = rn % n;
-X return (int)rn;
-}
-X
-X
-X
-X
-SHAR_EOF
-chmod 0644 nrand.c ||
-echo 'restore of nrand.c failed'
-Wc_c="`wc -c < 'nrand.c'`"
-test 566 -eq "$Wc_c" ||
- echo 'nrand.c: original size 566, current size' "$Wc_c"
-fi
-# ============= nrand48.c ==============
-if test -f 'nrand48.c' -a X"$1" != X"-c"; then
- echo 'x - skipping nrand48.c (File already exists)'
-else
-echo 'x - extracting nrand48.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'nrand48.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: nrand48.c,v 1.4 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdlib.h>
-#include <time.h>
-X
-void
-irand(int n) /* initialize random number generator */
-{
-X if (n == 0) {
-X n = time(NULL);
-X n = n % 16381;
-X if ((n % 2)==0) n++;
-X }
-X srand48(n);
-}
-X
-int
-nrand(int n) /* returns a random number between 0 and n-1 where n < 64K) */
-{
-X int rn;
-X
-X rn = lrand48();
-X rn = rn >> 16;
-X rn = (rn % n);
-X return rn;
-}
-X
-SHAR_EOF
-chmod 0644 nrand48.c ||
-echo 'restore of nrand48.c failed'
-Wc_c="`wc -c < 'nrand48.c'`"
-test 533 -eq "$Wc_c" ||
- echo 'nrand48.c: original size 533, current size' "$Wc_c"
-fi
-# ============= nrandom.c ==============
-if test -f 'nrandom.c' -a X"$1" != X"-c"; then
- echo 'x - skipping nrandom.c (File already exists)'
-else
-echo 'x - extracting nrandom.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'nrandom.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: nrandom.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdlib.h>
-#include <time.h>
-X
-void
-irand(n) /* initialize random number generator */
-X int n;
-{
-X if (n == 0) {
-X n = time(NULL);
-X n = n % 16381;
-X if ((n % 2)==0) n++;
-X }
-X srandom(n);
-}
-X
-int
-nrand(n) /* returns a random number between 0 and n-1 where n < 2^24) */
-X int n;
-{
-X int rn;
-X
-X rn = random();
-X rn = (rn % n);
-X return rn;
-}
-X
-SHAR_EOF
-chmod 0644 nrandom.c ||
-echo 'restore of nrandom.c failed'
-Wc_c="`wc -c < 'nrandom.c'`"
-test 532 -eq "$Wc_c" ||
- echo 'nrandom.c: original size 532, current size' "$Wc_c"
-fi
-# ============= oohu.aa ==============
-if test -f 'oohu.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping oohu.aa (File already exists)'
-else
-echo 'x - extracting oohu.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'oohu.aa' &&
->OOHU | 1358 rhodopsin - human
-MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
-PLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
-KPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV
-HFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
-SNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
-SHAR_EOF
-chmod 0644 oohu.aa ||
-echo 'restore of oohu.aa failed'
-Wc_c="`wc -c < 'oohu.aa'`"
-test 385 -eq "$Wc_c" ||
- echo 'oohu.aa: original size 385, current size' "$Wc_c"
-fi
-# ============= oohu.raa ==============
-if test -f 'oohu.raa' -a X"$1" != X"-c"; then
- echo 'x - skipping oohu.raa (File already exists)'
-else
-echo 'x - extracting oohu.raa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'oohu.raa' &&
->oohu.aa shuffled
-KLILINAIFT GLQNSGCTAQ PTPEFFVMAQ YLFAMVSNMG GVFFQTALLN SAGQGFYSWC
-IFIFMTYPGF MLFIQLTGAD FVTVNEGANL CMTFCTQVTA VEYAKPTPVN AAPSSYRILR
-VIGGPYQAIF HSIATVFINS PTTEELQFLR IVVIHIAFIV VAVPLTDPRA VKFNAGELTF
-GCIFYMQYYM VISLFAANPF YYAFIRVPFE VYCETELIMG PLCAKRYVLA AASNGAYLGW
-LKLLEVYSAF PSVKCLNMLR GHVFTTIPET QNAVMYKDVI SSTLFVLSEQ LSAWITSEYP
-VPGKCYWMPF GANTHKNINP DPFAEHEKEY ILVWMVCKFG LGMTVMAG
-SHAR_EOF
-chmod 0644 oohu.raa ||
-echo 'restore of oohu.raa failed'
-Wc_c="`wc -c < 'oohu.raa'`"
-test 401 -eq "$Wc_c" ||
- echo 'oohu.raa: original size 401, current size' "$Wc_c"
-fi
-# ============= p2_complib.c ==============
-if test -f 'p2_complib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping p2_complib.c (File already exists)'
-else
-echo 'x - extracting p2_complib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'p2_complib.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: p2_complib.c,v 1.96 2007/01/12 20:15:16 wrp Exp $ */
-X
-/*
-X * pcomplib.c : Parallel library search
-X *
-X * #define FIRSTNODE 0/1 (in msg.h) can be used to reserve one node
-X * for collecting results
-X *
-X * Parallel specific options (from doinit.c):
-X * -J # jump to query #
-X * -I self-comparison, do (N choose 2) comparisons
-X * -T # number of workers
-X */
-X
-/* This version is modifed to read all files, query and database,
-X through the manager process. Workers will now receive their
-X database from the manager, rather than reading it themselves. This
-X cuts down considerably on NFS traffic, simplifies searches of
-X multiple files, and allows use of clusters of slave nodes that do
-X not have NFS access
-*/
-X
-/* modified 5-November-2004 to ensure 15 byte (SEQ_PAD) NULL
-X padding
-X
-X modified 12-December-2006 to ensure n0>0 before SEQ_PAD padding.
-X */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <time.h>
-X
-#include <limits.h>
-#include <float.h>
-#include <math.h>
-X
-#include <unistd.h>
-#include <sys/types.h>
-#include <signal.h>
-#include <sys/stat.h>
-X
-#ifdef PVM_SRC
-#include "pvm3.h"
-char *mp_verstr="34.26, January 12, 2007 PVM";
-#endif
-X
-#ifdef MPI_SRC
-#include "mpi.h"
-char *mp_verstr="34.26, January 12, 2007 MPI";
-#endif
-X
-#include "msg.h"
-#include "defs.h"
-#include "mm_file.h"
-X
-#include "structs.h"
-#include "param.h"
-#include "p_mw.h"
-X
-#define XTERNAL
-#include "uascii.h"
-X
-char pgmdir[MAX_FN];
-char workerpgm[MAX_FN];
-char managepgm[MAX_FN];
-X
-#define XTERNAL
-#include "upam.h"
-#undef XTERNAL
-X
-/********************************/
-/* global variable declarations */
-/********************************/
-char gstring2[MAX_STR]; /* string for label */
-char gstring3[MAX_STR]; /* string for label */
-char hstring1[MAX_STR];
-X
-int nsfnum; /* number of superfamily numbers */
-int sfnum[10]; /* superfamily number from types 0 and 5 */
-int nsfnum_n;
-int sfnum_n[10];
-X
-/********************************/
-/* extern variable declarations */
-/********************************/
-extern char *prog_func; /* function label */
-extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
-X
-/********************************/
-/*extern function declarations */
-/********************************/
-X
-void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
-void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
-X
-extern void closelib();
-/* check for DNA sequence (nxgetaa.c) */
-extern int scanseq(unsigned char *seq, int n, char *str);
-extern void re_ascii(int *qascii, int *sascii);
-extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
-X
-/* 1d to 2d pam (initxx.c) */
-extern void initpam2 (struct pstruct *ppst);
-/* initialize environment (doinit.c) */
-extern void h_init (struct pstruct *ppst, struct mngmsg *, char *);
-extern void s_abort (char *p, char *p1);
-extern void query_parm (struct mngmsg *m_msp, struct pstruct *ppst);
-extern void last_init (struct mngmsg *, struct pstruct *, int);
-X
-extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
-X struct pstruct *ppst, unsigned char **aa0);
-X
-/* print hist, summaries, timing information */
-void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, int nstats, struct db_str, char *);
-void printsum(FILE *);
-extern void ptime (FILE *, time_t);
-X
-/* reset parameters if DNA sequence (initxx.c) */
-extern void resetp (struct mngmsg *, struct pstruct *);
-X
-/* read a sequence (nmgetlib.c) */
-struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
-X
-#define QGETLIB (q_file_p->getlib)
-#define LGETLIB (l_file_p->getlib)
-X
-/* these functions are in scaleswn.c */
-extern int process_hist(struct stat_str *sptr, int nstat,
-X struct mngmsg m_msg, struct pstruct pst,
-X struct hist_str *hist, void **pstat_void, int);
-extern double zs_to_E(double zs, int n1, int isdna, long, struct db_str ntt);
-extern double (*find_zp)(int score, double escore, int length, double comp, void *);
-void addhistz(double zscore, struct hist_str *); /* scaleswn.c */
-void last_stats(const unsigned char *aa0, int n0,
-X struct stat_str *sptr, int nstats,
-X struct beststr **bestp_arr, int nbest,
-X struct mngmsg m_msg, struct pstruct pst,
-X struct hist_str *histp, void *rs);
-X
-void selectbestz(struct beststr **, int, int);
-void sortbest(struct beststr **, int, int);
-X
-void showbest (FILE *fp, struct beststr **bptr, int nbest,
-X int qlib, struct mngmsg *m_msg, struct pstruct pst,
-X struct db_str ntt, char *gstring2);
-X
-void showalign (FILE *fp,
-X struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,
-X struct pstruct pst, char *gstring2);
-X
-#ifdef PVM_SRC
-char worknode[120];
-int pinums[MAXNOD],hosttid;
-int narch;
-struct pvmhostinfo *hostp;
-#endif
-X
-FILE *outfd; /* Output file */
-X
-extern time_t s_time (); /* fetches time for timing */
-X
-/* this information is global for fsigint() */
-time_t tstart, tscan, tprev, tdone; /* Timing */
-time_t tdstart, tddone, time();
-int max_nodes, nnodes; /* number of nodes */
-int node_map[MAXWRKR], node_id[MAXWRKR];
-int tot_speed,h_speed;
-int qlib = 0; /* number of sequences scanned */
-struct db_str ntt, qtt;
-X
-extern int max_workers, worker_1, worker_n;
-int wlsn [MAXWRKR + 1]; /* number of library sequences in worker */
-int clsn [MAXWRKR + 1]; /* number of 1st library sequence in worker */
-X
-int max_buf_cnt;
-X
-#ifdef PVM_SRC
-#ifndef WORKERPGM
-#define WORKERPGM "c34.work"
-#endif
-#endif
-X
-main (int argc, char *argv[])
-{
-X unsigned char *aa00, *aa01, *aa0p0, *aa0p1;
-X unsigned char *aa1, *aa1ptr, *aa1prev;
-X int aa1i, *aa1i_arr; /* integer offset of sequence in buffer */
-X
-X int n1;
-X int *n1tot_ptr=NULL, *n1tot_cur;
-X int n1tot_cnt=0;
-X int n1tot_v;
-X
-X long l_off;
-X char nodefile[240];
-X struct pstruct pst;
-X int i_score;
-X struct lmf_str *q_file_p;
-X struct lmf_str *l_file_p;
-X
-X /* from manage code */
-X struct mngmsg m_msg0, m_msg1; /* Message from host to manager */
-X struct mngmsg *m_msp0, *m_msp1; /* alternating pointers */
-X struct qmng_str qm_msg0, qm_msg1; /* stuff updated for each query */
-X char q_sqnam[4];
-X int sstart, sstop;
-X
-X struct qmng_str *qm_msp0, *qm_msp1; /* pointer to stuff updated */
-X int last_msg_b[10]; /* last set of numbers */
-X long curtype = ONETYPE; /* current message type */
-X int nclib;
-X struct beststr *best, /* array of best scores */
-X **bptr; /* array of pointers */
-X struct comstr bestr[BFR+1]; /* temporary structure array */
-X struct comstr2 bestr2[BFR2+1]; /* temporary structure array */
-X struct a_struct *aln_d_base=NULL; /* alignment info for -m 9 */
-X int qres_bufsize; /* buffer size for results */
-X struct stat_str *stats=NULL, *qstats=NULL;
-X int best_flag = 1; /* bptr[] must be re-initialized */
-X int fast_flag = 0; /* send new sequences before old displayed */
-X int nstats, nqstats, kstats, jstats;
-X int nbest, nres; /* number of best scores */
-X double zbestcut = -BIGNUM; /* z-value cutoff */
-X int lcnt; /* counters */
-X int nopt;
-X int i, j, k, is, id, iw, ires, naa0 = 0;
-X
-X FILE *fdata=NULL; /* file for full results */
-X struct sql *desptr;
-X struct sql *ldes; /* descriptive lines for all lib sequences */
-X char *bline_buf, *bline_bufp;
-X char *bline_buf_mx; /* buffer for blines */
-X char q_bline[256];
-X char t_bline[256];
-X int max_bline_b, bline_inc;
-X int *n1_arr, *m_seqnm_arr;
-X unsigned char *aa1_buf;
-X
-X char tlibstr[11]; /* used only for fdata *.res files */
-X
-X int node, snode, zero; /* Number of nodes */
-X int bufid, numt, tid;
-X
-X int ave_seq_len;
-X int max_sql;
-X int ntbuff, nseq, m_seqnm;
-X int iln, ocont, maxt;
-X long loffset;
-X
-X int leng; /* leng is length of the descriptive line */
-X fseek_t qseek,lseek; /* seek into library of current sequence */
-X int qlcont,lcont; /* continued sequence */
-X int n_proc, n_tmp;
-X char errstr[120];
-X int stats_done =0; /* flag for z-value processing */
-X int tm_best, t_rbest, t_qrbest, t_best, t_n1;
-X double e_score, tm_escore, t_rescore, t_qrescore;
-X double zscore; /* tmp value */
-X double k_H, k_comp;
-X char tmp_str[MAX_FN];
-X char pgm_abbr[MAX_SSTR];
-X char *bp;
-#ifdef MPI_SRC
-X MPI_Status mpi_status;
-#endif
-X
-X void fsigint();
-X
-X signal(SIGHUP,SIG_IGN);
-X if (signal(SIGINT,SIG_IGN) != SIG_IGN) signal(SIGINT,fsigint);
-X if (signal(SIGQUIT,SIG_IGN) != SIG_IGN) signal(SIGQUIT,fsigint);
-/* if (signal(SIGSEGV,SIG_IGN) != SIG_IGN) signal(SIGSEGV,fsigint); */
-X
-X /* Initialization */
-X
-X
-#if defined(UNIX)
-X m_msg0.quiet = !isatty(1);
-#endif
-X
-X /* BFR must be %6 = 0 for TFASTA */
-X if ((BFR%6) != 0) {
-X fprintf(stderr," BFR size %d not %%6=0 - recompile\n",BFR);
-X exit(1);
-X }
-X
-#ifdef MPI_SRC
-X MPI_Init(&argc, &argv);
-X MPI_Comm_rank(MPI_COMM_WORLD,&tid);
-X if (tid > 0) {
-X workcomp(tid);
-X MPI_Finalize();
-X exit(0);
-X }
-#endif
-X
-X printf("#");
-X for (i=0; i<argc; i++) {
-X if (strchr(argv[i],' ')) printf(" \"%s\"",argv[i]);
-X else printf(" %s",argv[i]);
-X }
-X printf("\n");
-X
-#ifdef MPI_SRC
-X MPI_Comm_size(MPI_COMM_WORLD,&nnodes);
-X if (nnodes <= 1) {
-X fprintf(stderr," nnodes = %d; no workers available\n",nnodes);
-X exit(1);
-X }
-X else fprintf(stderr," have %d nodes\n",nnodes);
-X
-X tot_speed = nnodes*100;
-#endif
-X
-X h_init (&pst,&m_msg0, pgm_abbr);
-X
-X initenv (argc, argv, &m_msg0, &pst, &aa00);
-X
-#ifdef PVM_SRC
-X strncpy (workerpgm, WORKERPGM,sizeof(workerpgm)-1);
-X strncat(workerpgm, pgm_abbr, sizeof(workerpgm)-strlen(workerpgm)-1);
-X workerpgm[sizeof(workerpgm)-1] = '\0';
-#endif
-X
-X strncpy(q_sqnam,"aa",sizeof(q_sqnam));
-X m_msg0.quiet = 1;
-X if (m_msg0.qdnaseq != SEQT_UNK &&
-X (m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq == SEQT_RNA))
-X strncpy(q_sqnam,"nt",sizeof(q_sqnam));
-X
-X m_msg0.pstat_void = NULL;
-X m_msg0.hist.hist_a = NULL;
-X
-X fprintf (stderr, "Pcomp library processor\n");
-X fprintf (stderr, "Using %s\n", prog_func);
-X
-X tstart = tscan = s_time();
-X tdstart = time(NULL);
-X
-X
-#ifdef PVM_SRC
-X if ((hosttid=pvm_mytid())<0) {
-X pvm_perror("initialization");
-X fprintf(stderr,"can't initialize %s\n", argv[0]);
-X pvm_exit();
-X exit(1);
-X }
-X
-X pvm_config(&nnodes,&narch,&hostp);
-X fprintf(stderr,"nnodes: %d, narch: %d\n",nnodes, narch);
-X max_nodes = nnodes;
-X
-#ifdef DEBUG
-X pvm_catchout(stderr);
-#endif
-X
-/* if (nnodes < 2 ) nnodes = 4; */
-X if (max_workers > 0 && nnodes > max_workers) {
-X nnodes = max_workers+FIRSTNODE;
-X fprintf(stderr," workers reset from %d to %d\n",
-X max_nodes,nnodes-FIRSTNODE);
-X }
-X else max_workers = nnodes;
-X
-X strncpy(nodefile,pgmdir,sizeof(nodefile)-1);
-X strncat(nodefile,workerpgm,sizeof(nodefile)-strlen(nodefile)-1);
-X nodefile[sizeof(nodefile)-1] = '\0';
-X
-X if (worker_1 > 0) {
-X /* remap configuration to specific nodes */
-X for (i=FIRSTNODE, j=worker_1; i<nnodes && j<=worker_n; i++,j++)
-X node_id[i]=j;
-X nnodes = i;
-X max_workers = i-FIRSTNODE;
-X fprintf(stderr," workers remapped from %d to %d\n",
-X max_nodes,nnodes-FIRSTNODE);
-X max_nodes = nnodes;
-X }
-X else {
-X for (i=0; i< nnodes; i++) node_map[i]=node_id[i] = i;
-X }
-X
-X if (nnodes < max_nodes) {
-X hostp++; /* bump over host name for spawn */
-X rand_nodes(node_map,nnodes,max_nodes-1);
-X for (i=FIRSTNODE; i<nnodes; i++) {
-X numt+=pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_map[i]].hi_name,
-X 1,&pinums[i]);
-X }
-X }
-X else {
-X /* i counts through nodes (machines) */
-X /* j counts through processes (multiple processes/node) */
-X /* node map maps the process (virtual node) to a physical node (machine) */
-X
-X for (i=j=FIRSTNODE; i<nnodes && j < MAXWRKR; i++) {
-X n_proc = hostp[node_id[i]].hi_speed%100;
-X if (n_proc == 0) n_proc = 1;
-X if (n_proc > max_workers) n_proc = max_workers;
-X
-X n_tmp =pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_id[i]].hi_name,
-X n_proc,&pinums[j]);
-X if (n_tmp < n_proc)
-X fprintf(stderr," spawn problem: %d\n", pinums[j]);
-X if (n_tmp > 0) {
-X for (k=j; k < j+n_tmp; k++) node_map[k]=node_id[i];
-X j += n_tmp;
-X }
-X }
-X nnodes = numt = j;
-X }
-X
-X if (numt < nnodes) {
-X if (numt <= 0) {
-X pvm_perror("");
-X pvm_exit();
-X exit(1);
-X }
-X nnodes = numt;
-X }
-X
-X for (tot_speed=0,i=FIRSTNODE; i<nnodes; i++) {
-X if (pinums[i]<0) {
-X fprintf(stderr," tids %d %8o\n",i,pinums[i]);
-X pvm_perror("");
-X pvm_exit();
-X exit(1);
-X }
-X else {
-X h_speed = hostp[node_map[tidtonode(pinums[i])]].hi_speed;
-X if (h_speed <= 0) h_speed = 100;
-X fprintf(stderr," tids %d %8o %s %5d\n",i,pinums[i],
-X hostp[node_map[tidtonode(pinums[i])]].hi_name,
-X h_speed);
-X tot_speed +=(hostp[node_map[tidtonode(pinums[i])]].hi_speed);
-X }
-X }
-X
-X strncpy(worknode,nodefile,sizeof(worknode));
-X fprintf (stderr, "%3d worker programs loaded from %s\n",
-X nnodes-FIRSTNODE,worknode);
-#endif
-X
-X /* need to allocate two aa0 arrays so that the old is saved for alignments */
-X
-X /* Allocate space for the query sequence */
-X if ((aa00 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1)* sizeof (char))) == NULL)
-X s_abort ("Unable to allocate query sequence", "");
-X
-X if ((aa01 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1) * sizeof (char))) == NULL)
-X s_abort ("Unable to allocate query sequence", "");
-X
-X fputs(iprompt0,stdout);
-X fprintf(stdout," %s%s\n",verstr,refstr);
-X
-X /* Query library */
-X if (m_msg0.tname[0] == '\0') {
-X if (m_msg0.quiet == 1) s_abort("query sequence undefined","");
-X
-X fprintf(stderr, "Pvcomplib [%s]\n",mp_verstr);
-X l1: fputs (iprompt1, stdout);
-X fflush (stdout);
-X if (fgets (m_msg0.tname, 80, stdin) == NULL)
-X s_abort ("Unable to read query library name","");
-X if ((bp=strchr(m_msg0.tname,'\n'))!=NULL) *bp='\0';
-X if (m_msg0.tname[0] == '\0') goto l1;
-X }
-X
-X /* Open query library */
-X if ((q_file_p=
-X openlib(m_msg0.tname, m_msg0.qdnaseq,qascii,!m_msg0.quiet,NULL))==NULL) {
-X s_abort(" cannot open library ",m_msg0.tname);
-X }
-X /*
-X else {
-X printf ("searching %s library\n",m_msg0.tname);
-X }
-X */
-X
-X ntt.entries = qtt.entries = 0;
-X ntt.carry = qtt.carry = 0;
-X ntt.length = qtt.length = 0l;
-X
-X /* Fetch first sequence */
-X qlcont = 0;
-X while (qlib < m_msg0.ql_start) { /* skip through query sequences */
-X pst.n0 = qm_msg0.n0 = m_msg0.n0 =
-X QGETLIB (aa00, MAXTST, q_bline, sizeof(q_bline), &qseek, &qlcont,
-X q_file_p,&m_msg0.sq0off);
-X
-X strncpy(qm_msg0.libstr,q_bline,sizeof(qm_msg0.libstr)-20);
-X qm_msg0.libstr[sizeof(qm_msg0.libstr)-21]='\0';
-X if ((bp=strchr(qm_msg0.libstr,' '))!=NULL) *bp='\0';
-X
-X /* if annotations are included in sequence, remove them */
-X if (m_msg0.ann_flg) {
-X pst.n0 = qm_msg0.n0 = m_msg0.n0 =
-X ann_scan(aa00, m_msg0.n0, &m_msg0, m_msg0.qdnaseq);
-#ifdef DEBUG
-X fprintf(stderr,"m_msp0->/aa0a is: %o/%o\n",&m_msg0,m_msg0.aa0a);
-#endif
-X }
-X
-X if (m_msg0.term_code &&
-X !(m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq==SEQT_RNA) &&
-X aa00[m_msg0.n0-1]!='*') {
-X aa00[m_msg0.n0++]='*';
-X aa00[m_msg0.n0]=0;
-X pst.n0 = qm_msg0.n0 = m_msg0.n0;
-X }
-X
-X /* check for subset */
-X if (q_file_p->opt_text[0]!='\0') {
-X if (q_file_p->opt_text[0]=='-') {
-X sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
-X }
-X else {
-X sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
-X sstart--;
-X if (sstop <= 0 ) sstop = BIGNUM;
-X }
-X for (id=0,is=sstart; is<min(m_msg0.n0,sstop); ) aa00[id++]=aa00[is++];
-X aa00[id]=0;
-X pst.n0 = qm_msg0.n0 = m_msg0.n0 = min(m_msg0.n0,sstop)-sstart;
-X if (m_msg0.sq0off==1) m_msg0.sq0off = sstart+1;
-X }
-X
-X qlib++;
-X
-X if (m_msg0.n0 <= 0)
-X s_abort ("Unable to fetch sequence from library: ", m_msg0.tname);
-X }
-X qtt.entries=1;
-X qm_msg0.slist = 0;
-X
-X /* now have correct query sequence - check sequence type and reset */
-X if (m_msg0.qdnaseq == SEQT_UNK) { /* check for DNA sequence */
-X if (m_msg0.n0 > 20 &&
-X (float)scanseq(aa00,m_msg0.n0,"ACGTUNacgtun")/(float)m_msg0.n0>0.85) {
-X pascii = nascii;
-X m_msg0.qdnaseq = SEQT_DNA;
-X }
-X else { /* its protein */
-X pascii = aascii;
-X m_msg0.qdnaseq = SEQT_PROT;
-X }
-X
-X re_ascii(qascii,pascii);
-X init_ascii(pst.ext_sq_set,qascii,m_msg0.qdnaseq);
-X m_msg0.n0 = recode(aa00,m_msg0.n0,qascii,pst.nsqx);
-X }
-X
-X /* for ALTIVEC, must pad with 15 NULL's */
-X for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
-X
-X qtt.length = m_msg0.n0;
-X
-X if (qlib <= 0) {
-X fprintf(stderr," no sequences found in query library\n");
-X exit(1);
-X }
-X
-X resetp (&m_msg0, &pst);
-X
-X sprintf(tmp_str," %d %s", qm_msg0.n0, q_sqnam);
-X leng = strlen (qm_msg0.libstr);
-X if (leng + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
-X qm_msg0.libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
-X strncat(&qm_msg0.libstr[0],tmp_str,
-X sizeof(qm_msg0.libstr)-strlen(qm_msg0.libstr)-1);
-X qm_msg0.libstr[sizeof(qm_msg0.libstr)-1]='\0';
-X
-X qm_msg0.seqnm = qlib-1;
-X
-X /* Library */
-X
-X if (strlen (m_msg0.lname) == 0) {
-X if (m_msg0.quiet == 1) s_abort("library name undefined","");
-X libchoice(m_msg0.lname, sizeof(m_msg0.lname), &m_msg0);
-X }
-X
-X libselect(m_msg0.lname, &m_msg0);
-X
-X /* Get additional parameters here */
-X if (!m_msg0.quiet) query_parm (&m_msg0, &pst);
-X
-X last_init(&m_msg0, &pst,nnodes-FIRSTNODE);
-X memcpy(&m_msg1, &m_msg0, sizeof(m_msg0));
-X
-X /* m_msg0.maxn needs to be set to MAXLIB or MAXTRN, depending on the
-X function - max_tot has the MAXTST + (MAXLIB|MAXTRN) */
-X if (m_msg0.maxn <= 0) m_msg0.maxn = m_msg0.max_tot - MAXTST;
-X
-X if (m_msg0.maxn < 2 * m_msg0.dupn) m_msg0.maxn = 5*m_msg0.dupn;
-X pst.maxlen = m_msg0.maxn;
-X
-X m_msg0.loff = m_msg0.dupn;
-X m_msg0.maxt3 = m_msg0.maxn-m_msg0.loff;
-X
-X
-X /* ******************** */
-X /* initial manager code */
-X /* ******************** */
-X
-X outfd = stdout;
-X if (m_msg0.outfile[0]!='\0') {
-X if ((outfd = fopen(m_msg0.outfile,"w"))==NULL) {
-X fprintf(stderr, "cannot open %s for output\n", m_msg0.outfile);
-X outfd = stdout;
-X }
-X }
-X
-X /* Label the output */
-X printf("Query library %s vs %s library\n", m_msg0.tname, m_msg0.lname);
-X
-X /* Allocate space for saved scores */
-X if ((best =
-X (struct beststr *)malloc((MAXBEST+1)*sizeof(struct beststr)))==NULL)
-X s_abort ("Cannot allocate best struct","");
-X if ((bptr =
-X (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
-X s_abort ("Cannot allocate bptr","");
-X
-X /* Initialize bptr */
-X for (nbest = 0; nbest < MAXBEST+1; nbest++)
-X bptr[nbest] = &best[nbest];
-X
-X best++; bptr++;
-X best[-1].score[0]=best[-1].score[1]=best[-1].score[2]=INT_MAX;
-X best[-1].zscore = FLT_MAX;
-X best[-1].escore = FLT_MIN;
-X best_flag = 0;
-X
-X if ((stats =
-X (struct stat_str *)calloc((size_t)MAXSTATS,sizeof(struct stat_str)))
-X ==NULL)
-X s_abort ("Cannot allocate stats struct","");
-X nstats = 0;
-X
-X /* Now open the second library, divide it, send sequences to all workers */
-X /* Set up buffer for reading the library:
-X
-X We will start by using a 2 Mbyte buffer for each worker. For
-X proteins, that means 5,000 sequences of length 400 (average).
-X For DNA, that means 2,000 sequences of length 1000. At the moment,
-X those are good averages.
-X */
-X
-X if (max_buf_cnt <= 0) {
-X if (m_msg0.ldnaseq==SEQT_DNA) max_buf_cnt = MAX_NT_BUF;
-X else max_buf_cnt = MAX_AA_BUF;
-X }
-X
-X if (m_msg0.ldnaseq==SEQT_DNA) ave_seq_len = AVE_NT_LEN;
-X else ave_seq_len = AVE_AA_LEN;
-X
-X /* however - buffer sizes should be a function of the number of
-X workers so that all the workers are kept busy. Assuming a 10,000
-X entry library is the smallest we want to schedule, then
-X */
-X
-X if (max_buf_cnt > 10000/(nnodes-FIRSTNODE))
-X max_buf_cnt = 10000/(2*(nnodes-FIRSTNODE));
-X
-X /* allocate space for sequence buffers */
-X
-X m_msg0.pbuf_siz=max_buf_cnt*ave_seq_len;
-X if (m_msg0.pbuf_siz < 5*m_msg0.maxn)
-X m_msg0.pbuf_siz = 5*m_msg0.maxn;
-X
-#ifdef PVM_SRC
-#ifdef ROUTE_DIRECT
-X pvm_setopt(PvmRoute,PvmRouteDirect);
-#endif
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&nnodes,1,1);
-X pvm_pkint(pinums,nnodes,1);
-X pvm_pkbyte((char *)&m_msg0,(int)sizeof(m_msg0),1);
-X for (node = FIRSTNODE; node<nnodes; node++)
-X if (pvm_send(pinums[node],STARTTYPE0)<0) {
-X pvm_perror("pvm_send1");
-X pvm_exit();
-X exit(1);
-X }
-#endif
-#ifdef MPI_SRC
-X for (node = FIRSTNODE; node<nnodes; node++) {
-X MPI_Send(&m_msg0,(int)sizeof(m_msg0),MPI_BYTE,node,STARTTYPE0,
-X MPI_COMM_WORLD);
-X }
-#endif
-X
-X /* now send pst, sascii */
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)&pst,(int)sizeof(pst),1);
-X pvm_pkbyte((char *)pascii,(int)sizeof(aascii),1);
-X
-X for (node = FIRSTNODE; node< nnodes; node++)
-X pvm_send(pinums[node],STARTTYPE1);
-X
-X /* send pam12 */
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(pam12,m_msg0.pamd1*m_msg0.pamd2,1);
-X for (node = FIRSTNODE; node< nnodes; node++)
-X pvm_send(pinums[node],STARTTYPE2);
-X
-X /* send pam12x */
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(pam12x,m_msg0.pamd1*m_msg0.pamd2,1);
-X for (node = FIRSTNODE; node< nnodes; node++)
-X pvm_send(pinums[node],STARTTYPE3);
-X
-#endif
-#ifdef MPI_SRC
-X for (node=FIRSTNODE; node < nnodes; node++) {
-X MPI_Send(&pst,(int)sizeof(pst),MPI_BYTE,node,STARTTYPE1,
-X MPI_COMM_WORLD);
-X MPI_Send(pascii,(int)sizeof(aascii),MPI_BYTE,node,STARTTYPE1,
-X MPI_COMM_WORLD);
-X MPI_Send(pam12,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE2,
-X MPI_COMM_WORLD);
-X MPI_Send(pam12x,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE3,
-X MPI_COMM_WORLD);
-X }
-#endif
-X
-X if ((n1_arr =
-X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate n1_arr %d\n",max_buf_cnt+1);
-X s_abort(" cannot allocate n1_arr","");
-X exit(1);
-X }
-X
-X if ((aa1i_arr =
-X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate aa1i_arr %d\n",max_buf_cnt+1);
-X s_abort(" cannot allocate aa1i_arr","");
-X exit(1);
-X }
-X
-X if ((m_seqnm_arr=
-X (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate m_seqnm_arr %d\n",max_buf_cnt+1);
-X s_abort(" cannot allocate m_seqnm_arr","");
-X exit(1);
-X }
-X
-X if ((aa1_buf =
-X (unsigned char *)calloc((size_t)(m_msg0.pbuf_siz),sizeof(unsigned char)))
-X ==NULL) {
-X s_abort(" cannot allocate library buffer %d","");
-X exit(1);
-X }
-X
-X
-X /* also allocate space for descriptions. Assume max of 250,000 sequences/
-X worker for now
-X */
-X
-X /* max_sql is the maxinum number of library sequences that can be stored */
-X max_sql = MAXSQL;
-X
-X if ((ldes=(struct sql *)calloc(max_sql,sizeof(struct sql)))==NULL) {
-X fprintf(stderr," failure to allocate ldes(%d) %ld\n",
-X max_sql,max_sql*sizeof(struct sql));
-X s_abort("cannot allocate ldes","");
-X exit(1);
-X }
-X
-X max_bline_b = MAXSQL * (m_msg0.aln.llen+1)/4;
-X bline_inc = m_msg0.aln.llen;
-X if (m_msg0.markx & MX_M9SUMM) bline_inc += 40;
-X
-X i = 4;
-X while (i-- > 0) {
-X if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL) break;
-X max_bline_b /= 2;
-X bline_inc /= 2;
-X }
-X if (bline_buf == NULL) {
-X fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
-X max_sql,max_bline_b);
-X s_abort(" cannot allocate bline_buf","");
-X }
-X
-X bline_bufp = bline_buf;
-X bline_buf_mx = bline_buf+max_bline_b;
-X
-X /* the code for filling the buffers is copied from comp_thr.c */
-X /* the major differences reflect the fact that all library descriptions
-X will be kept in memory, indexed by sequence number.
-X
-X As a result, one buffer is filled by this loop -
-X ldes[] has the descriptive information for every sequence
-X this array could potentially be quite large
-X */
-X
-X /* now open the library and start reading */
-X /* get a buffer and fill it up */
-X
-X ntbuff = 0;
-X m_seqnm = 0; /* m_seqnm is the number of this library sequence */
-X nseq = 0;
-X
-X node = FIRSTNODE;
-X
-X /* sqs2_buf[0].aa1 = aa1_buf; */
-X aa1 = aa1_buf;
-X
-X /* iln counts through each library */
-X for (iln = 0; iln < m_msg0.nln; iln++) {
-X if ((l_file_p=
-X openlib(m_msg0.lbnames[iln], m_msg0.ldnaseq,lascii,!m_msg0.quiet,NULL))==NULL) {
-X fprintf(stderr," cannot open library %s\n",m_msg0.lbnames[iln]);
-X continue;
-X }
-X else {
-X printf ("searching %s library\n",m_msg0.lbnames[iln]);
-X }
-X
-X lcont = ocont = 0;
-X n1tot_v = n1tot_cnt = 0;
-X n1tot_ptr = n1tot_cur = NULL;
-X maxt = m_msg0.maxn;
-X loffset = 0l;
-X
-X /* read sequence directly into buffer */
-X aa1ptr = aa1; /* = sqs2_buf[0].aa1; */
-X
-X while ((n1= LGETLIB(aa1ptr,maxt,t_bline,sizeof(t_bline),&lseek,&lcont,
-X l_file_p,&l_off))>=0) {
-X
-X /* skip sequences outside range */
-X if (n1 < m_msg0.n1_low || n1 > m_msg0.n1_high) goto loop1;
-X
-X /* add termination code for proteins, if asked */
-X if (m_msg0.term_code && !lcont &&
-X m_msg0.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg0.term_code) {
-X aa1ptr[n1++]=m_msg0.term_code;
-X aa1ptr[n1]=0;
-X }
-X
-X /* check for a continued sequence and provide a pointer to
-X the n1_tot array if lcont || ocont */
-X n1tot_v += n1;
-X if (lcont && !ocont) { /* get a new pointer */
-X if (n1tot_cnt <= 0) {
-X if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
-X fprintf(stderr," cannot allocate n1tot_ptr\n");
-X exit(1);
-X }
-X else {n1tot_cnt=1000;}
-X }
-X n1tot_cnt--;
-X n1tot_cur = n1tot_ptr++;
-X }
-X
-X if (bline_bufp + bline_inc > bline_buf_mx) {
-X i = 4;
-X while (i-- > 0) {
-X if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL)
-X break;
-X fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
-X max_sql,max_bline_b);
-X max_bline_b /= 2;
-X bline_inc /= 2;
-X }
-X if (bline_buf != NULL) {
-X bline_bufp = bline_buf;
-X bline_buf_mx = bline_buf+max_bline_b;
-X }
-X else {
-X s_abort("cannot allocate bline_buf ","");
-X exit(1);
-X }
-X }
-X
-X if (bline_bufp+bline_inc < bline_buf_mx ) {
-X strncpy(bline_bufp,t_bline,bline_inc);
-X ldes[m_seqnm].bline = bline_bufp;
-X bline_bufp[bline_inc]= '\0';
-X bline_bufp += bline_inc+1;
-X }
-X else {
-X fprintf(stderr," bline_buf overrun\n");
-X }
-X
-X ntt.entries++; /* inc number of sequences */
-X ntt.length += n1; /* update total library length */
-X if (ntt.length > LONG_MAX) {ntt.length -= LONG_MAX; ntt.carry++;}
-X
-#ifdef DEBUG
-X /* This discovers most reasons for core dumps */
-X if (pst.debug_lib)
-X for (i=0; i<n1; i++)
-X if (aa1[i]>pst.nsq) {
-X fprintf(stderr,
-X "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
-X qm_msg0.libstr,i,n1,aa1[i],pst.nsq,lcont,ocont,aa1ptr+i);
-X aa1[i]=0;
-X n1=i-1;
-X break;
-X }
-#endif
-X
-X /* for ALTIVEC, must pad with 15 NULL's */
-X for (i=0; i<SEQ_PAD+1; i++) {aa1ptr[n1+i]=0;}
-X
-X /* don't count long sequences more than once */
-X if (aa1!=aa1ptr) {
-X n1 += m_msg0.loff; m_msg0.db.entries--; ntt.entries--;
-X }
-X
-X if (n1>1) {
-X
-X desptr = &ldes[m_seqnm];
-X
-X aa1i_arr[nseq] = (int)(aa1-aa1_buf);
-X m_seqnm_arr[nseq] = m_seqnm;
-X desptr->n1 = n1_arr[nseq] = n1;
-X desptr->n1tot_p = n1tot_cur;
-X desptr->lseek = lseek;
-X desptr->loffset = loffset+l_off;
-X desptr->cont = ocont;
-X desptr->wrkr = node;
-X desptr->nsfnum = nsfnum;
-#ifdef SUPERFAMNUM
-X if ((desptr->sfnum[0]=sfnum[0])>0 &&
-X (desptr->sfnum[1]=sfnum[1])>0 &&
-X (desptr->sfnum[2]=sfnum[2])>0 &&
-X (desptr->sfnum[3]=sfnum[3])>0 &&
-X (desptr->sfnum[4]=sfnum[4])>0 &&
-X (desptr->sfnum[5]=sfnum[5])>0 &&
-X (desptr->sfnum[6]=sfnum[6])>0 &&
-X (desptr->sfnum[7]=sfnum[7])>0 &&
-X (desptr->sfnum[8]=sfnum[8])>0 &&
-X (desptr->sfnum[9]=sfnum[9])>0) ;
-#endif
-X m_seqnm++;
-X nseq++;
-X
-X if (m_seqnm >= max_sql) {
-X max_sql += MAXSQL;
-X if ((ldes=(struct sql *)realloc(ldes,max_sql*sizeof(struct sql)))
-X ==NULL) {
-X fprintf(stderr," failure to realloc ldes(%d) %ld\n",
-X max_sql,max_sql*sizeof(struct sql));
-X s_abort("cannot allocate ldes","");
-X exit(1);
-X }
-X }
-X
-X /* increment ptrs */
-X aa1prev = aa1;
-X
-X aa1 += n1+1+SEQ_PAD;
-X ntbuff += n1+1+SEQ_PAD;
-X
-X /* if the buffer is filled */
-X if (nseq >= max_buf_cnt || ntbuff >= m_msg0.pbuf_siz - m_msg0.maxn) {
-X /* provide filled buffer to workers */
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&nseq,1,1);
-X pvm_pkint(&ntbuff,1,1);
-X pvm_pkint(n1_arr,nseq,1);
-X pvm_pkint(aa1i_arr,nseq,1);
-X pvm_pkint(m_seqnm_arr,nseq,1);
-X pvm_send(pinums[node],STARTTYPE4);
-X
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)aa1_buf,ntbuff,1);
-X pvm_send(pinums[node],STARTTYPE5);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X
-X MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
-#endif
-X nseq = 0;
-X
-X aa1 = aa1_buf;
-X ntbuff = 0;
-X if (++node >= nnodes) node = FIRSTNODE;
-X }
-X
-X loop1:
-X if (lcont) {
-X memcpy(aa1,&aa1prev[n1-m_msg0.loff],m_msg0.loff);
-X aa1ptr = &aa1[m_msg0.loff];
-X ocont = lcont;
-X maxt = m_msg0.maxt3;
-X loffset += n1 - m_msg0.loff;
-X }
-X else {
-X if (ocont) *n1tot_cur = n1tot_v;
-X n1tot_v = 0;
-X n1tot_cur = NULL;
-X
-X ocont = 0;
-X aa1ptr = aa1;
-X maxt = m_msg0.maxn;
-X loffset = 0l;
-X }
-X }
-X }
-X } /* for (iln < nln) */
-X
-X if (nseq > 0) {
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&nseq,1,1);
-X pvm_pkint(&ntbuff,1,1);
-X pvm_pkint(n1_arr,nseq,1);
-X pvm_pkint(aa1i_arr,nseq,1);
-X pvm_pkint(m_seqnm_arr,nseq,1);
-X pvm_send(pinums[node],STARTTYPE4);
-X
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)aa1_buf,ntbuff,1);
-X pvm_send(pinums[node],STARTTYPE5);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X
-X MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
-#endif
-X }
-X
-X /* fprintf(stderr," all sequences sent\n"); */
-X
-X if (ntt.entries <= 0) {
-X s_abort("no reference library sequences found\n","");
-X }
-X
-X zero = 0;
-X for (node=FIRSTNODE; node < nnodes; node++) {
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&zero,1,1);
-X pvm_pkint(&zero,1,1);
-X pvm_pkint(n1_arr,1,1);
-X pvm_pkint(aa1i_arr,1,1);
-X pvm_pkint(m_seqnm_arr,1,1);
-X pvm_send(pinums[node],STARTTYPE4);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(n1_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(aa1i_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-X MPI_Send(m_seqnm_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
-#endif
-X }
-X
-X for (node = FIRSTNODE; node < nnodes; node++) {
-#ifdef PVM_SRC
-X bufid = pvm_recv(-1,STARTTYPE0);
-X pvm_bufinfo(bufid,NULL,NULL,&tid);
-X snode = tidtonode(tid);
-X pvm_upkint(&lcnt,1,1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(&lcnt,1,MPI_INT,MPI_ANY_SOURCE,STARTTYPE0,
-X MPI_COMM_WORLD,&mpi_status);
-X snode= mpi_status.MPI_SOURCE;
-#endif
-X wlsn [snode-FIRSTNODE] = lcnt;
-X fprintf(stderr," %d sequences at %d\n",lcnt,snode);
-X }
-X
-X /* print out all descriptions */
-X /*
-X for (node = FIRSTNODE; node < nnodes; node++)
-X for (lcnt = 0; lcnt < wlsn[node-FIRSTNODE]; lcnt ++)
-X printf("%2d:%3d\t%s\n",node,lcnt,ldes[lcnt].bline);
-X */
-X
-X /* Calculate cumulative totals and send to workers for a self search */
-X
-X clsn [0] = nclib= 0;
-X for (node = FIRSTNODE; node < nnodes-1; node++) {
-X /* clsn[] is for the next node */
-X clsn[node-FIRSTNODE+1] = nclib += wlsn[node-FIRSTNODE];
-X }
-X
-X if (m_msg0.self)
-X for (node = FIRSTNODE; node < nnodes; node++) {
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&clsn[node-FIRSTNODE],1,1);
-X pvm_send(pinums[node],STARTTYPE1);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&clsn[node-FIRSTNODE],1,MPI_INT,node,STARTTYPE1,MPI_COMM_WORLD);
-#endif
-X fprintf(stderr,"sending lend: %d to worker %d\n",clsn[node-FIRSTNODE],node);
-X }
-X
-X last_msg_b[0] = m_msg0.nbr_seq = m_msg1.nbr_seq = ntt.entries;
-X
-X qres_bufsize = BFR;
-X /* if BFR is too big for this library, reduce it */
-X while ( ntt.entries*(m_msg0.nitt1+1)/(2*nnodes) < qres_bufsize) {
-X qres_bufsize /= 2;
-X if ((qres_bufsize%(m_msg0.nitt1+1))!= 0) {
-X qres_bufsize *= (m_msg0.nitt1+1);
-X break;
-X }
-X if (qres_bufsize < 50) break;
-X }
-X last_msg_b[1] = qres_bufsize;
-X
-X fprintf(stderr," using BFR=%d/%d\n",qres_bufsize,BFR);
-X
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(last_msg_b,2,1);
-X for (node=FIRSTNODE; node < nnodes; node++)
-X pvm_send(pinums[node],STARTTYPE0);
-#endif
-#ifdef MPI_SRC
-X for (node=FIRSTNODE; node < nnodes; node++)
-X MPI_Send(last_msg_b,2,MPI_INT,node,STARTTYPE0,MPI_COMM_WORLD);
-#endif
-X
-X tscan = tprev = s_time();
-X
-/**************************************
-X The logic of this section has been simplified to allow multistage
-X comparison functions to be used and alignments to be generated.
-X
-X send 1st query to workers
-X get next query sequence from host (m_msp1)
-X L1: get results from next-1 search (m_msp0)
-X sort the results of the next-1 search
-X (possibly) do additional stages of search
-X (possibly produce alignments for search
-X send next query to workers (m_msp1)
-X display result of next-1 search (m_msp0)
-X get next query sequence from host (m_msp1)
-X goto L1;
-X
-As a result of the interleaving, there must be two qm_msg structures,
-one for the next-1 sequence (which is required for labeling the
-output), and one for the next sequence (which is sent to the workers
-while the results are being displayed. qm_msp0 and qm_msp1 alternate
-between these two structures.
-***************************************/
-X
-/*
-X qm_msp0 points to the older qm_msg
-X qm_msp1 points to the newer qm_msg
-X the assignment below goes with curtype==ONETYPE
-*/
-X m_msp0 = &m_msg0;
-X m_msp1 = &m_msg1;
-X
-X qm_msp0 = &qm_msg0;
-X qm_msp1 = &qm_msg1;
-X
-X aa0p0 = aa00; /* aa0p0 is the "old" sequence */
-X aa0p1 = aa01; /* aa0p1 is the "new" sequence */
-X
-X last_params(aa00,m_msp0->n0,m_msp0,&pst,qm_msp0);
-X
-X /* process_hist() is called here to get find_zp(), and some other
-X structures initialized that would otherwise not be initialized
-X because z-scores are not being calculated */
-X
-X if (m_msp0->escore_flg) {
-X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
-X &m_msp0->hist,&m_msp0->pstat_void,0);
-X stats_done=1;
-X }
-X
-X if (m_msp0->qshuffle && qstats==NULL) {
-X if ((qstats =
-X (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
-X s_abort ("Cannot allocate qstats struct","");
-X }
-X nqstats = 0;
-X
-/* Send first query sequence to each worker */
-X
-X if (m_msg0.dfile[0] && (fdata=fopen(m_msg0.dfile,"w"))!=NULL)
-X fprintf(fdata,"%3d>%-50s\n",qlib,qm_msp0->libstr);
-X
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)qm_msp0,sizeof(qm_msg0),1);
-X if (qm_msp0->n0 > 0) {
-X pvm_pkbyte((char *)aa0p0,qm_msp0->n0+1+SEQ_PAD,1);
-X if (m_msg0.ann_flg) pvm_pkbyte((char *)m_msp0->aa0a,qm_msp0->n0+1,1);
-X }
-X for (node = FIRSTNODE; node < nnodes; node++)
-X pvm_send(pinums[node],MSEQTYPE);
-#endif
-#ifdef MPI_SRC
-X for (node = FIRSTNODE; node < nnodes; node++) {
-X MPI_Send(qm_msp0,sizeof(qm_msg0),MPI_BYTE,node,MSEQTYPE,MPI_COMM_WORLD);
-X if (qm_msp0->n0 > 0) {
-X MPI_Send(aa0p0,qm_msp0->n0+1+SEQ_PAD,MPI_BYTE,node,
-X MSEQTYPE1,MPI_COMM_WORLD);
-X if (m_msg0.ann_flg) {
-X if (m_msp0->aa0a == NULL) {
-X fprintf(stderr," m_msp0: %o/%oaa0a is null\n",m_msp0,m_msp0->aa0a);
-X }
-X MPI_Send(m_msp0->aa0a,qm_msp0->n0+1,MPI_BYTE,node, MSEQTYPE2,MPI_COMM_WORLD);
-X }
-X }
-X }
-#endif
-X
-X /* Get second query sequence (additional query sequences are read in
-X the main loop */
-X
-X m_msp1->n0 = qm_msp1->n0 =
-X QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
-X strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
-X if ((bp=strchr(qm_msp1->libstr,' '))!=NULL) *bp='\0';
-X
-X /* if annotations are included in sequence, remove them */
-X if (m_msg0.ann_flg) {
-X m_msp1->n0 = qm_msp1->n0 =
-X ann_scan(aa0p1,qm_msp1->n0,m_msp1,m_msp1->qdnaseq);
-#ifdef DEBUG
-X fprintf(stderr,"m_msp1->/aa0a is: %o/%o\n",m_msp1,m_msp1->aa0a);
-#endif
-X }
-X
-X if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
-X m_msg0.qdnaseq == SEQT_PROT &&
-X aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
-X aa0p1[m_msp1->n0++]=m_msg0.term_code;
-X aa0p1[m_msp1->n0]=0;
-X qm_msp1->n0 = m_msp1->n0;
-X }
-X
-X /* for ALTIVEC, must pad with 15 NULL's */
-X if (m_msp1->n0 > 0) {
-X for (i=0; i<SEQ_PAD+1; i++) {aa0p1[m_msp1->n0+i]=0;}
-X }
-X
-X qm_msp1->slist = 0;
-X qm_msp1->seqnm = qlib;
-X
-X last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
-X
-X sprintf(tmp_str," - %d %s", qm_msp1->n0, q_sqnam);
-X if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
-X strncat(qm_msp1->libstr,tmp_str,
-X sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
-X
-X naa0 = 0; /* reset node counter */
-X
-X /* sit in loop and collect results */
-X nbest = nopt = 0;
-X zbestcut = -BIGNUM;
-X
-X
-X while (1) {
-X
-#ifdef PVM_SRC
-X bufid = pvm_recv(-1,curtype);
-X pvm_bufinfo(bufid,NULL,NULL,&tid);
-X pvm_upkbyte((char *)&bestr[0],sizeof(struct comstr)*(qres_bufsize+1),1);
-X snode = tidtonode(tid);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(bestr,sizeof(struct comstr)*(qres_bufsize+1),
-X MPI_BYTE,MPI_ANY_SOURCE,curtype,MPI_COMM_WORLD,&mpi_status);
-X snode = mpi_status.MPI_SOURCE;
-#endif
-X
-X nres = bestr[qres_bufsize].seqnm & ~FINISHED;
-X
-#ifdef DEBUG
-X fprintf(stderr,"%d results from %d\n",nres,snode);
-#endif
-X
-X if (bestr[qres_bufsize].seqnm&FINISHED) { /* a worker is finished */
-X naa0++;
-X
-X /* fast_flag == 1 => send new sequences immediately */
-X fast_flag = ((m_msp0->stages==1) && !(m_msp0->markx & MX_M9SUMM) &&
-X (m_msp0->ashow == 0) && (m_msp0->last_calc_flg==0));
-X /* send a new query sequence if no more processing required */
-X if (fast_flag) {
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
-X if (qm_msp1->n0 != -1) {
-X pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
-X if (m_msp1->ann_flg) pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
-X }
-X pvm_send(tid,MSEQTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,snode,MSEQTYPE,MPI_COMM_WORLD);
-X if (qm_msp1->n0 != -1) {
-X MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,snode,MSEQTYPE1,MPI_COMM_WORLD);
-X if (m_msp1->ann_flg)
-X MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
-X }
-#endif
-X }
-X }
-X
-#ifdef DEBUG
-X if (pst.debug_lib)
-X fprintf(stderr," unpacking %d from %d; nbest %d\n",nres,snode,nbest);
-#endif
-X
-X /* this section is now more complex because can get groups of
-X sequence results; e.g. forward and reverse frame */
-X
-X t_best = t_rbest = t_qrbest = -1;
-X tm_escore = t_rescore = t_qrescore = FLT_MAX;
-X for (ires = 0; ires < nres; ires++) {
-X desptr = &ldes[bestr[ires].m_seqnm];
-X
-X /* save raw results */
-X if (fdata) {
-X strncpy(tlibstr,desptr->bline,10);
-X if ((bp=strchr(tlibstr,' '))!=NULL) *bp='\0';
-X fprintf(fdata,"%-10s\t%4d\t%4d\t%d\t%4d\t%4d\t%4d\t%8ld\n",
-X tlibstr,desptr->sfnum[0],desptr->n1,bestr[ires].frame,
-X bestr[ires].score[0],bestr[ires].score[1],bestr[ires].score[2],
-X desptr->lseek);
-X }
-X
-X i_score = bestr[ires].score[pst.score_ix];
-X e_score = bestr[ires].escore;
-X k_comp = bestr[ires].comp;
-X k_H = bestr[ires].H;
-X
-X t_n1 = desptr->n1;
-X if (i_score > t_best) {tm_best = t_best = i_score;}
-X if (e_score < tm_escore) tm_escore = e_score;
-X
-X if (m_msp0->qshuffle) {
-X if (bestr[ires].qr_score > t_qrbest)
-X t_qrbest = bestr[ires].qr_score;
-X if (bestr[ires].qr_escore < t_qrescore)
-X t_qrescore = bestr[ires].qr_escore;
-X
-X if (bestr[ires].frame==m_msp0->nitt1 &&
-X nqstats < m_msp0->shuff_max &&
-X bestr[ires].qr_score >= 0) {
-X qstats[nqstats].n1 = t_n1; /* save the best score */
-X qstats[nqstats].comp = bestr[ires].comp;
-X qstats[nqstats].H = bestr[ires].H;
-X qstats[nqstats].escore = t_qrescore;
-X qstats[nqstats++].score = t_qrbest;
-X t_qrbest = -1; /* reset t_qrbest, t_qrescore */
-X t_qrescore = FLT_MAX;
-X }
-X }
-X
-X if (pst.zsflag >= 10 && bestr[ires].r_score > t_rbest) {
-X t_rbest = bestr[ires].r_score;
-X t_rescore = bestr[ires].r_escore;
-X }
-X
-X if (nstats < MAXSTATS) {
-X if (bestr[ires].frame == m_msg0.nitt1) {
-X stats[nstats].n1 = t_n1;
-X stats[nstats].comp = k_comp;
-X stats[nstats].H = k_H;
-X
-X if (pst.zsflag > 10) {
-X tm_best = t_rbest;
-X tm_escore = t_rescore;
-X t_rbest = -1;
-X t_rescore = FLT_MAX;
-X }
-X stats[nstats].escore = tm_escore;
-X stats[nstats++].score = tm_best;
-X tm_escore = FLT_MAX;
-X t_best = -1;
-X }
-X }
-X else if (pst.zsflag >=0) { /* nstats >= MAXSTATS, zsflag >=0 */
-X if (!stats_done ) {
-X pst.n0 = qm_msp0->n0;
-X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
-X &m_msp0->hist, &m_msp0->pstat_void,0);
-X stats_done = 1;
-X kstats = nstats;
-X for (i=0; i<nbest; i++) {
-X bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
-X bptr[i]->escore,bptr[i]->n1,
-X bptr[i]->comp, m_msp0->pstat_void);
-X }
-X }
-#ifdef SAMP_STATS
-X if (!m_msp0->escore_flg) {
-X jstats = nrand(kstats++);
-X if (jstats < MAXSTATS) {
-X stats[jstats].n1 = t_n1; /* save the best score */
-X stats[jstats].comp = k_comp;
-X stats[jstats].H = k_H;
-X if (pst.zsflag >=10) t_best = t_rbest;
-X stats[jstats].score = t_best;
-X }
-X }
-#endif
-X }
-X
-X if (stats_done) {
-X zscore=(*find_zp)(i_score,e_score,desptr->n1,k_comp,
-X m_msp0->pstat_void);
-X if (bestr[ires].frame == m_msg0.nitt1) {
-X addhistz((*find_zp)(tm_best,tm_escore,t_n1,k_comp,
-X m_msp0->pstat_void),
-X &(m_msp0->hist));
-X t_best = t_rbest = -1;
-X }
-X
-X }
-X else zscore = (double) i_score;
-X
-X if (zscore > zbestcut) {
-X if (nbest>=MAXBEST) {
-X selectbestz(bptr, nbest-MAXBEST/4-1, nbest);
-X nbest -= MAXBEST/4;
-X zbestcut = bptr[nbest-1]->zscore;
-X best_flag = 0;
-X }
-X /* if zbestcut == -BIGNUM, bptr[] has not been reinitialized */
-X else if (best_flag) bptr[nbest]=&best[nbest];
-X
-X bptr[nbest]->m_seqnm = bestr[ires].m_seqnm ;
-X bptr[nbest]->seqnm = bestr[ires].seqnm;
-X bptr[nbest]->score[0] = bestr[ires].score[0];
-X bptr[nbest]->score[1] = bestr[ires].score[1];
-X bptr[nbest]->score[2] = bestr[ires].score[2];
-X bptr[nbest]->escore = bestr[ires].escore;
-X bptr[nbest]->segnum = bestr[ires].segnum;
-X bptr[nbest]->seglen = bestr[ires].seglen;
-X bptr[nbest]->comp = bestr[ires].comp;
-X bptr[nbest]->H = bestr[ires].H;
-X bptr[nbest]->zscore = zscore;
-X bptr[nbest]->wrkr = snode;
-X bptr[nbest]->desptr = desptr;
-X bptr[nbest]->lseek = desptr->lseek; /* needed for identifying alternate
-X strand scores from same sequence */
-X bptr[nbest]->n1 = desptr->n1;
-X bptr[nbest]->frame = bestr[ires].frame;
-X
-X /* this was used when -m 9 info was calculated in 1st scan */
-X /*
-X bptr[nbest]->sw_score = bestr[ires].sw_score;
-X if (bestr[ires].sw_score > -1) {
-X nopt++;
-X bptr[nbest]->a_len = bestr[ires].a_len;
-X bptr[nbest]->percent = bestr[ires].percent;
-X bptr[nbest]->gpercent = bestr[ires].gpercent;
-X bptr[nbest]->min0 = bestr[ires].min0;
-X bptr[nbest]->min1 = bestr[ires].min1;
-X bptr[nbest]->max0 = bestr[ires].max0;
-X bptr[nbest]->max1 = bestr[ires].max1;
-X bptr[nbest]->ngap_q = bestr[ires].ngap_q;
-X bptr[nbest]->ngap_l = bestr[ires].ngap_l;
-X }
-X else {
-X bptr[nbest]->percent = -1.0;
-X bptr[nbest]->min0 = bptr[nbest]->min1 = bptr[nbest]->max0 =
-X bptr[nbest]->max1 = 0;
-X }
-X */
-X
-X nbest++;
-X }
-X } /* for loop */
-X if (naa0 < nnodes-FIRSTNODE) continue;
-X
-X gstring2[0]='\0';
-X
-X /* get gstring2,3 - algorithm/parameter description */
-#ifdef PVM_SRC
-X bufid = pvm_recv(pinums[FIRSTNODE],PARAMTYPE);
-X pvm_upkbyte(gstring2,sizeof(gstring2),1);
-X pvm_upkbyte(gstring3,sizeof(gstring3),1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(gstring2,sizeof(gstring2),MPI_BYTE,FIRSTNODE,PARAMTYPE,
-X MPI_COMM_WORLD,&mpi_status);
-X MPI_Recv(gstring3,sizeof(gstring3),MPI_BYTE,FIRSTNODE,PARAMTYPE,
-X MPI_COMM_WORLD,&mpi_status);
-#endif
-X
-/* ********************** */
-/* analyze the results */
-/* ********************** */
-X
-X if (!stats_done) {
-X if (nbest < 20 || pst.zsflag <= 0) {
-X pst.zsflag_f = -1;
-X }
-X else {
-X pst.n0 = qm_msp0->n0;
-X pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
-X &m_msp0->hist, &m_msp0->pstat_void,stats_done);
-X
-X for (i=0; i<nbest; i++)
-X bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
-X bptr[i]->escore, bptr[i]->n1,
-X bptr[i]->comp, m_msp0->pstat_void);
-X }
-X }
-X
-X m_msp0->db.entries = ntt.entries;
-X m_msp0->db.length = ntt.length;
-X m_msp0->db.carry = ntt.carry;
-X
-X if (pst.zdb_size < 1) pst.zdb_size = ntt.entries;
-X
-X if (!qm_msp0->qshuffle) {
-X last_stats(aa0p0, m_msp0->n0,
-X stats,nstats, bptr,nbest, *m_msp0, pst,
-X &m_msp0->hist, &m_msp0->pstat_void);
-X }
-X else {
-X last_stats(aa0p0, m_msp0->n0,
-X qstats,nqstats, bptr,nbest, *m_msp0, pst,
-X &m_msp0->hist, &m_msp0->pstat_void);
-X }
-X
-X if (m_msp0->last_calc_flg) {
-X nbest = last_calc(bptr,nbest, *m_msp0, &pst,qm_msp0,
-X m_msp0->pstat_void);
-X }
-X
-X sortbeste(bptr,nbest);
-X scale_scores(bptr,nbest,m_msp0->db,pst,m_msp0->pstat_void);
-X
-X if (pst.zsflag >= 0 && bptr[0]->escore >= m_msg0.e_cut) goto no_results;
-X
-X /* else sortorder(bptr,nbest,wlsn,nnodes); */
-X
-/* if more than one stage or markx==9, calculate opt scores or do alignment */
-/* send results to workers as available */
-X
-X if (m_msg0.stages > 1 || m_msg0.markx & MX_M9SUMM) {
-X
-X /* to determine how many sequences to re-align (either for
-X do_opt() or calc_id() we need to modify m_msg.mshow to get
-X the correct number of alignments */
-X
-X if (m_msg0.mshow_flg != 1 && pst.zsflag >= 0) {
-X for (i=0; i<nbest && bptr[i]->escore< m_msg0.e_cut; i++) {}
-X m_msg0.mshow = i;
-X }
-X
-X /* allocate space for a_struct info */
-X if (m_msg0.markx & MX_M9SUMM && m_msg0.mshow > 0) {
-X if ((aln_d_base=(struct a_struct *)
-X calloc((size_t)m_msg0.mshow,sizeof(struct a_struct)))==NULL) {
-X fprintf(stderr," cannot allocate a_struct %d\n", m_msg0.mshow);
-X exit(1);
-X }
-X
-X for (is = 0; is < m_msg0.mshow; is++ ) {
-X bptr[is]->aln_d = &aln_d_base[is];
-X }
-X }
-X
-X do_stage2(bptr,m_msg0.mshow, *m_msp0, DO_OPT_FLG, qm_msp0);
-X }
-X
-X no_results:
-X tdone = s_time();
-X tddone = time(NULL);
-X
-X /* changed from >> to >>> because qm_msp0->libstr is missing '>' */
-X fprintf (outfd, "%3d>>>%s\n", qlib,qm_msp0->libstr);
-X
-X /* make certain that m_msp0->n0, libstr are current */
-X m_msp0->n0 = qm_msp0->n0;
-X /* strncpy(m_msp0->libstr,qm_msp0->libstr,sizeof(m_msg0.libstr)); */
-X
-X prhist (outfd,*m_msp0,pst,m_msp0->hist,nstats,m_msp0->db,gstring2);
-X
-X if (bptr[0]->escore < m_msg0.e_cut) {
-X
-X showbest (outfd, bptr, nbest, qlib, m_msp0,pst,ntt,gstring2);
-X
-X if (m_msg0.markx & MX_M9SUMM) {
-X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
-X m_msg0.tname,qlib,qm_msp0->libstr,
-X (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
-X m_msg0.lname);
-X }
-X else if (m_msg0.markx & MX_M10FORM) {
-X if ((bp=strchr(qm_msp0->libstr,' '))!=NULL) *bp = '\0';
-X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
-X m_msg0.tname,qlib,qm_msp0->libstr,
-X (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
-X m_msg0.lname);
-X if (bp!=NULL) *bp=' ';
-X fprintf(outfd,"; mp_name: %s\n",argv[0]);
-X fprintf(outfd,"; mp_ver: %s\n",mp_verstr);
-X fprintf(outfd,"; mp_argv:");
-X for (i=0; i<argc; i++)
-X fprintf(outfd," %s",argv[i]);
-X fputc('\n',outfd);
-X fputs(gstring3,outfd);
-X fputs(hstring1,outfd);
-X }
-X
-X /* ashow is -1 if not set, -d 0 indicates no alignments, > 0 if set */
-X /* if ashow is -1, m_msg.nshow (set by e_cut above) sets limit
-X in showalign */
-X
-X if (m_msp0->ashow != 0) {
-X /* showalign needs m_msp->qtitle, so fill it in */
-X strncpy(m_msp0->qtitle,qm_msp0->libstr,MAX_FN-1);
-X m_msp0->qtitle[MAX_FN-1]='\0';
-X showalign (outfd, bptr, nbest, qlib, *m_msp0, pst, gstring2);
-X }
-X }
-X else {
-X if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) {
-X fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
-X m_msg0.tname,qlib,qm_msp0->libstr,(m_msg0.revcomp ? "-":"\0"), qm_msg0.n0, m_msg0.sqnam,
-X m_msg0.lname);
-X fprintf(outfd,">>>!!! No sequences with E() < %f\n",m_msg0.e_cut);
-X }
-X else fprintf(outfd,"!! No sequences with E() < %f\n",m_msg0.e_cut);
-X }
-X
-X if (! (m_msg0.markx & (MX_M9SUMM + MX_M10FORM))) {
-X fprintf(outfd,"/** search time: ");
-X ptime(outfd,tdone-tprev);
-X fprintf(outfd," **/\n");
-X tprev = tdone;
-X }
-X else if (m_msg0.markx & MX_M9SUMM) {
-X if (aln_d_base != NULL) {
-X free((void *)aln_d_base);
-X aln_d_base = NULL;
-X }
-X fprintf(outfd,">>>***\n");
-X fprintf(outfd,"/** %s **/\n",gstring2);
-X fprintf(outfd,"/** %s **/\n",m_msp0->hist.stat_info);
-X fprintf(outfd,">>><<<\n");
-X }
-X else if (m_msg0.markx & MX_M10FORM) {
-X fprintf(outfd,">>><<<\n");
-X }
-X fflush(outfd);
-X
-/* *********************** */
-/* end of analysis/display */
-/* *********************** */
-X
-X
-/* *********************** */
-/* start the next search */
-/* *********************** */
-X
-X if (fdata) { /* label the results file */
-X fprintf(fdata,"/** %s **/\n",gstring2);
-X fprintf(fdata,"%3d>%-50s\n",qlib-1,qm_msp1->libstr);
-X fflush(fdata);
-X }
-X
-X if (m_msp1->escore_flg) { /* re-initialize some stats stuff before search */
-X pst.zsflag_f = process_hist(stats,nstats,*m_msp1,pst,
-X &m_msp1->hist,&m_msp1->pstat_void,0);
-X stats_done=1;
-X }
-X else stats_done = 0;
-X
-X /* set up qstats if necessary - different queries have different qshuffle */
-X if (m_msp1->qshuffle && qstats==NULL) {
-X if ((qstats =
-X (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
-X s_abort ("Cannot allocate qstats struct","");
-X }
-X
-X nqstats = nstats = 0;
-X
-X /* send new qm_msp, sequence */
-X if (!fast_flag) {
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
-X if (qm_msp1->n0 != -1) {
-X pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
-X if (m_msp1->ann_flg) {
-X pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
-X }
-X }
-X for (node = FIRSTNODE; node < nnodes; node++)
-X pvm_send(pinums[node],MSEQTYPE);
-#endif
-#ifdef MPI_SRC
-X for (node=FIRSTNODE; node < nnodes; node++) {
-X MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,node,MSEQTYPE,
-X MPI_COMM_WORLD);
-X if (qm_msp1->n0 != -1) {
-X MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,node,MSEQTYPE1,MPI_COMM_WORLD);
-X if (m_msp1->ann_flg)
-X MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
-X }
-X }
-#endif
-X }
-X
-X qlib++;
-X if (qm_msp1->n0 != -1) {
-X qtt.entries++;
-X qtt.length += qm_msp1->n0;
-X }
-X else goto done;
-X
-/* ******************************** */
-/* flip m_msg, qm_msg, aa0 pointers */
-/* ******************************** */
-X
-X naa0 = 0;
-X best_flag = 1;
-X nbest = nopt = 0;
-X zbestcut = -BIGNUM;
-X if (curtype == ONETYPE) {
-X curtype = TWOTYPE;
-X qm_msp0 = &qm_msg1;
-X qm_msp1 = &qm_msg0;
-X m_msp0 = &m_msg1;
-X m_msp1 = &m_msg0;
-X aa0p0 = aa01;
-X aa0p1 = aa00;
-X }
-X else {
-X curtype = ONETYPE;
-X qm_msp0 = &qm_msg0;
-X qm_msp1 = &qm_msg1;
-X m_msp0 = &m_msg0;
-X m_msp1 = &m_msg1;
-X aa0p0 = aa00;
-X aa0p1 = aa01;
-X }
-X
-X
-/* **********************************************************/
-/* all library sequences are done get next library sequence */
-/* **********************************************************/
-X
-X m_msp1->n0 = qm_msp1->n0 =
-X QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
-X strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
-X
-X if ((qlib+1) >= m_msg0.ql_stop) { qm_msp1->n0 = m_msp1->n0 = -1;}
-X
-X if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
-X m_msg0.qdnaseq==SEQT_PROT &&
-X aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
-X aa0p1[m_msp1->n0++]=m_msg0.term_code;
-X aa0p1[m_msp1->n0]=0;
-X qm_msp1->n0 = m_msp1->n0;
-X }
-X
-X /* for ALTIVEC, must pad with 15 NULL's */
-X if (m_msg0.n0 > 0) {
-X for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
-X }
-X
-X qm_msp1->slist = 0;
-X /*
-X leng = strlen (qm_msp1->libstr);
-X sprintf (&(qm_msp1->libstr[leng]), " %d %s", qm_msp1->n0, q_sqnam);
-X */
-X sprintf(tmp_str," %d %s", qm_msp1->n0, q_sqnam);
-X if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
-X strncat(qm_msp1->libstr,tmp_str,
-X sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
-X qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
-X
-X qm_msp1->seqnm = qlib;
-X
-X last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
-X
-X } /* while loop */
-X
-X /* ******************** */
-X /* end of library while */
-X /* ******************** */
-X
-X done:
-X tdone = s_time();
-X if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) fputs(">>>///\n",outfd);
-X printsum(outfd);
-X if (outfd!=stdout) printsum(stdout);
-X printsum(stderr);
-#ifdef PVM_SRC
-X pvm_exit();
-#endif
-#ifdef MPI_SRC
-X MPI_Finalize();
-#endif
-X
-X exit(0);
-} /* End of main program */
-X
-void
-printsum(FILE *fd)
-{
-X double db_tt;
-X char tstr1[26], tstr2[26];
-X
-X strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
-X strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
-X tstr1[24]=tstr2[24]='\0';
-X
-X /* Print timing to output file as well */
-X if (qtt.carry==0) {
-X fprintf(fd, "\n%ld residues in %d query sequences\n", qtt.length, qtt.entries);
-X }
-X else {
-X db_tt = (double)qtt.carry*(double)LONG_MAX + (double)qtt.length;
-X fprintf(fd, "\n%.0g residues in %d query sequences\n", db_tt, qtt.entries);
-X }
-X
-X if (ntt.carry==0) {
-X fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
-X }
-X else {
-X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
-X fprintf(fd, "%.6f residues in %ld library sequences\n", db_tt, ntt.entries);
-X }
-X
-X fprintf(fd," %d processors (%d workers) were used\n",
-X nnodes+-FIRSTNODE+1,nnodes-FIRSTNODE);
-X fprintf(fd," Pvcomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
-X fprintf(fd," Loading time: ");
-X ptime(fd, tscan - tstart);
-X fprintf (fd," Scan time: ");
-X ptime (fd, tdone - tscan);
-X fprintf (fd,"\n");
-X fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
-}
-X
-void fsigint()
-{
-X int i;
-X
-X tdone = s_time();
-X tddone = time(NULL);
-X
-X if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
-X fprintf(stderr,"/*** interrupted ***/\n");
-X
-X printsum(stdout);
-X if (outfd!=stdout) printsum(outfd);
-X
-#ifdef PVM_SRC
-X for (i=FIRSTNODE; i<nnodes; i++) pvm_kill(pinums[i]);
-X pvm_exit();
-#endif
-#ifdef MPI_SRC
-X MPI_Abort(MPI_COMM_WORLD,1);
-X MPI_Finalize();
-#endif
-X exit(1);
-}
-SHAR_EOF
-chmod 0644 p2_complib.c ||
-echo 'restore of p2_complib.c failed'
-Wc_c="`wc -c < 'p2_complib.c'`"
-test 55578 -eq "$Wc_c" ||
- echo 'p2_complib.c: original size 55578, current size' "$Wc_c"
-fi
-# ============= p2_workcomp.c ==============
-if test -f 'p2_workcomp.c' -a X"$1" != X"-c"; then
- echo 'x - skipping p2_workcomp.c (File already exists)'
-else
-echo 'x - extracting p2_workcomp.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'p2_workcomp.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: p2_workcomp.c,v 1.49 2007/01/02 17:24:36 wrp Exp $ */
-X
-/* This version is modifed to read all files, query and database,
-X through the manager process. Workers will now receive their
-X database from the manager, rather than reading it themselves. This
-X cuts down considerably on NFS traffic, simplifies searches of
-X multiple files, and allows use of clusters of slave nodes that do
-X not have NFS access */
-X
-/* September, 1994 - this version has been modified to do two kinds of
-X searches, a general library search, or list of library sequences search.
-X The latter would be used to generate optimized scores for fasta and
-X to produce alignments */
-X
-/* modified July, 2002, to provide query shuffle */
-X
-/* modified October, 2005, to support struct a_res_str a_res -
-X coordinates of alignment in aa0[], aa1[]. Future modifications
-X will cause do_walign to be run only once - subsequent calls for
-X seqc[0,1] can be filled using a_res, by adding a_res to the
-X struct sqs2 array.
-X
-X 19-March-2006 - modifications to call do_walign() only once, and
-X use the resulting a_res structure for subsequent calls to calc_id,
-X calcons, calcons_a, have been implemented. Also, the -V option is
-X now valid with the parallel programs.
-X
-X 31-May-2006 - some functions (e.g. dropfs and dropff do not store
-X complete information in a_res - thus they cannot use this shortcut
-X (yet).
-X
-*/
-X
-X
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#ifdef PVM_SRC
-#include "pvm3.h"
-#endif
-X
-#ifdef MPI_SRC
-#include "mpi.h"
-#endif
-X
-/*
-#define PvmDataDefault 0
-#define PvmTaskDefault 0
-*/
-#include "msg.h"
-#include "defs.h"
-#include "param.h"
-#include "w_mw.h"
-#include "structs.h"
-X
-#ifdef MPI_SRC
-#define XTERNAL
-#endif
-#include "upam.h"
-#include "uascii.h"
-X
-#ifdef PVM_SRC
-int worker, mytid;
-int nnodes, pinums[MAXNOD];
-#endif
-X
-#include "drop_func.h"
-X
-extern void alloc_pam (int d1, int d2, struct pstruct *ppst); /* allocate ppst->pam12,pam12x */
-extern int **alloc_pam2p (int len, int nsq);
-extern void w_init ();
-extern void irand(int);
-extern void revcomp(unsigned char *, int, int *);
-X
-X
-X
-extern void initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz);
-extern void freeseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca);
-X
-void send_bestr(int, int, struct comstr *, int, int);
-void send_bestr2(int, struct comstr2 *, int);
-void send_code(int, char *, int);
-X
-extern void get_param (struct pstruct *ppst, char *pstring2, char *pstring3);
-extern void update_param(struct qmng_str *qm_msg, struct mngmsg *m_msg,
-X struct pstruct *ppst);
-extern int shuffle(unsigned char *, unsigned char *, int);
-extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
-X
-extern char err_str[];
-X
-/* local function declarations */
-void free_ares(struct sqs2 *, int itt, int *, int walign_cnt, int worker);
-X
-X
-X
-void w_abort (p, p1)
-char *p, *p1;
-{
-X fprintf (stderr, " %s %s\n", p, p1);
-#ifdef PVM_SRC
-X pvm_exit();
-X exit (1);
-#endif
-#ifdef MPI_SRC
-X MPI_Abort(MPI_COMM_WORLD,1);
-#endif
-}
-X
-#ifdef PVM_SRC
-main ()
-#endif
-#ifdef MPI_SRC
-void
-workcomp(int worker)
-#endif
-{
-X unsigned char *aa0[6], *aa1s, *aa0s; /* Query and library sequences */
-X struct mngmsg m_msg; /* start message from manager to worker 1 */
-X struct qmng_str qm_msg; /* updated for each query */
-X int last_msg_b[10]; /* last set of numbers */
-X struct sqs2 *seqpt; /* sequence pointers for chunk */
-X int seqbuf_n,seqbuf_s; /* number of sequences, length of sequences */
-X int max_sql; /* maximum number of sequences/node */
-X int *n1_arr; /* array of sequence lengths in buffer */
-X int *m_seqnm_arr; /* array of sequence numbers in buffer */
-X int *aa1i_arr; /* array of offsets into the buffer */
-X unsigned char *seq_buf; /* space for sequence data */
-X int ntx;
-X int nsq; /* effective alphabet size */
-X long curtype = ONETYPE; /* current send message type */
-X int ieven=0; /* flag for window shuffle */
-X int cur_n0;
-X int n1, n1over; /* length of query, library sequences */
-X struct comstr bestr[BFR+1]; /* best structures */
-X struct comstr2 bestr2[BFR2+1]; /* best structures */
-X struct a_struct aln, *aln_dp;
-X int qres_bufsize; /* results buffer size */
-X int bestcnt = 0; /* how many best structures are full */
-X char gstring2[MAX_STR]; /* parameter string for manager */
-X char gstring3[MAX_STR]; /* parameter string for manager */
-X struct pstruct pst; /* parameter structure */
-X struct rstruct rst, qrst, rrst; /* results structure */
-X void *f_str[6], *qf_str;
-X int sw_score;
-X int lcnt, count, seqnm; /* counters */
-X int *walign_done[2], walign_cnt[2]; /* index of current valid a_res in seqpt */
-X int have_walign;
-X int *tres; /* allocated storage for seqpt[].a_res[].res */
-X int lend; /*global library sequence number information */
-X int lsn; /* library sequence number */
-X struct stage2_str *liblist=NULL; /* list of sequences to search */
-X int i, j; /* my turn to send sequence descriptions */
-X char libstr[21];
-X char errstr[128];
-X int itt=0;
-X int bufid;
-X char *seqc0, *seqc0a, *seqc1, *seqca;
-X char *seqc, *seqc_buff;
-X int seqc_buff_cnt, seqc_buff_len, seqc_flag;
-X int maxc, lc, nc, nident, ngap, aln_code_n;
-X float percent, gpercent;
-X int old_shuffle=0; /* did a qshuffle last time */
-X int hosttid=0;
-X char worker_str[5];
-X
-#ifdef MPI_SRC
-X MPI_Status mpi_status;
-#endif
-X
-#ifdef PVM_SRC
-X mytid = pvm_mytid();
-X hosttid = pvm_parent();
-#endif
-X
-X w_init(); /* sets up default sascii, hsq, sq */
-X
-X /* Allocate space for the query sequence */
-X if ((aa0[0] = (unsigned char *) malloc ((MAXTST+2+SEQ_PAD)*sizeof (char))) == NULL) {
-X w_abort ("Unable to allocate sequence array[0] - exiting!","");
-X }
-X *aa0[0]='\0';
-X aa0[0]++;
-X
-X /* initial messages set up various parameter structures:
-X
-X STARTTYPE0: &nnodes
-X pinums
-X &m_msg
-X
-X STARTTYPE1 &pst
-X
-X STARTTYPE2 pam12
-X STARTTYPE3 pam12x
-X */
-X
-#ifdef PVM_SRC
-#ifdef ROUTE_DIRECT
-X pvm_setopt(PvmRoute,PvmRouteDirect);
-#endif
-X /* get number of nodes, pinums */
-X bufid = pvm_recv(hosttid,STARTTYPE0);
-X pvm_upkint(&nnodes,1,1);
-X pvm_upkint(pinums,nnodes,1);
-X pvm_upkbyte((char *)&m_msg,(int)sizeof(m_msg),1);
-X worker = tidtonode(mytid);
-X pvm_freebuf(bufid);
-#endif
-X
-X sprintf(worker_str,"@%d",worker);
-X
-#ifdef MPI_SRC
-X MPI_Recv(&m_msg,sizeof(m_msg),MPI_BYTE,hosttid,STARTTYPE0,MPI_COMM_WORLD,
-X &mpi_status);
-#endif
-X
-X /* the aln structure needs some information from m_msg0.aln */
-X memcpy(&aln,&m_msg.aln,sizeof(struct a_struct));
-X
-X /*
-X fprintf(stderr,"d1: %d d2: %d\n",m_msg.pamd1,m_msg.pamd2);
-X */
-X
-X /* get pst params */
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,STARTTYPE1);
-X pvm_upkbyte((char *)&pst,(int)sizeof(pst),1);
-X /* 31t nsq = pst.nsq; */
-X pvm_upkbyte((char *)pascii,(int)sizeof(aascii),1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(&pst,(int)sizeof(pst),MPI_BYTE,hosttid,STARTTYPE1,MPI_COMM_WORLD,
-X &mpi_status);
-X
-X MPI_Recv(pascii,(int)sizeof(aascii)/sizeof(int),MPI_INT,hosttid,STARTTYPE1,MPI_COMM_WORLD,
-X &mpi_status);
-#endif
-X
-X if (pst.ext_sq_set) { nsq = pst.nsqx;}
-X else { nsq = pst.nsq;}
-X
-X aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
-X if (m_msg.qframe == 2) {
-X if ((aa0[1]=(unsigned char *)malloc((MAXTST+2)*sizeof (char)))==NULL)
-X w_abort ("Unable to allocate sequence[1] array - exiting!","");
-X *aa0[1]='\0';
-X aa0[1]++;
-X }
-X
-X if ((aa1s=(unsigned char *)malloc((m_msg.max_tot+1)*sizeof (char)))==NULL)
-X w_abort ("Unable to allocate shuffled library sequence", "");
-X *aa1s=0;
-X aa1s++;
-X
-X irand(0); /* necessary for shuffled sequences */
-X
-X /* this function allocates pam12, pam12x
-X assigns pst.pam[0][0]=pam12, pst.pam[1][0] = pam12x
-X and sets up the correct pst.pam[0][0][0] pointers */
-X
-X alloc_pam(m_msg.pamd1,m_msg.pamd2,&pst);
-X
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,STARTTYPE2);
-X pvm_upkint(pam12,m_msg.pamd1*m_msg.pamd2,1);
-X pvm_freebuf(bufid);
-X
-X bufid = pvm_recv(hosttid,STARTTYPE3);
-X pvm_upkint(pam12x,m_msg.pamd1*m_msg.pamd2,1);
-X pvm_freebuf(bufid);
-#endif
-X
-#ifdef DEBUG
-X if (worker==FIRSTNODE) {
-X fprintf(stderr,"ext?: %d\tnsq: %d\tnsqx: %d\n",pst.ext_sq_set,pst.nsq, pst.nsqx);
-X for (i=1; i<5; i++) {
-X for (j=1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sq[i],pst.sq[j],pst.pam2[0][i][j]);
-X fprintf(stderr,"\n");
-X }
-X for (i=pst.nsq+1; i<pst.nsq+5; i++) {
-X for (j=pst.nsq+1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[0][i][j]);
-X fprintf(stderr,"\n");
-X }
-X
-X for (i=1; i<5; i++) {
-X for (j=1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[1][i][j]);
-X fprintf(stderr,"\n");
-X }
-X for (i=pst.nsq+1; i<pst.nsq+5; i++) {
-X for (j=pst.nsq+1; j <= i; j++) fprintf(stderr," %c,%c:%2d",pst.sqx[i],pst.sqx[j],pst.pam2[1][i][j]);
-X fprintf(stderr,"\n");
-X }
-X }
-#endif
-X
-#ifdef MPI_SRC
-X MPI_Recv(pam12,m_msg.pamd1*m_msg.pamd2,MPI_INT,hosttid,STARTTYPE2,
-X MPI_COMM_WORLD,&mpi_status);
-X
-X MPI_Recv(pam12x,m_msg.pamd1*m_msg.pamd2,MPI_INT,hosttid,STARTTYPE3,
-X MPI_COMM_WORLD,&mpi_status);
-#endif
-X
-/*
-X We have the PAM matrices - get the library sequences
-*/
-X
-X /* Allocate space for the sequences */
-X max_sql = MAXSQL/2;
-X
-X if ((seqpt=(struct sqs2 *)calloc(max_sql,sizeof(struct sqs2)))==NULL)
-X w_abort("cannot allocate seqpt(sqs2)","");
-X
-X if ((n1_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
-X w_abort("cannot allocate n1_arr","");
-X
-X if ((aa1i_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
-X w_abort("cannot allocate n1_arr","");
-X
-X if ((m_seqnm_arr=(int *)calloc(m_msg.pbuf_siz+1,sizeof(int)))==NULL)
-X w_abort("cannot allocate m_seqnm_arr","");
-X
-/*****************************************************************/
-/* This section gets all the database sequences from the manager */
-/*****************************************************************/
-X
-X lcnt = 0;
-X while (1) {
-#ifdef PVM_SRC
-X /* get the number of sequences, sequence lengths */
-X bufid = pvm_recv(hosttid,STARTTYPE4);
-X pvm_upkint(&seqbuf_n,1,1); /* number of sequences */
-X pvm_upkint(&seqbuf_s,1,1); /* size of sequence buffer */
-X pvm_upkint(n1_arr,seqbuf_n,1); /* length of each sequence in buffer */
-X pvm_upkint(aa1i_arr,seqbuf_n,1); /* indexes for each sequence */
-X pvm_upkint(m_seqnm_arr,seqbuf_n,1); /* number of each library sequence */
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(&seqbuf_n,1,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(&seqbuf_s,1,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(n1_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(aa1i_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
-X &mpi_status);
-X MPI_Recv(m_seqnm_arr,seqbuf_n,MPI_INT,hosttid,STARTTYPE4,MPI_COMM_WORLD,
-X &mpi_status);
-#endif
-X
-X if (seqbuf_n <= 0) break;
-#ifdef DEBUG
-X /*
-X fprintf(stderr,"[%d] seqbuf_n: %d seqbuf_s: %d\n",
-X worker,seqbuf_n,seqbuf_s);
-X fprintf(stderr,"[%d] lcnt: %d n1: %d seqnm %d\n",
-X worker,0,n1_arr[0],m_seqnm_arr[0]);
-X fprintf(stderr,"[%d] lcnt: %d n1: %d seqnm %d\n",
-X worker,1,n1_arr[1],m_seqnm_arr[1]);
-X */
-#endif
-X
-X /* allocate space for sequences */
-X if ((seq_buf = (unsigned char *)calloc((size_t)seqbuf_s+1,sizeof(char)))
-X ==NULL) {
-X w_abort("cannot allocate tmp_seq","");
-X }
-X seq_buf++; /* leave a '\0' at the start */
-X
-X /* get the sequence buffer */
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,STARTTYPE5);
-X pvm_upkbyte((char *)seq_buf,seqbuf_s,1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(seq_buf,seqbuf_s,MPI_BYTE,hosttid,STARTTYPE5,MPI_COMM_WORLD,
-X &mpi_status);
-#endif
-X
-X /* now we have everything - update the pointers */
-X if (lcnt+seqbuf_n >= max_sql) {
-X max_sql += max(MAXSQL/2,seqbuf_n);
-X if ((seqpt=(struct sqs2 *)realloc(seqpt,max_sql*sizeof(struct sqs2)))
-X ==NULL)
-X w_abort("cannot allocate seqpt(sqs2)","");
-X }
-X
-X /* convert from offsets to pointers into buffer */
-X /* ntx = 0; */
-X for (i=0; i<seqbuf_n; i++,lcnt++) {
-X seqpt[lcnt].n1 = n1_arr[i];
-X seqpt[lcnt].m_seqnm = m_seqnm_arr[i];
-X seqpt[lcnt].aa1 = &seq_buf[aa1i_arr[i]];
-X /* ntx += n1_arr[i]+1 + SEQ_PAD */
-X
-#ifdef DEBUG
-X /* must have null's at both ends of sequence */
-X if (seqpt[lcnt].aa1[-1]!= '\0') {
-X fprintf(stderr,"Missing null at start: %d %d\n",
-X lcnt,seqpt[lcnt].aa1[-1]);
-X seqpt[lcnt].aa1[-1]='\0';
-X }
-X if (seqpt[lcnt].aa1[seqpt[lcnt].n1]!= '\0') {
-X fprintf(stderr,"Missing null at end: %d %d\n",
-X lcnt,seqpt[lcnt].aa1[seqpt[lcnt].n1]);
-X seqpt[lcnt].aa1[seqpt[lcnt].n1]='\0';
-X }
-#endif
-X }
-X }
-X /* all done - lcnt has the total number of library sequences */
-X
-#ifdef DEBUG
-X if (lcnt > 0)
-X for (i=0; i<10; i++) {
-X for (j=0; j<10; j++) libstr[j]=pst.sq[seqpt[i].aa1[j]];
-X libstr[10]='\0';
-X fprintf(stderr,"[%d] n1: %d seqnm: %d aa1: %s\n",
-X worker,seqpt[i].n1,seqpt[i].m_seqnm,libstr);
-X }
-#endif
-X
-X /* send back the number of descriptions received */
-X
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&lcnt,1,1);
-X pvm_send(hosttid,STARTTYPE0);
-#endif
-#ifdef MPI_SRC
-/* p4_dprintf(" have %d descriptions to send\n",lcnt); */
-X MPI_Send(&lcnt,1,MPI_INT,hosttid,STARTTYPE0,MPI_COMM_WORLD);
-#endif
-X
-/*****************************************************************/
-/* Library reads are finished, get ready to do searches */
-/*****************************************************************/
-X
-X /* get last set of numbers */
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,STARTTYPE0);
-X pvm_upkint(last_msg_b,2,1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(last_msg_b, 2, MPI_INT, hosttid, STARTTYPE0, MPI_COMM_WORLD,
-X &mpi_status);
-#endif
-X m_msg.nbr_seq = last_msg_b[0];
-X qres_bufsize = last_msg_b[1];
-X
-#ifdef DEBUG
-#ifdef PVM_SRC
-X fprintf(stderr,"[%d] have nbr_seq %d qres_bufsize %d\n",worker,
-X m_msg.nbr_seq, qres_bufsize);
-#endif
-#ifdef MPI_SRC
-X /* p4_dprintf("[%d] have nbr_seq %d qres_bufsize %d\n",worker,
-X m_msg.nbr_seq, qres_bufsize);
-X */;
-#endif
-#endif
-X /* If self search, receive sequence numbering data */
-X if (m_msg.self) {
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,STARTTYPE1);
-X pvm_upkint(&lend,1,1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(&lend,1,MPI_INT,hosttid,STARTTYPE1,MPI_COMM_WORLD,&mpi_status);
-#endif
-X }
-X
-X /* allocate space for a_res flag array */
-X
-X if ((walign_done[0] = (int *)calloc(lcnt,sizeof(int)))==NULL) {
-X w_abort("cannot allocate walign_done");
-X }
-X walign_cnt[0]=0;
-X
-X if ((walign_done[1] = (int *)calloc(lcnt,sizeof(int)))==NULL) {
-X w_abort("cannot allocate walign_done");
-X }
-X walign_cnt[1]=0;
-X
-X /* was commented in for only FASTX/TFASTX, but do it always to
-X simplify */
-X aainit(pst.tr_type, pst.debug_lib);
-X pst.maxlen = m_msg.maxn;
-X
-/*****************************************************************/
-/* Main search loop, which calles do_work() repeatedly */
-/*****************************************************************/
-X
-X cur_n0 = 0;
-X while (1) {
-/*
-#ifdef DEBUG
-#ifdef PVM_SRC
-X fprintf(stderr," W: %d waiting MSEQTYPE\n",worker);
-#endif
-#ifdef MPI_SRC
-X p4_dprintf(" W: %d waiting MSEQTYPE\n",worker);
-#endif
-#endif
-*/
-X
-/*****************************************************************/
-/* Wait for a query sequence from the manager */
-/*****************************************************************/
-X
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,MSEQTYPE);
-X pvm_upkbyte((char *)&qm_msg,sizeof(qm_msg),1);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(&qm_msg,sizeof(struct mngmsg),MPI_BYTE,hosttid,MSEQTYPE,
-X MPI_COMM_WORLD,&mpi_status);
-#endif
-#ifdef DEBUG
-X fprintf(stderr,"[%d] have MSEQTYPE n0: %d s_func: %d slist: %d qf: %d\n",
-X worker,qm_msg.n0,qm_msg.s_func,qm_msg.slist,qm_msg.qshuffle);
-#endif
-X
-/*****************************************************************/
-/* New query sequence indicated by qm_msg.slist=0 */
-/*****************************************************************/
-X
-X if (qm_msg.n0 > 0 && qm_msg.slist == 0) {
-X
-X if (cur_n0 > 0) {
-X
-/*****************************************************************/
-/* free everything associated with previous search */
-/*****************************************************************/
-X
-X close_work (aa0[0], cur_n0, &pst, &f_str[0]);
-X free_ares(seqpt, 0, walign_done[0], walign_cnt[0], worker);
-X walign_cnt[0] = 0;
-X if (m_msg.ann_flg) free(m_msg.aa0a);
-X
-X
-X if (m_msg.qframe == 2) {
-X close_work(aa0[1], cur_n0, &pst, &f_str[1]);
-X free_ares(seqpt, 1, walign_done[1], walign_cnt[1], worker);
-X walign_cnt[1] = 0;
-X }
-X if (old_shuffle) {
-X close_work(aa0s,cur_n0, &pst, &qf_str);
-X aa0s--;
-X free(aa0s);
-X old_shuffle = 0;
-X }
-X if (pst.pam_pssm) {
-X free_pam2p(pst.pam2p[0]);
-X free_pam2p(pst.pam2p[1]);
-X }
-X }
-X
-/*****************************************************************/
-/* Start allocating things for the next search */
-/*****************************************************************/
-X
-X pst.pam_pssm = qm_msg.pam_pssm;
-X cur_n0 = qm_msg.n0;
-X if (m_msg.ann_flg) {
-X if ((m_msg.aa0a = calloc(qm_msg.n0+1,sizeof(char)))==NULL) {
-X w_abort(" cannot allocate aa0a");
-X }
-X }
-X
-/*****************************************************************/
-/* Get the next query sequence */
-/*****************************************************************/
-X
-#ifdef PVM_SRC
-X pvm_upkbyte((char *)aa0[0],qm_msg.n0+1+SEQ_PAD,1);
-X if (m_msg.ann_flg) {
-X pvm_upkbyte((char *)m_msg.aa0a,qm_msg.n0+1,1);
-X }
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(aa0[0],qm_msg.n0+1+SEQ_PAD,MPI_BYTE,hosttid,
-X MSEQTYPE1,MPI_COMM_WORLD, &mpi_status);
-X if (m_msg.ann_flg) {
-X MPI_Recv(m_msg.aa0a,qm_msg.n0+1,MPI_BYTE,hosttid,
-X MSEQTYPE2,MPI_COMM_WORLD, &mpi_status);
-X }
-#endif
-X
-#ifdef DEBUG
-X /* must have null's at both ends of sequence */
-X if (aa0[0][-1]!= '\0') {
-X fprintf(stderr,"Missing null at start: %s %d\n",
-X qm_msg.libstr,aa0[0][-1]);
-X aa0[0][-1]='\0';
-X }
-X if (aa0[0][qm_msg.n0]!= '\0') {
-X fprintf(stderr,"Missing null at end: %s %d\n",
-X qm_msg.libstr,aa0[0][qm_msg.n0]);
-X aa0[qm_msg.n0]='\0';
-X }
-X
-X /* This discovers most reasons for core dumps */
-X if (pst.debug_lib)
-X for (j=0; j<qm_msg.n0; j++)
-X if (aa0[0][j]>pst.nsq) {
-X fprintf(stderr,
-X "seq: %s residue[%d/%d] %d range (%d)\n",
-X qm_msg.libstr,j,qm_msg.n0,aa0[0][j],pst.nsq);
-X aa0[0][j]=0;
-X qm_msg.n0=j-1;
-X break;
-X }
-#endif
-X update_params(&qm_msg,&m_msg,&pst);
-X }
-X
-/*****************************************************************/
-/* End of free()'s/ initialization for new sequence */
-/*****************************************************************/
-X
-#ifdef PVM_SRC
-X pvm_freebuf(bufid);
-#endif
-X
-X if (qm_msg.n0 == -1) {
-X
-/*****************************************************************/
-/* All done with searches */
-/*****************************************************************/
-/* printf(" %d: got n0 == -1\n",worker); */
-X break;
-X }
-X
-X /* p4_dprintf(" W:%d n0:%d slist:%d s_func:%d (%d)\n",worker,qm_msg.n0,qm_msg.slist,qm_msg.s_func,qres_bufsize); */
-X
-/*****************************************************************/
-/* if qm_msg.slist > 0, search specific sequences, to be sent */
-/*****************************************************************/
-X
-X if (qm_msg.slist > 0) { /* list search, not library search */
-X if (liblist != NULL) free(liblist);
-X
-X /* get the list of sequences */
-X if ((liblist=(struct stage2_str *)
-X calloc(qm_msg.slist,sizeof(struct stage2_str)))==NULL) {
-X sprintf(errstr,"sequence list %d",qm_msg.slist);
-X w_abort (errstr, "");
-X }
-X
-#ifdef PVM_SRC
-X bufid = pvm_recv(hosttid,LISTTYPE);
-X pvm_upkbyte((char *)liblist,qm_msg.slist*sizeof(struct stage2_str),1);
-X pvm_freebuf(bufid);
-#endif
-#ifdef MPI_SRC
-X MPI_Recv(liblist,qm_msg.slist*sizeof(struct stage2_str),MPI_BYTE,
-X hosttid,LISTTYPE,MPI_COMM_WORLD, &mpi_status);
-#endif
-X }
-X
-/*****************************************************************/
-/* have list of sequences to be compared/aligned */
-/*****************************************************************/
-X
-X /* Initial stuff */
-X if (qm_msg.slist == 0) {
-/*****************************************************************/
-/* New query - set up matrices and init_work() */
-/*****************************************************************/
-#ifdef DEBUG
-/*
-X fprintf(stderr,"n1: %d\t",qm_msg.n0);
-X for (i=0; i<10; i++) fprintf(stderr,"%c",nt[aa0[0][i]]);
-X fprintf(stderr,"\n");
-*/
-#endif
-X if (pst.pam_pssm) {
-X pst.pam2p[0] = alloc_pam2p(qm_msg.n0,nsq);
-X pst.pam2p[1] = alloc_pam2p(qm_msg.n0,nsq);
-X }
-X
-X init_work (aa0[0], qm_msg.n0, &pst, &f_str[0]);
-X f_str[5]=f_str[4]=f_str[3]=f_str[2]=f_str[1]=f_str[0];
-X
-X if (qm_msg.qshuffle) {
-X if ((aa0s=(unsigned char *)malloc((qm_msg.n0+2)*sizeof (char)))==NULL)
-X w_abort ("Unable to allocate aa0s array - exiting!","");
-X *aa0s='\0';
-X aa0s++;
-X
-X memcpy(aa0s,aa0[0],qm_msg.n0+1);
-X qshuffle(aa0s,qm_msg.n0,qm_msg.nm0);
-#ifdef DEBUG
-X fprintf(stderr,"[%d] shuffle: %d\n",worker,qm_msg.n0);
-X fputs(" ",stderr);
-X for (i=0; i<5; i++) {fprintf(stderr,"%c",pst.sq[aa0s[i]]);}
-X fputc('\n',stderr);
-#endif
-X
-X init_work (aa0s, qm_msg.n0, &pst, &qf_str);
-X old_shuffle=1;
-X }
-X
-X if (m_msg.qframe == 2) {
-X memcpy(aa0[1],aa0[0],qm_msg.n0+1);
-X revcomp(aa0[1],qm_msg.n0,&pst.c_nt[0]);
-X init_work (aa0[1], qm_msg.n0, &pst, &f_str[1]);
-X }
-#ifdef DEBUG
-/*
-X fprintf(stderr,"[%d] init_work qf: %d nf: %d\n",worker,m_msg.qframe,m_msg.nframe);
-*/
-#endif
-X }
-X
-/*****************************************************************/
-/* Finished with initialization, */
-/* start doing comparisons or alignments */
-/*****************************************************************/
-X
-X bestcnt = 0;
-X if (qm_msg.slist == 0) { /* library search */
-X
-/*****************************************************************/
-/* Start library search */
-/*****************************************************************/
-X
-X for (count=0; count < lcnt; count++) {
-X
-X for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
-X
-X rst.score[0] = rst.score[1] = rst.score[2] = 0;
-X if (m_msg.self) {
-X lsn = lend + count;
-X if ((qm_msg.seqnm > lsn) && (((qm_msg.seqnm + lsn) % 2) != 0)) {
-X do_work (aa0[itt], qm_msg.n0,seqpt[count].aa1, seqpt[count].n1,
-X itt, &pst, f_str[itt], 0, &rst);
-X }
-X else if ((qm_msg.seqnm <= lsn) && (((qm_msg.seqnm+lsn)%2) == 0)) {
-X do_work (aa0[itt], qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
-X itt, &pst, f_str[itt], 0, &rst);
-X }
-X else continue;
-X }
-X else {
-X do_work (aa0[itt], qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
-X itt, &pst, f_str[itt], 0, &rst);
-X if (qm_msg.qshuffle) {
-X do_work (aa0s, qm_msg.n0, seqpt[count].aa1, seqpt[count].n1,
-X itt, &pst, qf_str, 1, &qrst);
-X }
-X }
-#ifdef DEBUG
-/*
-X if (count < 10 || (count % 200 == 199)) {
-X fprintf(stderr,"[node %d] itt:%d/%d (%d) %3d %3d %3d - %d/%d\n",
-X worker,itt,m_msg.nitt1,count,
-X rst.score[0],rst.score[1],rst.score[2],
-X seqpt[count].m_seqnm,seqpt[count].n1);
-X }
-*/
-#endif
-X sw_score = -1;
-X
-X bestr[bestcnt].seqnm = count;
-X bestr[bestcnt].m_seqnm = seqpt[count].m_seqnm;
-X bestr[bestcnt].score[0] = rst.score[0];
-X bestr[bestcnt].score[1] = rst.score[1];
-X bestr[bestcnt].score[2] = rst.score[2];
-X bestr[bestcnt].escore = rst.escore;
-X bestr[bestcnt].segnum = rst.segnum;
-X bestr[bestcnt].seglen = rst.seglen;
-X bestr[bestcnt].frame = itt;
-X bestr[bestcnt].comp = rst.comp;
-X bestr[bestcnt].H = rst.H;
-X
-X bestr[bestcnt].qr_score = qrst.score[pst.score_ix];
-X bestr[bestcnt].qr_escore = qrst.escore;
-X
-X if (pst.zsflag >= 10) {
-X if (pst.zs_win > 0)
-X wshuffle(seqpt[count].aa1, aa1s,seqpt[count].n1,pst.zs_win,&ieven);
-X else
-X shuffle(seqpt[count].aa1, aa1s,seqpt[count].n1);
-X
-X do_work(aa0[itt],qm_msg.n0,aa1s,seqpt[count].n1,itt, &pst,
-X f_str[itt], 0, &rst);
-X bestr[bestcnt].r_score = rst.score[pst.score_ix];
-X }
-X
-X bestcnt++;
-X if (bestcnt >= qres_bufsize) {
-#ifdef DEBUG
-X fprintf(stderr," worker: %d sending %d results\n",worker,qres_bufsize);
-#endif
-X send_bestr(hosttid,curtype,bestr,qres_bufsize,bestcnt);
-X bestcnt = 0;
-X }
-X }
-X } /* END - for count loop */
-X send_bestr(hosttid, curtype, bestr,qres_bufsize, (bestcnt | FINISHED));
-X }
-X
-/*****************************************************************/
-/* End of library search section */
-/*****************************************************************/
-X
-/*****************************************************************/
-/* Do do_opt() from list s_func=DO_CALC_FLG */
-/*****************************************************************/
-X
-X else if (qm_msg.s_func== DO_CALC_FLG) { /* qm_msg.slist > 0 */
-X
-X bestcnt = 0;
-X for (count=0; count < qm_msg.slist; count++) {
-X rst.score[0] = rst.score[1] = rst.score[2] = 0;
-X itt = liblist[count].frame;
-X seqnm = bestr2[bestcnt].seqnm = liblist[count].seqnm;
-X bestr2[bestcnt].m_seqnm = seqpt[seqnm].m_seqnm;
-X
-X do_opt (aa0[itt], qm_msg.n0, seqpt[seqnm].aa1,
-X seqpt[seqnm].n1, itt,
-X &pst, f_str[itt], &rst);
-X
-X bestr2[bestcnt].score[0] = rst.score[0];
-X bestr2[bestcnt].score[1] = rst.score[1];
-X bestr2[bestcnt].score[2] = rst.score[2];
-X bestr2[bestcnt].escore = rst.escore;
-X bestr2[bestcnt].segnum = rst.segnum;
-X bestr2[bestcnt].seglen = rst.seglen;
-X bestr2[bestcnt].aln_code_n = 0;
-X bestcnt++;
-X
-X if (bestcnt >= BFR2) {
-X send_bestr2(hosttid,bestr2,bestcnt);
-X bestcnt = 0;
-X }
-X } /* END - for count loop */
-X
-X send_bestr2(hosttid,bestr2,(bestcnt|FINISHED));
-X }
-X
-/*****************************************************************/
-/* s_func=DO_OPT_FLG */
-/* */
-/* from list: */
-/* if (m_msg.stages > 1) do_opt() */
-/* do_walign() */
-/* calc_id or calc_code, no calcons */
-/*****************************************************************/
-X
-X /* s_func == 1 means do_opt if necessary */
-X else if (qm_msg.s_func== DO_OPT_FLG) { /* qm_msg.slist > 0 */
-#ifdef DEBUG
-X fprintf(stderr," [%d] starting s_func:1 slist: %d\n",
-X worker,qm_msg.slist);
-#endif
-X /* get the buffer once - re-use it for the entire slist */
-X if (m_msg.show_code == SHOW_CODE_ALIGN) {
-X seqc_buff_len = (BFR2+5)*256;
-X seqc = seqc_buff = (char *)calloc(seqc_buff_len,sizeof(char));
-X seqc_buff_cnt = 0;
-X if (seqc_buff == NULL) {
-X seqc_buff_cnt = seqc_buff_len = 0;
-X }
-X }
-X
-X bestcnt = 0;
-X for (count=0; count < qm_msg.slist; count++) {
-X rst.score[0] = rst.score[1] = rst.score[2] = 0;
-X itt = liblist[count].frame;
-X seqnm = liblist[count].seqnm;
-X
-X bestr2[bestcnt].seqnm = seqnm;
-X bestr2[bestcnt].m_seqnm = seqpt[seqnm].m_seqnm;
-X if (m_msg.stages > 1) {
-X do_opt (aa0[itt], qm_msg.n0, seqpt[seqnm].aa1,
-X seqpt[seqnm].n1, itt,
-X &pst, f_str[itt], &rst);
-X
-X bestr2[bestcnt].score[0] = rst.score[0];
-X bestr2[bestcnt].score[1] = rst.score[1];
-X bestr2[bestcnt].score[2] = rst.score[2];
-X }
-X
-X if (m_msg.markx & MX_M9SUMM) {
-#ifdef DEBUG
-X fprintf(stderr," [%d] starting do_walign seqnm: %d n1: %d\n",
-X worker,seqnm,seqpt[seqnm].n1);
-#endif
-X aln_dp = &bestr2[bestcnt].aln_d;
-X memcpy(aln_dp, &aln,sizeof(struct a_struct));
-X
-X sw_score = do_walign(aa0[itt], qm_msg.n0,
-X seqpt[seqnm].aa1, seqpt[seqnm].n1,
-X itt, &pst, f_str[itt],
-X &seqpt[seqnm].a_res[itt],
-X &have_walign);
-X seqpt[seqnm].sw_score[itt] = sw_score;
-X
-X /* the a_res[itt] provided by do_walign is re-used - so it
-X must be copied to a valid location */
-X
-X if (have_walign) {
-X if ((tres = calloc(seqpt[seqnm].a_res[itt].nres+1,sizeof(int)))==NULL) {
-X w_abort(" cannot allocate tres");
-X }
-X else {
-X memcpy(tres,seqpt[seqnm].a_res[itt].res,sizeof(int)*seqpt[seqnm].a_res[itt].nres);
-X seqpt[seqnm].a_res[itt].res = tres;
-X /*
-X fprintf(stderr, " [%d] saving %d:%d[%d]:%o\n", worker,
-X walign_cnt[itt],seqnm,itt, seqpt[seqnm].a_res[itt].res);
-X */
-X if (walign_cnt[itt] < lcnt) walign_done[itt][walign_cnt[itt]++] = seqnm;
-X else w_abort(" walign_cnt overrun");
-X seqpt[seqnm].walign_dflg[itt] = 1;
-X }
-X }
-X aln_func_vals(itt, aln_dp);
-X
-#ifdef DEBUG
-X fprintf(stderr," [%d] starting calc_id sw_score: %d\n",
-X worker,sw_score);
-X fprintf(stderr,"bi: %d seqc_buff_cnt: %d - seqc_buff_len: %d\n",
-X bestcnt, seqc_buff_cnt, seqc_buff_len);
-#endif
-X aln_code_n = 0; /* must be set in case no seqc_code */
-X if (m_msg.show_code == SHOW_CODE_ALIGN) {
-X if (seqc_buff_cnt < seqc_buff_len - 256) {
-X lc=calc_code(aa0[itt],qm_msg.n0,
-X seqpt[seqnm].aa1, seqpt[seqnm].n1,
-X aln_dp,seqpt[seqnm].a_res[itt],pst,
-X seqc,seqc_buff_len-seqc_buff_cnt-10,
-X f_str[itt]);
-X aln_code_n = strlen(seqc);
-X seqc_buff_cnt += aln_code_n + 1;
-/*
-X fprintf(stderr,"%d:%d:%d: %d/%d - [%d] %s\n",
-X worker,seqnm,bestcnt,aln_code_n,seqc_buff_cnt, seqc-seqc_buff,seqc);
-*/
-X seqc += aln_code_n;
-X *seqc++ = '\0';
-X }
-X }
-X else {
-X lc=calc_id(aa0[itt],qm_msg.n0,
-X seqpt[seqnm].aa1, seqpt[seqnm].n1,
-X aln_dp,seqpt[seqnm].a_res[itt],pst,f_str[itt]);
-X }
-X
-X nident = aln_dp->nident;
-X aln_dp->a_len = lc;
-X
-X if (lc > 0) percent = (100.0*(float)nident)/(float)lc;
-X else percent = 0.0;
-X
-X ngap = aln_dp->ngap_q + aln_dp->ngap_l;
-#ifndef SHOWSIM
-X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
-#else
-X if (lc > 0) gpercent =(100.0*(float)aln_dp->nsim)/(float)lc;
-#endif
-X else gpercent = -1.0;
-X
-X bestr2[bestcnt].sw_score = sw_score;
-X bestr2[bestcnt].percent = percent;
-X bestr2[bestcnt].gpercent = gpercent;
-X bestr2[bestcnt].aln_code_n = aln_code_n;
-X }
-X bestcnt++;
-X
-X if (bestcnt >= BFR2) {
-X send_bestr2(hosttid,bestr2,bestcnt);
-X if (m_msg.show_code == SHOW_CODE_ALIGN) {
-X send_code(hosttid,seqc_buff,seqc_buff_cnt);
-X memset(seqc_buff,0,seqc_buff_len);
-X seqc = seqc_buff;
-X seqc_buff_cnt = 0;
-X }
-X bestcnt = 0;
-X }
-X } /* END - for count loop */
-X
-X send_bestr2(hosttid,bestr2,(bestcnt|FINISHED));
-X if (m_msg.show_code == SHOW_CODE_ALIGN) {
-X send_code(hosttid,seqc_buff,seqc_buff_cnt);
-X if (seqc_buff) free(seqc_buff);
-X }
-X }
-X /* get alignments */
-X
-/*****************************************************************/
-/* s_list > */
-/* s_func=DO_ALIGN_FLG */
-/* */
-/* from list: */
-/* do_walign() if not done already */
-/* calcons() */
-/*****************************************************************/
-X
-X else if (qm_msg.s_func==DO_ALIGN_FLG) {
-X for (count=0; count < qm_msg.slist; count++) {
-X itt = liblist[count].frame;
-X seqnm = liblist[count].seqnm;
-/*
-X fprintf(stderr,"worker: %d; %s, frame: %d\n",worker,qm_msg.libstr,itt);
-*/
-X if (!seqpt[seqnm].walign_dflg[itt]) {
-X seqpt[seqnm].sw_score[itt] =
-X sw_score = do_walign (aa0[itt], qm_msg.n0,seqpt[seqnm].aa1,
-X seqpt[seqnm].n1, itt,
-X &pst, f_str[itt],
-X &seqpt[seqnm].a_res[itt],
-X &have_walign);
-X }
-X else {
-X sw_score = seqpt[seqnm].sw_score[itt];
-X pre_cons(seqpt[seqnm].aa1,seqpt[seqnm].n1,itt,f_str[itt]);
-X }
-X
-X aln_func_vals(itt, &aln);
-X
-X if (aln.showall==1)
-X maxc = seqpt[seqnm].a_res[itt].nres + max(seqpt[seqnm].a_res[itt].min0,seqpt[seqnm].a_res[itt].min1)+
-X max((qm_msg.n0-seqpt[seqnm].a_res[itt].max0),
-X (seqpt[seqnm].n1-seqpt[seqnm].a_res[itt].max1))+4;
-X else maxc = seqpt[seqnm].a_res[itt].nres + 4*aln.llen+4;
-X
-X initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
-X
-X if (!m_msg.ann_flg) {
-X nc=calcons(aa0[itt],qm_msg.n0,
-X seqpt[seqnm].aa1, seqpt[seqnm].n1,
-X &lc,&aln,seqpt[seqnm].a_res[itt],pst,
-X seqc0,seqc1,seqca,f_str[itt]);
-X memset(seqc0a,' ',nc);
-X seqc0a[nc]='\0';
-X }
-X else {
-X nc=calcons_a(aa0[itt],m_msg.aa0a,qm_msg.n0,
-X seqpt[seqnm].aa1, seqpt[seqnm].n1,
-X &lc,&aln,seqpt[seqnm].a_res[itt],pst,
-X seqc0,seqc0a,seqc1,seqca,
-X m_msg.ann_arr,f_str[itt]);
-X }
-X
-X /*
-X fprintf(stderr,"[%d] nident: %d nsim: %d lc: %d\n",aln.nident, aln.nsim, lc);
-X */
-X
-X maxc = max(strlen(seqc0),strlen(seqc1))+1;
-X nident = aln.nident;
-X percent = (100.0*(float)nident)/(float)lc;
-X ngap = aln.ngap_q+aln.ngap_l;
-#ifndef SHOWSIM
-X if (lc-ngap > 0) gpercent = (100.0*(float)nident)/(float)(lc-ngap);
-#else
-X if (lc > 0) gpercent = (100.0*(float)aln.nsim)/(float)lc;
-#endif
-X else gpercent = -1.0;
-X
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&nc,1,1);
-X pvm_pkint(&lc,1,1);
-X pvm_pkint(&maxc,1,1);
-X pvm_pkfloat(&percent,1,1);
-X pvm_pkfloat(&gpercent,1,1);
-X pvm_pkint(&sw_score,1,1);
-X pvm_pkbyte((char *)&aln,sizeof(struct a_struct),1);
-X pvm_send(hosttid,ALN1TYPE);
-#ifdef DEBUG
-X fprintf(stderr,"[%d] ALN1TYPE sent: %d\n",worker,qm_msg.n0);
-#endif
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte(seqc0,maxc,1);
-X if (m_msg.ann_flg) pvm_pkbyte(seqc0a,maxc,1);
-X pvm_pkbyte(seqc1,maxc,1);
-X pvm_pkbyte(seqca,maxc,1);
-X pvm_send(hosttid,ALN2TYPE);
-#endif
-#ifdef MPI_SRC
-X last_msg_b[0]=nc;
-X last_msg_b[1]=lc;
-X last_msg_b[2]=maxc;
-X last_msg_b[3]=sw_score;
-X MPI_Send(last_msg_b,4,MPI_INT,hosttid,ALN1TYPE,MPI_COMM_WORLD);
-X MPI_Send(&percent,1,MPI_FLOAT,hosttid,ALN2TYPE,MPI_COMM_WORLD);
-X MPI_Send(&gpercent,1,MPI_FLOAT,hosttid,ALN2TYPE,MPI_COMM_WORLD);
-X
-/* p4_dprintf("[%d] sending aln\n",worker); */
-X MPI_Send(&aln,sizeof(struct a_struct),MPI_BYTE,hosttid,
-X ALN3TYPE,MPI_COMM_WORLD);
-X
-X MPI_Send(seqc0,maxc,MPI_BYTE,hosttid,ALN2TYPE,MPI_COMM_WORLD);
-X if (m_msg.ann_flg) MPI_Send(seqc0a,maxc,MPI_BYTE,hosttid,ALN2TYPE,MPI_COMM_WORLD);
-X MPI_Send(seqc1,maxc,MPI_BYTE,hosttid,ALN3TYPE,MPI_COMM_WORLD);
-X MPI_Send(seqca,maxc,MPI_BYTE,hosttid,ALN3TYPE,MPI_COMM_WORLD);
-#endif
-X freeseq(&seqc0,&seqc0a,&seqc1,&seqca);
-X }
-X }
-X
-/* send back parameter settings */
-X if (worker==FIRSTWORK && qm_msg.slist==0) {
-X get_param(&pst, gstring2,gstring3);
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte(gstring2,sizeof(gstring2),1);
-X pvm_pkbyte(gstring3,sizeof(gstring3),1);
-X pvm_send(hosttid,PARAMTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(gstring2,sizeof(gstring2),MPI_BYTE,
-X hosttid,PARAMTYPE,MPI_COMM_WORLD);
-X MPI_Send(gstring3,sizeof(gstring3),MPI_BYTE,
-X hosttid,PARAMTYPE,MPI_COMM_WORLD);
-#endif
-X }
-X
-X if (qm_msg.slist==0) {
-X if (curtype == ONETYPE) curtype = TWOTYPE;
-X else curtype = ONETYPE;
-X }
-X } /* END - while (1) loop */
-#ifdef PVM_SRC
-X pvm_exit();
-#endif
-#ifdef MPI_SRC
-/* MPI_Finalize(); */
-#endif
-}
-X
-void
-send_bestr(int hosttid, int curtype,
-X struct comstr *bestr, int buf_size, int lastcnt) {
-X
-X bestr[buf_size].seqnm = lastcnt;
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)&bestr[0],sizeof(struct comstr)*(buf_size+1),1);
-X pvm_send(hosttid,curtype);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(bestr,sizeof(struct comstr)*(buf_size+1),MPI_BYTE,
-X hosttid,curtype,MPI_COMM_WORLD);
-#endif
-}
-X
-void
-send_bestr2(int hosttid, struct comstr2 *bestr2,
-X int lastcnt)
-{
-X bestr2[BFR2].seqnm = lastcnt;
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkbyte((char *)&bestr2[0],sizeof(struct comstr2)*(BFR2+1),1);
-X pvm_send(hosttid,LISTRTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&bestr2[0],sizeof(struct comstr2)*(BFR2+1),MPI_BYTE,
-X hosttid,LISTRTYPE,MPI_COMM_WORLD);
-#endif
-}
-X
-void
-send_code(int hosttid, char *seqc_buff, int seqc_buff_len) {
-X
-#ifdef PVM_SRC
-X pvm_initsend(PvmDataRaw);
-X pvm_pkint(&seqc_buff_len,1,1);
-X if (seqc_buff_len > 0) pvm_pkbyte(seqc_buff,seqc_buff_len,1);
-X pvm_send(hosttid,CODERTYPE);
-#endif
-#ifdef MPI_SRC
-X MPI_Send(&seqc_buff_len,1,MPI_INT,
-X hosttid,CODERTYPE,MPI_COMM_WORLD);
-X if (seqc_buff_len>0) MPI_Send(seqc_buff,seqc_buff_len,MPI_BYTE,
-X hosttid,CODERTYPE,MPI_COMM_WORLD);
-#endif
-}
-X
-#ifdef PVM_SRC
-int tidtonode(tid)
-X int tid;
-{
-X int i;
-X for (i=FIRSTNODE; i< nnodes; i++) if (tid==pinums[i]) return i;
-X fprintf(stderr," cannot find tid %d\n",tid);
-X return -1;
-}
-#endif
-X
-void
-free_ares(struct sqs2 *seqpt, int itt, int *walign_done, int walign_cnt, int worker) {
-X
-X int i, seqnm;
-X
-X for (i=0; i< walign_cnt; i++) {
-X seqnm = walign_done[i];
-X walign_done[i]=0;
-X if (seqpt[seqnm].walign_dflg[itt]) {
-X if (seqpt[seqnm].a_res[itt].nres > 0 ) {
-X /*
-X fprintf(stderr, "[%d] freeing %d:%d[%d]:%o\n",
-X worker,i,seqnm,itt,seqpt[seqnm].a_res[itt].res);
-X */
-X seqpt[seqnm].a_res[itt].nres = 0;
-X free(seqpt[seqnm].a_res[itt].res);
-X }
-X }
-X else {
-X w_abort(" have walign_done but no walign_dflag");
-X }
-X seqpt[seqnm].walign_dflg[itt] = 0;
-X }
-}
-SHAR_EOF
-chmod 0644 p2_workcomp.c ||
-echo 'restore of p2_workcomp.c failed'
-Wc_c="`wc -c < 'p2_workcomp.c'`"
-test 37611 -eq "$Wc_c" ||
- echo 'p2_workcomp.c: original size 37611, current size' "$Wc_c"
-fi
-# ============= p_mw.h ==============
-if test -f 'p_mw.h' -a X"$1" != X"-c"; then
- echo 'x - skipping p_mw.h (File already exists)'
-else
-echo 'x - extracting p_mw.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'p_mw.h' &&
-/* Concurrent read version */
-X
-/* $Name: fa_34_26_5 $ - $Id: p_mw.h,v 1.17 2006/04/12 18:00:02 wrp Exp $ */
-X
-#ifndef FSEEK_T_DEF
-#ifndef USE_FSEEKO
-typedef long fseek_t;
-#else
-typedef off_t fseek_t;
-#endif
-#endif
-X
-struct beststr {
-X int n1; /* sequence number */
-X int score[3]; /* score */
-X int rscore; /* score from shuffled sequence */
-X int sw_score; /* optimal score from alignment */
-X double comp; /* karlin 1/lambda comp.parameter */
-X double H; /* karlin H information content */
-X double zscore;
-X double escore;
-X double r_escore;
-X int segnum;
-X int seglen;
-X int lib;
-X fseek_t lseek;
-X int cont;
-X int frame;
-X int m_seqnm;
-X int seqnm;
-X int wrkr;
-X struct sql *desptr;
-X struct a_struct *aln_d;
-X char *aln_code;
-X int aln_code_n;
-X float percent, gpercent;
-};
-X
-struct stat_str {
-X int score;
-X int n1;
-X double comp;
-X double H;
-X double escore;
-X int segnum;
-X int seglen;
-};
-X
-/* this structure passes library sequences to the worker threads
-X and returns scores */
-X
-#include "w_mw.h"
-X
-/*
-struct pbuf_head {
-X int buf_cnt;
-X unsigned char *start;
-X struct sqs2 *buf;
-};
-*/
-SHAR_EOF
-chmod 0644 p_mw.h ||
-echo 'restore of p_mw.h failed'
-Wc_c="`wc -c < 'p_mw.h'`"
-test 1096 -eq "$Wc_c" ||
- echo 'p_mw.h: original size 1096, current size' "$Wc_c"
-fi
-# ============= pam120.mat ==============
-if test -f 'pam120.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping pam120.mat (File already exists)'
-else
-echo 'x - extracting pam120.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pam120.mat' &&
-#
-# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
-#
-# PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
-#
-# Expected score = -1.64, Entropy = 0.979 bits
-#
-# Lowest score = -8, Highest score = 12
-#
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 3 -3 -1 0 -3 -1 0 1 -3 -1 -3 -2 -2 -4 1 1 1 -7 -4 0 0 -1 -1
-R -3 6 -1 -3 -4 1 -3 -4 1 -2 -4 2 -1 -5 -1 -1 -2 1 -5 -3 -2 -1 -2
-N -1 -1 4 2 -5 0 1 0 2 -2 -4 1 -3 -4 -2 1 0 -4 -2 -3 3 0 -1
-D 0 -3 2 5 -7 1 3 0 0 -3 -5 -1 -4 -7 -3 0 -1 -8 -5 -3 4 3 -2
-C -3 -4 -5 -7 9 -7 -7 -4 -4 -3 -7 -7 -6 -6 -4 0 -3 -8 -1 -3 -6 -7 -4
-Q -1 1 0 1 -7 6 2 -3 3 -3 -2 0 -1 -6 0 -2 -2 -6 -5 -3 0 4 -1
-E 0 -3 1 3 -7 2 5 -1 -1 -3 -4 -1 -3 -7 -2 -1 -2 -8 -5 -3 3 4 -1
-G 1 -4 0 0 -4 -3 -1 5 -4 -4 -5 -3 -4 -5 -2 1 -1 -8 -6 -2 0 -2 -2
-H -3 1 2 0 -4 3 -1 -4 7 -4 -3 -2 -4 -3 -1 -2 -3 -3 -1 -3 1 1 -2
-I -1 -2 -2 -3 -3 -3 -3 -4 -4 6 1 -3 1 0 -3 -2 0 -6 -2 3 -3 -3 -1
-L -3 -4 -4 -5 -7 -2 -4 -5 -3 1 5 -4 3 0 -3 -4 -3 -3 -2 1 -4 -3 -2
-K -2 2 1 -1 -7 0 -1 -3 -2 -3 -4 5 0 -7 -2 -1 -1 -5 -5 -4 0 -1 -2
-M -2 -1 -3 -4 -6 -1 -3 -4 -4 1 3 0 8 -1 -3 -2 -1 -6 -4 1 -4 -2 -2
-F -4 -5 -4 -7 -6 -6 -7 -5 -3 0 0 -7 -1 8 -5 -3 -4 -1 4 -3 -5 -6 -3
-P 1 -1 -2 -3 -4 0 -2 -2 -1 -3 -3 -2 -3 -5 6 1 -1 -7 -6 -2 -2 -1 -2
-S 1 -1 1 0 0 -2 -1 1 -2 -2 -4 -1 -2 -3 1 3 2 -2 -3 -2 0 -1 -1
-T 1 -2 0 -1 -3 -2 -2 -1 -3 0 -3 -1 -1 -4 -1 2 4 -6 -3 0 0 -2 -1
-W -7 1 -4 -8 -8 -6 -8 -8 -3 -6 -3 -5 -6 -1 -7 -2 -6 12 -2 -8 -6 -7 -5
-Y -4 -5 -2 -5 -1 -5 -5 -6 -1 -2 -2 -5 -4 4 -6 -3 -3 -2 8 -3 -3 -5 -3
-V 0 -3 -3 -3 -3 -3 -3 -2 -3 3 1 -4 1 -3 -2 -2 0 -8 -3 5 -3 -3 -1
-B 0 -2 3 4 -6 0 3 0 1 -3 -4 0 -4 -5 -2 0 0 -6 -3 -3 4 2 -1
-Z -1 -1 0 3 -7 4 4 -2 1 -3 -3 -1 -2 -6 -1 -1 -2 -7 -5 -3 2 4 -1
-XX -1 -2 -1 -2 -4 -1 -1 -2 -2 -1 -2 -2 -2 -3 -2 -1 -1 -5 -3 -1 -1 -1 -2
-X
-SHAR_EOF
-chmod 0644 pam120.mat ||
-echo 'restore of pam120.mat failed'
-Wc_c="`wc -c < 'pam120.mat'`"
-test 1922 -eq "$Wc_c" ||
- echo 'pam120.mat: original size 1922, current size' "$Wc_c"
-fi
-# ============= pam250.mat ==============
-if test -f 'pam250.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping pam250.mat (File already exists)'
-else
-echo 'x - extracting pam250.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pam250.mat' &&
-#
-# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
-#
-# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
-#
-# Expected score = -0.844, Entropy = 0.354 bits
-#
-# Lowest score = -8, Highest score = 17
-#
-X A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0
-R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1
-N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0
-D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1
-C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3
-Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1
-E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1
-G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1
-H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1
-I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1
-L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1
-K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1
-M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1
-F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2
-P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1
-S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0
-T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0
-W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4
-Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2
-V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1
-B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1
-Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1
-XX 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1
-X
-SHAR_EOF
-chmod 0644 pam250.mat ||
-echo 'restore of pam250.mat failed'
-Wc_c="`wc -c < 'pam250.mat'`"
-test 1923 -eq "$Wc_c" ||
- echo 'pam250.mat: original size 1923, current size' "$Wc_c"
-fi
-# ============= param.h ==============
-if test -f 'param.h' -a X"$1" != X"-c"; then
- echo 'x - skipping param.h (File already exists)'
-else
-echo 'x - extracting param.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'param.h' &&
-/* $Name: fa_34_26_5 $ - $Id: param.h,v 1.41 2007/04/26 18:37:19 wrp Exp $ */
-X
-X
-#ifndef P_STRUCT
-#define P_STRUCT
-X
-#define MAXSQ 50
-X
-X
-/* Concurrent read version */
-X
-struct fastr {
-X int ktup;
-X int cgap;
-X int pgap;
-X int pamfact;
-X int scfact;
-X int bestoff;
-X int bestscale;
-X int bkfact;
-X int bktup;
-X int bestmax;
-X int altflag;
-X int optflag;
-X int iniflag;
-X int optcut;
-X int optcut_set;
-X int optwid;
-};
-X
-struct prostr {
-X int gopen;
-X int gextend;
-X int width;
-};
-X
-struct pstruct /* parameters */
-{
-X int n0; /* length of query sequence, used for statistics */
-X int gdelval; /* value gap open (-10) */
-X int ggapval; /* value for additional residues in gap (-2) */
-X int gshift; /* frameshift for fastx, fasty */
-X int gsubs; /* nt substitution in fasty */
-X int p_d_mat; /* dna match penalty */
-X int p_d_mis; /* dna mismatch penalty */
-X int p_d_set; /* using match/mismatch */
-X int score_ix; /* index to sorted score */
-X int zsflag; /* use scalebest() */
-X int zsflag_f; /* use scalebest() */
-X int zs_win;
-X int histint; /* histogram interval */
-X char sq[MAXSQ+1];
-X int hsq[MAXSQ+1];
-X int nsq; /* length of normal sq */
-X int ext_sq_set; /* flag for using extended alphabet */
-X char sqx[MAXSQ];
-X int hsqx[MAXSQ+1];
-X int c_nt[MAXSQ+1];
-X int nsqx; /* length of extended sq */
-X int dnaseq; /* -1 = not set (protein); 0 = protein; 1 = DNA; 2 = other, 3 RNA */
-X int nt_align; /* DNA/RNA alignment = 1 */
-X int debug_lib;
-X int tr_type; /* codon table */
-X int sw_flag;
-X char pamfile[120]; /* pam file type */
-X char pgpfile[120];
-X int pgpfile_type;
-X float pamscale;
-X int pam_pssm;
-X int pam_set;
-X int have_pam2;
-X int **pam2[2];
-X int **pam2p[2];
-X int pamoff; /* offset for pam values */
-X int pam_l, pam_h, pam_xx, pam_xm; /* lowest, highest pam value */
-X int pam_x_set;
-X int pam_ms; /* use a Mass Spec pam matrix */
-X int maxlen;
-X long zdb_size; /* force database size */
-X int pgm_id;
-X union {
-X struct fastr fa;
-X struct prostr pr;
-X } param_u;
-X int pseudocts;
-X int shuff_node;
-};
-X
-/* Result structure - do not remove */
-struct rstruct
-{
-X int score[3];
-X double comp;
-X double H;
-X double escore;
-X int segnum;
-X int seglen;
-};
-X
-#ifndef PCOMPLIB
-struct thr_str {
-X int worker;
-X void *status;
-X int max_work_buf;
-X int qframe;
-X struct pstruct *ppst;
-X int qshuffle;
-X unsigned char *aa0;
-X int n0;
-X int nm0;
-X int max_tot;
-};
-X
-#include <sys/types.h>
-X
-/* this structure passes library sequences to the worker threads
-X and returns scores */
-X
-struct buf_str {
-X int n1;
-X int *n1tot_p;
-X unsigned char *aa1b;
-#ifndef USE_FSEEKO
-X long lseek;
-#else
-X off_t lseek;
-#endif
-X struct lmf_str *m_file_p;
-X int cont;
-X int qframe;
-X int frame;
-X int nsfnum;
-X int sfnum[10];
-X char libstr[20]; /* set to MAX_UID */
-X struct rstruct rst;
-X int r_score, qr_score;
-X double r_escore, qr_escore;
-};
-X
-struct buf_head {
-X int buf_cnt;
-X int have_results;
-X unsigned char *start;
-X struct buf_str *buf;
-};
-X
-#endif
-X
-#endif /* PSTRUCT */
-X
-#include "aln_structs.h"
-SHAR_EOF
-chmod 0644 param.h ||
-echo 'restore of param.h failed'
-Wc_c="`wc -c < 'param.h'`"
-test 3002 -eq "$Wc_c" ||
- echo 'param.h: original size 3002, current size' "$Wc_c"
-fi
-# ============= pgsql_lib.c ==============
-if test -f 'pgsql_lib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping pgsql_lib.c (File already exists)'
-else
-echo 'x - extracting pgsql_lib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pgsql_lib.c' &&
-X
-/* pgsql_lib.c copyright (c) 2004 William R. Pearson */
-X
-/* $Name: fa_34_26_5 $ - $Id: pgsql_lib.c,v 1.3 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* functions for opening, reading, seeking a pgsql database */
-X
-/*
-X For the moment, this interface assumes that the file to be searched will
-X be specified in a single, long, string with 4 parts:
-X
-X (1) a database open string. This string has four fields, separated by
-X whitespace (' \t'):
-X hostname:port dbname user password
-X
-X '--' dashes at the beginning of lines are ignored -
-X thus the first line could be:
-X -- hostname:port dbname user password
-X
-X (2) a database query string that will return an unique ID (not
-X necessarily numberic, but it must be < 12 characters as libstr[12]
-X is used) and a sequence string
-X
-X (2a) a series of pgsql commands that do not generate results
-X starting with 'DO', followed by a select() statement.
-X
-X (3) a database select string that will return a description
-X given a unique ID
-X
-X (4) a database select string that well return a sequence given a
-X unique ID
-X
-X Lines (3) and (4) are not required for pv34comp* libraries, but
-X line (2) must generate a complete description as well as a sequence.
-X
-X
-X 18-July-2001
-X Additional syntax has been added to support multiline SQL queries.
-X
-X If the host line begins with '+', then the SQL is openned on the same
-X connection as the previous SQL file.
-X
-X If the host line contains '-' just before the terminal ';', then
-X the file will not produce any output.
-X
-X This string can contain "\n". ";" are used to separate the four
-X functions, which must be specified in the order shown above.
-X The last (fourth) query must terminate with a ';'
-X
-X 19-July-2004
-X
-X This file is designed for PostgreSQL, which uses a different syntax
-X for getting rows of data. Specifically, a select statement must be
-X associated with a "cursor", so that one can fetch a single row.
-X
-X This can be simply done with the statment:
-X
-X DECLARE next_seq CURSOR FOR "select statement ..."
-X
-X The need for a CURSOR complicates the getlib()/ranlib() design, which
-X assumes that ranlib() can set something up that getlib() can read.
-X This can be avoided by setting up an otherwise unnecessary cursor for
-X the ranlib statement that gets a sequence.
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-X
-#include <libpq-fe.h>
-#define PGSQL_LIB 17
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#define EOSEQ 0
-/* #include "upam.h" */
-X
-#ifdef SUPERFAMNUM
-int sfnum[10], nsfnum;
-#endif
-X
-int pgsql_getlib(unsigned char *, int, char *, int, fseek_t *, int *, struct lmf_str *, long *);
-void pgsql_ranlib(char *, int, fseek_t, char *, struct lmf_str *m_fd);
-X
-#define PGSQL_BUF 4096
-X
-struct lmf_str *
-pgsql_openlib(char *sname, int ldnaseq, int *sascii) {
-X FILE *sql_file;
-X PGconn *conn;
-X PGresult *res;
-X char *tmp_str, *ttmp_str;
-X int tmp_str_len;
-X char *bp, *bps, *bdp, *tp, tchar;
-X int i, qs_len, qqs_len;
-X char *sql_db, *sql_host, *sql_dbname, *sql_user, *sql_pass;
-X char *sql_port;
-X char *sql_do;
-X int sql_do_cnt;
-X struct lmf_str *m_fptr;
-X
-X /* if (sql_reopen) return NULL; - should not be called for re-open */
-X
-X tmp_str_len = PGSQL_BUF;
-X if ((tmp_str=(char *)calloc(tmp_str_len,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate %d for pgSQL buffer\n",tmp_str_len);
-X return NULL;
-X }
-X
-X if (sname[0] == '%') {
-X strncpy(tmp_str,sname+1,tmp_str_len);
-X tmp_str[sizeof(tmp_str)-1]='\0';
-X }
-X else {
-X if ((sql_file=fopen(sname,"r"))==NULL) {
-X fprintf(stderr," cannot open pgSQL file: %s\n",sname);
-X return NULL;
-X }
-X
-X if ((qs_len=fread(tmp_str,sizeof(char),tmp_str_len-1,sql_file))<=0) {
-X fprintf(stderr," cannot read pgSQL file: %s\n",sname);
-X return NULL;
-X }
-X else {
-X tmp_str[qs_len]='\0';
-X qqs_len = qs_len;
-X while (qqs_len >= tmp_str_len-1) {
-X tmp_str_len += PGSQL_BUF;
-X if ((tmp_str=(char *)realloc(tmp_str,tmp_str_len))==NULL) {
-X fprintf(stderr,
-X " cannot reallocate %d for pgSQL buffer\n",tmp_str_len);
-X return NULL;
-X }
-X ttmp_str = &tmp_str[qqs_len];
-X if ((qs_len=fread(ttmp_str,sizeof(char),PGSQL_BUF,sql_file))<0) {
-X fprintf(stderr," cannot read pgSQL file: %s\n",sname);
-X return NULL;
-X }
-X ttmp_str[qs_len]='\0';
-X qqs_len += qs_len;
-X }
-X }
-X fclose(sql_file);
-X }
-X
-X bps = tmp_str;
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((sql_db=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X return NULL;
-X }
-X /* have database name, parse the fields */
-X else {
-X strcpy(sql_db,bps); /* strcpy OK because allocated strlen(bps) */
-X bps = bp+1; /* points to next char after ';' */
-X while (isspace(*bps)) bps++;
-X *bp=';'; /* replace ; */
-X bp = sql_db;
-X while (*bp=='-') {*bp++ = ' ';}
-X sql_host = strtok(bp," \t\n");
-X if (sql_host[0]=='@') sql_host="";
-X sql_dbname = strtok(NULL," \t\n");
-X sql_user = strtok(NULL," \t\n");
-X if (sql_user[0]=='@') sql_user="";
-X sql_pass = strtok(NULL," \t\n");
-X if (sql_pass[0]=='@') sql_pass="";
-X if ((tp=strchr(sql_host,':'))!=NULL) {
-X sql_port = tp+1;
-X *tp='\0';
-X }
-X else sql_port = "";
-X }
-X }
-X else {
-X fprintf(stderr," cannot find database fields:\n%s\n",tmp_str);
-X return NULL;
-X }
-X
-X /* we have all the info we need to open a database, allocate lmf_str */
-X if ((m_fptr = (struct lmf_str *)calloc(1,sizeof(struct lmf_str)))==NULL) {
-X fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
-X sizeof(struct lmf_str),sname);
-X return NULL;
-X }
-X
-X /* have our struct, initialize it */
-X
-X strncpy(m_fptr->lb_name,sname,MAX_FN);
-X m_fptr->lb_name[MAX_FN-1]='\0';
-X
-X m_fptr->sascii = sascii;
-X
-X m_fptr->sql_db = sql_db;
-X m_fptr->getlib = pgsql_getlib;
-X m_fptr->ranlib = pgsql_ranlib;
-X m_fptr->mm_flg = 0;
-X m_fptr->sql_reopen = 0;
-X m_fptr->lb_type = PGSQL_LIB;
-X
-X /* now open the database, if necessary */
-X conn = PQsetdbLogin(sql_host,
-X sql_port,
-X NULL,
-X NULL,
-X sql_dbname,
-X sql_user,
-X sql_pass);
-X
-X if (PQstatus(conn) != CONNECTION_OK) {
-X fprintf(stderr, "Connection to database '%s' failed.\n", PQdb(conn));
-X fprintf(stderr, "%s", PQerrorMessage(conn));
-X PQfinish(conn);
-X goto error_r;
-X }
-X else {
-X m_fptr->pgsql_conn = conn;
-X fprintf(stderr," Database %s opened on %s\n",sql_dbname,sql_host);
-X }
-X
-X /* check for 'DO' command - copy to 'DO' string */
-X while (*bps == '-') { *bps++=' ';}
-X if (isspace(bps[-1]) && toupper(bps[0])=='D' &&
-X toupper(bps[1])=='O' && isspace(bps[2])) {
-X /* have some 'DO' commands */
-X /* check where the end of the last DO statement is */
-X
-X sql_do_cnt = 1; /* count up the number of 'DO' statements for later */
-X bdp=bps+3;
-X while ((bp=strchr(bdp,';'))!=NULL) {
-X tp = bp+2; /* skip ;\n */
-X while (isspace(*tp) || *tp == '-') {*tp++ = ' ';}
-X if (toupper(*tp)=='D' && toupper(tp[1])=='O' && isspace(tp[2])) {
-X sql_do_cnt++; /* count the DO statements */
-X bdp = tp+3; /* move to the next DO statement */
-X }
-X else break;
-X }
-X if (bp != NULL) { /* end of the last DO, begin of select */
-X tchar = *(bp+1);
-X *(bp+1)='\0'; /* terminate DO strings */
-X if ((sql_do = calloc(strlen(bps)+1, sizeof(char)))==NULL) {
-X fprintf(stderr," cannot allocate %d for sql_do\n",strlen(bps));
-X goto error_r;
-X }
-X else {
-X strcpy(sql_do,bps);
-X *(bp+1)=tchar; /* replace missing ';' */
-X }
-X bps = bp+1;
-X while (isspace(*bps)) bps++;
-X }
-X else {
-X fprintf(stderr," terminal ';' not found: %s\n",bps);
-X goto error_r;
-X }
-X /* all the DO commands are in m_fptr->sql_do in the form:
-X DO command1; DO command2; DO command3; */
-X bdp = sql_do;
-X while (sql_do_cnt-- && (bp=strchr(bdp,';'))!=NULL) {
-X /* do the pgsql statement on bdp+3 */
-X /* check for error */
-X *bp='\0';
-X res = PQexec(m_fptr->pgsql_conn,bdp+3);
-X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
-X fprintf(stderr,"*** Error %s - query failed:\n%s\n",
-X PQerrorMessage(m_fptr->pgsql_conn), bdp+3);
-X PQclear(res);
-X goto error_r;
-X }
-X PQclear(res);
-X
-X *bp=';';
-X bdp = bp+1;
-X while (isspace(*bdp)) bdp++;
-X }
-X }
-X
-X /* copy 1st query field */
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((m_fptr->sql_query=calloc(strlen(bps)+41,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for query string [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X /* have query, copy it */
-X else {
-X strncpy(m_fptr->sql_query,"DECLARE next_seq CURSOR FOR ",40);
-X strcat(m_fptr->sql_query,bps);
-X *bp=';'; /* replace ; */
-X bps = bp+1;
-X while(isspace(*bps)) bps++;
-X }
-X }
-X else {
-X fprintf(stderr," cannot find database query field:\n%s\n",tmp_str);
-X goto error_r;
-X }
-X
-X /* copy get_desc field */
-X if ((bp=strchr(bps,';'))!=NULL) {
-X *bp='\0';
-X if ((m_fptr->sql_getdesc=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X /* have get_desc, copy it */
-X else {
-X strcpy(m_fptr->sql_getdesc,bps);
-X *bp=';'; /* replace ; */
-X bps = bp+1;
-X while(isspace(*bps)) bps++;
-X }
-X }
-X else {
-X fprintf(stderr," cannot find getdesc field:\n%s\n",tmp_str);
-X goto error_r;
-X }
-X
-X if ((bp=strchr(bps,';'))!=NULL) { *bp='\0';}
-X
-X if ((m_fptr->sql_getseq=calloc(strlen(bps)+1,sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot allocate space for database name [%d], %s\n",
-X strlen(bps),bps);
-X goto error_r;
-X }
-X
-X if (strlen(bps) > 0) {
-X strcpy(m_fptr->sql_getseq,bps);
-X }
-X else {
-X fprintf(stderr," cannot find getseq field:\n%s\n",tmp_str);
-X return NULL;
-X }
-X if (bp!=NULL) *bp=';';
-X
-X /* now do the fetch */
-X
-X res = PQexec(m_fptr->pgsql_conn,"BEGIN;");
-X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
-X fprintf(stderr,"*** Error %s - BEGIN failed:\n",
-X PQerrorMessage(conn));
-X PQclear(res);
-X goto error_r;
-X }
-X PQclear(res);
-X
-X res = PQexec(m_fptr->pgsql_conn, m_fptr->sql_query);
-X if (PQresultStatus(res) != PGRES_COMMAND_OK) {
-X fprintf(stderr,"*** Error %d:%s - query failed:\n%s\n",
-X PQresultStatus(res),PQerrorMessage(conn), m_fptr->sql_query);
-X PQclear(res);
-X goto error_r;
-X }
-X PQclear(res);
-X m_fptr->pgsql_res=NULL;
-X
-X return m_fptr;
-X
-X error_r:
-X free(m_fptr->sql_getseq);
-X free(m_fptr->sql_getdesc);
-X free(m_fptr->sql_query);
-X free(m_fptr);
-X free(sql_db);
-X return NULL;
-}
-X
-struct lmf_str *
-pgsql_reopen(struct lmf_str *m_fptr) {
-X m_fptr->sql_reopen = 1;
-X return m_fptr;
-}
-X
-void
-pgsql_closelib(struct lmf_str *m_fptr) {
-X
-X if (m_fptr == NULL) return;
-X if (m_fptr->pgsql_res != NULL) PQclear(m_fptr->pgsql_res);
-X PQfinish(m_fptr->pgsql_conn);
-X m_fptr->sql_reopen=0;
-}
-X
-/*
-static char *sql_seq = NULL, *sql_seqp;
-static int sql_seq_len;
-*/
-X
-int
-pgsql_getlib( unsigned char *seq,
-X int maxs,
-X char *libstr,
-X int n_libstr,
-X fseek_t *libpos,
-X int *lcont,
-X struct lmf_str *lm_fd,
-X long *l_off)
-{
-X register unsigned char *cp, *seqp;
-X register int *ap;
-X unsigned char *seqm, *seqm1;
-X PGresult *res;
-X
-X char *bp;
-X /* int l_start, l_stop, len; */
-X
-X seqp = seq;
-X seqm = &seq[maxs-9];
-X seqm1 = seqm-1;
-X
-X ap = lm_fd->sascii;
-X
-#ifdef SUPERFAMNUM
-X sfnum[0]=nsfnum = 0;
-#endif
-X
-X if (*lcont==0) {
-X /* get a row, with UID, sequence */
-X *l_off = 1;
-X
-X /* check to see if we already have a valid result */
-X if (lm_fd->pgsql_res==NULL) {
-X res = PQexec(lm_fd->pgsql_conn,"FETCH next_seq");
-X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
-X fprintf(stderr,"*** Error %s - getlib FETCH failed:\n%s\n",
-X PQerrorMessage(lm_fd->pgsql_conn), lm_fd->sql_query);
-X PQclear(res);
-X lm_fd->pgsql_res = NULL;
-X *lcont = 0;
-X *seqp = EOSEQ;
-X return -1;
-X }
-X }
-X else {res = lm_fd->pgsql_res;}
-X
-X if (PQntuples(res)>0) {
-X lm_fd->pgsql_res = res;
-X *libpos=(fseek_t)atol(PQgetvalue(res,0,0));
-X
-X *l_off = 1;
-X if (PQnfields(res) > 2 && (bp=strchr(PQgetvalue(res,0,2),'@'))!=NULL &&
-X !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
-X
-X lm_fd->sql_seqp = PQgetvalue(res,0,1);
-X
-X /* because of changes in pgsql_ranlib(), it is essential that
-X libstr return the unique identifier; thus we must use
-X sql_row[0], not sql_row[2]. Using libstr as the UID allows
-X one to use any UID, not just numeric ones. *libpos is not
-X used for pgsql libraries.
-X */
-X
-X if (n_libstr <= MAX_UID) {
-X /* the normal case returns only GID/sequence */
-X strncpy(libstr,PQgetvalue(res,0,0),MAX_UID-1);
-X libstr[MAX_UID-1]='\0';
-X }
-X else {
-X /* here we do not use the UID in libstr, because we are not
-X going back into the db */
-X /* the PVM case also returns a long description */
-X if (PQnfields(res)>2) {
-X strncpy(libstr,PQgetvalue(res,0,2),n_libstr-1);
-X }
-X else {
-X strncpy(libstr,PQgetvalue(res,0,0),n_libstr-1);
-X }
-X libstr[n_libstr-1]='\0';
-X }
-X }
-X else {
-X PQclear(lm_fd->pgsql_res);
-X lm_fd->pgsql_res=NULL;
-X *lcont = 0;
-X *seqp = EOSEQ;
-X return -1;
-X }
-X }
-X
-X for (cp=(unsigned char *)lm_fd->sql_seqp; seqp<seqm1 && *cp; ) {
-X if ((*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA &&
-X (*seqp++=ap[*cp++])<NA) continue;
-X --seqp;
-X if (*(cp-1)==0) break;
-X }
-X lm_fd->sql_seqp = (char *)cp;
-X
-X if (seqp>=seqm1) (*lcont)++;
-X else {
-X *lcont=0;
-X PQclear(lm_fd->pgsql_res);
-X lm_fd->pgsql_res = NULL;
-X }
-X
-X *seqp = EOSEQ;
-X /* if ((int)(seqp-seq)==0) return 1; */
-X return (int)(seqp-seq);
-}
-X
-void
-pgsql_ranlib(char *str,
-X int cnt,
-X fseek_t libpos,
-X char *libstr,
-X struct lmf_str *lm_fd
-X )
-{
-X char tmp_query[1024], tmp_val[20];
-X PGresult *res;
-X char *bp;
-X
-X str[0]='\0';
-X
-X /* put the UID into the query string - cannot use sprintf because of
-X "%' etc */
-X
-X /* sprintf(tmp_query,lm_fd->sql_getdesc,libpos); */
-X
-X if ((bp=strchr(lm_fd->sql_getdesc,'#'))==NULL) {
-X fprintf(stderr, "no KEY position in %s\n",lm_fd->sql_getdesc);
-X goto next1;
-X }
-X else {
-X *bp = '\0';
-X strncpy(tmp_query,lm_fd->sql_getdesc,sizeof(tmp_query));
-X tmp_query[sizeof(tmp_query)-1]='\0';
-X /* sprintf(tmp_val,"%ld",(long)libpos); */
-X strncat(tmp_query,libstr,sizeof(tmp_query)-1);
-X strncat(tmp_query,bp+1,sizeof(tmp_query)-1);
-X *bp='#';
-X lm_fd->lpos = libpos;
-X }
-X
-X /* fprintf(stderr," requesting: %s\n",tmp_query); */
-X
-X if (lm_fd->pgsql_res !=NULL) {
-X PQclear(lm_fd->pgsql_res);
-X lm_fd->pgsql_res = NULL;
-X }
-X
-X res = PQexec(lm_fd->pgsql_conn,tmp_query);
-X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
-X lm_fd->pgsql_res = NULL;
-X
-X sprintf(str,"gi|%ld ***Error - query failed***",(long)libpos);
-X fprintf(stderr,"*** Error %s - ranlib DESC failed:\n%s\n",
-X PQerrorMessage(lm_fd->pgsql_conn), tmp_query);
-X PQclear(res);
-X goto next1;
-X }
-X
-X if (PQntuples(res)<=0) {
-/* fprintf(stderr,"*** Error = use result failed\n%s\n",
-X pgsql_error(lm_fd->pgsql_conn)); */
-X sprintf(str,"gi|%ld ***use result failed***",(long)libpos);
-X goto next0;
-X }
-X
-X if (PQgetvalue(res,0,1)!= NULL) strncpy(str,PQgetvalue(res,0,1),cnt-1);
-X else strncpy(str,PQgetvalue(res,0,0),cnt-1);
-X str[cnt-1]='\0';
-X /* change this later to support multiple row returns */
-X /*
-X while (strlen(str) < cnt-1 &&
-X (lm_fd->sql_row = pgsql_fetch_row(lm_fd->pgsql_res))!=NULL) {
-X strncat(str," ",cnt-2-strlen(str));
-X if (lm_fd->sql_row[1]!=NULL)
-X strncat(str,lm_fd->sql_row[1],cnt-2-strlen(str));
-X else break;
-X }
-X */
-X
-X str[cnt-1]='\0';
-X if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
-X if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
-X
-X next0:
-X PQclear(res);
-X next1:
-X lm_fd->pgsql_res = NULL;
-X
-X /* get the sequence, set up for pgsql_getseq() */
-X /* put the UID into the query string */
-X
-X if ((bp=strchr(lm_fd->sql_getseq,'#'))==NULL) {
-X fprintf(stderr, "no GID position in %s\n",lm_fd->sql_getseq);
-X return;
-X }
-X else {
-X *bp = '\0';
-X strncpy(tmp_query,lm_fd->sql_getseq,sizeof(tmp_query));
-X tmp_query[sizeof(tmp_query)-1]='\0';
-X /* sprintf(tmp_val,"%ld",(long)libpos); */
-X strncat(tmp_query,libstr,sizeof(tmp_query));
-X strncat(tmp_query,bp+1,sizeof(tmp_query));
-X *bp='#';
-X }
-X
-X res = PQexec(lm_fd->pgsql_conn,tmp_query);
-X if (PQresultStatus(res) != PGRES_TUPLES_OK) {
-X PQclear(res);
-X lm_fd->pgsql_res = NULL;
-X fprintf(stderr,"*** Error - ranlib SEQ failed:\n%s\n%s\n",tmp_query,
-X PQerrorMessage(lm_fd->pgsql_conn));
-X exit(1);
-X }
-X else {
-X lm_fd->pgsql_res = res;
-X }
-}
-SHAR_EOF
-chmod 0644 pgsql_lib.c ||
-echo 'restore of pgsql_lib.c failed'
-Wc_c="`wc -c < 'pgsql_lib.c'`"
-test 16978 -eq "$Wc_c" ||
- echo 'pgsql_lib.c: original size 16978, current size' "$Wc_c"
-fi
-# ============= pirpsd.sql ==============
-if test -f 'pirpsd.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping pirpsd.sql (File already exists)'
-else
-echo 'x - extracting pirpsd.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pirpsd.sql' &&
-xdb.wrplab PIRPSD seq_demo demo_pass;
-SELECT PIRID, SEQUENCES, PIRID
-X FROM c_psdsequence;
-SELECT PIRID, concat(PIRID," ",TITLE) FROM c_psdmain
-X WHERE PIRID='#';
-SELECT PIRID, SEQUENCES, PIRID
-X FROM c_psdsequence
-X WHERE PIRID='#';
-SHAR_EOF
-chmod 0644 pirpsd.sql ||
-echo 'restore of pirpsd.sql failed'
-Wc_c="`wc -c < 'pirpsd.sql'`"
-test 230 -eq "$Wc_c" ||
- echo 'pirpsd.sql: original size 230, current size' "$Wc_c"
-fi
-# ============= print_pssm.c ==============
-if test -f 'print_pssm.c' -a X"$1" != X"-c"; then
- echo 'x - skipping print_pssm.c (File already exists)'
-else
-echo 'x - extracting print_pssm.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'print_pssm.c' &&
-/* print_pssm.c - 21-Jan-2005
-X
-X copyright (c) 2005 - William R. Pearson and the University of Virginia
-X
-X read a binary PSSM checkpoint file from blastpgp, and produce an ascii
-X formatted file
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <math.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "mm_file.h"
-#include "param.h"
-X
-#include "uascii.h"
-#include "upam.h"
-X
-void initenv(int, char **, struct pstruct *, char *);
-void read_pssm();
-void alloc_pam();
-int **alloc_pam2p();
-void initpam2();
-void fill_pam();
-double get_lambda();
-X
-extern int optind;
-extern char *optarg;
-X
-main(int argc, char **argv) {
-X
-X char *aa0;
-X char libstr[MAX_FN];
-X char qname[MAX_FN];
-X int sq0off;
-X int i, n0;
-X FILE *fp;
-X struct pstruct pst, *ppst;
-X
-X /* stuff from initfa.c/h_init() */
-X
-X memcpy(qascii,aascii,sizeof(qascii));
-X
-X /* initialize a pam matrix */
-X ppst = &pst;
-X strncpy(ppst->pamfile,"BL50",MAX_FN);
-X standard_pam(ppst->pamfile,ppst,0,0);
-X
-X /* this is always protein by default */
-X ppst->nsq = naa;
-X ppst->nsqx = naax;
-X for (i=0; i<=ppst->nsqx; i++) {
-X ppst->sq[i] = aa[i];
-X ppst->hsq[i] = haa[i];
-X ppst->sqx[i]=aax[i]; /* sq = aa */
-X ppst->hsqx[i]=haax[i]; /* hsq = haa */
-X }
-X ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';
-X
-X if ((aa0 = calloc(MAXTST,sizeof(char)))==NULL) {
-X fprintf(stderr,"Cannot allocate aa0\n");
-X exit(1);
-X }
-X
-X initenv(argc, argv, &pst, qname);
-X alloc_pam(pst.nsq+1,pst.nsq+1, &pst);
-X initpam2(&pst);
-X
-X n0 = getseq (qname, qascii, aa0, MAXTST, libstr,&sq0off);
-X
-X if (!pst.pam_pssm) {
-X fprintf(stderr," ** ERROR ** No -P PSSM provided\n");
-X }
-X else {
-X ppst->pam2p[0] = alloc_pam2p(n0,pst.nsq);
-X ppst->pam2p[1] = alloc_pam2p(n0,pst.nsq);
-X if ((fp = fopen(pst.pgpfile,"rb"))!=NULL) {
-X read_pssm(aa0, n0, pst.nsq, pst.pamscale,fp,ppst);
-X }
-X }
-}
-X
-void
-initenv(int argc, char **argv, struct pstruct *ppst, char *qname) {
-X char copt;
-X
-X pascii = aascii;
-X
-X while ((copt = getopt(argc, argv, "P:s:"))!=EOF) {
-X switch (copt) {
-X case 'P':
-X strncpy(ppst->pgpfile,optarg,MAX_FN);
-X ppst->pgpfile[MAX_FN-1]='\0';
-X ppst->pam_pssm = 1;
-X break;
-X
-X case 's':
-X strncpy (ppst->pamfile, optarg, 120);
-X ppst->pamfile[120-1]='\0';
-X if (!standard_pam(ppst->pamfile,ppst,0, 0)) {
-X initpam (ppst->pamfile, ppst);
-X }
-X ppst->pam_set=1;
-X break;
-X }
-X }
-X optind--;
-X
-X if (argc - optind > 1) strncpy(qname, argv[optind+1], MAX_FN);
-}
-X
-X
-/*
-X *aa0 - query sequence
-X n0 - length
-X pamscale - scaling for pam matrix - provided by apam.c, either
-X 0.346574 = ln(2)/2 (P120, BL62) or
-X 0.231049 = ln(2)/3 (P250, BL50)
-*/
-X
-#define N_EFFECT 20
-X
-void
-read_pssm(unsigned char *aa0, int n0, int nsq, double pamscale, FILE *fp, struct pstruct *ppst) {
-X int i, j, len;
-X int qi, rj;
-X int **pam2p;
-X int first, too_high;
-X char *query;
-X double freq, **freq2d, lambda, new_lambda;
-X double scale, scale_high, scale_low;
-X
-X pam2p = ppst->pam2p[0];
-X
-X if(1 != fread(&len, sizeof(int), 1, fp)) {
-X fprintf(stderr, "error reading from checkpoint file: %d\n", len);
-X exit(1);
-X }
-X
-X if(len != n0) {
-X fprintf(stderr, "profile length (%d) and query length (%d) don't match!\n",
-X len,n0);
-X exit(1);
-X }
-X
-X /* read over query sequence stored in BLAST profile */
-X if(NULL == (query = (char *) calloc(len, sizeof(char)))) {
-X fprintf(stderr, "Couldn't allocate memory for query!\n");
-X exit(1);
-X }
-X
-X if(len != fread(query, sizeof(char), len, fp)) {
-X fprintf(stderr, "Couldn't read query sequence from profile: %s\n", query);
-X exit(1);
-X }
-X
-X printf("%d\n%s\n",len,query);
-X
-X /* currently we don't do anything with query; ideally, we should
-X check to see that it actually matches aa0 ... */
-X
-X /* quick 2d array alloc: */
-X if((freq2d = (double **) calloc(n0, sizeof(double *))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
-X exit(1);
-X }
-X
-X if((freq2d[0] = (double *) calloc(n0 * N_EFFECT, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for frequencies!\n");
-X exit(1);
-X }
-X
-X /* a little pointer arithmetic to fill out 2d array: */
-X for (qi = 1 ; qi < n0 ; qi++) {
-X freq2d[qi] = freq2d[0] + (N_EFFECT * qi);
-X }
-X
-X for (qi = 0 ; qi < n0 ; qi++) {
-X printf("%c",query[qi]);
-X for (rj = 0 ; rj < N_EFFECT ; rj++) {
-X if(1 != fread(&freq, sizeof(double), 1, fp)) {
-X fprintf(stderr, "Error while reading frequencies!\n");
-X exit(1);
-X }
-X printf(" %8.7g",freq*10.0);
-X
-X if (freq > 1e-12) {
-X freq = log(freq /((double) (rrcounts[rj+1])/(double) rrtotal));
-X freq /= pamscale; /* this gets us close to originial pam scores */
-X freq2d[qi][rj] = freq;
-X }
-X else {freq2d[qi][rj] = freq;}
-X }
-X printf("\n");
-X }
-X
-X
-X /* now figure out the right scale */
-X scale = 1.0;
-X lambda = get_lambda(ppst->pam2[0], 20, 20, "\0ARNDCQEGHILKMFPSTWYV");
-X
-X /* should be near 1.0 because of our initial scaling by ppst->pamscale */
-X fprintf(stderr, "real_lambda: %g\n", lambda);
-X
-X /* get initial high/low scale values: */
-X first = 1;
-X while (1) {
-X fill_pam(pam2p, n0, 20, freq2d, scale);
-X new_lambda = get_lambda(pam2p, n0, 20, query);
-X
-X if (new_lambda > lambda) {
-X if (first) {
-X first = 0;
-X scale = scale_high = 1.0 + 0.05;
-X scale_low = 1.0;
-X too_high = 1;
-X } else {
-X if (!too_high) break;
-X scale = (scale_high += scale_high - 1.0);
-X }
-X } else if (new_lambda > 0) {
-X if (first) {
-X first = 0;
-X scale_high = 1.0;
-X scale = scale_low = 1.0 - 0.05;
-X too_high = 0;
-X } else {
-X if (too_high) break;
-X scale = (scale_low += scale_low - 1.0);
-X }
-X } else {
-X fprintf(stderr, "new_lambda (%g) <= 0; matrix has positive average score", new_lambda);
-X exit(1);
-X }
-X }
-X
-X /* now do binary search between low and high */
-X for (i = 0 ; i < 10 ; i++) {
-X scale = 0.5 * (scale_high + scale_low);
-X fill_pam(pam2p, n0, 20, freq2d, scale);
-X new_lambda = get_lambda(pam2p, n0, 20, query);
-X
-X if (new_lambda > lambda) scale_low = scale;
-X else scale_high = scale;
-X }
-X
-X scale = 0.5 * (scale_high + scale_low);
-X fill_pam(pam2p, n0, 20, freq2d, scale);
-X
-X fprintf(stderr, "final scale: %g\n", scale);
-X
-X for (qi = 0 ; qi < n0 ; qi++) {
-X fprintf(stderr, "%4d %c: ", qi+1, query[qi]);
-X for (rj = 1 ; rj <= 20 ; rj++) {
-X fprintf(stderr, "%4d", pam2p[qi][rj]);
-X }
-X fprintf(stderr, "\n");
-X }
-X
-X free(freq2d[0]);
-X free(freq2d);
-X
-X free(query);
-}
-X
-/*
-X * alloc_pam(): allocates memory for the 2D pam matrix as well
-X * as for the integer array used to transmit the pam matrix
-X */
-void
-alloc_pam (int d1, int d2, struct pstruct *ppst)
-{
-X int i, *d2p;
-X
-X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
-X exit(1);
-X }
-X
-X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
-X exit(1);
-X }
-X
-X if ((d2p = pam12 = (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
-X fprintf(stderr,"Cannot allocate 2D pam matrix: %d",d1);
-X exit(1);
-X }
-X
-X for (i = 0; i < d1; i++, d2p += d2)
-X ppst->pam2[0][i] = d2p;
-X
-X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
-X fprintf(stderr,"Cannot allocate 2d pam matrix: %d",d2);
-X exit(1);
-X }
-X
-X for (i = 0; i < d1; i++, d2p += d2)
-X ppst->pam2[1][i] = d2p;
-}
-X
-void
-fill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {
-X int i, j;
-X double freq;
-X
-X /* fprintf(stderr, "scale: %g\n", scale); */
-X
-X /* now fill in the pam matrix: */
-X for (i = 0 ; i < n0 ; i++) {
-X for (j = 1 ; j <=nsq ; j++) {
-X freq = scale * freq2d[i][j-1];
-X if ( freq < 0.0) freq -= 0.5;
-X else freq += 0.5;
-X pam2p[i][j] = (int)(freq);
-X }
-X }
-}
-X
-/*
-X * initpam2(struct pstruct pst): Converts 1-D pam matrix to 2-D
-X */
-void initpam2 (struct pstruct *ppst)
-{
-X int i, j, k, nsq, pam_xx, pam_xm;
-X int sa_x, sa_t, tmp;
-X
-X nsq = ppst->nsq;
-X sa_x = pascii['X'];
-X sa_t = pascii['*'];
-X
-X ppst->pam2[0][0][0] = -BIGNUM;
-X ppst->pam_h = -1; ppst->pam_l = 1;
-X
-X k = 0;
-X for (i = 1; i <= nsq; i++) {
-X ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;
-X for (j = 1; j <= i; j++) {
-X ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;
-X if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];
-X if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];
-X }
-X }
-X
-X ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);
-X
-X if (ppst->dnaseq == SEQT_RNA) {
-X tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 1;
-X ppst->pam2[0][nascii['A']][nascii['G']] =
-X ppst->pam2[0][nascii['C']][nascii['T']] =
-X ppst->pam2[0][nascii['C']][nascii['U']] = tmp;
-X }
-X
-X if (ppst->pam_x_set) {
-X for (i=1; i<=nsq; i++) {
-X ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;
-X ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;
-X }
-X ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;
-X ppst->pam2[0][sa_t][sa_t]=ppst->pam_xm;
-X }
-X else {
-X ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];
-X ppst->pam_xm = ppst->pam2[0][1][sa_x];
-X }
-}
-X
-double
-get_lambda(int **pam2p, int n0, int nsq, char *aa0) {
-X double lambda, H;
-X double *pr, tot, sum;
-X int i, ioff, j, min, max;
-X
-X /* get min and max scores */
-X min = BIGNUM;
-X max = -BIGNUM;
-X if(pam2p[0][1] == -BIGNUM) {
-X ioff = 1;
-X n0++;
-X } else {
-X ioff = 0;
-X }
-X
-X for (i = ioff ; i < n0 ; i++) {
-X for (j = 1; j <= nsq ; j++) {
-X if (min > pam2p[i][j])
-X min = pam2p[i][j];
-X if (max < pam2p[i][j])
-X max = pam2p[i][j];
-X }
-X }
-X
-X /* fprintf(stderr, "min: %d\tmax:%d\n", min, max); */
-X
-X if ((pr = (double *) calloc(max - min + 1, sizeof(double))) == NULL) {
-X fprintf(stderr, "Couldn't allocate memory for score probabilities: %d\n", max - min + 1);
-X exit(1);
-X }
-X
-X tot = (double) rrtotal * (double) rrtotal * (double) n0;
-X for (i = ioff ; i < n0 ; i++) {
-X for (j = 1; j <= nsq ; j++) {
-X pr[pam2p[i][j] - min] +=
-X (double) ((double) rrcounts[aascii[aa0[i]]] * (double) rrcounts[j]) / tot;
-X }
-X }
-X
-X sum = 0.0;
-X for(i = 0 ; i <= max-min ; i++) {
-X sum += pr[i];
-X /* fprintf(stderr, "%3d: %g %g\n", i+min, pr[i], sum); */
-X }
-X /* fprintf(stderr, "sum: %g\n", sum); */
-X
-X for(i = 0 ; i <= max-min ; i++) { pr[i] /= sum; }
-X
-X if (!karlin(min, max, pr, &lambda, &H)) {
-X fprintf(stderr, "Karlin lambda estimation failed\n");
-X }
-X
-X /* fprintf(stderr, "lambda: %g\n", lambda); */
-X free(pr);
-X
-X return lambda;
-}
-X
-int **
-alloc_pam2p(int len, int nsq) {
-X int i;
-X int **pam2p;
-X
-X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
-X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
-X return NULL;
-X }
-X
-X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
-X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
-X free(pam2p);
-X return NULL;
-X }
-X
-X for (i=1; i<len; i++) {
-X pam2p[i] = pam2p[0] + (i*(nsq+1));
-X }
-X
-X return pam2p;
-}
-X
-void free_pam2p(int **pam2p) {
-X if (pam2p) {
-X free(pam2p[0]);
-X free(pam2p);
-X }
-}
-X
-SHAR_EOF
-chmod 0644 print_pssm.c ||
-echo 'restore of print_pssm.c failed'
-Wc_c="`wc -c < 'print_pssm.c'`"
-test 11147 -eq "$Wc_c" ||
- echo 'print_pssm.c: original size 11147, current size' "$Wc_c"
-fi
-# ============= prio_atepa.aa ==============
-if test -f 'prio_atepa.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping prio_atepa.aa (File already exists)'
-else
-echo 'x - extracting prio_atepa.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'prio_atepa.aa' &&
->PRIO_ATEPA | 90377 | MAJOR PRION PROTEIN PRECURSOR (PRP) (PRP27-30) (PRP33-35C).
-MANLGYWMLVLFVATWSDLGLCKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGWGQPHGGGWGQPHGGGWGQP
-HGGGWGQPHGGGWGQAGGTHNQWNKPSKPKTNMKHMAGAAAAGAVVGGLGGYMLGSAMSRPLIHFGNDYEDRYYR
-ENMYRYPNQVYYRPVDQYNNQNNFVHDCVNITIKQHTVTTTTKGENLTETDVKMMERVVEQMCITQYERESQAYY
-QRGSSMVLFSSPPVILLISFLIFLIVG
-SHAR_EOF
-chmod 0644 prio_atepa.aa ||
-echo 'restore of prio_atepa.aa failed'
-Wc_c="`wc -c < 'prio_atepa.aa'`"
-test 340 -eq "$Wc_c" ||
- echo 'prio_atepa.aa: original size 340, current size' "$Wc_c"
-fi
-# ============= prot_test.lib ==============
-if test -f 'prot_test.lib' -a X"$1" != X"-c"; then
- echo 'x - skipping prot_test.lib (File already exists)'
-else
-echo 'x - extracting prot_test.lib (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'prot_test.lib' &&
->HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee
-VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV
-AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY
-R
->K1HUAG | 1091 | Ig kappa chain V-I region (Ag) - Human
-DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPSRFSGSGFGTD
-FTFTISGLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR/
->CCHU | 1 | Cytochrome c - Human
-MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLE
-NPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
->N2KF1U | 1021 | Long neurotoxin 1 - Many-banded krait
-IVCHTTATIPSSAVTCPPGENLCYRKMWCDAFCSSRGKVVELGCAATCPSKKPYEEVTCCSTDKCNHPPK
-RQPG
->TPHUCS | 1322 | Troponin C, skeletal muscle - Human
-DTQQAEARSYLSEEMIAEFKAAFDMFDADGGGDISVKELGTVMRMLGQTPTKEELDAIIEEVDEDGSGTI
-DFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIFRASGEHVTDEEIESLMKDGDK
-NNDGRIDFDEFLKMMEGVQ
->FEPE | 25 | Ferredoxin - Peptostreptococcus asaccharolyticus
-AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED
->RKMDS | 677 | Ribulose-bisphosphate carboxylase (EC 4.1.1.39) small chain - Cry
-MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKDPAAVMFEINA
-CRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSRNIRYTIQSYASTRPEGERY*
-X
->K3HU | 1099 | Ig kappa chain C region - Human
-/TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSS
-TLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC
->HMIVV | 2581 | Hemagglutinin precursor - Influenza A virus (2 strains)
-MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELVQSSSTGKICN
-NPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVPDYASLRSLVASSGTLEFINE
-GFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPVQNVTMPNNDNSDKLYIWGVHHPSTDKEQTN
-LYVQASGKVTVSTKRSQQTIIPNVGSRPWVRGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTG
-KSSI
-MRSDAPIGTCSSECITPNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAG
-FIENGWEGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVEGRIQ
-DLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMGNGCFKIYHKCDNAC
-IGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVVLLGFIMWACQKGNIRCN
-ICI
->OKBO2C | 296 | Protein kinase (EC 2.7.1.37), cAMP-dependent, catalytic chain - B
-GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLVKHMETGNHYA
-MKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVMEYVPGGEMFSHLRRIGRFSE
-PHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYIQVTDFGFAKRVKGRTWTLCGTPEYLAPEII
-LSKGYNKAVDWWALGVLIYEMAAGYPPFFADQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKR
-FGNLKDGVNDIKNHKWFATTDWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
->GT8.7 | 266 | transl. of pa875.con, 19 to 675
-MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKR
-YTMGDAPDFDRSQWLNEKFKLGLDFPNLPYLI
-DGSHKITQSNAILRYLARKHHLDGETEEERIR
-ADIVENQVMDTRMQLIMLCYNPDFEKQKPEFL
-KTIPEKMKLYSEFLGKRPWFAGDKVTYVDFLA
-YDILDQYRMFEPKCLDAFPNLRDFLARFEGLK
-KISAYMKSSRYIATPIFSKMAHWSNK
-SHAR_EOF
-chmod 0644 prot_test.lib ||
-echo 'restore of prot_test.lib failed'
-Wc_c="`wc -c < 'prot_test.lib'`"
-test 2741 -eq "$Wc_c" ||
- echo 'prot_test.lib: original size 2741, current size' "$Wc_c"
-fi
-# ============= prot_test.lseg ==============
-if test -f 'prot_test.lseg' -a X"$1" != X"-c"; then
- echo 'x - skipping prot_test.lseg (File already exists)'
-else
-echo 'x - extracting prot_test.lseg (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'prot_test.lseg' &&
->HAHU | 1114 | Hemoglobin alpha chain - Human, chimpanzee, and pygmy chimpanzee @P:1-50
-VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
-KVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA
-VHASLDKFLASVSTVLTSKYR
-X
->K1HUAG | 1091 | Ig kappa chain V-I region (Ag) - Human @P:51-90
-DIQMTQSPSSLSASVGDRVTITCQASQDINHYLNWYQQGPKKAPKILIYDASNLETGVPs
-rfsgsgfgtdftftisgLQPEDIATYYCQQYDTLPRTFGQGTKLEIKR*
-X
->CCHU | 1 | Cytochrome c - Human @P:25-85
-MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIW
-GEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE
-X
->N2KF1U | 1021 | Long neurotoxin 1 - Many-banded krait
-IVCHTTATIPSSAVTCPPGENLCYRKMWCDAFCSSRGKVVELGCAATCPSKKPYEEVTCC
-STDKCNHPPKRQPG
-X
->TPHUCS | 1322 | Troponin C, skeletal muscle - Human @P:50-125
-DTQQAEARSYLSEEMIAEfkaafdmfdadgggdISVKELGTVMRMLGQTPTKEELDAIIE
-EVDEDGSGTIDFEEFLVMMVRQMKEDAKGKSEEELAECFRIFDRNADGYIDPEELAEIFR
-ASGEHVTDEEIESLMKDGDKNNDGRIDFDEFLKMMEGVQ
-X
->FEPE | 25 | Ferredoxin - Peptostreptococcus asaccharolyticus
-AYVINDSCIACGACKPECPVNIQQGSIYAIDADSCIDCGSCASVCPVGAPNPED
-X
->RKMDS | 677 | Ribulose-bisphosphate carboxylase (EC 4.1.1.39) small chain - Cry
-MRLTQGAFSFLPDLTDEQIVKQIQYAISKNWALNVEWTDDPHPRNAYWDLWGLPLFGIKD
-PAAVMFEINACRKAKPACYVKVNAFDNSRGVESCCLSFIVQRPTSNEPGFQLIRSEVDSR
-NIRYTIQSYASTRPEGERY*
-X
->K3HU | 1099 | Ig kappa chain C region - Human
-TVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDS
-KDstyslsstltlsKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC
->HMIVV | 2581 | Hemagglutinin precursor - Influenza A virus (2 strains)
-MKTIIALSYIFCLVFAQDLPGNDNNSTATLCLGHHAVPNGTLVKTITNDQIEVTNATELV
-QSSSTGKICNNPHRILDGINCTLIDALLGDPHCDGFQNEKWDLFVERSKAFSNCYPYDVP
-DYASLRSLVASSGTLEFINEGFNWTGVTQNGGSSACKRGPDSGFFSRLNWLYKSGSTYPV
-QNVTMPNNDNSDKLYIWGVHHPSTDKEQTNLYVQASGKVTVSTKRSQQTIIPNVGSRPWV
-RGLSSRISIYWTIVKPGDILVINSNGNLIAPRGYFKMRTGKSSIMRSDAPIGTCSSECIT
-PNGSIPNDKPFQNVNKITYGACPKYVKQNTLKLATGMRNVPEKQTRGIFGAIAGFIENGW
-EGMIDGWYGFRHQNSEGTGQAADLKSTQAAIDQINGKLNRVIEKTNEKFHQIEKEFSEVE
-GRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTRRQLRENAEDMG
-NGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAIS
-CFLLCVVLLGFIMWACQKGNIRCNICI
->OKBO2C | 296 | Protein kinase (EC 2.7.1.37), cAMP-dependent, catalytic chain - B
-GNAAAAKKGSEQESVKEFLAKAKEDFLKKWENPAQNTAHLDQFERIKTLGTGSFGRVMLV
-KHMETGNHYAMKILDKQKVVKLKQIEHTLNEKRILQAVNFPFLVKLEFSFKDNSNLYMVM
-EYVPGGEMFSHLRRIGRFSEPHARFYAAQIVLTFEYLHSLDLIYRDLKPENLLIDQQGYI
-QVTDFGFAKRVKGRTWTLCGTPEYLAPEIILSKGYNKAVDWWALGVLIYEMAAGYPPFFA
-DQPIQIYEKIVSGKVRFPSHFSSDLKDLLRNLLQVDLTKRFGNLKDGVNDIKNHKWFATT
-DWIAIYQRKVEAPFIPKFKGPGDTSNFDDYEEEEIRVSINEKCGKEFSEF
->GT8.7 | 266 | transl. of pa875.con, 19 to 675 @P:21-180
-MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKLGLDFPNL
-pylidgshkitqsnailrylarkhhldget
-EEERIRADIVENQVMDTRMQLIMLCYNPDF
-ekqkpeflktipekmklyseflgkrpwfag
-DKVTYVDFLAYDILDQYRMFEPKCLDAFPN
-LRDFLARFEGLKKISAYMKSSRYIATPIFSKMAHWSNK
-X
-SHAR_EOF
-chmod 0644 prot_test.lseg ||
-echo 'restore of prot_test.lseg failed'
-Wc_c="`wc -c < 'prot_test.lseg'`"
-test 2786 -eq "$Wc_c" ||
- echo 'prot_test.lseg: original size 2786, current size' "$Wc_c"
-fi
-# ============= prss3.1 ==============
-if test -f 'prss3.1' -a X"$1" != X"-c"; then
- echo 'x - skipping prss3.1 (File already exists)'
-else
-echo 'x - extracting prss3.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'prss3.1' &&
-.TH PRSS3 1 local
-.SH NAME
-prss \- test a protein sequence similarity for significance
-.SH SYNOPSIS
-.B prss34
-\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -Z #
-.I -k # -v #
-]
-sequence-file-1 sequence-file-2
-[
-.I #-of-shuffles
-]
-X
-.B prfx34
-\&[-Q -A -f # -g # -H -O file -s SMATRIX -w # -z 1,3 -Z #
-.I -k # -v #
-]
-sequence-file-1 sequence-file-2
-[
-.I ktup
-]
-[
-.I #-of-shuffles
-]
-X
-.B prss34(_t)/prfx34(_t)
-[-AfghksvwzZ]
-\- interactive mode
-X
-.SH DESCRIPTION
-.B prss34
-and
-.B prfx34
-are used to evaluate the significance of a protein:protein, DNA:DNA
-(
-.B prss34
-), or translated-DNA:protein (
-.B prfx34
-) sequence similarity score
-by comparing two sequences and calculating optimal similarity scores,
-and then repeatedly shuffling the second sequence, and calculating
-optimal similarity scores using the Smith-Waterman algorithm. An
-extreme value distribution is then fit to the shuffled-sequence
-scores. The characteristic parameters of the extreme value
-distribution are then used to estimate the probability that each of
-the unshuffled sequence scores would be obtained by chance in one
-sequence, or in a number of sequences equal to the number of shuffles.
-This program is derived from
-.B rdf2\c
-\&, described by Pearson and Lipman, PNAS (1988) 85:2444-2448, and
-Pearson (Meth. Enz. 183:63-98). Use of the extreme value
-distribution for estimating the probabilities of similarity scores was
-described by Altshul and Karlin, PNAS (1990) 87:2264-2268. The
-'z-values' calculated by rdf2 are not as informative as the P-values
-and expectations calculated by prdf.
-.B prss34
-calculates optimal scores using the same rigorous Smith-Waterman
-algorithm (Smith and Waterman, J. Mol. Biol. (1983) 147:195-197) used by the
-.B ssearch34
-program.
-.B prfx34
-calculates scores using the FASTX algorithm (Pearson et al. (1997) Genomics 46:24-36.
-.PP
-.B prss34
-and
-.B prfx34
-also allow a more sophisticated shuffling method: residues can be shuffled
-within a local window, so that the order of residues 1-10, 11-20, etc,
-is destroyed but a residue in the first 10 is never swapped with a residue
-outside the first ten, and so on for each local window.
-.SH EXAMPLES
-.TP
-(1)
-.B prss34
-\& -v 10 musplfm.aa lcbo.aa
-.PP
-Compare the amino acid sequence in the file musplfm.aa with that
-in lcbo.aa, then shuffle lcbo.aa 200 times using a local shuffle with
-a window of 10. Report the significance of the
-unshuffled musplfm/lcbo comparison scores with respect to the shuffled
-scores.
-.TP
-(2)
-.B prss34
-musplfm.aa lcbo.aa 1000
-.PP
-Compare the amino acid sequence in the file musplfm.aa with the sequences
-in the file lcbo.aa, shuffling \fClcbo.aa\fP 1000 times. Shuffles can also be specified with the -k # option.
-.TP
-(3)
-.B prfx34
-mgstm1.esq xurt8c.aa 2 1000
-.PP
-Translate the DNA sequence in the \fCmgstm1.esq\fP file in all six
-frames and compare it to the amino acid sequence in the file
-\fCxurt8c.aa\fP, using ktup=2 and shuffling \fCxurt8c.aa\fP 1000
-times. Each comparison considers the best forward or reverse
-alignment with frameshifts, using the fastx algorithm (Pearson et al
-(1997) Genomics 46:24-36).
-.TP
-(4)
-.B prss34/prfx34
-.PP
-Run prss in interactive mode. The program will prompt for the file
-name of the two query sequence files and the number of shuffles to be
-used.
-.SH OPTIONS
-.PP
-.B prss34/prfx34
-can be directed to change the scoring matrix, gap penalties, and
-shuffle parameters by entering options on the command line (preceeded
-by a `\-'). All of the options should preceed the file names number of
-shuffles.
-.TP
-\-A
-Show unshuffled alignment.
-.TP
-\-f #
-Penalty for opening a gap (-10 by default for proteins).
-.TP
-\-g #
-Penalty for additional residues in a gap (-2 by default) for proteins.
-.TP
-\-H
-Do not display histogram of similarity scores.
-.TP
-\-k #
-Number of shuffles (200 is the default)
-.TP
-\-Q -q
-"quiet" - do not prompt for filename.
-.TP
-\-O filename
-send copy of results to "filename."
-.TP
-\-s str
-specify the scoring matrix. BLOSUM50 is used by default for proteins;
-+5/-4 is used by defaul for DNA.
-.B prss34
-recognizes the same scoring matrices as fasta34, ssearch34, fastx34, etc;
-e.g. BL50, P250, BL62, BL80, MD10, MD20, and other matrices in BLAST1.4
-matrix format.
-.TP
-\-v #
-Use a local window shuffle with a window size of #.
-.TP
-\-z #
-Calculate statistical significance using the mean/variance
-(moments) approach used by fasta34/ssearch or from maximum likelihood
-estimates of lambda and K.
-.TP
-\-Z #
-Present statistical significance as if a '#' entry database had
-been searched (e.g. "-Z 50000" presents statistical significance as if
-50,000 sequences had been compared).
-.SH ENVIRONMENT VARIABLES
-.PP
-.B (SMATRIX)
-the filename of an alternative scoring matrix file. For protein
-sequences, BLOSUM50 is used by default; PAM250 can be used with the
-command line option
-.B -s P250\c
-(or with -s pam250.mat). BLOSUM62 (-s BL62) and PAM120 (-S P120).
-.SH "SEE ALSO"
-ssearch3(1), fasta3(1).
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-X
-SHAR_EOF
-chmod 0644 prss3.1 ||
-echo 'restore of prss3.1 failed'
-Wc_c="`wc -c < 'prss3.1'`"
-test 4969 -eq "$Wc_c" ||
- echo 'prss3.1: original size 4969, current size' "$Wc_c"
-fi
-# ============= prss3.rsp ==============
-if test -f 'prss3.rsp' -a X"$1" != X"-c"; then
- echo 'x - skipping prss3.rsp (File already exists)'
-else
-echo 'x - extracting prss3.rsp (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'prss3.rsp' &&
-compacc.obj doinit.obj karlin.obj scaleswn.obj htime.obj apam.obj lib_sel.obj getopt.obj showrss.obj pssm_asn_subs.obj
-SHAR_EOF
-chmod 0644 prss3.rsp ||
-echo 'restore of prss3.rsp failed'
-Wc_c="`wc -c < 'prss3.rsp'`"
-test 119 -eq "$Wc_c" ||
- echo 'prss3.rsp: original size 119, current size' "$Wc_c"
-fi
-# ============= psql_demo.sql ==============
-if test -f 'psql_demo.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping psql_demo.sql (File already exists)'
-else
-echo 'x - extracting psql_demo.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'psql_demo.sql' &&
-@ seqdb_demo seqdb_demo @;
-SELECT acc, protein.seq, sp_name
-X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp';
-SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
-SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
-X WHERE annot.acc='#' AND db='sp';
-X
-SHAR_EOF
-chmod 0644 psql_demo.sql ||
-echo 'restore of psql_demo.sql failed'
-Wc_c="`wc -c < 'psql_demo.sql'`"
-test 317 -eq "$Wc_c" ||
- echo 'psql_demo.sql: original size 317, current size' "$Wc_c"
-fi
-# ============= psql_demo1.sql ==============
-if test -f 'psql_demo1.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping psql_demo1.sql (File already exists)'
-else
-echo 'x - extracting psql_demo1.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'psql_demo1.sql' &&
-xdb.wrplab seqdb_demo wrplab gstmu;
-SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
-X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
-SELECT acc, 'sp|'||acc||'|'||sp_name||' '||descr FROM annot WHERE acc='#' AND db='sp';
-SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
-X WHERE annot.acc='#' AND db='sp';
-SHAR_EOF
-chmod 0644 psql_demo1.sql ||
-echo 'restore of psql_demo1.sql failed'
-Wc_c="`wc -c < 'psql_demo1.sql'`"
-test 366 -eq "$Wc_c" ||
- echo 'psql_demo1.sql: original size 366, current size' "$Wc_c"
-fi
-# ============= psql_demo_pv.sql ==============
-if test -f 'psql_demo_pv.sql' -a X"$1" != X"-c"; then
- echo 'x - skipping psql_demo_pv.sql (File already exists)'
-else
-echo 'x - extracting psql_demo_pv.sql (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'psql_demo_pv.sql' &&
-xdb.wrplab seqdb_demo wrplab gstmu;
-SELECT acc, protein.seq, 'sp|'||acc||'|'||sp_name||' '||descr
-X FROM annot INNER JOIN protein USING(prot_id) WHERE annot.db='sp' LIMIT 50000;
-SELECT acc, descr FROM annot WHERE acc='#' AND db='sp';
-SELECT acc,protein.seq FROM protein INNER JOIN annot USING(prot_id)
-X WHERE annot.acc='#' AND db='sp';
-X
-SHAR_EOF
-chmod 0644 psql_demo_pv.sql ||
-echo 'restore of psql_demo_pv.sql failed'
-Wc_c="`wc -c < 'psql_demo_pv.sql'`"
-test 336 -eq "$Wc_c" ||
- echo 'psql_demo_pv.sql: original size 336, current size' "$Wc_c"
-fi
-# ============= pssm_asn_subs.c ==============
-if test -f 'pssm_asn_subs.c' -a X"$1" != X"-c"; then
- echo 'x - skipping pssm_asn_subs.c (File already exists)'
-else
-echo 'x - extracting pssm_asn_subs.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pssm_asn_subs.c' &&
-/* pssm_asn_subs.c */
-X
-X
-/* $Name: fa_34_26_5 $ - $Id: pssm_asn_subs.c,v 1.15 2007/04/02 18:08:11 wrp Exp $ */
-X
-/* copyright (C) 2005 by William R. Pearson and the U. of Virginia */
-X
-/* this code is designed to parse the ASN.1 binary encoded scoremat
-X object produced by blastpgp -C file.ckpt_asn -u 2 */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-X
-int parse_pssm_asn();
-int parse_pssm2_asn();
-X
-int
-parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
-X unsigned char **query, double ***freqs,
-X char *matrix, int *gap_open, int *gap_extend,
-X double *lambda);
-X
-X
-X
-#define COMPO_NUM_TRUE_AA 20
-X
-/**positions of true characters in protein alphabet*/
-/*
-static int trueCharPositions[COMPO_NUM_TRUE_AA] = {
-X 1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22
-};
-*/
-X
-#define COMPO_LARGEST_ALPHABET 28
-X
-/*
-static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
-X
-static int alphaConvert[COMPO_LARGEST_ALPHABET] = {
-X (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,
-X 16, 19, 17, (-1), 18, (-1), (-1), (-1), (-1), (-1)
-};
-*/
-X
-int pssm_aa_order[20] = { 1, /*A*/
-X 16, /*R*/
-X 13, /*N*/
-X 4, /*D*/
-X 3, /*C*/
-X 15, /*Q*/
-X 5, /*E*/
-X 7, /*G*/
-X 8, /*H*/
-X 9, /*I*/
-X 11, /*L*/
-X 10, /*K*/
-X 12, /*M*/
-X 6, /*F*/
-X 14, /*P*/
-X 17, /*S*/
-X 18, /*T*/
-X 20, /*W*/
-X 22, /*Y*/
-X 19}; /*V*/
-X
-X
-#define ASN_SEQ 48
-#define ASN_SEQOF 49
-X
-#define ASN_PSSM_QUERY 166
-#define ASN_PSSM2_QUERY 162
-X
-#define ASN_PSSM_IS_PROT 160
-#define ASN_PSSM2_MATRIX 161
-#define ASN_PSSM_NROWS 162
-#define ASN_PSSM_NCOLS 163
-X
-#define ASN_PSSM2_NCOLS 163
-#define ASN_PSSM2_NROWS 164
-#define ASN_PSSM_BYCOL 165
-#define ASN_PSSM_INTERMED_DATA 167
-#define ASN_PSSM_FREQS 162
-#define ASN_PSSM2_FREQS 165
-#define ASN_PSSM2_LAMBDA 166
-X
-#define ASN_IS_STR 26
-#define ASN_IS_INT 2
-#define ASN_IS_BOOL 1
-#define ASN_IS_ENUM 10
-X
-struct asn_bstruct {
-X FILE *fd;
-X unsigned char *buf;
-X unsigned char *abp;
-X unsigned char *buf_max;
-X int len;
-};
-X
-#define ASN_BUF 1024
-X
-unsigned char *
-chk_asn_buf(struct asn_bstruct *asnp, int v) {
-X int new_buf;
-X
-X if (v > ASN_BUF) {
-X fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n",
-X v, ASN_BUF);
-X exit(1);
-X }
-X
-X if (asnp->abp + v > asnp->buf_max) {
-X
-X /* move down the left over stuff */
-X asnp->len = asnp->buf_max - asnp->abp;
-X
-X memmove(asnp->buf, asnp->abp, asnp->len);
-X
-X asnp->abp = asnp->buf;
-X new_buf = ASN_BUF - asnp->len;
-X
-X if (!feof(asnp->fd) &&
-X (new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) {
-X asnp->len += new_buf;
-X }
-X
-X asnp->buf_max = asnp->buf + asnp->len;
-X
-X if (asnp->len < v) {
-X fprintf(stderr, " Unable to read %d bytes\n",v);
-X exit(1);
-X }
-X }
-X /* otherwise, v bytes are currently in the buffer */
-X
-X return asnp->abp;
-}
-X
-/* read_asn_dest reads v bytes into oct_str if v <= o_len */
-/* read_asn_dest is required for ASN data entities that are longer than ASN_BUF (1024) */
-unsigned char *
-read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) {
-X int new_buf;
-X unsigned char *oct_ptr;
-X
-X
-X if (v > o_len) {
-X fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n",
-X v, o_len);
-X exit(1);
-X }
-X
-X if (asnp->abp + v <= asnp->buf_max) {
-X memmove(oct_str, asnp->abp, v);
-X return asnp->abp+v;
-X }
-X else {
-X /* move down the left over stuff */
-X
-X asnp->len = asnp->buf_max - asnp->abp;
-X
-X memmove(oct_str, asnp->abp, asnp->len);
-X oct_ptr = oct_str+asnp->len;
-X v -= asnp->len;
-X
-X asnp->abp = asnp->buf;
-X new_buf = ASN_BUF;
-X
-X while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) {
-X asnp->len = new_buf;
-X asnp->buf_max = asnp->buf + asnp->len;
-X if (v <= new_buf) { /* we have it all this time */
-X memmove(oct_ptr, asnp->buf, v);
-X asnp->len -= v;
-X asnp->abp = asnp->buf + v;
-X break;
-X }
-X else { /* we need to read some more */
-X memmove(oct_ptr, asnp->buf, new_buf);
-X v -= new_buf;
-X new_buf = ASN_BUF;
-X }
-X }
-X }
-X return asnp->buf + v;
-}
-X
-unsigned char *
-get_astr_bool(struct asn_bstruct *asnp, int *val) {
-X
-X int v_len, v;
-X
-X asnp->abp = chk_asn_buf(asnp,5);
-X
-X v = 0;
-X if (*asnp->abp++ != 1) { /* check for int */
-X fprintf(stderr," bool missing\n");
-X }
-X else {
-X v_len = *asnp->abp++;
-X if (v_len != 1) {
-X fprintf(stderr, "boolean length != 1 : %d\n", v_len);
-X v = *asnp->abp++;
-X }
-X else { v = *asnp->abp++;}
-X }
-X asnp->abp += 2; /* skip over null's */
-X *val = v;
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_int(struct asn_bstruct *asnp,
-X int *val) {
-X
-X int v_len, v;
-X
-X v = 0;
-X
-X asnp->abp = chk_asn_buf(asnp,8);
-X
-X if (*asnp->abp++ != 2) { /* check for int */
-X fprintf(stderr," int missing\n");
-X }
-X else {
-X v_len = *asnp->abp++;
-X while (v_len-- > 0) {
-X v *= 256;
-X v += *asnp->abp++;
-X }
-X asnp->abp += 2; /* skip over null's */
-X }
-X *val = v;
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_enum(struct asn_bstruct *asnp, int *val) {
-X
-X int v_len, v;
-X
-X asnp->abp = chk_asn_buf(asnp,5);
-X
-X v = 0;
-X if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */
-X fprintf(stderr," enum missing\n");
-X }
-X else {
-X v_len = *asnp->abp++;
-X while (v_len-- > 0) { v *= 256; v += *asnp->abp++; }
-X asnp->abp += 2; /* skip over null's */
-X }
-X *val = v;
-X
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_packedfloat(struct asn_bstruct *asnp, double *val) {
-X
-X int v_len, v;
-X char tmp_str[64];
-X
-X asnp->abp = chk_asn_buf(asnp,2);
-X
-X v = 0;
-X if (*asnp->abp++ != 9) { /* check for packed float */
-X fprintf(stderr," float missing\n");
-X *val = 0;
-X return asnp->abp;
-X }
-X else {
-X v_len = *asnp->abp++;
-X
-X if (v_len > 63) {
-X fprintf(stderr," real string too long: %d\n",v_len);
-X }
-X
-X asnp->abp = chk_asn_buf(asnp,v_len);
-X
-X if (v_len == 2 && *asnp->abp == '\0' && *(asnp->abp+1)=='0') {
-X asnp->abp += 2;
-X *val = 0.0;
-X }
-X else { /* copy and scan it */
-X if (*asnp->abp != '\0') {
-X fprintf(stderr, " packedfloat - expected 0, got %d\n", *asnp->abp);
-X *val = -1.0;
-X return asnp->abp;
-X }
-X asnp->abp++;
-X strncpy(tmp_str, (char *)asnp->abp, sizeof(tmp_str)-1);
-X tmp_str[v_len-1] = '\0';
-X tmp_str[63] = '\0';
-X sscanf(tmp_str,"%lg",val);
-X asnp->abp += v_len-1;
-X }
-X }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) {
-X
-X int v_len;
-X
-X asnp->abp = chk_asn_buf(asnp,2);
-X
-X text[0] = '\0';
-X if (*asnp->abp++ != ASN_IS_STR) { /* check for str */
-X fprintf(stderr," str missing\n");
-X }
-X else {
-X v_len = *asnp->abp++;
-X if (v_len > 128) { /* need to read the length from the next bytes */
-X t_len = v_len &0x7f;
-X
-X asnp->abp = chk_asn_buf(asnp,t_len);
-X
-X for (v_len =0; t_len; t_len--) { v_len = (v_len << 8) + *asnp->abp++; }
-X }
-X
-X /* read v_len bytes */
-X
-X asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len);
-X asnp->abp += 2; /* skip over last nulls */
-X }
-X return asnp->abp;
-}
-X
-#define ASN_BIOSEQ_SEQ 160
-#define ASN_BIOSEQ_ID 160
-#define ASN_BIOSEQ_ID_VAL 160
-X
-#define ASN_BIOSEQ_ID_LOCAL 161
-#define ASN_BIOSEQ_ID_GIBBSQ 162
-#define ASN_BIOSEQ_ID_GIBBMT 163
-#define ASN_BIOSEQ_ID_GB 164
-#define ASN_BIOSEQ_ID_EMBL 165
-#define ASN_BIOSEQ_ID_PIR 166
-#define ASN_BIOSEQ_ID_SP 167
-#define ASN_BIOSEQ_ID_PATENT 168
-#define ASN_BIOSEQ_ID_OTHER 169
-#define ASN_BIOSEQ_ID_GEN 170
-#define ASN_BIOSEQ_ID_GI 171
-X
-#define ASN_BIOSEQ_TEXTID_NAME 160
-#define ASN_BIOSEQ_TEXTID_ACC 161
-#define ASN_BIOSEQ_TEXTID_REL 162
-#define ASN_BIOSEQ_TEXTID_VER 163
-X
-#define ASN_BIOSEQ_DESCR 161
-#define ASN_BIOSEQ_INST 162
-#define ASN_BIOSEQ_TITLE 164
-#define ASN_BIOSEQ_INST_REPR 160
-#define ASN_BIOSEQ_INST_MOL 161
-#define ASN_BIOSEQ_INST_LEN 162
-#define ASN_BIOSEQ_INST_TOPOL 166
-#define ASN_BIOSEQ_INST_SEQD 167
-#define ASN_OCTET_STR 65
-#define ASN_NCBIeaa 65
-X
-unsigned char *
-get_astr_seqdescr(struct asn_bstruct *asnp,
-X char *descr) {
-X
-X int end_seq=0;
-X
-X /* get seqof '1' */
-X /* get 164/128 - title */
-X /* get string */
-X /* pop nulls */
-X
-X asnp->abp = chk_asn_buf(asnp,6);
-X
-X if (*asnp->abp == ASN_SEQOF) {
-X end_seq++;
-X asnp->abp += 2;
-X }
-X else {
-X fprintf(stderr, " missing ASN_SEQOF '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_TITLE) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, descr, MAX_STR);
-X }
-X else {
-X fprintf(stderr, " missing ASN_BIOSEQ_TITLE '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X
-X asnp->abp = chk_asn_buf(asnp,2);
-X
-X asnp->abp += 2; /* skip over nulls */
-X
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_octstr(struct asn_bstruct *asnp,
-X unsigned char *oct_str,
-X int o_len) {
-X
-X int q_len, v_len;
-X
-X asnp->abp = chk_asn_buf(asnp,2);
-X
-X if (*asnp->abp++ == ASN_NCBIeaa) {
-X /* get length of length */
-X if (*asnp->abp > 128) {
-X v_len = *asnp->abp++ & 0x7f;
-X
-X asnp->abp = chk_asn_buf(asnp,v_len);
-X
-X q_len = 0;
-X while (v_len-- > 0) {
-X q_len *= 256;
-X q_len += *asnp->abp++;
-X }
-X }
-X else {
-X q_len = *asnp->abp++ & 0x7f;
-X }
-X
-X asnp->abp = read_asn_dest(asnp, q_len, oct_str, o_len);
-X
-X oct_str[min(q_len,o_len)]='\0';
-X
-X asnp->abp += 2; /* skip characters and NULL's */
-X }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_seqinst(struct asn_bstruct *asnp,
-X unsigned char **query,
-X int *nq) {
-X
-X int end_seq=0, tmp;
-X
-X /* get sequence '0' */
-X /* get 160/128/10/len/val - repr enum raw val */
-X /* get 161/128/10/len/val - mol enum aa val */
-X /* get 162/128/02/len/val - length int val */
-X /* get 166/128 - topology (empty) */
-X /* get 167/128 - seq-data */
-X /* get 65/len+128/len/octet_string */
-X /* pop nulls */
-X
-X asnp->abp = chk_asn_buf(asnp,12);
-X
-X if (*asnp->abp == ASN_SEQ) {
-X end_seq++;
-X asnp->abp += 2;
-X }
-X else {
-X fprintf(stderr, " missing ASN_SEQ '0': %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_INST_REPR && *(asnp->abp+1) == 128) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_enum(asnp, &tmp);
-X }
-X else {
-X fprintf(stderr, " missing ASN_BIOSEQ_INST_REPR 160: %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_INST_MOL && *(asnp->abp+1) == 128) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_enum(asnp, &tmp);
-X }
-X else {
-X fprintf(stderr, " missing ASN_BIOSEQ_INST_MOL 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_INST_LEN) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, nq);
-X }
-X else {
-X fprintf(stderr, " missing ASN_BIOSEQ_INST_LEN 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X
-X if ((*query = (unsigned char *)calloc(*nq + 1, sizeof(char)))==NULL) {
-X fprintf(stderr, " cannot read %d char query\n", *nq+1);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_INST_TOPOL && *(asnp->abp+1) == 128 ) {
-X asnp->abp += 2;
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_INST_SEQD) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_octstr(asnp, *query, *nq );
-X }
-X else {
-X fprintf(stderr, " missing ASN_BIOSEQ_INST_SEQD 166: %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X
-X asnp->abp += 4; /* skip over nulls */
-X
-X return asnp->abp;
-}
-X
-X
-unsigned char *
-get_astr_textid( struct asn_bstruct *asnp,
-X char *name,
-X char *acc) {
-X int end_seq = 0;
-X int ver;
-X
-X chk_asn_buf(asnp,16);
-X
-X if (*asnp->abp != ASN_SEQ) {
-X fprintf(stderr, " Expected ASN_SEQ: %0x %0x\n",*asnp->abp, asnp->abp[1]);
-X }
-X else {asnp->abp += 2; end_seq++;}
-X
-X name[0] = acc[0] = '\0';
-X
-X while (*asnp->abp != '\0') {
-X if (*asnp->abp == ASN_BIOSEQ_TEXTID_NAME) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
-X }
-X if (*asnp->abp == ASN_BIOSEQ_TEXTID_ACC) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, acc, MAX_SSTR);
-X }
-X if (*asnp->abp == ASN_BIOSEQ_TEXTID_VER) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, &ver);
-X }
-X }
-X asnp->abp += 4;
-X while (end_seq-- > 0) { asnp->abp += 4; }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_query(struct asn_bstruct *asnp,
-X int *gi,
-X char *name,
-X char *acc,
-X char *descr,
-X unsigned char **query,
-X int *nq
-X ) {
-X
-X int end_seq = 0;
-X
-X asnp->abp = chk_asn_buf(asnp,32);
-X
-X if (*asnp->abp != ASN_BIOSEQ_SEQ) {
-X fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else { asnp->abp += 2;}
-X
-X if (*asnp->abp != ASN_SEQ && *asnp->abp != ASN_SEQOF ) {
-X fprintf(stderr, "Bioseq - missing SEQUENCE tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X end_seq++;
-X asnp->abp += 2;
-X }
-X
-X if (*asnp->abp != ASN_BIOSEQ_ID) {
-X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X asnp->abp += 2;
-X if (*asnp->abp != ASN_SEQOF) {
-X fprintf(stderr, "missing bioseq/id/SEQOF tag: %d\n",*asnp->abp);
-X return asnp->abp;
-X }
-X else {
-X asnp->abp += 2;
-X if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
-X
-X if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, gi);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
-X *gi = 0;
-X acc[0] = '\0';
-X
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
-X asnp->abp += 2;
-X }
-X else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
-X *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
-X *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
-X
-X asnp->abp+=2;
-X asnp->abp = get_astr_textid(asnp, name, acc);
-X }
-X }
-X }
-X
-X while (*asnp->abp == 0) asnp->abp += 2;
-X
-X if (*asnp->abp == ASN_BIOSEQ_DESCR) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_seqdescr(asnp, descr);
-X asnp->abp += 2; /* skip nulls */
-X }
-X else { descr[0] = '\0';}
-X
-X if (*asnp->abp != ASN_BIOSEQ_INST) {
-X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X asnp->abp += 2;
-X asnp->abp = get_astr_seqinst(asnp, query, nq);
-X asnp->abp += 2; /* skip nulls */
-X }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_astr_query2(struct asn_bstruct *asnp,
-X int *gi,
-X char *name,
-X char *acc,
-X char *descr,
-X unsigned char **query,
-X int *nq
-X ) {
-X
-X int end_seq = 0;
-X
-X asnp->abp = chk_asn_buf(asnp,32);
-X
-X if (*asnp->abp != ASN_BIOSEQ_SEQ) {
-X fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else { asnp->abp += 2;}
-X
-X if (*asnp->abp != ASN_SEQOF ) {
-X fprintf(stderr, "Bioseq2 - missing SEQOF tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X end_seq++;
-X asnp->abp += 2;
-X }
-X
-X if (*asnp->abp != ASN_BIOSEQ_ID) {
-X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X asnp->abp += 2;
-X if (*asnp->abp == ASN_SEQOF) {
-X asnp->abp += 2;
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
-X
-X if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, gi);
-X }
-X
-X if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
-X *gi = 0;
-X acc[0] = '\0';
-X
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
-X asnp->abp += 2;
-X }
-X else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
-X *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
-X *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
-X
-X asnp->abp+=2;
-X asnp->abp = get_astr_textid(asnp, name, acc);
-X }
-X }
-X
-X while (*asnp->abp == 0) asnp->abp += 2;
-X
-X if (*asnp->abp == ASN_BIOSEQ_DESCR) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_seqdescr(asnp, descr);
-X asnp->abp += 2; /* skip nulls */
-X }
-X else { descr[0] = '\0';}
-X
-X if (*asnp->abp != ASN_BIOSEQ_INST) {
-X fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
-X return asnp->abp;
-X }
-X else {
-X asnp->abp += 2;
-X asnp->abp = get_astr_seqinst(asnp, query, nq);
-X asnp->abp += 2; /* skip nulls */
-X }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_pssm_freqs(struct asn_bstruct *asnp,
-X double **freqs,
-X int n_rows,
-X int n_cols,
-X int by_row) {
-X
-X int i_rows, i_cols;
-X int in_seq = 0;
-X
-X double f_val;
-X
-X asnp->abp = chk_asn_buf(asnp,4);
-X
-X if (*asnp->abp == ASN_SEQ) {
-X in_seq = 1;
-X asnp->abp += 2;
-X in_seq = 1;
-X }
-X
-X if (!by_row) {
-X for (i_cols = 0; i_cols < n_cols; i_cols++) {
-X for (i_rows = 0; i_rows < n_rows; i_rows++) {
-X asnp->abp = get_astr_packedfloat(asnp, &f_val);
-X freqs[i_cols][i_rows] = f_val;
-X }
-X }
-X }
-X else {
-X for (i_rows = 0; i_rows < n_rows; i_rows++) {
-X for (i_cols = 0; i_cols < n_cols; i_cols++) {
-X asnp->abp = get_astr_packedfloat(asnp, &f_val);
-X freqs[i_cols][i_rows] = f_val;
-X }
-X }
-X }
-X if (in_seq) {asnp->abp +=2;} /* skip nulls */
-X asnp->abp += 2;
-X return asnp->abp;
-}
-X
-unsigned char *
-get_pssm_intermed(struct asn_bstruct *asnp,
-X double **freqs,
-X int n_rows,
-X int n_cols,
-X int by_row) {
-X
-X asnp->abp = chk_asn_buf(asnp,4);
-X
-X if (*asnp->abp == ASN_SEQ) {
-X asnp->abp += 2;
-X if (*asnp->abp == ASN_PSSM_FREQS) {
-X asnp->abp+=2;
-X asnp->abp = get_pssm_freqs(asnp, freqs, n_rows, n_cols, by_row);
-X }
-X asnp->abp +=2; /* skip nulls */
-X }
-X asnp->abp += 2;
-X return asnp->abp;
-}
-X
-X
-#define ASN_PSSM_PARAMS 161
-#define ASN_PSSM_PARAMS_PSEUDOCNT 160
-#define ASN_PSSM_PARAMS_RPSPARAMS 161
-#define ASN_PSSM_RPSPARAMS_MATRIX 160
-#define ASN_PSSM_RPSPARAMS_GAPOPEN 161
-#define ASN_PSSM_RPSPARAMS_GAPEXT 162
-X
-unsigned char *
-get_pssm_rpsparams(struct asn_bstruct *asnp,
-X char *matrix,
-X int *gap_open,
-X int *gap_ext) {
-X
-X int end_seq=0;
-X
-X asnp->abp = chk_asn_buf(asnp,4);
-X
-X if (*asnp->abp == ASN_SEQ) {
-X asnp->abp += 2;
-X end_seq++;
-X }
-X
-X if (*asnp->abp == ASN_PSSM_RPSPARAMS_MATRIX) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
-X }
-X
-X if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPOPEN) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, gap_open);
-X }
-X
-X if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPEXT) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, gap_ext);
-X }
-X
-X if (end_seq) { chk_asn_buf(asnp,end_seq * 2); }
-X while (end_seq-- > 0) { asnp->abp += 2; }
-X return asnp->abp;
-}
-X
-unsigned char *
-get_pssm_params(struct asn_bstruct *asnp,
-X int *pseudo_cnts,
-X char *matrix,
-X int *gap_open,
-X int *gap_ext) {
-X
-X int end_seq=0;
-X
-X asnp->abp = chk_asn_buf(asnp,6);
-X
-X if (*asnp->abp == ASN_SEQ) {
-X asnp->abp += 2;
-X end_seq++;
-X }
-X
-X if (*asnp->abp == ASN_PSSM_PARAMS_PSEUDOCNT) {
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, pseudo_cnts);
-X }
-X
-X if (*asnp->abp == ASN_PSSM_PARAMS_RPSPARAMS) {
-X asnp->abp+=2;
-X asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open, gap_ext);
-X asnp->abp += 2;
-X }
-X while (end_seq-- > 0) { asnp->abp+=2; }
-X return asnp->abp;
-}
-X
-X
-unsigned char *
-get_pssm2_intermed(struct asn_bstruct *asnp,
-X double ***freqs,
-X int n_rows,
-X int n_cols) {
-X
-X int i;
-X double **my_freqs;
-X
-X if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
-X fprintf(stderr, " cannot allocate freq cols - %d\n", n_cols);
-X exit(1);
-X }
-X
-X if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
-X fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", n_rows, n_cols);
-X exit(1);
-X }
-X
-X for (i=1; i < n_cols; i++) {
-X my_freqs[i] = my_freqs[i-1] + n_rows;
-X }
-X
-X *freqs = my_freqs;
-X
-X chk_asn_buf(asnp, 8);
-X
-X return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);
-}
-X
-int
-parse_pssm2_asn(struct asn_bstruct *asnp,
-X int *gi,
-X char *name,
-X char *acc,
-X char *descr,
-X unsigned char **query,
-X int *nq,
-X int *n_rows,
-X int *n_cols,
-X double ***freqs,
-X int *pseudo_cnts,
-X char *matrix,
-X double *lambda_p) {
-X
-X int is_protein;
-X int have_rows, have_cols;
-X
-X chk_asn_buf(asnp, 32);
-X
-X if (memcmp(asnp->abp, "\241\2000\200",4) != 0) {
-X fprintf(stderr, "improper PSSM2 start\n");
-X return -1;
-X }
-X else {asnp->abp+=4;}
-X
-X if (*asnp->abp == ASN_BIOSEQ_SEQ ) {
-X asnp->abp = get_astr_query2(asnp, gi, name, acc, descr, query, nq);
-X }
-X
-X /* finish up the nulls */
-X while (*asnp->abp == '\0') { asnp->abp += 2;}
-X
-X if (*asnp->abp == ASN_PSSM2_QUERY &&
-X asnp->abp[2] != ASN_SEQ ) {
-X fprintf(stderr, "improper PSSM2 start\n");
-X return -1;
-X }
-X else {asnp->abp += 4;}
-X
-X while (*asnp->abp != '\0' ) {
-X
-X switch (*asnp->abp) {
-X case ASN_PSSM_IS_PROT :
-X asnp->abp+=2;
-X asnp->abp = get_astr_bool(asnp, &is_protein);
-X break;
-X
-X case ASN_PSSM2_MATRIX :
-X asnp->abp+=2;
-X asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
-X break;
-X
-X case ASN_PSSM2_NROWS :
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, n_rows);
-X
-X if (*n_rows > 0) { have_rows = 1; }
-X else {
-X fprintf(stderr, " bad n_row count\n");
-X exit(1);
-X }
-X break;
-X
-X case ASN_PSSM2_NCOLS :
-X asnp->abp+=2;
-X asnp->abp = get_astr_int(asnp, n_cols);
-X if (*n_cols > 0) {
-X have_cols = 1;
-X }
-X else {
-X fprintf(stderr, " bad n_row count\n");
-X exit(1);
-X }
-X break;
-X
-X case ASN_PSSM2_FREQS :
-X asnp->abp += 4;
-X if (*asnp->abp == '\0') { asnp->abp += 4;}
-X break;
-X
-X case ASN_PSSM2_LAMBDA :
-X asnp->abp += 2;
-X asnp->abp = get_astr_packedfloat(asnp,lambda_p);
-X asnp->abp +=2; /* skip over end of ASN_PSSM2_LAMBDA */
-X break;
-X
-X case ASN_PSSM_INTERMED_DATA :
-X asnp->abp += 2;
-X asnp->abp = get_pssm2_intermed(asnp, freqs, *n_rows, *n_cols);
-X asnp->abp += 4;
-X break;
-X
-X default: asnp->abp += 2;
-X }
-X }
-X
-X
-X return 1;
-}
-X
-int
-parse_pssm_asn(FILE *afd,
-X int *gi,
-X char *name,
-X char *acc,
-X char *descr,
-X unsigned char **query,
-X int *nq,
-X int *n_rows,
-X int *n_cols,
-X double ***freqs,
-X int *pseudo_cnts,
-X char *matrix,
-X int *gap_open,
-X int *gap_ext,
-X double *lambda_p) {
-X
-X int is_protein, pssm_version;
-X int i;
-X int have_rows, have_cols, by_col;
-X double **my_freqs;
-X
-X struct asn_bstruct asn_str;
-X
-X if ((asn_str.buf = (unsigned char *)calloc(ASN_BUF, sizeof(char))) == NULL ) {
-X fprintf(stderr, " cannot allocate asn_buf (%d)\n",ASN_BUF);
-X exit(1);
-X }
-X
-X asn_str.fd = afd;
-X asn_str.len = ASN_BUF;
-X asn_str.abp = asn_str.buf_max = asn_str.buf + ASN_BUF;
-X
-X chk_asn_buf(&asn_str, 32);
-X
-X if (memcmp(asn_str.abp, "0\200\240\200",4) != 0) {
-X fprintf(stderr, "improper PSSM header -");
-X return -1;
-X }
-X else {asn_str.abp+=4;}
-X
-X if (*asn_str.abp == ASN_IS_INT) {
-X asn_str.abp = get_astr_int(&asn_str, &pssm_version);
-X if (pssm_version != 2) {
-X fprintf(stderr, "PSSM2 version mismatch: %d\n",pssm_version);
-X return -1;
-X }
-X *gap_open = *gap_ext = 0;
-X return parse_pssm2_asn(&asn_str, gi, name, acc, descr,
-X query, nq,
-X n_rows, n_cols, freqs,
-X pseudo_cnts, matrix,
-X lambda_p);
-X }
-X
-X if (*asn_str.abp == ASN_SEQ) { asn_str.abp += 2; }
-X
-X if (*asn_str.abp == ASN_PSSM_IS_PROT ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_astr_bool(&asn_str, &is_protein);
-X }
-X
-X if (*asn_str.abp == ASN_PSSM_NROWS ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_astr_int(&asn_str, n_rows);
-X
-X if (*n_rows > 0) { have_rows = 1; }
-X else {
-X fprintf(stderr, " bad n_row count\n");
-X exit(1);
-X }
-X }
-X
-X if (*asn_str.abp == ASN_PSSM_NCOLS ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_astr_int(&asn_str, n_cols);
-X if (*n_cols > 0) {
-X have_cols = 1;
-X }
-X else {
-X fprintf(stderr, " bad n_row count\n");
-X exit(1);
-X }
-X }
-X
-X if (*asn_str.abp == ASN_PSSM_BYCOL ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_astr_bool(&asn_str, &by_col);
-X }
-X
-X /* we have read everything up to the query
-X
-X n_cols gives us the query length, which we can allocate;
-X */
-X
-X if (*asn_str.abp == ASN_PSSM_QUERY ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_astr_query(&asn_str, gi, name, acc, descr, query, nq);
-X *nq = *n_cols;
-X }
-X
-X /* finish up the nulls */
-X while (*asn_str.abp == '\0') { asn_str.abp += 2;}
-X
-X if (*asn_str.abp == ASN_PSSM_INTERMED_DATA) {
-X
-X if (!have_rows || !have_cols) {
-X fprintf(stderr, " cannot allocate freq - missing rows/cols - %d/%d\n",
-X have_rows, have_cols);
-X return -1;
-X }
-X
-X if ((my_freqs = (double **) calloc(*n_cols, sizeof(double *)))==NULL) {
-X fprintf(stderr, " cannot allocate freq cols - %d\n", *n_cols);
-X return -1;
-X }
-X
-X if ((my_freqs[0] = (double *) calloc(*n_cols * *n_rows, sizeof(double)))==NULL) {
-X fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", *n_rows, *n_cols);
-X return -1;
-X }
-X for (i=1; i < *n_cols; i++) {
-X my_freqs[i] = my_freqs[i-1] + *n_rows;
-X }
-X
-X *freqs = my_freqs;
-X
-X asn_str.abp+=2;
-X asn_str.abp = get_pssm_intermed(&asn_str, my_freqs, *n_rows, *n_cols, by_col);
-X asn_str.abp += 4;
-X }
-X
-X if (*asn_str.abp == ASN_PSSM_PARAMS ) {
-X asn_str.abp+=2;
-X asn_str.abp = get_pssm_params(&asn_str, pseudo_cnts, matrix, gap_open, gap_ext);
-X }
-X else if (*asn_str.abp == 0) {asn_str.abp+=2;}
-X return 1;
-}
-X
-int
-parse_pssm_asn_fa( FILE *fd,
-X int *n_rows_p, int *n_cols_p,
-X unsigned char **query,
-X double ***freq2d,
-X char *matrix,
-X int *gap_open_p,
-X int *gap_extend_p,
-X double *lambda_p) {
-X
-X int qi, rj;
-X int gi;
-X double tmp_freqs[COMPO_LARGEST_ALPHABET];
-X char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR];
-X int nq;
-X int pseudo_cnts, ret_val;
-X
-X /* parse the file */
-X
-X ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq,
-X n_rows_p, n_cols_p, freq2d,
-X &pseudo_cnts, matrix, gap_open_p, gap_extend_p,
-X lambda_p);
-X
-X if (ret_val <=0) return ret_val;
-X
-X /* transform the frequencies */
-X
-X for (qi = 0; qi < *n_cols_p; qi++) {
-X for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];}
-X
-X for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) {
-X (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]];
-X }
-X }
-X return 1;
-}
-SHAR_EOF
-chmod 0644 pssm_asn_subs.c ||
-echo 'restore of pssm_asn_subs.c failed'
-Wc_c="`wc -c < 'pssm_asn_subs.c'`"
-test 26268 -eq "$Wc_c" ||
- echo 'pssm_asn_subs.c: original size 26268, current size' "$Wc_c"
-fi
-# ============= pthr_subs.h ==============
-if test -f 'pthr_subs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping pthr_subs.h (File already exists)'
-else
-echo 'x - extracting pthr_subs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pthr_subs.h' &&
-X
-X
-/* $Name: fa_34_26_5 $ - $Id: pthr_subs.h,v 1.1.1.1 1999/10/22 20:56:01 wrp Exp $ */
-X
-X
-#include <pthread.h>
-X
-/* error macro for thread calls */
-X
-#define check(status,string) \
-X if (status != 0) {fprintf(stderr,string); \
-X fprintf(stderr,"%s\n",strerror(status)); } /* error macro */
-X
-/*
-#define check(status,string) \
-X if (status == -1) perror(string) */ /* error macro for thread calls */
-X
-X
-#ifndef XTERNAL
-pthread_t threads[MAX_WORKERS];
-X
-/* mutex stuff */
-X
-pthread_mutex_t reader_mutex; /* empty buffer pointer structure lock */
-pthread_mutex_t worker_mutex; /* full buffer pointer structure lock */
-X
-/* condition variable stuff */
-X
-pthread_cond_t reader_cond_var; /* condition variable for reader */
-pthread_cond_t worker_cond_var; /* condition variable for workers */
-X
-pthread_mutex_t start_mutex; /* start-up synchronisation lock */
-pthread_cond_t start_cond_var; /* start-up synchronisation condition variable */
-X
-extern pthread_t threads[];
-X
-/* mutex stuff */
-X
-extern pthread_mutex_t reader_mutex;
-extern pthread_mutex_t worker_mutex;
-X
-/* condition variable stuff */
-X
-extern pthread_cond_t reader_cond_var;
-extern pthread_cond_t worker_cond_var;
-X
-extern pthread_mutex_t start_mutex;
-extern pthread_cond_t start_cond_var;
-extern int start_thread;
-X
-#endif
-SHAR_EOF
-chmod 0644 pthr_subs.h ||
-echo 'restore of pthr_subs.h failed'
-Wc_c="`wc -c < 'pthr_subs.h'`"
-test 1301 -eq "$Wc_c" ||
- echo 'pthr_subs.h: original size 1301, current size' "$Wc_c"
-fi
-# ============= pthr_subs2.c ==============
-if test -f 'pthr_subs2.c' -a X"$1" != X"-c"; then
- echo 'x - skipping pthr_subs2.c (File already exists)'
-else
-echo 'x - extracting pthr_subs2.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pthr_subs2.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* modified to do more initialization of work_info here, rather than in main() */
-X
-/* $Name: fa_34_26_5 $ - $Id: pthr_subs2.c,v 1.9 2006/06/22 02:35:05 wrp Exp $ */
-X
-/* this file isolates the pthreads calls from the main program */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/types.h>
-#include <signal.h>
-X
-#include "defs.h"
-#include "structs.h" /* mngmsg, libstruct */
-#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
-X
-#include <pthread.h>
-#define XTERNAL
-#include "thr.h"
-#undef XTERNAL
-#include "pthr_subs.h"
-X
-extern void work_thread (struct thr_str *);
-X
-/* start the threads working */
-X
-void init_thr(int nthreads, struct thr_str *work_info,
-X struct mngmsg m_msg, struct pstruct *ppst,
-X unsigned char *aa0, int max_work_buf)
-{
-X int status, i;
-X pthread_attr_t thread_attr;
-X
-X if (nthreads > MAX_WORKERS) {
-X fprintf ( stderr," cannot start %d threads, max: %d\n",
-X nthreads, MAX_WORKERS);
-X exit(1);
-X }
-X
-X /* set up work_info[] structure, set parameters */
-X
-X for (i=0; i<nthreads; i++) {
-X work_info[i].n0 = m_msg.n0;
-X work_info[i].nm0 = m_msg.nm0;
-X work_info[i].qframe = m_msg.qframe;
-X work_info[i].qshuffle = m_msg.qshuffle;
-X work_info[i].ppst = ppst;
-X work_info[i].aa0 = aa0;
-X work_info[i].max_work_buf=max_work_buf;
-X work_info[i].worker=i;
-X work_info[i].max_tot=m_msg.max_tot;
-X }
-X
-X /* mutex and condition variable initialisation */
-X
-X status = pthread_mutex_init(&reader_mutex, NULL);
-X check(status,"Reader_mutex init bad status\n");
-X
-X status = pthread_mutex_init(&worker_mutex, NULL);
-X check(status,"Worker_mutex init bad status\n");
-X
-X status = pthread_cond_init(&reader_cond_var, NULL);
-X check(status,"Reader_cond_var init bad status\n");
-X
-X status = pthread_cond_init(&worker_cond_var, NULL);
-X check(status,"Worker_cond_var init bad status\n");
-X
-X status = pthread_mutex_init(&start_mutex, NULL);
-X check(status,"Start_mutex init bad status\n");
-X
-X status = pthread_cond_init(&start_cond_var, NULL);
-X check(status,"Start_cond_var init bad status\n");
-X
-X /* change stacksize on threads */ /***************************/
-X
-X status = pthread_attr_init( &thread_attr );
-X check(status,"attribute create bad status\n");
-X
-#ifdef IRIX
-X if (pthread_attr_setscope( &thread_attr, 2) != NULL)
-X status = pthread_attr_setscope( &thread_attr,PTHREAD_SCOPE_PROCESS);
-X check(status,"set scope on IRIX bad status\n");
-#endif
-X
-#ifdef FASTA_setscope
-X status = pthread_attr_setscope( &thread_attr, PTHREAD_SCOPE_SYSTEM);
-X check(status,"set scope bad status\n");
-#endif
-X
-X /* start the worker threads */
-X
-X for (i=0; i < nthreads; i++) {
-X /**********************/
-X status=pthread_create(&threads[i],&thread_attr,
-X (void *(*)(void *))&work_thread,&work_info[i]);
-X check(status,"Pthread_create failed\n");
-X }
-}
-X
-/* start_mutex/start_cont_var provides exclusive access to
-X extern int start_thread */
-X
-void start_thr()
-{
-X int status;
-X
-X /* tell threads to proceed */
-X
-X status = pthread_mutex_lock(&start_mutex);
-X check(status,"Start_mutex lock bad status in main\n");
-X
-X start_thread = 0; /* lower predicate */
-X
-X status = pthread_cond_broadcast(&start_cond_var);
-X status = pthread_mutex_unlock(&start_mutex);
-X check(status,"Start_mutex unlock bad status in main\n");
-}
-X
-void get_rbuf(struct buf_head **cur_buf, int max_work_buf)
-{
-X int status;
-X
-X status = pthread_mutex_lock(&reader_mutex); /* lock reader_buf structure */
-X
-X check(status,"Reader_mutex lock in master bad status\n");
-X
-X /* no reader bufs: wait for signal to proceed */
-X while (num_reader_bufs == 0) {
-X pthread_cond_wait(&reader_cond_var,&reader_mutex);
-X }
-X
-X *cur_buf = reader_buf[reader_buf_readp]; /* get the buffer address */
-X reader_buf_readp = (reader_buf_readp+1)%(max_work_buf); /* increment index */
-X num_reader_bufs--;
-X
-X status = pthread_mutex_unlock(&reader_mutex); /* unlock structure */
-X check(status,"Reader_mutex unlock in master bad status\n");
-}
-X
-void put_rbuf(struct buf_head *cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* give the buffer to a thread, and wait for more */
-X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
-X check(status,"Worker_mutex lock in master bad status\n");
-X
-X /* Put buffer onto available for workers list */
-X worker_buf[worker_buf_readp] = cur_buf;
-X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
-X num_worker_bufs++; /* increment number of buffers available to workers */
-X
-X /* Signal one worker to wake and start work */
-X status = pthread_cond_signal(&worker_cond_var);
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"Worker_mutex unlock in master bad status\n");
-}
-X
-void put_rbuf_done(int nthreads, struct buf_head *cur_buf, int max_work_buf)
-{
-X int status, i;
-X void *exit_value;
-X
-X /* give the buffer to a thread, and wait for more */
-X status = pthread_mutex_lock(&worker_mutex); /* lock worker_buf_structure */
-X check(status,"Worker_mutex lock in master bad status\n");
-X
-X /* Put buffer onto available for workers list */
-X worker_buf[worker_buf_readp] = cur_buf;
-X worker_buf_readp = (worker_buf_readp+1)%(max_work_buf);
-X num_worker_bufs++; /* increment number of buffers available to workers */
-X
-X /* Signal one worker to wake and start work */
-X
-X reader_done = 1;
-X status = pthread_cond_broadcast(&worker_cond_var);
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"Worker_mutex unlock in master bad status\n");
-X
-X /* wait for all buffers available (means all do_workers are done) */
-X
-X for (i=0; i < nthreads; i++) {
-X status = pthread_join( threads[i], &exit_value);
-X check(status,"Pthread_join bad status\n");
-X }
-}
-X
-/* wait for extern int start_thread == 0 */
-X
-void wait_thr()
-{
-X int status;
-X
-X /* Wait on master to give start signal */
-X status = pthread_mutex_lock(&start_mutex);
-X check(status,"Start_mutex lock bad status in worker\n");
-X
-X while (start_thread) {
-X status = pthread_cond_wait(&start_cond_var, &start_mutex);
-X check(status,"Start_cond_wait bad status in worker\n");
-X }
-X
-X status = pthread_mutex_unlock(&start_mutex);
-X check(status,"Start_mutex unlock bad status in worker\n");
-}
-X
-int get_wbuf(struct buf_head **cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* get a buffer to work on */
-X status = pthread_mutex_lock(&worker_mutex);
-X check(status,"First worker_mutex lock in worker bad status\n");
-X
-X /* No worker_bufs available: wait for reader to produce some */
-X while (num_worker_bufs == 0) {
-X /* Exit if reader has finished */
-X if (reader_done) {
-X pthread_mutex_unlock(&worker_mutex);
-X return 0;
-X }
-X pthread_cond_wait(&worker_cond_var,&worker_mutex);
-X } /* end while */
-X
-X /* Get the buffer from list */
-X *cur_buf = worker_buf[worker_buf_workp];
-X worker_buf_workp = (worker_buf_workp+1)%(max_work_buf);
-X num_worker_bufs--;
-X
-X status = pthread_mutex_unlock(&worker_mutex);
-X check(status,"First worker_mutex unlock in worker bad status\n");
-X return 1;
-}
-X
-void put_wbuf(struct buf_head *cur_buf, int max_work_buf)
-{
-X int status;
-X
-X /* put buffer back on list for reader */
-X status = pthread_mutex_lock(&reader_mutex);
-X check(status,"Reader_mutex lock in worker bad status\n");
-X
-X reader_buf[reader_buf_workp] = cur_buf;
-X reader_buf_workp = (reader_buf_workp+1)%(max_work_buf);
-X num_reader_bufs++;
-X
-X /* No reader_bufs available: wake reader */
-X if (num_reader_bufs == 1) {
-X pthread_cond_signal(&reader_cond_var);
-X }
-X
-X status = pthread_mutex_unlock(&reader_mutex);
-X check(status,"Reader_mutex unlock in worker bad status\n");
-}
-SHAR_EOF
-chmod 0644 pthr_subs2.c ||
-echo 'restore of pthr_subs2.c failed'
-Wc_c="`wc -c < 'pthr_subs2.c'`"
-test 7689 -eq "$Wc_c" ||
- echo 'pthr_subs2.c: original size 7689, current size' "$Wc_c"
-fi
-# ============= pvcomp.1 ==============
-if test -f 'pvcomp.1' -a X"$1" != X"-c"; then
- echo 'x - skipping pvcomp.1 (File already exists)'
-else
-echo 'x - extracting pvcomp.1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'pvcomp.1' &&
-.TH PVCOMPFA/PVCOMPSW/v3.4 1 "January, 2003"
-.SH NAME
-.B pv34compfa
-\- scan a protein or DNA sequence library for similar
-sequences using the FASTA algorithm in parallel on a network of
-machines running pvm3.
-X
-.B pv34compsw
-\- scan a protein or DNA sequence library for similar
-sequences using the Smith-Waterman algorithm in parallel on a network
-of machines running pvm3.
-X
-.B ps34compfa
-\- evaluate sequence comparison parameters using the FASTA
-algorithm and super-family-annotated libraries.
-X
-.B ps34compsw
-\- evaluate sequence comparison parameters using the
-Smith-Waterman algorithm and super-family-annotated libraries.
-X
-.SH SYNOPSIS
-.B pv34compfa
-[-Q|q -B -b # -d # -E # -f # -g # -H -i J # -n -o -p #
-\& -R
-.I STATFILE
-\& -r "+n/-m" \& -S -s
-.I SMATRIX
-\& -w # -1 ] query-library reference-library [
-.I ktup
-]
-.B pv34compfa
-[\-QBbcefgHiJnopRrSsw1] \- interactive mode
-X
-.B pv34compsw
-[-Q|q -B -b # -e -f delval -g gapval -i
-\& -n -p # -R -R
-.I STATFILE
-\& -r "+n/-m" \& -S -s
-\& -s
-.I SMATRIX
-X ] query-library reference-library [
-.I ktup
-]
-X
-.B pv34compsw
-[\-QBbefgnpRrsS] \- interactive mode
-X
-.SH DESCRIPTION
-.B pv34compfa
-and
-.B pv34compsw
-compare all of the sequences in one DNA or protein sequence library
-(the query library) with to all of the entries in a reference sequence
-library using the FASTA (pv34compfa) or Smith-Waterman (pv34compsw)
-algorithms. For example,
-.B pv34compfa
-can compare a library of protein sequences to all of the sequences in
-the NBRF PIR protein sequence database.
-.B pv34compfa
-and
-.B pv34compsw
-are designed to run in parallel on networks of unix workstations using
-the PVM parallel programming system. (For more information on PVM,
-send email to "netlib@ornl.gov" with the message "send index for pvm3").
-.PP
-.B pv34compfa
-uses the rapid sequence comparison algorithm
-described in Pearson and Lipman, Proc. Natl. Acad. USA, (1988) 85:2444.
-The program can be invoked either with command line arguments or in
-interactive mode. The optional third argument,
-.I ktup
-sets the sensitivity and speed of the search. If
-.I ktup=2,
-similar regions in the two sequences being compared are found by
-looking at pairs of aligned residues; if
-.I ktup=1,
-single aligned amino acids are examined.
-.I ktup
-can be set to 2 or 1 for protein sequences, or from 1 to 6 for DNA sequences.
-The default if
-.I
-ktup
-is not specified is 2 for proteins and 6 for DNA.
-.PP
-.B pv34compfa
-compares a library of query sequences (there need be only one) to a
-reference sequence library. Normally
-.B pv34compfa
-sorts the output by the
-.I initn
-score. By using the
-.I \-1
-option, sequences are ranked by their
-.B init1
-score. Alternative, the
-.I \-o
-option causes optimized scores to be calculated for every sequence
-greater than a threshold and the output to be sorted by the optimized
-scores.
-.PP
-.B pv34compsw
-uses the rigorous Smith-Waterman algorithm to compare protein or
-DNA sequences. The gap penalties and scoring matrices can be
-modified with the
-.I -f\c
-\&,
-.I -k\c
-\&, and
-.I -s
-options.
-.PP
-.B pv34compfa
-(and
-.B pv34compsw\c
-\&) will automatically decide whether the query sequence is DNA or
-protein by reading the query sequence as protein and determining
-whether the `amino-acid composition' is more than 85% A+C+G+T.
-.PP
-.B ps34compfa
-and
-.B ps34compsw
-are versions of
-.B pv34compfa
-and
-.B pv34compsw
-that evaluate the quality of a search by reporting how many
-high-scoring related sequences and low-scoring unrelated sequences
-were found. These programs require that both the query library and
-the reference library be annotated with superfamily numbers for every
-sequence in the library.
-.SH OPTIONS
-.LP
-.B Pv34compfa
-and
-.B pv34compsw
-now support all the options of the fasta3(_t) programs.
-.TP
-\-B
-Report z-score, rather than bit-score, in list of best hits.
-.TP
-\-b #
-The number of similarity scores to be shown (10 by default).
-.TP
-\-E #
-Expectation value limit for displaying best scores.
-.TP
-\-d #
-The number of alignments to be shown.
-.TP
-\-f #
-(delval) penalty for the first residue in a gap. -12 by default for proteins.
-.TP
-\-g #
-(gapval) penalty for additional residues in a gap after the first. -2
-by default for proteins.
-.TP
-\-H #
-turn on histogram display (off by default).
-.TP
-\-i
-invert (reverse complement) DNA sequence.
-.TP
-\-J M:N
-start at the M-th sequence in the query library and continue to the
-"N-th". By default, J=1 and the search begins with the first sequence
-and ends with the last, but sometimes it makes sense to start in the
-middle of the query library if a run partially completed, and to
-finish "early" if the analysis will be run on several parallel
-clusters.
-.TP
-\-n
-Force the program to use DNA sequence parameters.
-.TP
-\-p #
-Number of "slave" processors to use. Typically, one less than
-the number of processors available with
-.B pv34compfa
-so that one processor can be used to collate results. With
-.B pv34compsw\c
-\&, it is more efficient to use every processor as a slave and
-not use this option.
-.TP
-\-Q \-q
-Quiet option. The programs will not prompt for input.
-.TP
-\-R file
-(STATFILE) Causes
-.B pv34compfa
-and
-.B pv34compsw
-to write out the sequence identifier, superfamily number (if available),
-and similarity scores to
-.I STATFILE
-for every sequence in the library. These results are not sorted.
-.TP
-\-r
-specify DNA match/mismatch ratio as "+3/-2". Default is "+5/-4".
-The "+" and "-" are required.
-.TP
-\-S
-Treat lower case residues as low complexity regions.
-.TP
-\-s file
-the filename of an alternative scoring matrix file.
-.LP
-.B
-pv34compfa
-only
-.TP
-\-1
-sort similarity scores by
-.I init1
-scores instead of
-.I initn
-scores.
-.TP
-\-c #
-(OPTCUT) the threshold for optimization with the
-.B -o
-option.
-.TP
-\-o
-(no-optimize); causes
-.B pv34compfa
-not to perform the default optimization on all of the sequences in the library
-with
-.B initn
-scores greater than
-.B OPTCUT\c
-\&.
-.TP
-\-y #
-Width for limited optimization (32 by default).
-.SH FILES
-.LP
-Query library files must be in Pearson/FASTA format, e.g.
-.in +0.5i
-.nf
->seq-id | sfnum descriptive line
-tmlyrghi... (sequence)
-X
-.fi
-.in -0.5i
-.PP
-.B pv34compfa
-and
-.B pv34compsw
-recognize the following library formats: 0 - Pearson/FASTA; 1 - Genbank tape;
-2 - NBRF/PIR Codata; 3 - EMBL/SWISS-PROT; 5 - NBRF/PIR VMS.
-.PP
-.I Scoring matrices \-
-These programs use a different format for the scoring (PAM) matrix
-file from FASTA; they use the PAM matrix file that is used by BLASTP
-and produced by Altshul's "pam.c" program in the BLAST package.
-.SH BUGS
-The program has been tested extensively only with type 0 and type 5
-files. This documentation file may not be up to date.
-.SH AUTHOR
-Bill Pearson
-.br
-wrp@virginia.EDU
-SHAR_EOF
-chmod 0644 pvcomp.1 ||
-echo 'restore of pvcomp.1 failed'
-Wc_c="`wc -c < 'pvcomp.1'`"
-test 6657 -eq "$Wc_c" ||
- echo 'pvcomp.1: original size 6657, current size' "$Wc_c"
-fi
-# ============= qrhuld.aa ==============
-if test -f 'qrhuld.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping qrhuld.aa (File already exists)'
-else
-echo 'x - extracting qrhuld.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'qrhuld.aa' &&
->QRHULD LDL receptor precursor - Human
-MGPWGWKLRWTVALLLAAAGTAVGDRCERNEFQCQDGKCISYKWVCDGSAECQDGSDESQETCLSVTCKS
-GDFSCGGRVNRCIPQFWRCDGQVDCDNGSDEQGCPPKTCSQDEFRCHDGKCISRQFVCDSDRDCLDGSDE
-ASCPVLTCGPASFQCNSSTCIPQLWACDNDPDCEDGSDEWPQRCRGLYVFQGDSSPCSAFEFHCLSGECI
-HSSWRCDGGPDCKDKSDEENCAVATCRPDEFQCSDGNCIHGSRQCDREYDCKDMSDEVGCVNVTLCEGPN
-KFKCHSGECITLDKVCNMARDCRDWSDEPIKECGTNECLDNNGGCSHVCNDLKIGYECLCPDGFQLVAQR
-RCEDIDECQDPDTCSQLCVNLEGGYKCQCEEGFQLDPHTKACKAVGSIAYLFFTNRHEVRKMTLDRSEYT
-SLIPNLRNVVA
-LDTEVASNRIYWSDLSQRMICSTQLDRAHGVSSYDTVISRDIQAPDGLAVDWIHSNIYWTDSVLGTVSVA
-DTKGVKRKTLFRENGSKPRAIVVDPVHGFMYWTDWGTPAKIKKGGLNGVDIYSLVTENIQWPNGITLDLL
-SGRLYWVDSKLHSISSIDVNGGNRKTILEDEKRLAHPFSLAVFEDKVFWTDIINEAIFSANRLTGSDVNL
-LAENLLSPEDMVLFHNLTQPRGVNWCERTTLSNGGCQYLCLPAPQINPHSPKFTCACPDGMLLARDMRSC
-LTEAEAAVATQETSTVRLKVSSTAVRTQHTTTRPVPDTSRLPGATPGLTTVEIVTMSHQALGDVAGRGNE
-KKPSSVRALSIVLPIVLLVFLCLGVFLLWKNWRLKNINSINFDNPVYQKTTEDEVHICHNQDGYSYPSRQ
-MVSLEDDVA
-SHAR_EOF
-chmod 0644 qrhuld.aa ||
-echo 'restore of qrhuld.aa failed'
-Wc_c="`wc -c < 'qrhuld.aa'`"
-test 914 -eq "$Wc_c" ||
- echo 'qrhuld.aa: original size 914, current size' "$Wc_c"
-fi
-# ============= randtest.c ==============
-if test -f 'randtest.c' -a X"$1" != X"-c"; then
- echo 'x - skipping randtest.c (File already exists)'
-else
-echo 'x - extracting randtest.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'randtest.c' &&
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-X
-main(argc, argv)
-X int argc; char **argv;
-{
-X int i, n, s;
-X struct timeval t;
-X
-X if (argc < 2) n = 10;
-X else n = atoi(argv[1]);
-X
-X gettimeofday(&t,NULL);
-X printf(" seed: %d\n",t.tv_usec);
-X srandom(t.tv_usec);
-X
-X for (i=0; i< n; i++)
-X printf("%3d\n",random()%100);
-X
-}
-SHAR_EOF
-chmod 0644 randtest.c ||
-echo 'restore of randtest.c failed'
-Wc_c="`wc -c < 'randtest.c'`"
-test 339 -eq "$Wc_c" ||
- echo 'randtest.c: original size 339, current size' "$Wc_c"
-fi
-# ============= re_getlib.c ==============
-if test -f 're_getlib.c' -a X"$1" != X"-c"; then
- echo 'x - skipping re_getlib.c (File already exists)'
-else
-echo 'x - extracting re_getlib.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 're_getlib.c' &&
-/* re_getlib.c - re-acquire a sequence given lseek, lcont */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "mm_file.h"
-X
-#define GETLIB (m_fptr->getlib)
-X
-int
-re_getlib(unsigned char *aa1,
-X int maxn, /* longest aa1 */
-X int maxt3, /* alternate maxn */
-X int loff, /* overlap */
-X int lcont,
-X int term_code,
-X long *loffset, /* offset from real start of sequence */
-X long *l_off_p, /* coordinate of sequence start */
-X struct lmf_str *m_fptr) {
-X
-X unsigned char *aa1ptr;
-X int icont, maxt, ccont, n1;
-X char libstr[20];
-X fseek_t lmark;
-X
-X aa1ptr = aa1;
-X icont=0;
-X
-X *loffset = 0l;
-X maxt = maxn;
-X n1 = -1;
-X for (ccont=0; ccont<=lcont-1; ccont++) {
-X
-X n1= GETLIB(aa1ptr,maxt,libstr,sizeof(libstr),&lmark,&icont,m_fptr,l_off_p);
-X
-X if (term_code && m_fptr->lib_aa && aa1ptr[n1-1]!=term_code) {
-X aa1ptr[n1++]=term_code;
-X aa1ptr[n1]=0;
-X }
-X
-X if (aa1ptr!=aa1) n1 += loff;
-X
-X if (icont>lcont-1) break;
-X
-X if (icont) {
-X maxt = maxt3;
-X memcpy(aa1,&aa1[n1-loff],loff);
-X aa1ptr= &aa1[loff];
-X *loffset += n1 - loff;
-X }
-X else {
-X maxt = maxn;
-X aa1ptr=aa1;
-X }
-X }
-X return n1;
-}
-SHAR_EOF
-chmod 0644 re_getlib.c ||
-echo 'restore of re_getlib.c failed'
-Wc_c="`wc -c < 're_getlib.c'`"
-test 1184 -eq "$Wc_c" ||
- echo 're_getlib.c: original size 1184, current size' "$Wc_c"
-fi
-# ============= readme.mpi_3.3 ==============
-if test -f 'readme.mpi_3.3' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.mpi_3.3 (File already exists)'
-else
-echo 'x - extracting readme.mpi_3.3 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.mpi_3.3' &&
-X
-X $Name: fa_34_26_5 $ - $Id: readme.mpi_3.3,v 1.4 2001/08/20 21:18:47 wrp Exp $
-X
-20-August-2001
-X
-This file is obsolete - see readme.v34t0, readme.v33t0, and
-readme.pvm_3.4 for more up-to-date information. With version 3.4, the
-MPI programs are mp34comp*, mu34comp*, etc.
-X
-================
-X
-20 January 2000
-X
-This distribution includes the first full-function MPI implementation of
-the libary-vs-library comparison programs. The following programs are
-available:
-X
-Programs to produce conventional scores and alignments:
-X
-mp3compfa protein vs protein, DNA vs DNA
-mp3compsw protein vs protein, DNA vs DNA
-mp3compfx/ DNA vs protein
-mp3comptfx/y protein vs DNA
-X
-Programs to summarize the effectiveness of a search (require
-super-family-labeled databases):
-X
-ms3compfa protein vs protein, DNA vs DNA
-ms3compsw protein vs protein, DNA vs DNA
-ms3compfx/ DNA vs protein
-ms3comptfx/y protein vs DNA
-X
-Programs to report the scores and alignments of the highest scoring
-unrelated sequence (require super-family-labeled databases). These
-programs are used to evaluate the super-family labeling.
-X
-mu3compfa protein vs protein, DNA vs DNA
-mu3compsw protein vs protein, DNA vs DNA
-mucompfx/ DNA vs protein
-mu3comptfx/y protein vs DNA
-X
-Note that the current parallel implementations distribute the second
-database among 'N' parallel workers by approximately dividing the
-database into 'N' parts by seeking into the middle of the database and
-finding the next entry. This strategy fails when the database is a
-single long sequence (the first worker gets the entire database, the
-others get nothing).
-X
-This version has been tested using the MPICH implementation of MPI,
-which is available from:
-X
-X ftp://ftp.mcs.anl.gov/mpi
-X
-See readme.pvm_3.3 for other information about the development of
-these programs. Both the PVM (pv3compfa, etc.) and MPI (mp3compfa,
-etc.) sets of programs use the same sets of source files; differences
-in the two implementations are specified with #define PVM_SRC and
-#define MPI_SRC.
-X
-SHAR_EOF
-chmod 0644 readme.mpi_3.3 ||
-echo 'restore of readme.mpi_3.3 failed'
-Wc_c="`wc -c < 'readme.mpi_3.3'`"
-test 1994 -eq "$Wc_c" ||
- echo 'readme.mpi_3.3: original size 1994, current size' "$Wc_c"
-fi
-# ============= readme.pvm_3.2 ==============
-if test -f 'readme.pvm_3.2' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.pvm_3.2 (File already exists)'
-else
-echo 'x - extracting readme.pvm_3.2 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.2' &&
---> August, 1999
-X
-Corrected problem with opt_cut initialization that only appeared
-with p?compfa programs.
-X
---> v3.26 July, 1999
-X
-pvcomp* programs now use the same method for working with forward and
-reverse strands as the standard fast*3(_t) programs. Thus, statistics
-for DNA sequences should be very similar for pvcompfa and fasta3 or
-fasta3_t.
-X
-X February, 1999
-X
-With release fasta32t02 of the FASTA package, the alignment
-routines for pvcompfa, pvcompsw, etc now work properly
-again.
-X
-The PVM versions of the FASTA and Smith-Waterman search programs
-should now be functionally identical to the multithreaded (fasta3_t,
-ssearch3_t) and non-threaded (fasta3, ssearch3) versions.
-X
-The programs have also been updated to provide similar -m 10
-information to the non-pvm versions. There are some slight
-differences, because the pvcomp* versions are designed to work with
-multiple sequences. But, in general, a script that looks for /^>>>/
-to start an alignment set and /^>>><<</ to end the set work work
-properly.
-X
---> v3.23 March, 1999
-X
-Modified Makefile.pvm, showsum.c so that showsum.c is used by
-both the complib/_thr and pvcomplib (pvm parallel) versions.
-X
-Corrected bug in reading first query for DNA sequences.
-X
---> v3.25 May, 1999
-X
-Fixed pvm_showalign.c so that FIRSTNODE (in msg.h) can be 1, rather
-than 0. #define FIRSTNODE 1 is recommended when the virtual machine
-has 8 or more nodes.
-X
-SHAR_EOF
-chmod 0644 readme.pvm_3.2 ||
-echo 'restore of readme.pvm_3.2 failed'
-Wc_c="`wc -c < 'readme.pvm_3.2'`"
-test 1404 -eq "$Wc_c" ||
- echo 'readme.pvm_3.2: original size 1404, current size' "$Wc_c"
-fi
-# ============= readme.pvm_3.3 ==============
-if test -f 'readme.pvm_3.3' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.pvm_3.3 (File already exists)'
-else
-echo 'x - extracting readme.pvm_3.3 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.3' &&
-X
-X $Name: fa_34_26_5 $ - $Id: readme.pvm_3.3,v 1.13 2000/08/04 18:45:15 wrp Exp $
-X
-================
-pvcomp* - FAQ's, November, 1999
-X
-(The comments below apply to the pv3comp* programs. This problem has
-been addressed in the pv4comp* programs, by dramatically changing
-the way databases are distributed.)
-X
-I believe that the number one reason why the pvcomp* programs do not
-work properly is that the second library must be fully specified.
-If you simply type:
-X
-X pv3compfa query.lib database.lib
-X
-The program will not be able to find database.lib on the worker machines.
-You need to use:
-X
-X pv3compfa query.lib /home/user/lib/database.lib
-X
-and /home/user/lib/database.lib must be accessible to all of the worker
-nodes.
-X
-To find error messages from the workers, look at /tmp/pvml.uid, where
-uid is your unix uid.
-X
-================
-Program summary:
-X
-Programs to produce conventional scores and alignments:
-X
-pv3compfa protein vs protein, DNA vs DNA
-pv3compsw protein vs protein, DNA vs DNA
-pv3compfx/ DNA vs protein
-pv3comptfx/y protein vs DNA
-X
-Programs to summarize the effectiveness of a search (require
-super-family-labeled databases):
-X
-ps3compfa protein vs protein, DNA vs DNA
-ps3compsw protein vs protein, DNA vs DNA
-ps3compfx/ DNA vs protein
-ps3comptfx/y protein vs DNA
-X
-Programs to report the scores and alignments of the highest scoring
-unrelated sequence (require super-family-labeled databases). These
-programs are used to evaluate the super-family labeling.
-X
-pu3compfa protein vs protein, DNA vs DNA
-pu3compsw protein vs protein, DNA vs DNA
-pucompfx/ DNA vs protein
-pu3comptfx/y protein vs DNA
-X
-Note that the current parallel implementations distribute the second
-database among 'N' parallel workers by approximately dividing the
-database into 'N' parts by seeking into the middle of the database and
-finding the next entry. This strategy fails when the database is a
-single long sequence (the first worker gets the entire database, the
-others get nothing).
-X
-================
-Release notes:
-X
---> July 18, 2000
-X
-Increase SQSZ in pxgetaa.c to 200000 for long Genbank entries. This
-may still not be long enough. This increase may allow overlaps to
-occur.
-X
---> July 10, 2000
-X
-Corrections to the code for breaking up very long sequences. The last
-portion of a long sequence did not have the correct offset.
-X
---> July 1, 2000
-X
-Modified pxgetaa.c to read Genbank flatfiles.
-X
-Additional pieces of a long sequence no longer have a '+' at the
-beginning.
-X
---> June 12, 2000
-X
-Restructured p_complib.c, p_workcomp.c to make the -m 9 display more
-consistent with the fast33(_t) set of programs. The alignment (%_id,
-swscore, boundary) information is now calculated at the do_opt() stage
-of the calculation. This rearrangement uncovered a problem with the
-do_opt() stage (s_func=1) that has been fixed. This has not yet been
-tested with the MPI implementation.
-X
-Many changes were made to allow k_H, k_comp information to be passed
-back so that the -z 6 scaleswn.c (proc_hist_mle2) function could be
-used.
-X
---> February 6, 2000
-X
-Corrected some problems with proc_hist_ml() to correctly reinitialize
-hist_db_size and num_db_entries.
-X
---> January 20, 2000
-X
-X The structure of the p[vsu]comp* programs has not changed, but the
-the code has been modified to accomodate both PVM and MPI versions of
-the programs from the same source code. Thus, all of the PVM-specific
-code is now surrounded by #ifdef PVM_SRC/#endif. The source files
-pvcomplib.c and pvworkcomp.c have been replaced by p_complib.c and
-p_workcomp.c, respectively. Additional changes were made to ensure
-that "FIRSTNODE" is used appropriately. In general, FIRSTNODE=0 for
-PVM programs (although with > 8 nodes, FIRSTNODE=1 may be more
-effective), but FIRSTNODE=1 for MPI programs.
-X
-X Modest changes were made to reduce warning messages during
-compilation.
-X
---> January, 2000
-X
-X Modification to hxgetaa.c, pxgetaa.c to handle library sequences,
-such as those from NCBI/NR, with very long comment lines. Additional
-modifications to correct problems with long comments, long DNA
-sequences with pv3comptfx/tfy.
-X
---> v3.33 December, 1999
-X
-Substantial updates to pvcomplib.c/pvworkcomp.c to improve efficiency
-and to provide pv3compf[xy] and pv3comptf[xy]. Previous versions of
-pvcomplib.c/pvworkcomp.c passed the entire struct mngmsg (structs.h)
-each time a new query was initiated or alignments were required. This
-version sends struct mngmsg only once and sends struct qmng_str
-(w_msg.h), which is much smaller, for the queries and alignments. In
-addition, the buffer size for results is now variable (but can be as
-large as 1200, vs 600 previously), which may improve performance when
-large numbers of workers are available. The maximum number of library
-sequences per worker has been raised to 200,000 from 50,000.
-Nevertheless, very large databases (est_human) may have too many
-entries to be examined by 4 workers.
-X
-It is likely that pv3comptf[xy] may have problems with very long
-sequences. pv3compf[xy]/tf[xy] have not been tested extensively.
-X
---> v3.32 December, 1999
-X
-Substantial corrections to showsum.c (showbest()) for the case of DNA
-queries, where two scores are calculated for each query. As a result
-of the changes, bptr[] no longer mapped exactly to best[], which
-caused a bug that was very difficult to track down. To ensure that
-bptr[]=best[], bptr[] is now re-initialized for each query.
-X
-The output format has changed significantly as well. Lots of
-redundant /** **/ comments have been removed. An E() value has been
-added to the "equ num:" line in showsum.c.
-X
-The organization of the inner while() loop in pvcomplib.c has been
-modified so that new query sequences can be sent to workers
-immediately as soon as a worker is available, rather than waiting for
-all to finish and the statistical analysis.
-X
---> v3.30 October, 1999
-X
-The p*comp*/c.work* programs have been renamed to pv3compfa,
-ps3compfa, etc. and c3.work* so that the older version 3.2 programs
-can co-exist with this version.
-X
-Corrected problem with "-n" option that prevented it from functioning
-properly. Include "ACGTCN" in check for DNA query library.a
-X
-(from readme.pvm_3.2)
-X
---> August, 1999
-X
-Corrected problem with opt_cut initialization that only appeared
-with p?compfa programs.
-X
---> v3.26 July, 1999
-X
-pvcomp* programs now use the same method for working with forward and
-reverse strands as the standard fast*3(_t) programs. Thus, statistics
-for DNA sequences should be very similar for pvcompfa and fasta3 or
-fasta3_t.
-X
-X February, 1999
-X
-With release fasta32t02 of the FASTA package, the alignment
-routines for pvcompfa, pvcompsw, etc now work properly
-again.
-X
-The PVM versions of the FASTA and Smith-Waterman search programs
-should now be functionally identical to the multithreaded (fasta3_t,
-ssearch3_t) and non-threaded (fasta3, ssearch3) versions.
-X
-The programs have also been updated to provide similar -m 10
-information to the non-pvm versions. There are some slight
-differences, because the pvcomp* versions are designed to work with
-multiple sequences. But, in general, a script that looks for /^>>>/
-to start an alignment set and /^>>><<</ to end the set work
-properly.
-X
---> v3.23 March, 1999
-X
-Modified Makefile.pvm, showsum.c so that showsum.c is used by
-both the complib/_thr and pvcomplib (pvm parallel) versions.
-X
-Corrected bug in reading first query for DNA sequences.
-X
---> v3.25 May, 1999
-X
-Fixed pvm_showalign.c so that FIRSTNODE (in msg.h) can be 1, rather
-than 0. #define FIRSTNODE 1 is recommended when the virtual machine
-has 8 or more nodes.
-X
-SHAR_EOF
-chmod 0644 readme.pvm_3.3 ||
-echo 'restore of readme.pvm_3.3 failed'
-Wc_c="`wc -c < 'readme.pvm_3.3'`"
-test 7535 -eq "$Wc_c" ||
- echo 'readme.pvm_3.3: original size 7535, current size' "$Wc_c"
-fi
-# ============= readme.pvm_3.4 ==============
-if test -f 'readme.pvm_3.4' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.pvm_3.4 (File already exists)'
-else
-echo 'x - extracting readme.pvm_3.4 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.pvm_3.4' &&
-X
-X $Name: fa_34_26_5 $ - $Id: readme.pvm_3.4,v 1.3 2001/09/17 21:18:19 wrp Exp $
-X
-X
-20-August-2001
-X
-The pvm/mpi complib programs have been substantially updated with
-release 3.4. See readme.v34t0 for more information. With version
-3.4, the MPI programs are mp34comp*, mu34comp*, etc.
-X
-A major effect of this change is to disable automatic sequence type
-(protein/DNA) recognition with pv34compfa/mp34compfa. By default,
-protein libraries are assumed. Thus, pv34compfa/mp34compfa require
-the "-n" command line option when running pv34compfa/mp34compfa on DNA
-sequence libraries. This issue does not occur with the other
-programs, which will recognize the appropriate sequence type, because
-it is determined by the program (e.g. pv34compfx requires
-DNA:protein).
-X
-================
-pv4comp* - July, August, 2000
-X
-As noted in readme.pvm_3.3 - the major problem that users have had
-with the PVM/MPI version of the programs is in reading database files
-on the nodes. All previous versions of the program (pvcompfa,
-pv3compfa, etc) had the nodes read the databases in parallel. Thus,
-the database file had to be visible to the nodes, typically through
-NFS on modern clusters of workstations.
-X
-This strategy caused some problems. It did not work on beowulf-type
-systems, where most of the nodes are in an isolated local network and
-do not have NFS access to the outside world. And it made it
-complicated to read more than one database file. Because specialized
-functions were used, the nodes could not read the full set of library
-file formats available to the other fasta programs.
-X
-These problems have been addressed by significantly changing the the
-way the pv4comp*/mp4comp* programs read the second "reference"
-library. With these versions, both databases, but specifically the
-reference library, are read by a manager process. The manager process
-then sends the sequences to the workers. This solves problems with
-NFS reads from the workers (they don't do any), and uses exactly the
-same functions as the other fasta programs, so the full set of
-database formats can be read. In addition, the FASTLIBS database
-abbreviations are available. This also should also solve problems with
-searches of very long sequences (bacterial genomes); they can now be
-broken up into smaller pieces with the -N ##### option, as with
-fasta33/tfastx33.
-X
-Thus, you are encouraged to use the pv4comp*/mp4comp* versions of the
-programs, which should run more like fasta33.
-X
-================
-Program summary:
-X
-Programs to produce conventional scores and alignments:
-X
-pv4compfa protein vs protein, DNA vs DNA
-pv4compsw protein vs protein, DNA vs DNA
-pv4compfx/ DNA vs protein
-pv4comptfx/y protein vs DNA
-X
-Programs to summarize the effectiveness of a search (require
-super-family-labeled databases):
-X
-ps4compfa protein vs protein, DNA vs DNA
-ps4compsw protein vs protein, DNA vs DNA
-ps4compfx/ DNA vs protein
-ps4comptfx/y protein vs DNA
-X
-Programs to report the scores and alignments of the highest scoring
-unrelated sequence (require super-family-labeled databases). These
-programs are used to evaluate the super-family labeling.
-X
-pu4compfa protein vs protein, DNA vs DNA
-pu4compsw protein vs protein, DNA vs DNA
-pucompfx/ DNA vs protein
-pu4comptfx/y protein vs DNA
-X
-================
-Release notes:
-X
---> Aug. 4, 2000
-X
-Compiled and tested mp4compfa/mp4compsw programs.
-X
---> July 22, 2000
-X
-First release of restructured p2_complib.c/p2_workcomp.c, which use
-the manager program to read both sequence databases and send the
-"reference database" to the workers.
-X
-SHAR_EOF
-chmod 0644 readme.pvm_3.4 ||
-echo 'restore of readme.pvm_3.4 failed'
-Wc_c="`wc -c < 'readme.pvm_3.4'`"
-test 3539 -eq "$Wc_c" ||
- echo 'readme.pvm_3.4: original size 3539, current size' "$Wc_c"
-fi
-# ============= readme.v30 ==============
-if test -f 'readme.v30' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v30 (File already exists)'
-else
-echo 'x - extracting readme.v30 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v30' &&
-X
-Because of interdependencies in the Makefile, sometimes you must
-type "make" a second time to get everything built.
-X
-June 12, 1996 - fasta30t1
-X
-X Fixed bug in reading blast-format DNA sequence files.
-X Fixed core-dump for some large libraries on some machines.
-X
-June 19, 1996 - fasta30t2
-X
-X Fixed a serious bug in the Smith-Waterman alignment routines used
-X by both fasta3 (dropnfa.c) and ssearch3 (dropgsw.c) that caused
-X the amount of memory required to depend on the library sequence
-X size, rather than the query sequence size.
-X
-X Fixed some memory-overwrite errors in showalign.c
-X
-June 27, 1996 - fasta30t3
-X
-X Found and fixed bugs in comp_thr.c and nxgetaa.c that caused core
-X dumps when reading DNA libraries with long sequences in fasta
-X format.
-X
-July 6, 1996 - fasta30t4
-X
-X ibm_pthread_subs.c available, Makefile.ibm for multiprocessor
-X IBM RS/6000 AIX systems.
-X
-X Finally (?) fixed the previous bug that caused core dumps when
-X reading DNA libraries in fasta format.
-X
-X Corrections to the fastx algorithm.
-X
-July 10, 1996
-X
-X Fixed reading of compressed GCG DNA format.
-X
-SHAR_EOF
-chmod 0644 readme.v30 ||
-echo 'restore of readme.v30 failed'
-Wc_c="`wc -c < 'readme.v30'`"
-test 1070 -eq "$Wc_c" ||
- echo 'readme.v30: original size 1070, current size' "$Wc_c"
-fi
-# ============= readme.v30t6 ==============
-if test -f 'readme.v30t6' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v30t6 (File already exists)'
-else
-echo 'x - extracting readme.v30t6 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v30t6' &&
-X
->>August 24, 1996
-X
-New programs - tfastx3, tfastx3_t, compare a protein sequence to
-forward and reverse translations of a DNA sequence database. An excellent
-replacement for tfasta3.
-X
-Sun multiprocessing - change in thr_create() to use all CPU's if available.
-X
-GCG formats - now can search with simple GCG-format query sequences and
-results with GCG format Swissprot and Genpept are more readable.
-X
->>August 26, 1996
-X
-Fixed bugs in tfastx3(_t) and fastx3(_t) including an ancient problem
-with aatran(). Less redundancy in gcg_ranlib().
-X
-X
->>August 31, 1996
-X
-Included support for BLOSUM62 (-s BL62) as per documentation.
-X
-Rearranged Makefile's so that they would make everything in one pass.
-X
->>September 6, 1996
-X
-Corrected yet another problem with the fastx/tfastx code.
-X
-Noticed that searching without optimized scores gave no optimized
-scores on the final list of scores - fixed this.
-X
-The pvm version now does alignments - not thoroughly tested.
-X
->>September 13, 1996
-X
-Fixed display of best scores to stdout.
-X
-Fixed problem with alignments when -o flag used.
-X
-pvcompfa/pvcompsw have now been tested on DEC Alpha, Solaris X86, and
-SGI PVM implementations. Several bugs were corrected.
-X
->>September 18, 1996
-X
-Fixed bug selectbestz() that caused core dumps in pvcomplib.c
-(changes to pvcomplib.c, comp_thr.c, complib.c).
-X
->>September 23, 1996
-X
-Corrected showalign.c/pvm_showalign.c addressing bug found and fixed
-by Erik Wallin. (erikw@biokemi.su.se).
-X
->>October 15, 1996
-X
-Corrected bug so alternative scoring matrices are used.
-X
->>October 22, 1996
-X
-Remove singularities from regression routine.
-X
--z 0 now means no statistics (same as -z -1).
-X
-No longer show alignment for 0 score.
-X
->>October 26, 1996
-X
-Fix problem with -b, -d when Z-values disabled.
-X
->>November 1, 1996
-X
-Altschul-Gish statistical estimates (-z 3) now work properly.
-X
-Fix problem with mean_var==0.0.
-X
-SHAR_EOF
-chmod 0644 readme.v30t6 ||
-echo 'restore of readme.v30t6 failed'
-Wc_c="`wc -c < 'readme.v30t6'`"
-test 1871 -eq "$Wc_c" ||
- echo 'readme.v30t6: original size 1871, current size' "$Wc_c"
-fi
-# ============= readme.v30t7 ==============
-if test -f 'readme.v30t7' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v30t7 (File already exists)'
-else
-echo 'x - extracting readme.v30t7 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v30t7' &&
->> October 30, 1996
-X
-A new program, sc_to_e, can be used to calculate expectation values
-from the regression coefficients reported from a search. The
-expectation value is based on similarity score, sequence length, and
-database size.
-X
->> November 8, 1996
-X
-fasta30t7 differs from fasta30t6 in the amount of information provided
-with the -m 10 option.
-X
-(1) The query and library sequence identifiers are no longer abbreviated.
-X
-(2) New information about the program and program version are provided:
-X
-The new information provided is:
-X
-X mp_name: program name (actually argv[0])
-X mp_ver: main program version (can be different from function version)
-X mp_argv: command line arguments (duplicates argv[0])
-X
-X Some statistical information is provided as well:
-X mp_extrap: XXXX YYY - statistics extrapolated from XXX to YYY
-X mp_stats: indicates type of statistics used for E() value
-X mp_KS: Kolmogorov-Smirnoff statistic
-X
-The "mp_" (main program) information is function independent, while the "pg_"
-information is produced by a particular comparison function (ssearch,
-fastx, fasta, etc). "pg_" should probably be called "fn_", and "mp_"
-called "pg_", but I remain backwards compatible.
-X
-(3) The end of the "parseable" records is denoted with:
-X
-X >>><<<
-X
-(4) There now an compile-time option -DM10_CONS, that allows you to
-display a final alignment summary:
-X
-;al_cons:
-X .::.:- .:: .. :. .:.---: : .--.:. :
-.. .--- ..: :: ... :..: .::.:. . .---. . .:
-X : . . . : .. . :..: .--. . : .:. .. : .
-X .:.::: ..:. :
-X
-or, if M10_CONS_L is defined (in addition to M10_CONS), the output is:
-;al_cons:
-X p==p=-mmmp==mpzmm=pmmmmz=p---=mmm=mmp--p=zm=m
-pzmmp---mmzp=m==mzzzm=zp=mz==z=pmzmmz---pmmpmmmp=m
-m=mzmmzmpm=mmmmppmmmpmmmm=pp=mp--pmpm=mp=pmzzm=mmp
-mp=z===mmpz=zm=
-X
-where '=' indicates identical residues, '-' a gap in one or the other
-sequence, 'p' indicates a positive pam value, 'm' indicates a negative
-pam value, and 'z' indicates a zero pam value.
-X
-A typical run now looks like:
-X
->>>gtm1_mouse.aa, 217 aa vs s library
-; mp_name: fasta3_t
-; mp_ver: version 3.0t7 November, 1996
-; mp_argv: fasta3_t -q -m 10 gtm1_mouse.aa s
-; pg_name: FASTA
-; pg_ver: 3.06 Sept, 1996
-; pg_matrix: BL50
-; pg_gap-pen: -12 -2
-; pg_ktup: 2
-; pg_optcut: 24
-; pg_cgap: 36
-; mp_extrap: 50000 51933
-; mp_stats: Expectation fit: rho(ln(x))= 5.8855+/-0.000527; mu= 1.5386+/- 0.029; mean_var=73.0398+/-15.283
-; mp_KS: 0.0133 (N=29) at 42
->>GTM1_MOUSE GLUTATHIONE S-TRANSFERASE GT8.7 (EC 2.5.1.18) (GST 1-1) (CLASS-MU).
-; fa_initn: 1490
-; fa_init1: 1490
-; fa_opt: 1490
-; fa_z-score: 1754.6
-; fa_expect: 0
-; sw_score: 1490
-; sw_ident: 1.000
-; sw_overlap: 217
->GTM1_MOUSE ..
-; sq_len: 217
-; sq_type: p
-; al_start: 1
-; al_stop: 217
-; al_display_start: 1
-PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
-KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
-NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
-KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
-RYIATPIFSKMAHWSNK
->GTM1_MOUSE ..
-; sq_len: 217
-; sq_type: p
-; al_start: 1
-; al_stop: 217
-; al_display_start: 1
-PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
-KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
-NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
-KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
-RYIATPIFSKMAHWSNK
->>GTM1_RAT GLUTATHIONE S-TRANSFERASE YB1 (EC 2.5.1.18) (CHAIN 3) (CLASS-MU).
-; fa_initn: 1406
-; fa_init1: 1406
-; fa_opt: 1406
-; fa_z-score: 1656.3
-; fa_expect: 0
-; sw_score: 1406
-; sw_ident: 0.931
-; sw_overlap: 217
->GTM1_MOUSE ..
-; sq_len: 217
-; sq_type: p
-; al_start: 1
-; al_stop: 217
-; al_display_start: 1
-PMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKF
-KLGLDFPNLPYLIDGSHKITQSNAILRYLARKHHLDGETEEERIRADIVE
-NQVMDTRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
-KVTYVDFLAYDILDQYRMFEPKCLDAFPNLRDFLARFEGLKKISAYMKSS
-RYIATPIFSKMAHWSNK
->GTM1_RAT ..
-; sq_len: 217
-; sq_type: p
-; al_start: 1
-; al_stop: 217
-; al_display_start: 1
-PMILGYWNVRGLTHPIRLLLEYTDSSYEEKRYAMGDAPDYDRSQWLNEKF
-KLGLDFPNLPYLIDGSRKITQSNAIMRYLARKHHLCGETEEERIRADIVE
-NQVMDNRMQLIMLCYNPDFEKQKPEFLKTIPEKMKLYSEFLGKRPWFAGD
-KVTYVDFLAYDILDQYHIFEPKCLDAFPNLKDFLARFEGLKKISAYMKSS
-RYLSTPIFSKLAQWSNK
-;al_cons:
-:::::::::::::::::.:::::::::.::::.::::::.::::::::::
-::::::::::::::::.::::::::.::::::::: ::::::::::::::
-:::::.::::::::::::::::::::::::::::::::::::::::::::
-::::::::::::::::..::::::::::::.:::::::::::::::::::
-::..::::::.:.::::
->>><<<
-X
-X
-217 residues in 1 query sequences
-18531385 residues in 52205 library sequences
-X Tcomplib (4 proc)[version 3.0t7 November, 1996]
-X start: Fri Nov 8 18:20:26 1996 done: Fri Nov 8 18:20:41 1996
-X Scan time: 38.434 Display time: 2.166
-X
-Function used was FASTA
-X
-================================================================
-X
->> November 11, 1996
-X
-X --> v30t71
-X
-Made changes to complib.c, comp_thr.c, nxgetaa.c to allow scoring
-matrix to be modified in fastx3, fastx3_t.
-X
-================================================================
-X
->> November 15, 1996
-X
-X --> v30t72
-X
-nxgetaa.c now accepts query sequences from "stdin" by using "-" as the
-input file name. If DNA sequences are read in this mode, the "-n"
-option must be used.
-X
-> November 23, 1996
-X
-Included code in nxgetaa.c and Makefile.sgi to get around a bug in SGI's
-sscanf() that prevented compressed GCG databases from being read properly.
-X
-SHAR_EOF
-chmod 0644 readme.v30t7 ||
-echo 'restore of readme.v30t7 failed'
-Wc_c="`wc -c < 'readme.v30t7'`"
-test 5283 -eq "$Wc_c" ||
- echo 'readme.v30t7: original size 5283, current size' "$Wc_c"
-fi
-# ============= readme.v31t0 ==============
-if test -f 'readme.v31t0' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v31t0 (File already exists)'
-else
-echo 'x - extracting readme.v31t0 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v31t0' &&
-X
->>November 1, 1997
-X
-X --> v31t0
-X
-version 31t of the fasta program package uses a more modular
-structure for comparison functions. In addition to modular functions
-to initialize, calculate and align sequences, v31 provides a modular
-function for creating the alignment display. This was required for
-fasty and fastf, which have very different alignment strategies from
-the other search programs.
-X
->>February 13, 1998
-X
-modified nascii[] so that 0, 1, 2 are no longer end of sequence
-characters.
-X
-prss3 added. Unlike prss, prss3 uses -d # to specify the number of
-shuffles.
-X
->>March 18, 1998
-X
-First public release. Corrected problems with dropfz.c (which is
-used in fasty3, tfasty3). Makefile is well tested, but other Makefile's
-are not. PVM versions not tested.
-X
->>March 19, 1998
-X
-Problem with unthreaded tfastx3, tfasty3 caused by bug in complib.c
-fixed. All Makefiles (Makefile.alpha Makefile.sun, Makefile.sgi,
-Makefile.linux) have been tested and work properly. Threaded versions
-do not work on linux (yet). Function labeling problems with fasty3,
-tfasty3 corrected.
-X
->>March 20, 1998
-X
-X --> v31t02
-X
-Fixed problem with inconsistent openlib() calls that broke BLAST databases
-on some platforms.
-X
->>March 27, 1998
-X
-X --> v31t04
-X
-Fixed a long standing problem with fastx/tfastx and fasty/tfasty that
-caused various memory allocation problems and core dumps.
-X
-The PVM version works again, but cannot produce alignments. The
-change in the location of the modular display functions will require
-significant changes in the pvm display functions. For the moment,
-showalign() has been commented out.
-X
-Code tested on Macintosh without changes.
-X
-Added some additional information in the results file.
-X
-X
-Please report bugs to wrp@virginia.edu
-X
->>April 3, 1998
-X
-Removed some debugging code in faatran.c now that fastx/fasty bugs
-seem corrected.
-X
-X FASTA --> v3.14
-X
-Corrected uninitialized array elements in dropnfa.c.
-X
->>April 10, 1998
-X
-Added facility for specifying SRCH_URL (the URL string that will be
-used to re-search the database) and REF_RUL (the URL string that
-will be used to lookup the sequence) ini url_subs.c. This allows perl
-scripts to provide different databases for re-searching dynamically.
-X
->>April 16, 1998
-X
-X --> v31t05
-X
-Corrected problem with ignoring ','s in databases (','s are found in
-PIR).
-X
->>April 18, 1998
-X
-Corrected some problems with sequence names for Entrez lookups and
-re-searching databases.
-X
-Made minor modifications to nxgetaa.c and compacc.c for compatibility
-with Borland 'C' compiler for Win32 systems. Including makefile.tc
-fasta.rsp, prss.rsp, and test.bat for Borland 'C'/win32.
-X
->>April 24, 1998
-X
-X --> v31t06
-X
-Fixed another bug in fasty3/tfasty3 alignment routines.
-X
-Added additional information to the do_url1() (url_subs.c) function.
-The re-search URL can now reference the start, stop, and length of the
-library sequence to be re-searched with. For DNA library sequences,
-these values are always in nucleotides, even with tfasta/x/y.
-X
-X
->>May 12, 1998
-X
-(no version change as v31t06 was not released prior to this)
-X
-Correct nxgetaa.c GETLIB to deal correctly with BLAST NR database
-sequences with exceptionally long title lines.
-X
-Fix bug with long -O results files.
-X
->>May 18, 1998
-X
-X --> v31t07
-X
-Corrected some bugs in information string lengths (e.g. gstring1,
-stat_str), disabling statistics with -z 0, translation of 'X' by
-saatran() (faatran.c) that caused problems with FASTX.
-X
-A serious bug has been fixed in the FASTX alignment routines.
-For some pathological sequences, % identity increases from < 10%
-to 40%. The version number of the main program has not changed,
-but the version number of the fastx function has changed to 3.2.
-X
->>June 19, 1998
-X
-X --> v31t08
-X
-Corrected some problems with alignments with -m 10.
-X
-Added -Z db_size option to modify apparent database size for
-expectation value calculation (used only for protein/protein FASTA and
-SSEARCH, FASTX, FASTY, TFASTX, and TFASTY).
-X
->>July 1, 1998
-X
-X (no version change)
-X
-Corrected size of lbnames[], lb_size[] in structs.h to accomodate MAX_LF
-files.
-X
->>July 13, 1998
-X
-X --> v31t09
-X
-Corrected problem in nxgetaa.c encountered when reading long sequences
-(that must be split) in fasta format.
-X
-Corrected problem in statistics calculation encountered with a small number
-of very long DNA sequences.
-X
->>July 17, 1998
-X
-X (no version change, date change for ssearch3)
-X
-Corrected default expectation cutoff (it was 10, now it is 2.0) for
-DNA with ssearch3.
-X
-SHAR_EOF
-chmod 0644 readme.v31t0 ||
-echo 'restore of readme.v31t0 failed'
-Wc_c="`wc -c < 'readme.v31t0'`"
-test 4461 -eq "$Wc_c" ||
- echo 'readme.v31t0: original size 4461, current size' "$Wc_c"
-fi
-# ============= readme.v31t1 ==============
-if test -f 'readme.v31t1' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v31t1 (File already exists)'
-else
-echo 'x - extracting readme.v31t1 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v31t1' &&
->>July 22, 1998
-X
-X --> v31t10
-X
-Corrected problem with histogram when unscaled statistics used (e.g. prss3).
-X
-Corrected problems with prss3 shuffled sequence prompt. Provided option
-to enter number of shuffles, window size, for prss3. Number of shuffles
-for prss3 can be entered as an option (-d #) or as the third argument
-on the command line (prss3 query lib 1000).
-X
-Modified nrand.c, nrand48.c to use time to set random number.
-X
-Corrected problems reading GCG formatted files with prss3.
-X
-Corrected various problems with pvcomp* programs, but they still do
-not produce alignments with version 3.1.
-X
-Two new programs, fastf3(_t) and tfastf3(_t) are available. These
-programs compare a set of mixed peptide sequences from an Edman
-sequencer to a protein (fastf3) or DNA (tfastf3) database, using
-the database sequences to de-convolve the peptide mixture.
-X
-See fastf3.1
-X
->>August 11, 1998
-X
-(no version change)
-X
-Modified initfa.c so that using '-n' on the fastx/fasty command line
-would not cause problems.
-X
-Changed labeling of query sequence length for fastx/fasty from 'aa' to 'nt'.
-X
->>August 18, 1998
-X
-(no version change)
-X
-Modified complib.c, comp_thr.c scaleswn.c, to report E()-value for only
-one related sequence if -z 3 is used.
-X
->>August 23, 1998
-X
-X -->v31t11
-X
-Some serious problems with prss3 have been corrected:
-X
-(1) use dropnsw.c rather than dropgsw.c for more accurate low scores
-X
-(2) modify estimation program; use scaleswe.c rather than scaleswn.c.
-X scaleswe.c has some improvements for estimation by moments and can
-X use MLE as well as mu/var (-z 3).
-X
-(3) add p() estimate.
-X
-(4) correct bugs in nrand48, which caused bad sequences for llgetaa.c
-X
-(5) -Z number works properly for prss3 and other programs (fixed histogram).
-X
-(6) a new program, ssearch3e, is available that uses the same scaling
-X routines as prss3 (scaleswe.c). prss3 will save the random
-X sequences it generates when the -r file option is given; the
-X sequences are in file_rlib. ssearch3e (or ssearch3 or fasta) can
-X then do a search on exactly the same sequences that were used by prss3.
-X
-A bug reading GCG format compressed DNA databases was fixed.
-X
-Fixed a bug that caused query sequence not to be displayed with -m 10.
-X
-Simple optimization in dropnfa.c improves performance 10%.
-X
->>Sept. 1, 1998
-X
-(no version change)
-X
-Modified nxgetaa.c to recognize "ACGTX" as nucleotides.
-X
->>Sept. 7, 1998
-X
-X --> v31t12
-X
-Added -z 11 - 15, which use shuffled sequences, rather than real
-sequences to calculate statistical estimates. Because a shuffled
-sequence score is calculated for each sequence score, the search
-process takes twice as long. In this first version, codons are not
-preserved during shuffles, so tfasta/x/y shuffles may not be as
-informative as they should be.
-X
-Also fix a problem with prss3 shuffles.
-X
->>Sept. 14, 1998
-X
-X (no version change; previous version not released)
-X
-Corrected bugs in tfastx3/tfasty3 caused by using the -3 option with
-or without -i. With the bug fixes; "-3" and "-3 -i" work as expected;
-"-3" gives the forward three frames, while "-3 -i" gives the reverse
-three frames.
-X
-In addition, tfasta3/tfasta3_t was upgraded to perform the same way
-that tfastx/y3 does - i.e. a search with "-i -3" searches only frames
-4,5, and 6, while "-3" searches only frames 1, 2, and 3.
-X
->>Sept. 29, 1998
-X
-X --> v31t13
-X
-Corrected bugs in dropfx.c that were corrected in fasta30 last May,
-but lingered in fasta31. Also included code to ensure that tfastx/y
-alignments against long introns would not overrun the alignment
-buffer. Instead of overrunning the buffer, the message: ***aligment
-truncated *** is displayed.
-X
-SHAR_EOF
-chmod 0644 readme.v31t1 ||
-echo 'restore of readme.v31t1 failed'
-Wc_c="`wc -c < 'readme.v31t1'`"
-test 3632 -eq "$Wc_c" ||
- echo 'readme.v31t1: original size 3632, current size' "$Wc_c"
-fi
-# ============= readme.v32t0 ==============
-if test -f 'readme.v32t0' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v32t0 (File already exists)'
-else
-echo 'x - extracting readme.v32t0 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v32t0' &&
-X
-FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
-search both the forward and reverse strands by default.
-X
-The documentation in fasta3x.me/fasta3x.doc has been substantially
-revised.
-X
->>October 9, 1999
-X --> v32t08 (no version number change)
-X
-Added "-M low-high" option, where low and high are inclusion limits
-for library sequences. If a library sequence is shorter than "low" or
-longer than "high", it will not be considered in the search. Thus,
-"-M 200-250" limits the database search to proteins between 200 and
-250 residues in length. This should be particularly useful for fasts3
-and fastf3. This limit applies only to protein sequences.
-X
-Modified scaleswn.c to fall back to maximum likelihood estimates of
-lambda, K rather than mean/variance estimates. (This allows MLE
-estimation to be used instead of proc_hist_n when a limited range of
-scores is examined.)
-X
->>October 20, 1999
-(no version change)
-X
-Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
-X
->>October 9, 1999
-X --> v32t08 (no version number change)
-X
-Added "-M low-high" option, where low and high are inclusion limits
-for library sequences. If a library sequence is shorter than "low" or
-longer than "high", it will not be considered in the search. Thus,
-"-M 200-250" limits the database search to proteins between 200 and
-250 residues in length. This should be particularly useful for fasts3
-and fastf3. -M -500 searches library sequences < 500; -M 200 -
-searches sequences > 200. This limit applies only to protein
-sequences.
-X
-Modified scaleswn.c to fall back to maximum likelihood estimates of
-lambda, K rather than mean/variance estimates. (This allows MLE
-estimation to be used instead of proc_hist_n when a limited range of
-scores is examined.)
-X
->>October 2, 1999
-X --> v32t08
-X
-Many changes:
-X
-(1) memory mapped (mmap()ed) database reading - other database reading fixes
-(2) BLAST2 databases supported
-(3) true maximum likelihood estimates for Lambda, K
-(4) Misc. minor fixes
-X
-(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
-It is now possible to use mmap()ed access to FASTA format databases,
-if the "map_db" program has been used to produce an ".xin" file. If
-USE_MMAP is defined at compile time and a ".xin" file is present, the
-".xin" will be used to access sequences directly after the file is
-mmap()ed. On my 4-processor Alpha, this can reduce elapsed time by
-50%. It is not quite as efficient as BLAST2 format, but it is close.
-X
-Currently, memory mapping is supported for type 0 (FASTA), 5
-(PIR/GCG ascii), and 6 (GCG binary). Memory mapping is used if a
-".xin" file is present. ".xin" files are created by the new program
-"map_db". The syntax for "map_db" is:
-X
-X map_db [-n] "/dir/database.fa"
-X
-which creates the file /dir/database.fa.xin. Library types can be
-included in the filename; thus:
-X
-X map_db -n "/gcggenbank/gb_om.seq 6"
-X
-would be used for a type 6 GCG binary file.
-X
-The ".xin" file must be updated each time the database file changes.
-map_db writes the size of the database file into the ".xin" file, so
-that if the database file changes, making the ".xin" offset
-information invalid, the ".xin" file is not used. "list_db" is
-provided to print out the offset information in the ".xin" file.
-X
-(Oct 2, 1999) The memory mapping routines have been changed to
-allow several files to be memory mapped simultaneously. Indeed, once a
-database has been memory mapped, it will not be unmap()ed until the
-program finishes. This fixes a problem under Digital Unix, and should
-make re-access to mmap()ed files (as when displaying high scores and
-alignments) much more efficient. If no more memory is available for
-mmap()ing, the file will be read using conventional fread/fgets.
-X
-(Oct 2, 1999) The names of the database reading functions has been
-changed to allow both Blast1.4 and Blast2.0 databases to be read. In
-addition, Makefile.common now includes an option to link both
-ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
-However, Blast1.4 support has not been tested.
-X
-The Makefile structure has been improved. Each architecture specific
-Makefile (Makefile.alpha, Makefile.linux, etc) now includes
-Makefile.common. Thus, changes to the program structure should be
-correct for all platforms. "map_db" and "list_db" are not made with
-"make all".
-X
-The database reading functions in nxgetaa.c can now return a database
-length of 0, which indicates that no residues were read. Previously,
-0-length sequences returned a length of 1, which were ignored.
-Complib.c and comp_thr.c have changed to accommodate this
-modification. This change was made to ensure that each residue,
-including the last, of each sequence is read.
-X
-Corrected bug in nxgetaa.c with FASTA format files with very long
-(>512 char) definition lines.
-X
-(2) (September 20, 1999) BLAST2 format databases supported
-X
-This release supports NCBI Blast2.0 format databases, using either
-conventional file reading or memory mapped files. The Blast2.0 format
-can be read very efficiently, so there is only a modest improvement in
-performance with memory mapping. The decision to use mmap()'ed files
-is made at compile time, by defining USE_MMAP. My thanks to Eamonn
-O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
-providing mmap()'ed modifications to fasta3. On my machines, Blast2.0
-format reduces search time by about 30%. At the moment, ambiguous DNA
-sequences are not decoded properly.
-X
-(3) (September 30, 1999) A new statistical estimation option is
-available. -z 2 has been changed from ln()-scaling, which never
-should have been used, to scaling using Maximum Likelihood Estimates
-(MLEs) of Lambda and K. The MLE estimation routines were written by
-Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
-written by Sean Eddy. The MLE estimation examines the middle 95% of
-scores, if there are fewer than 10000 sequences in the database;
-otherwise it excludes (censors) the top 250 scores and the bottom 250
-scores. This approach seems to effectively prevent related sequences
-from contaminating the estimation process. As with -z 1, -z 12 causes
-the program to generate a shuffled sequence score for each of the
-library sequences; in this case, no censoring is done. If the
-estimation process is reliable, Lambda and K should not vary much with
-different queries or query lengths. Lambda appears not to vary much
-with the comparison algorithm, although K does.
-X
-(4) Minor changes include fixes to some of the alignment display routines,
-individual copies of the pstruct structure for each thread, and some
-changes to ensure that every last residue in a library is available
-for matching (sometime the last residue could be ignored). This
-version has undergone extensive testing with high-throughput sequences
-to confirm that long sequences are read properly. Problems with
-fastf3/fasts3 alignment display have also been addressed.
-X
->>August 26, 1999 (no version change - not released)
-X
-Corrected problem in "apam.c" that prevented scoring matrices from
-being imported for [t]fasts3/[t]fastf3.
-X
->>August 17, 1999
-X --> v32t07
-X
-Corrected problem with opt_cut initialization that only appeared
-with pvcomp* programs.
-X
-Improved calculation of FASTA optcut threshold for DNA sequence
-comparison for match scores much less than +5 (e.g. +3). The previous
-optcut theshold was too high when the match penalty was < 4 and
-ktup=6; it is now scaled more appropriately.
-X
-Optcut thresholds have also been raised slightly for
-fastx/y3/tfastx/y3. This should improve performance with minimal
-effects on sensitivity.
-X
->>July 29, 1999
-(no version change - date change)
-X
-Corrected various uninitialized variables and buffer overruns
-detected.
-X
->>July 26, 1999 - new distribution
-(no version change - v32t06, previous version not released)
-X
-Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
-programs.
-X
-Statistical calculations for tfasta/x/y in unthreaded version
-corrected. Statistical estimates for threaded and unthreaded versions
-of the tfasta/x/y/s/f programs should be much more consistent.
-X
-Substantial modifications in alignment coordinate calculation/
-presentation. Minor error in fastx/y/tfastx/y end of alignment
-corrected. Major problems with tfasta alignment coordinates
-corrected. tfasta and tfastx/y coordinates should now be consistent.
-X
-Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
-with long query sequences.
-X
-Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
-to try to avoid "cannot allocate diagonal arrays" error message.
-Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
-so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
-I am still getting this message, so it has not been completely
-successful. Makefile.linux now uses -DALLOCN0 to avoid this problem,
-at some cost in speed.
-X
-The pvcomp* programs have been updated to work properly with
-forward/reverse DNA searches. See readme.pvm_3.2.
-X
->>July 7, 1999 - not released
-X --> v32t06
-X
-Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
-dumps with "-o" option.
-X
-Corrected a subtle bug in fastx/y/tfastx/y alignment display.
-X
->>June 30, 1999 - new distribution
-(no version change)
-X
-Corrected doinit.c to allow DNA substitution matrices with -s matrix
-option.
-X
-Changed ".gbl" files to ".h" files.
-X
->>June 2 - 9, 1999 - new distribution
-(no version change)
-X
-Added additional DNA lambda/K/H to alt_param.h. Corrected some
-other problems with those table. for the case where (inf,inf)
-gap penalties were not included.
-X
-Fixed complib.c/comp_thr.c error message to properly report filename
-when library file is not found.
-X
-Included approximate Lambda/K/H for BL80 in alt_parms.h.
-BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
-X
-Included some additional perl files for searchfa.cgi, searchnn.cgi
-in the distribution (my-cgi.pl, cgi-lib.pl).
-X
->>May 30, 1999, June 2, 1999 - new distribution
-(no version number change)
-X
-Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h. Changed
-zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
-when only one sequence is compared and -z 3 is used.
-X
->>May 27, 1999
-(no version number change)
-X
-Corrected bug in alignment numbering on the % identity line
-X 27.4% identity in 234 aa (101-234:110-243)
-for reverse complements with offset coordinates (test.aa:101-250)
-X
->>May 23, 1999
-(no version number change)
-X
-Correction to Makefile.linux (tgetaa.o : failed to -DTFAST).
-X
->>May 19, 1999
-(no version number change)
-X
-Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
-Changes to showsum.c to change off-end reporting. (Neither of these
-changes is likely to affect anyone outside my research group.)
-X
->>May 12, 1999
-X --> v32t05
-X
-Fixed a serious bug in the fastx3/tfastx3 alignment display which
-caused t/fastx3 to produce incorrect alignments (and incorrectly low
-percent identities). The scores were correct, but the alignment
-percent identities were too low and the alignments were wrong.
-X
-Numbering errors were also corrected in fastx3/tfastx3 and
-fasty3/tfasty3 and when partial query sequences were used.
-X
->>May 7, 1999
-X
-Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
-incorrect Smith-Waterman scores after do_walign() had been called.
-This affected only pvcompsw searches with the "-m 9" option.
-X
->>May 5, 1999
-X
-Modified showalign.c to provide improved alignment information that
-includes explicitly the boundaries of the alignment. Default
-alignments now say:
-X
-Smith-Waterman score: 175; 24.645% identity in 211 aa overlap (5:207-7:207)
-X
->>May 3, 1999
-X
-Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
-"not" superfamily annotation for the query sequence only. The
-goal is to be able to specify that certain superfamily numbers be
-ignored in some of the search summaries. Thus, a description line
-of the form:
-X
->GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
-X
-says that GT8.7 belongs to superfamily 40001, but any library
-sequences with superfamily number 90043 should be ignored in any
-listing or summary of best scores.
-X
-In addition, it is now possible to make a fasta3r/prcompfa, which is
-the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
-unrelated sequences in a search using the superfamily annotation.
-fasta3r shows only the scores of related sequences. This might be
-used in combination with the -F e_val option to show the scores
-obtained by the most distantly related members of a family.
-X
->>April 25, 1999
-X
-X -->v32t04 (not distributed)
-X
-Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
-(necessary for a more rational Makefile structure). No code changes.
-X
->>April 19, 1999
-X
-Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
-(no version number change).
-X
->>April 17, 1999
-X
-X --> v32t03
-X
-A serious bug in DNA alignments when the sequence has been broken into
-multiple segments that was introduced in version fasta32 has been
-fixed. In addition, several minor problems with -z 3 statistics on
-DNA sequences were fixed.
-X
-Added -m 9 option, which unfortunately does different things in
-pvcompfa/sw and fasta3/ssearch3. In both programs, -m 9 provides the
-id's of the two sequences, length, E(), %_ident, and start and end of
-the alignment in both sequences. pvcompfa/sw provides this
-information with the list of high scoring sequences. fasta3/ssearch3
-provides the information in lieu of an alignment.
-X
->>March 18, 1999
-X
-X --> v32t02
-X
-Added information on the algorithm/parameter description line to
-report the range of the pam matrices. Useful for matrices like
-MD_10, _20, and _40 which require much higher gap penalties.
-X
->>March 13, 1999 (not distributed)
-X
-X --> v32t01
-X
-X -r results.file has been changed to -R results.file to accomodate
-X DNA match/mismatch penalties of the form: -r "+1/-3".
-X
->>February 10, 1999
-X
-Modify functions in scalesw*.c to prevent underflow after exp() on
-Alpha Linux machines. The Alpha/LINUX gcc compiler is buggy and
-doesn't behave properly with "denormalized" numbers, so "gcc -g -m
-ieee" is recommended.
-X
-Add "Display alignments also (y/n)[n] "
-X
-pvcomplib.c again provides alignments!! In addition, there is a
-new "-m 9" option, which reports alignments as:
-X
->>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
-HS5 30 HS5 30 1.873e-11 1.000 30 1 30 1 30
-HS5 30 HS2249 40 1.061e-07 0.774 31 1 30 7 37
-HS5 30 HS2221 38 1.207e-07 0.833 30 1 30 7 35
-HS5 30 HS2283 40 1.455e-07 0.774 31 1 30 7 37
-HS5 30 HS2239 38 1.939e-07 0.800 30 1 30 7 35
-X
-where the columns are:
-X
-query-name q-len lib-name lib-len E() %id align-len q-start q-end l-start l-end
-X
->>February 9, 1999
-X
-Corrected bug in showalign.c that offset reverse complement alignments
-by one.
-X
->>Febrary 2, 1999
-X
-Changed the formatting slightly in showbest.c to have columns line up better.
-X
->>January 11, 1999
-X
-Corrected some bugs introduced into fastf3(_t) in the previous version.
-X
->>December 28, 1998
-X
-Corrected various problems in dropfz.c affecting alignment scores
-and coordinates.
-X
-Introduced a new program, fasts3(_t), for searching with peptide
-sequences.
-X
->>November 11, 1998
-X
-X --> v32t0
-X
-Added code to correct problems with coordinate number in long library
-sequences with tfastx/tfasty. With this release, sequences should be
-numbered properly, and sequence numbers count down with reverse
-complement library sequences.
-X
-In addition, with this release, fastx/y and tfastx/y translated
-protein alignments are numbered as nucleotides (increasing by 3,
-labels every 30 nucleotides) rather than codons.
-X
-SHAR_EOF
-chmod 0644 readme.v32t0 ||
-echo 'restore of readme.v32t0 failed'
-Wc_c="`wc -c < 'readme.v32t0'`"
-test 15841 -eq "$Wc_c" ||
- echo 'readme.v32t0: original size 15841, current size' "$Wc_c"
-fi
-# ============= readme.v33t0 ==============
-if test -f 'readme.v33t0' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v33t0 (File already exists)'
-else
-echo 'x - extracting readme.v33t0 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v33t0' &&
-X
-X $Name: fa_34_26_5 $ - $Id: readme.v33t0,v 1.45 2001/07/10 18:03:42 wrp Exp $
-X
-================ readme.v33t0 ================
-X
-This release includes an MPI implementation of the parallel
-library-vs-library comparison code. See readme.mpi_3.3 and
-readme.pvm_3.3 for more information.
-X
-=====
->>July 9, 2001
-X
-Considerable changes to support no-global library functions.
-X
-(1) Separate ascii/sequence mapping arrays are used by the
-X query-reading (qascii), library-reading (lascii), and sequence
-X comparison function (pascii) routines. As a result, there is no
-X longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
-X functions.
-X
-(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
-X from complib.c/comp_thr.c/p2_complib.c. We no longer need
-X tcomp_thr.o, comp_thrx.o, etc. We still have a variety of
-X p2_complib.o variations to support the different c34.work* files.
-X
-(3) Because non-global openlib/getlib functions are available, exactly
-X the same open/get functions are available for reading both the
-X query and reference libraries in pv34comp* programs. The
-X host-specific openlib/getlib functions in hxgetaa.c are now
-X provided by nmgetlib.c, etc. This has two effect:
-X
-X (a) it is now possible to compare a query database generated by an
-X SQL query to a library database generated by a different SQL
-X query.
-X
-X (b) pv34comp* has lost (at least in this version) the ability to
-X automatically detect the query sequence type. To search with a
-X DNA query, you MUST use "-n".
-X
-(4) the resetp() function is now responsible for almost all of the
-X function sepcific (TFAST/FASTX/etc) initializations. All of the
-X function specific code has been removed from complib.c/comp_thr.c
-X and most of it has been moved to initfa.c/resetp().
-X
-(5) manageacc.c has been merged into compacc.c (mostly prhist()).
-X
-(6) Although it may reflect a subtle bug in my code, it is not
-X possible to reliably run threaded/memory mapped versions of the
-X fasta34_t code. I have spent considerable time tracking down the
-X problem, and have determined that, in threaded code, something
-X happens during the thread initialization to corrupt the
-X description offset information used when files are memory mapped.
-X This never occurs when the unthreaded versions of the code are
-X used. And it does not occur under MacOSX, Compaq Tru64Unix, Sun
-X Solaris/Sparc, or SGI IRIX.
-X
-X Thus, I cannot recommend using the threaded code versions (_t)
-X under Linux (RH6.2 or 7.1).
-X
-=====
->>June 1, 2001
-X
-Many changes to accomodate a new - no global variable - strategy for
-reading sequence databases. Every time a file is opened, a struct
-lmf_str is allocated which can be used for memory mapped files, ncbl2,
-files, and mysql files.
-X
-In addition, an open'ed file has a default sequence type: DNA or
-protein, or one can open a file in a mode that will allow the sequence
-type to be changed.
-X
-=====
->>May 18, 2001 CVS: fa33t09d0
-X
-A new compile time parameter - -DGAP_OPEN, is available to change the
-definition of the "-f gap-open" parameter from the penalty for the
-first residue in a gap to a true gap-open penalty, as is used in BLAST
-and many other comparison algorithms. This will probably become the
-default for fasta in version 3.4.
-X
-Fixes to conflicts between "-S" and "-s matrix". When a scoring
-matrix file was specified, lower-case alignments were not displayed
-with -S (although the scores were calculated properly).
-X
-More extensive testting of mysql_lib.c (mySQL query-libraries) with
-the pv4comp* and mp4comp* programs.
-X
-=====
->>April 5, 2001 CVS: fa33t08d4b3
-X
-Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
-descriptions for PCOMPLIB (pv4/mp3comp*). Also fix p2_complib.c to
-request DNA library for translated comparisons.
-X
-Fix for prss33(_t) to read both sequences from stdin.
-X
-=====
->>March 27, 2001 CVS: fa33t08d4
-X --> fa33t08d4
-X
-Problems in ncbl2_mlib.c found searching NCBI non-redundant nucleotide
-database "nt" were fixed. Testing revealed a minor memory leak, which
-was fixed by modifying showbest.c, showalign.c, comp_thr.c, complib.c,
-and p2_complib.c to remember the last opened database file more
-effectively.
-X
-Modifications to allow 64-bit fseek/ftell on machines like Sun,
-Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
-off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO. Machines
-with 64-bit long's do not need this option. Machines with 32-bit
-longs that allow files >2 Gb can do so with 64-bit file access
-functions, including fseeko() and ftello(), which work with off_t file
-offsets instead of long's.
-X
-=====
->>March 3, 2001 CVS: fa33t08d2
-X
-Corrected problems in nmgetaa.c and mysql_lib.c with parallel
-programs, and one serious problem with alternate DNA scoring matrices
-(initfa.c, initsw.c) not being set properly. A subtle problem with
-the merge of scaleswn.c and scaleswg.c is fixed.
-X
->>February 17, 2001
-X
-Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
-position of the GID. This change was made because sprintf() cannot be
-used reliably to generate an SQL string, as '"' and '%' are used in
-such strings.
-X
-=====
->>January 17, 2001
-(no version change, date change)
-X
-Minro fixes to initfa.c, initsw.c to deal with DNA scoring matrices
-properly. "-n -s dna.mat" is required for the sequence/matrix to be
-recognized as DNA.
-X
->>January 16, 2001
--->v34t00
-X
-Merge of the main CVS trunk - fa33t06 with the latest release branch,
-fa33t08.
-X
-In addition, PCOMPLIB mods have been made to mysql_lib.c. Because
-p2_complib.c gets sequence description information during the first
-read of the database, the mysql_query must be changed to return:
-result[0]=GID, result[1]=description, result[2]=sequence. In the
-PCOMPLIB case, the other SQL queries (for GID description, sequence)
-are not necessary but must still be provided.
-X
-=====
->>January 16, 2001
-(no version change, previous version not released)
-X
-changes to p2_complib.c to correct openlib() incompatibility.
-X
-changes to nmgetaa.c, ncbl2_lib.c to incorporate PCOMPLIB. nxgetaa.c
-removed.
-X
-=====
->>January 12, 2001
-(no version change, previous version not released)
-X
-Change to initfa.c to move ktup check from query_parm() to last_init().
-X
-=====
->>January 10, 2001
---> v33t08
-X
-Fixes to complib.c, comp_thr.c to deal properly with long query
-protein sequences when a short library chunk (e.g. -N 5000) was given.
-In the case where the chunk size is too short, it will be reset to a
-length which allows the search to proceed, by including an amount of
-new sequence that is equal to the amount of overlap sequence.
-X
-scaleswn.c and scaleswg.c have been merged.
-X
-v33t08 includes the initial implementation for mySQL described below
-for v33t07x.
-X
-======
->>Dec. 20, 2000
---> v33t07x
-X
-Initial implementation of a syntax for mySQL database queries. A new
-file, mysql_lib.c has been added, and changes have been made to
-nmgetaa.c (which should now replace nxgetaa.c) and altlib.h. A mySQL
-database search needs a file with 4 parts:
-X
-(1) description of the database, user, password
-(2) a select statement that generates the set of protein sequences
-X as: UID, sequence
-(3) a select statement that generates a UID, description given a UID
-(4) a select statement that generats a single UID, sequence given a UID
-X
-Each of the four parts should be separated by ';'. For example, in
-the database that we are using for testing, a file "demo.sql" that
-contains:
-X
-================
-localhost taxonomy username secret;
-SELECT proteins.gid, proteins.sequence FROM proteins,swissprot WHERE proteins.gid=swissprot.gid AND swissprot.spid IS NOT NULL;
-select proteins.gid, concat(swissprot.spid," ",proteins.description) from proteins,swissprot where proteins.gid=%ld AND swissprot.gid=proteins.gid;
-select gid, sequence from proteins where gid=%ld;
-================
-X
-will find all the proteins in the BLAST "nr" database that also have
-SwissProt ID's when given the command line:
-X
-X fasta33 -q query.aa "demo.sql 16"
-X
-At least for simple queries, there is surprisingly little overhead for the
-search. For more complex queries involving several tables, the overhead
-can be significant.
-X
-At the moment, libraries that need the functions in mysql_lib.c will
-use library type 16. We may also use file type 17 for SQL queries
-that return binary sequences.
-X
-This implementation of mysql_lib.c was written to require a minimal
-amount of change to the other programs. Only nmgetaa.c and altlib.h
-needed to be changed to incorporate this new capability. One result
-of this limitation is that one cannot mix mySQL databases queries with
-other databases in the same search. Eventually, I would like to make
-a mySQL database like any other, so that several mysql database
-queries could be searched in the same run, and mysql databases could
-be mixed with other (flat file) databases, but this will require some
-changes in the function calls throughout the code. (Right now, the
-various programs do not distinguish between an openlib() that is made
-before searching a large database, and one before retrieving a single
-sequence. This must be changed for a database query like mySQL to
-behave like other databases.
-X
-Several mySQL demo files have been provided: mysql_demo*.sql.
-X
-(10 January 2001) The mySQL code has been tested on Intel Linux and
-Compaq/Alpha/Tru64 Unix.
-X
->>Dec. 9, 2000
-X
-Changes to apam.c that to tie different default gap penalties to
-alternate scoring matrices. In addition, changes to apam.c, to deal
-with user-specified matrices with or without '*'.
-X
->>Nov. 5, 2000 (date updated)
-X
-pst.dnaseq can now have 3 values, -1, or 0-> protein, 1->DNA, and 2->other.
-This becomes important for thing like init_karlin_a, which needs a
-background frequency of residues.
-X
->>Nov. 1, 2000
-X
-Significant bug fixes for the -z 6/-z 16 option. An ininitialized
-variable was fixed in karlin.c, and comp_thr.c did not pass the
-correct composition argument type in find_zp(). The -z 6/16 option
-has now been tested and works correctly on Alphas, Linux x86, SGI, Sun
-and Mac OSX. Another problem was fixed in scaleswn.c (simplex()) that
-prevented the code from being reused by the pv4/mp4 complib programs.
-X
->>Oct. 9, 2000
-X
-Several changes made to accomodate Mac OSX. Longer lists of superfamily
-numbers now supported in p[su]4comp/m[su]4comp programs.
-X
->>Sept 25, 2000
-X
-All global variables have been removed from scaleswn.c. The last to
-go, db_struct db, required many edits, because until now, the fasta
-programs have kept two versions of the db_struct data (entries,
-length). One version was kept by the main program, which updated entry
-number and db length as sequences were read; a second copy of this
-information was kept by the statistical estimation routines. Now
-there is only one copy, which means that the E() values will be a
-function of the complete database, not the database with some high
-scoring sequences removed.
-X
->>Sept 23, 2000
-X
-Continued removal of global variables from scaleswn.c. Only one
-global is left, db_struct db, which contains the number of entries in
-the database and the number of residues. It will be the next to go
-(changing all the zs_to_*() functions) and scaleswn. will be free
-of globals. scaleswg.c is gone - scaleswn.c compiles to scaleswg.c
-with -DNORMAL_DIST.
-X
->>Sept 20, 2000
-X
-Removal of histogram globals required changes in p2_complib.c as well.
-p_complib.c has not been updated. scaleswg.c has been modified to
-reflect the new histogram strategy.
-X
->>Sept 19, 2000
-X
-Substantial changes to remove globals for printing histogram. m_msg
-now contains a hist_str, which keeps histogram information.
-X
->>Sept. 19, 2000
-(no version change, previous version not released)
-X
-Correct bug introduced into scaleswn.c (inithist()) by changing
-score2_sums[], score_sums[] from int to double.
-X
-Reporting of version numbers is more consistent between fasta33,
-fasta33_t, and pv4compfa/mp4compfa. The programs now report the same
-numbers/dates in similar places.
-X
->>Sept. 15, 2000
---> v33t07
-X
-Changes to fix problems with statistical estimates when a large
-fraction (but not all) of the database is related. Several users
-reported problems when searching with rRNA genes with version 33t06.
-In some cases, a 100% identitical match over 1500 nt would not be
-statistically significant against a search of the bacterial division
-of Genbank. This problem was not seen with some releases of v33t05.
-X
-The cause of the problem was a change between v33t05 and v33t06 to
-allow scoring matrices with unusual scaling to be used. In v33t05,
-there was a line that excluded all scores > 300 from the statistical
-estimation procedure. While 300 is a high score with any "normal"
-scoring matrix, some investigators were using matrices scaled 10X, so
-that a score of 300 was really a score of 30 with a conventional
-matrix, and should not be excluded. Unfortunately, removing the test
-to exclude scores > 300 meant that when a rRNA sequence was used to
-search the bacterial division, tens of thousands of high scoring
-related sequences were treated as if they were unrelated, with the
-result that the variance estimates were much too high, and thus high
-real scores had low z-scores, and thus were not statistically
-significant. (There appear to be more than 20,000 rRNA sequences in
-the bacterial division of Genbank, almost 25% of all sequences).
-X
-The solution to the problem is a substantial enhancement in the
-strategies used to exclude high-scoring, related sequences, the -z 1,
-4, and 5 parameter estimation strategies. The programs now estimate
-the expected high scoring sequence by calculating an ungapped Lambda
-and K, and then use a relatively conservative threshold for excluding
-scores that are higher than would be expected 0.01 times by chance.
-By calculating Lambda and K, we can scale the cutoff thresholds to
-allow scoring matrices with unusual scales. For "normal" searches,
-there should be little change, but there should be an improvement for
-searches with large numbers of related sequences in the database.
-X
-As a result of testing for this change, a bug in the karlin() function
-used with -z 6 was found and corrected.
-X
-=======
->>Sept. 9, 2000
-X
-Changes to manshowbest.c to include correct display coordinates.
-X
-Significant changes to structs.h, param.h, p2_complib.c,
-p2_workcomp.c, to store and use a reliable a_struct for alignment
-coordinates.
-X
-Other cosmetic changes.
-X
->>Sept. 7, 2000
-X
-Minor changes to complib.c, showrss.c, so that prss33 -q uses 200
-shuffles and prss33 provides bit scores, rather than z-scores.
-(no version number change).
-X
-Modifications to p2_complib.c to include superfamily numbers for
-ps4comp* ms4comp*.
-X
->>Aug 22, 2000
-X
-Changes to mmgetaa.c, ncbl2_mlib.c, dropfs.c to accomodate AIX.
-00README.1st updated to reflect the current version and correct
-outdated information on threads.
-X
->>Aug. 3, 2000
-X
-Modifications to initpam2() in initsw.c to correct a problem with pam_x
-when the -S option is used.
-X
-Modifications to compacc.c, scaleswn.c to ensure that residue numbers
-are calculated properly when more than 2 Gb of sequence is searched.
-X
->>July 12, 2000
-X
-Modifications to dropnfa.c so that DNA matches to 'N' will be included
-in the "ungapped %identity". Thus, a sequence that is 100% identical
-for 100 nt on either side of a 100 nt region that has been masked to
-'NNNNN' will be reported as: "67% identical (100% ungapped)". This
-has been added to deal with masked BAC-end databases. It would be
-better if masking changed the letters to lowercase, but the mouse
-BAC-end sequences at TIGR use 'NNNNN'. This is currently available
-only for the fasta function, not [t]fast[x/y], etc, and only for DNA
-sequences.
-X
-mk_n_pam() in apam.c modified to ensure that mismatch scores of -1
-remain -1.
-X
->>June 25, 2000
-X
-Modification to nxgetaa.c, nmgetaa.c, mmgetaa.c to return Genbank Accession
-number as part of the descriptive string.
-X
->>June 11, 2000
-X
-(no version change - not yet released)
-X
-Modifications to calcons(), calc_id(), showbest(), p_workcomp.c to
-provide ngap_q (number of alignment gaps in query) , ngap_l (number
-of gaps in library) information for -m 9 output.
-X
->>June 6, 2000
-X
-(no version change - not yet released)
-X
-Modified scaleswn.c to provide better support for unconventional
-scoring scoring matrices, in particular, scoring matrices where every
-value is 50-times higher. Previous versions of the MLE estimator (-z
-2) started with lambda = 0.2, which is too high for a scoring matrix
-going from -500:+1500. The initial estimate for lambda is now
-calculated using the formula: lambda = pi/sqrt(6*variance). For the
-default -z 1, a restriction to limit scores to a maximum of 300 for
-the statistical analysis was removed.
-X
->>June 3, 2000
-X
-Modified aligment output, and -m 9 and -m10, to report an "ungapped"
-identity as well as the traditional "gapped" identity. The
-traditional "gapped" identity reports the number of identities divided
-by the overall length of the alignment, including gaps. The
-"ungapped" identity does not include gaps in the length of the
-alignment. This new value is included for alignments that include
-introns; thus, a tfastx33 search might find the 100% identical genomic
-sequence but report the gapped percent identity if a short intron were
-included in the alignment (the alignment probably would not span a
-long exon) as 66%. The "ungapped" identity would remain 100%. The
-ungapped identity value is also shown in the "-m 9" output line after
-the "gapped" fraction identical.
-X
->>June 1, 2000
-X
-Modified -m 9 output to provide fraction identical, alignment boundary
-information with the initial list of high scoring sequences, just as
-the pv3comp and mp_comp versions do. The -m 9 option now shows the
-same alignment display as -m 0, but the width of the alignment is
-increased by 40. Thus, by default, -m 9 will show the list of best
-hits, with percent identity, Smith-Waterman score, and alignment
-boundaries initially, and then show alignments standard (-m 0)
-alignments with 100 residues/line.
-X
->>May 29, 2000
-X
-Correct some problems with reading data files with <CR>'s under unix.
-X
-nmgetaa.c/nxgetaa.c/mmgetaa.c have been modified to convert <TAB>
-('\t') to <SPC> (' ') in descriptive lines.
-X
-=======
-X
->>May 3, 2000
-X
-X Corrected problem with very low mean_var in fit_llen() in scaleswn.c.
-X
->>May 2, 2000
-X (no version number change - previous version not released)
-X
-X Merged fasta33t05d2 with fasta33t06. Also removed restriction on
-"-M size-range" to proteins - the size range now can be applied to DNA
-as well.
-X
->>May 1, 2000
-X (changes to v33t05d merged into v33t06)
-X
-Introduced changes to include '*' as a valid sequence character, which
-indicates termination. Thus, 'TGA', 'TAG', and 'TAA' are now
-tranlated to '*' rather than 'X', and the protein PAM matrices have
-been modified to provide a match score of approximately 1/2 the max
-identity score for a '*:*' match. Otherise, '*' is the same as 'X'.
-This change only affects query sequences that include a '*' to
-indicate an end of sequence, the '*' is not there by default.
-X
-The inclusion of '*' broke some things in tfasts33, tfastf33, fasty33,
-and tfasty33, which were fixed today.
-X
->>March 28, 2000/April 24, 2000
-X --> v33t06
-X
-(a) -z 6 statistics that factor in composition
-(b) -smatrix-offset pam-offset parameter
-X
-(a) This release provides a new statistics option, -z 6, which
-provides a more sophisticated model that accounts for sequence
-composition. When -z 6 is used (only for fasta33(_t) and
-ssearch33(_t)), the program calculates a composition parameter
-comp=1/lambda using a modified version of the Karlin-Altschul karlin()
-function. As a result, every sequence in the database has an
-associated length (n1) and composition (comp).
-X
-The length n1 and composition comp are used in the maximum likelihood
-estimation described by Mott (1992) Bull. Math. Biol. 54:59-75. Four
-parameters are estimated, a0, a1, a2, and b1, and the probability of
-obtaining a score is then:
-X
-p(s >= x) = 1-exp(-exp(-( a0 + a1*comp + a2*comp*log(n0*n1) + x)/(b1*comp)))
-X
-The maximum likelihood estimates of a0, a1, a2, and b1 are calculated
-using the Nelder-Mead simplex search strategy.
-X
-The average Lambda is reported for the search using Lambda =
-1/(b1*ave_comp). Where ave_comp is the geometric mean of the comp values
-calculated during the statistical estimates.
-X
-The "lambda/comp" calculation can fail for sequences with very biased
-amino acid composition. When this occurs, 'comp' is set to -1.0 (as
-is 'H', the information content parameter) and the 'ave_comp' value is
-used to calculate statistical significance. (But obviously 'ave_comp'
-is not really appropriate, since if the sequence had an average 'comp'
-value, it would have been calculated.) When -z 6 is used, the
-alignment display shows the 'comp' and 'H' values for that library
-sequence.
-X
-(b) Scoring matrix offsets - The main reason that the "lamdba/comp"
-calculation fails is that, for the particular query/library sequence
-pair, the expected score is not < 0, instead, Sum {p_ij S_ij} >= 0.0.
-This problem is reported to 'stderr' when it occurs. The simplest
-solution to the problem is to provide an offset to the scoring matrix;
-for example, to use Blosum62 - 1, which ranges from +10 to -5, rather
-than the standard +11 to -4. This option used to be available with
-the -S offset option, but -S is now used to specify a lower-case
-seg-ed database. The offset can now be specified as part of the
-scoring matrix name. Thus, "-s BL62-1" uses Blosum62 reduced by 1 at
-each entry. The '-' character is used to indicate an offset, so
-scoring matrix files must not have a '-' in their name.
-Alternatively, "-s BL80+1" or "-s BL80--1" would add one to each value.
-X
-nxgetaa.c, nmgetaa.c, and mmgetaa.c have been edited to avoid string
-run-off problems after strncpy().
-X
-Fixed problem where positive gap extension penalties in ssearch33
-were not converted to negative values.
-X
->>April 8, 2000
-X
-Fixed problem in calculating corrected sequence lengths for
-Altschul-Gish probabilities.
-X
->>March 30, 2000
-X (no version change, date updated to March 30, 2000)
-X
-Corrected problem with -m 9 option.
-X
-The '*' character is now available to allow translated alignments to
-extend through the termination codon. Thus, if a protein sequence ends
-with a '*', and matches in to a translated termination codon, the
-score will be increased. The *:* match score is set to 1/2 the max
-positive score for the matrix (see upam.h). This strategy can also be
-used to upweight a match that extends all the way to the end of a
-full-length sequence by putting '*' at the end of both the query and
-library protein sequences. Recognition of '*' will probably become a
-command line option.
-X
->>March 21, 2000
-X (no version change, previous version not distributed)
-X
-Changes to map_db.c, list_db.c, and mmgetaa.c to accomodate large
-sequence files. Long (64-bit on some systems) variables are now used
-to specify file and memory position for the memory mapped functions.
-As a result, there are now two *.xin (memory mapped index) file
-formats: MP0, which uses 32-bit longs, and MP1, which uses 64-bit
-longs. On 64-bit machines, MP0 32-bit indices are read properly, but
-limit the database size to 2 or 4 Gb; MP1 64-bit indices allow very
-large databases. Blast2.0 formatdb databases are still limited to
-4Gb. To compile map_db.c to generate 64-bit index files, include the
-compile time option -DBIG_LIB64 in the Makefile. (Currently this
-option has been tested only on the DEC Alpha and SGI platforms, and
-will work only with Unix versions that provide 64-bit longs and 64-bit
-ftell()'s.)
-X
-The -R results file now uses sfn_cmp() to report a matching
-superfamily number, if one exists, and '0' otherwise.
-X
->>March 12, 2000
-X (no version change, previous version not distributed)
-X
-Provide new strategy for specifying library abbreviations. In
-addition to:
-X
-X fasta33 query.aa %anr
-X
-one can also specify:
-X
-X fasta33 query.aa %pir1+sp+nr
-or
-X fasta33 query.aa +pir1+sp+nr
-or
-X fasta33 query.aa %+pir1+sp+nr
-X
-where the + anywhere in the library name string indicates that
-variable length library names, separated by '+', are being used (the
-last '+' is optional). The FASTLIBS file then becomes:
-X
-================
-PIR1 Annotated Protein Database (rel 56)$0+pir1+/slib2/blast/pir1.lseg
-NBRF Protein database (complete)$0+nbrf+@/seqlib/lib/NBRF.nam
-NRL_3d structure database$0D/seqlib/lib/nrl_3d.seq 5
-NCBI/Blast non-redundant proteins$0+nr+/slib2/blast/nr.lseg
-NCBI/Blast Swissprot$0+sp+/slib2/blast/swissprot.lseg
-================
-X
-The two abbreviation types, single letter and +word+, cannot be
-intermixed, and at least initially, +word+ specifiers are
-case-sensitive (single letter abbreviations are not) and will not be
-available interactively, only on the command line.
-X
-Removed 'K' estimate for Expectation_n, Expectation_i fits to the
-distribution of unrelated similarity scores. 'K' cannot be calculated
-from the data available. 'Lamdba' can be calculated, it is
-1.28255/sqrt(mean_var), and is still available.
-X
->>March 3, 2000
-X (no version change)
-X
-changed Makefile33.common, Makefile.common, to incorporate $(NRAND)
-rather than "rand48". Provide nrandom.c which uses random(), as
-replacement for nrand.c, which uses rand48().
-X
->>February 8, 2000
-X --> v33t05
-X
-Fixes to scaleswn.c (proc_hist_ml) to set num_db_entries properly.
-Scaleswn.c also provides Lambda estimates for -z 1/11 (Expectation_n),
-and -z 1/14 (Expectation_i) statistical estimates.
-X
-Modifications to calc_id() to correct bug in counting identities.
-Modified showalign() to use calc_id() with -m 9, for simpler
-debugging.
-X
-Additional modifications to dropfa*.c files to deal properly with 'n's
-and 'x's.
-X
-Added new option: -x #, which allows one to override the penalty for a
-match against 'x' (or 'N') provided by the scoring matrix. This
-option is particularly useful in fast[x/y] searches, where out of
-frame low complexity regions can generate high scores.
-X
-The old function of '-x' - to specify an alternate coordinate system,
-is now available as '-X # #'.
-X
-Updated scaleswn.c to provide window shuffle information for -z 12.
-X
-Updated compacc.c, workacc.c, to fix serious bug in wshuffle()
-that destroyed aa1[n1]=0.
-X
->>January 25, 2000
-X --> v33t04
-X
-X A serious bug in all of the fasta related programs has been
-corrected. The new code in fasta33 which ignores certain residues
-failed to initialize one of the arrays properly. As a result, in
-pathological situations, a very strong match could be missed.
-X
-X Corrected minor bug in initsw.c that cause misplaced "ktup" command
-line argument, which should be ingnored by ssearch, to be read as -d
-ktup.
-X
-X Improved error message for 0 length query sequence.
-X
->>January 17, 2000
-X --> no external version number change
-X
-Modified mmgetaa.c, map_db.c, and nmgetaa.c to provide memory mapping
-of genbank flatfile (format=1) files. This format could be read much
-more efficiently, however.
-X
->>January 12, 2000
-X --> no external version number change
-X
-Changed the behavior of the options that set the number of high scores
-(-b) and alignments (-d) that are displayed. Previously, fasta33 -E
-10.0 -d 10 would show 50 best scores, rather than all the scores with
-E() < 10.0. To get the -E threshold to limit, -E 10.0 -b 10000 -d 10
-was required. This is now fixed. Setting "-d 10" does not affect the
-number of best scores shown.
-X
-Minor change in mw.h to remove unused defines.
-X
-fasta3x.me (fasta3x.doc) updated.
-X
->>January 6, 2000
-X --> v33t03
-X
-Corrected bug in memory mapped reads of gcg_binary format files
-that potentially caused the last 63 residues to be read improperly.
-X
-Changes to comp_thr.c, pthr_subs.c, uthr_subs.c, ibm_pthr_subs.c to
-ensure that each thread has its own work_info structure. This solves
-some minor race conditions that sometimes caused some parameters
-not to be reported properly.
-X
-Changes to most of the drop*.c files to correct some minor problems
-with sequence alphabets. Code in mmgetaa.c (memory mapped code for
-FASTA, GCG compressed files) reordered to prevent files from being
-memory mapped if appropriate index files are not available.
-X
-See readme.pvm_3.3 for updates to the pvm programs.
-X
->>December 10, 1999
-X (no version change - modifications largely affect ps3comp*)
-X
-Modifications to showsum.c to deal with 2 scores/sequence. Modifications
-to mmgetaa.c for superfamily numbers.
-X
->>December 7, 1999
-X (no version change, previous version not released)
-X
-Corrected problem in mmgetaa.c that caused searches on a memory mapped
-single long sequence (e.g. Chr22) to fail. Corrected bug in map_db.c
-that caused it to crash on some architectures if a filename was not
-specified. Corrected off-by-three error in fasty/tfasty. Corrected
-indexing error in dropfz2.c.
-X
->>December 5, 1999
-X --> v33t02
-X
-corrected some bugs in inifa.c/initsw.c/doinit.c that caused
-abbreviated function names to be lost.
-X
-modify showbest.c, showalign.c to include information on position in
-library sequence (bbp->cont) to distinguish subsegment of very long
-sequences. Currently, the new label is available only with -m 6.
-X
->>November 29, 1999
-X [t]fastz33 uses v33t02 of fasty function.
-X
-Replace dropfz.c with dropfz2.c. Dropfz2.c interprets any codons,
-that include the nucleotide 'N' as the amino 'X'. Previously, 'N' was
-treated as 'A', so 'NNN' ended up 'K'. This modification, together
-with the -S option and lower-case pseg'ed databases, should ensure
-that DNA queries with large numbers of 'N's do not match low
-complexity regions.
-X
->>November 20, 1999
-X (no version change, previous version not released)
-X
-Modify initfa.c to disply initn, init1 scores for [t]fast[fs].
-Include "-B" option to show previous z-scores.
-X
->>November 17, 1999
-X (no version change, previous version not released)
-X
-Modify dropfx.c to use saatran(), rather than aatran(). saatran
-translates any 'N' containing codon as 'X'. aatran() treats 'N' as
-an 'A'. Although more steps are required for translation, the program
-appears to run just as fast.
-X
->>November 7, 1999
-X --> v33t01
-X
-Substantial changes to the output format in showbest.c (the list of
-high scoring sequences) and showalign.c (the alignments). The classic
-list of best scores:
-X
-The best scores are: initn init1 opt z-sc E(82014)
-gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIO ( 218) 1497 1497 1497 1761.1 2.3e-91
-gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE ( 218) 1413 1413 1413 1662.9 6.7e-86
-X
-has been replaced by:
-X
-The best scores are: opt bits E(82138)
-gi|121716|sp|P10649|GTM1_MOUSE GLUTATHIONE S-TRAN ( 218) 1497 354 7.6e-98
-gi|121717|sp|P04905|GTM1_RAT GLUTATHIONE S-TRANSF ( 218) 1413 335 5.3e-92
-X
-This display provides more information and removes the outdated initn
-and init1 scores, which are no longer used. The "bit" score is
-comparable to the blast2 bit score. It is calculated as: (lambda*S -
-ln K)/ln 2, where S is the raw similarity score, lambda and K are
-statistical parameters estimated from the distribution of unrelated
-sequence similarity scores. All of the similarity scores, including
-init1, initn, and z-scores are reported with the alignment data.
-Z-scores are displayed instead of bit scores in the list of high
-scores if the command line option "-B" is specified.
-X
-In addition, the alignment score line has changed from:
-X
->>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER (220 aa)
-X initn: 954 init1: 954 opt: 958 Z-score: 1130.9 expect() 1.1e-56
-Smith-Waterman score: 958; 61.927% identity in 218 aa overlap (1-218:1-218)
-X
-to:
-X
->>gi|2506495|sp|P20136|GTM2_CHICK GLUTATHIONE S-TRANSFER (220 aa)
-X initn: 954 init1: 954 opt: 958 Z-score: 1130.9 bits: 216.4 E(): 2.8e-56
-Smith-Waterman score: 958; 61.927% identity in 218 aa overlap (1-218:1-218)
-X
-In addition to the addition of the "bits:" score, the "expect()" label
-has changed to "E()" to save some space.
-X
->>November 4,12, 1999
-(no version change)
-X
-Fixed serious bug in -z 2 lambda/K calculation in scaleswn.c
-X
-Fixed bugs in llgetaa.c (openlib()) and definition of superfamily
-numbers.
-X
->>October 21, 1999
-(no version change)
-X
-Begin using CVS for version control. Correct faulty error message in
-dropfs.c. Corrected bad "goto loopl;" in dropfz.c. Corrected prss3.rsp
-for Makefile.tc (Win32 version).
-X
->>October 18, 1999
-X --> v33t0
-X
-Corrected some serious bugs with the various fasta/x/y programs when
-the -DALLOCN0 was used to save memory. Improvements to fasta3x.me/.doc
-documentation.
-X
->>October 12, 1999
-X --> v33tx
-X
-For this initial release of version 33 of the FASTA programs, the
-Makefile's have been modified to make "fasta33(_t)", "fastx33(_t)",
-etc, so that you can test fasta33 while retaining fasta3 (from release
-v32t08). The FASTA33 programs are somewhat slower than previous
-releases, but I believe the ability to handle low complexity regions
-without 'X'ing them out outweighs the slowdown. By (temporarily)
-changing the names of the programs slightly, it will be easier for you
-to judge the relative cost and benefit. To "make" the programs as
-"fasta3(_t)", etc, simply replace "Makefile33.common" with
-"Makefile.common" in the "Makefile" that you use.
-X
->>September 30, 1999
-X
-ssearch3/fasta3/fastx3/fasty3 have been modified to search databases
-containing both upper and lower case letters, where lower case letters
-indicate low-complexity regions. With the modified programs, lower
-case letters are treated as 'X's' in the initial scan, but are then
-treated normally in the final alignment. In addition, alignments can
-contain lower case letters. Lower case letters are treated as
-low-complexity regions during the seach phase of the program, but as
-"conventional" residues during the alignment phase, with the "-S"
-option. Currently, lower case letters are mapped to 'X's during the
-scan of the entire library. In the future, alternate weights will be
-available. This is a substantial improvement for very large scale
-comparison, where one seeks both accurate statistical estimates and
-accurate %identities and alignments, and for translated DNA:protein
-comparisons, like "fastx3" and "fasty3", where out-of-frame
-translations tend to match low complexity regions (see Pearson et
-al. (1997) Genomics 46:24-36).
-X
-Protein databases (and query sequences) can be generated in the
-appropriate format using John Wooton's "pseg" program, available from
-ftp://ncbi.nlm.nih.gov/pub/seg/pseg. Once you have compiled the "pseg"
-program, use the command:
-X
-X pseg database.fasta -z 1 -q > database.lc_seg
-X
-Once you have database.lc_seg, run the command "map_db" to generate
-a ".xin" file that can be used to efficiently memory map the database.
-X
-You can then search database.lc_seg with or without the "-S" option.
-Without "-S", the database is treated as any other FASTA format file -
-all the residues are present. With "-S", lower case residues will be
-treated as 'x's' during the initial scan but as normal residues when
-final alignments are displayed.
-X
-When the -S option is used, the matrix information line is changed
-from: "BL50 matrix (15:-5)" to "BL50 matrix (15:-5)xS". The "-S"
-option is no longer available to provide a scoring matrix offset.
-X
-Unfortunately, Blast2.0 format files cannot contain lower case
-letters. We have addressed this problem by providing efficient memory
-mapped access to Fasta and GCG/PIR, and GCG/compressed-binary files in
-the last release of fasta32t08. The memory mapped file I/O
-improvements are provided in fasta33 as well.
-X
-================ readme.v32 ================
-X
-FASTX/Y and FASTA (DNA) are now half as fast, because the programs now
-search both the forward and reverse strands by default.
-X
-The documentation in fasta3x.me/fasta3x.doc has been substantially
-revised.
-X
->>October 20, 1999
-(no version change)
-X
-Modify nxgetaa.c/nmgetaa.c to recognize 'N' as a possible DNA character.
-X
->>October 9, 1999
-X --> v32t08 (no version number change)
-X
-Added "-M low-high" option, where low and high are inclusion limits
-for library sequences. If a library sequence is shorter than "low" or
-longer than "high", it will not be considered in the search. Thus,
-"-M 200-250" limits the database search to proteins between 200 and
-250 residues in length. This should be particularly useful for fasts3
-and fastf3. -M -500 searches library sequences < 500; -M 200 -
-searches sequences > 200. This limit applies only to protein
-sequences.
-X
-Modified scaleswn.c to fall back to maximum likelihood estimates of
-lambda, K rather than mean/variance estimates. (This allows MLE
-estimation to be used instead of proc_hist_n when a limited range of
-scores is examined.)
-X
->>October 2, 1999
-X --> v32t08
-X
-Many changes:
-X
-(1) memory mapped (mmap()ed) database reading - other database reading fixes
-(2) BLAST2 databases supported
-(3) true maximum likelihood estimates for Lambda, K
-(4) Misc. minor fixes
-X
-(1) (Sept. 26 - Oct. 2, 1999) Memory mapped database access.
-It is now possible to use mmap()ed access to FASTA format databases,
-if the "map_db" program has been used to produce an ".xin" file. If
-USE_MMAP is defined at compile time and a ".xin" file is present, the
-".xin" will be used to access sequences directly after the file is
-mmap()ed. On my 4-processor Alpha, this can reduce elapsed time by
-50%. It is not quite as efficient as BLAST2 format, but it is close.
-X
-Currently, memory mapping is supported for type 0 (FASTA), 5
-(PIR/GCG ascii), and 6 (GCG binary). Memory mapping is used if a
-".xin" file is present. ".xin" files are created by the new program
-"map_db". The syntax for "map_db" is:
-X
-X map_db [-n] "/dir/database.fa"
-X
-which creates the file /dir/database.fa.xin. Library types can be
-included in the filename; thus:
-X
-X map_db -n "/gcggenbank/gb_om.seq 6"
-X
-would be used for a type 6 GCG binary file.
-X
-The ".xin" file must be updated each time the database file changes.
-map_db writes the size of the database file into the ".xin" file, so
-that if the database file changes, making the ".xin" offset
-information invalid, the ".xin" file is not used. "list_db" is
-provided to print out the offset information in the ".xin" file.
-X
-(Oct 2, 1999) The memory mapping routines have been changed to
-allow several files to be memory mapped simultaneously. Indeed, once a
-database has been memory mapped, it will not be unmap()ed until the
-program finishes. This fixes a problem under Digital Unix, and should
-make re-access to mmap()ed files (as when displaying high scores and
-alignments) much more efficient. If no more memory is available for
-mmap()ing, the file will be read using conventional fread/fgets.
-X
-(Oct 2, 1999) The names of the database reading functions has been
-changed to allow both Blast1.4 and Blast2.0 databases to be read. In
-addition, Makefile.common now includes an option to link both
-ncbl_lib.o and ncbl2_lib.o, which provides support for both libraries.
-However, Blast1.4 support has not been tested.
-X
-The Makefile structure has been improved. Each architecture specific
-Makefile (Makefile.alpha, Makefile.linux, etc) now includes
-Makefile.common. Thus, changes to the program structure should be
-correct for all platforms. "map_db" and "list_db" are not made with
-"make all".
-X
-The database reading functions in nxgetaa.c can now return a database
-length of 0, which indicates that no residues were read. Previously,
-0-length sequences returned a length of 1, which were ignored.
-Complib.c and comp_thr.c have changed to accommodate this
-modification. This change was made to ensure that each residue,
-including the last, of each sequence is read.
-X
-Corrected bug in nxgetaa.c with FASTA format files with very long
-(>512 char) definition lines.
-X
-(2) (September 20, 1999) BLAST2 format databases supported
-X
-This release supports NCBI Blast2.0 format databases, using either
-conventional file reading or memory mapped files. The Blast2.0 format
-can be read very efficiently, so there is only a modest improvement in
-performance with memory mapping. The decision to use mmap()'ed files
-is made at compile time, by defining USE_MMAP. My thanks to Eamonn
-O'Toole of DEC/Compaq, and Daryl Madura of Sun Microsystems, for
-providing mmap()'ed modifications to fasta3. On my machines, Blast2.0
-format reduces search time by about 30%. At the moment, ambiguous DNA
-sequences are not decoded properly.
-X
-(3) (September 30, 1999) A new statistical estimation option is
-available. -z 2 has been changed from ln()-scaling, which never
-should have been used, to scaling using Maximum Likelihood Estimates
-(MLEs) of Lambda and K. The MLE estimation routines were written by
-Aaron Mackey, based on a discussion of MLE estimates of Lambda and K
-written by Sean Eddy. The MLE estimation examines the middle 95% of
-scores, if there are fewer than 10000 sequences in the database;
-otherwise it excludes (censors) the top 250 scores and the bottom 250
-scores. This approach seems to effectively prevent related sequences
-from contaminating the estimation process. As with -z 1, -z 12 causes
-the program to generate a shuffled sequence score for each of the
-library sequences; in this case, no censoring is done. If the
-estimation process is reliable, Lambda and K should not vary much with
-different queries or query lengths. Lambda appears not to vary much
-with the comparison algorithm, although K does.
-X
-(4) Minor changes include fixes to some of the alignment display routines,
-individual copies of the pstruct structure for each thread, and some
-changes to ensure that every last residue in a library is available
-for matching (sometime the last residue could be ignored). This
-version has undergone extensive testing with high-throughput sequences
-to confirm that long sequences are read properly. Problems with
-fastf3/fasts3 alignment display have also been addressed.
-X
->>August 26, 1999 (no version change - not released)
-X
-Corrected problem in "apam.c" that prevented scoring matrices from
-being imported for [t]fasts3/[t]fastf3.
-X
->>August 17, 1999
-X --> v32t07
-X
-Corrected problem with opt_cut initialization that only appeared
-with pvcomp* programs.
-X
-Improved calculation of FASTA optcut threshold for DNA sequence
-comparison for match scores much less than +5 (e.g. +3). The previous
-optcut theshold was too high when the match penalty was < 4 and
-ktup=6; it is now scaled more appropriately.
-X
-Optcut thresholds have also been raised slightly for
-fastx/y3/tfastx/y3. This should improve performance with minimal
-effects on sensitivity.
-X
->>July 29, 1999
-(no version change - date change)
-X
-Corrected various uninitialized variables and buffer overruns
-detected.
-X
->>July 26, 1999 - new distribution
-(no version change - v32t06, previous version not released)
-X
-Changed the location of "(reverse complement)" label in tfasta/x/y/s/f
-programs.
-X
-Statistical calculations for tfasta/x/y in unthreaded version
-corrected. Statistical estimates for threaded and unthreaded versions
-of the tfasta/x/y/s/f programs should be much more consistent.
-X
-Substantial modifications in alignment coordinate calculation/
-presentation. Minor error in fastx/y/tfastx/y end of alignment
-corrected. Major problems with tfasta alignment coordinates
-corrected. tfasta and tfastx/y coordinates should now be consistent.
-X
-Corrected problem with -N 5000 in tfasta/x/y3(_t) searches encountered
-with long query sequences.
-X
-Updated pthr_subs.c/Makefile.linux to increase the pthreads stacksize
-to try to avoid "cannot allocate diagonal arrays" error message.
-Pthreads stacksize can be changed with RedHat 6.0, but not RedHat 5.2,
-so Makefile.linux uses -DLINUX5 for RedHat5.* (no pthreads stack size).
-I am still getting this message, so it has not been completely
-successful. Makefile.linux now uses -DALLOCN0 to avoid this problem,
-at some cost in speed.
-X
-The pvcomp* programs have been updated to work properly with
-forward/reverse DNA searches. See readme.pvm_3.2.
-X
->>July 7, 1999 - not released
-X --> v32t06
-X
-Corrected bug in complib.c (fasta3, fastx3, etc) that caused core
-dumps with "-o" option.
-X
-Corrected a subtle bug in fastx/y/tfastx/y alignment display.
-X
->>June 30, 1999 - new distribution
-(no version change)
-X
-Corrected doinit.c to allow DNA substitution matrices with -s matrix
-option.
-X
-Changed ".gbl" files to ".h" files.
-X
->>June 2 - 9, 1999 - new distribution
-(no version change)
-X
-Added additional DNA lambda/K/H to alt_param.h. Corrected some
-other problems with those table. for the case where (inf,inf)
-gap penalties were not included.
-X
-Fixed complib.c/comp_thr.c error message to properly report filename
-when library file is not found.
-X
-Included approximate Lambda/K/H for BL80 in alt_parms.h.
-BL80 scoring matrix changed from 1/3 bit to 1/2 bit units.
-X
-Included some additional perl files for searchfa.cgi, searchnn.cgi
-in the distribution (my-cgi.pl, cgi-lib.pl).
-X
->>May 30, 1999, June 2, 1999 - new distribution
-(no version number change)
-X
-Added Makefile.NetBSD, if !defined(__NetBSD__) for values.h. Changed
-zs_to_E() and z_to_E() in scaleswn.c to correctly calculate E() value
-when only one sequence is compared and -z 3 is used.
-X
->>May 27, 1999
-(no version number change)
-X
-Corrected bug in alignment numbering on the % identity line
-X 27.4% identity in 234 aa (101-234:110-243)
-for reverse complements with offset coordinates (test.aa:101-250)
-X
->>May 23, 1999
-(no version number change)
-X
-Correction to Makefile.linux (tgetaa.o : failed to -DTFAST).
-X
->>May 19, 1999
-(no version number change)
-X
-Minor changes to pvm_showalign.c to allow #define FIRSTNODE 1.
-Changes to showsum.c to change off-end reporting. (Neither of these
-changes is likely to affect anyone outside my research group.)
-X
->>May 12, 1999
-X --> v32t05
-X
-Fixed a serious bug in the fastx3/tfastx3 alignment display which
-caused t/fastx3 to produce incorrect alignments (and incorrectly low
-percent identities). The scores were correct, but the alignment
-percent identities were too low and the alignments were wrong.
-X
-Numbering errors were also corrected in fastx3/tfastx3 and
-fasty3/tfasty3 and when partial query sequences were used.
-X
->>May 7, 1999
-X
-Fixed a subtle bug in dropgsw.c that caused do_work() to calculate
-incorrect Smith-Waterman scores after do_walign() had been called.
-This affected only pvcompsw searches with the "-m 9" option.
-X
->>May 5, 1999
-X
-Modified showalign.c to provide improved alignment information that
-includes explicitly the boundaries of the alignment. Default
-alignments now say:
-X
-Smith-Waterman score: 175; 24.645% identity in 211 aa overlap (5:207-7:207)
-X
->>May 3, 1999
-X
-Modified nxgetaa.c, showsum.c, showbest.c, manshowun.c to allow a
-"not" superfamily annotation for the query sequence only. The
-goal is to be able to specify that certain superfamily numbers be
-ignored in some of the search summaries. Thus, a description line
-of the form:
-X
->GT8.7 | 40001 ! 90043 | transl. of pa875.con, 19 to 675
-X
-says that GT8.7 belongs to superfamily 40001, but any library
-sequences with superfamily number 90043 should be ignored in any
-listing or summary of best scores.
-X
-In addition, it is now possible to make a fasta3r/prcompfa, which is
-the converse of fasta3u/pucompfa. fasta3u reports the highest scoring
-unrelated sequences in a search using the superfamily annotation.
-fasta3r shows only the scores of related sequences. This might be
-used in combination with the -F e_val option to show the scores
-obtained by the most distantly related members of a family.
-X
->>April 25, 1999
-X
-X -->v32t04 (not distributed)
-X
-Modified nxgetaa.c to remove the dependence of tgetaa.o on TFASTA
-(necessary for a more rational Makefile structure). No code changes.
-X
->>April 19, 1999
-X
-Fixed a bug in showalign.c that displayed incorrect alignment coordinates.
-(no version number change).
-X
->>April 17, 1999
-X
-X --> v32t03
-X
-A serious bug in DNA alignments when the sequence has been broken into
-multiple segments that was introduced in version fasta32 has been
-fixed. In addition, several minor problems with -z 3 statistics on
-DNA sequences were fixed.
-X
-Added -m 9 option, which unfortunately does different things in
-pvcompfa/sw and fasta3/ssearch3. In both programs, -m 9 provides the
-id's of the two sequences, length, E(), %_ident, and start and end of
-the alignment in both sequences. pvcompfa/sw provides this
-information with the list of high scoring sequences. fasta3/ssearch3
-provides the information in lieu of an alignment.
-X
->>March 18, 1999
-X
-X --> v32t02
-X
-Added information on the algorithm/parameter description line to
-report the range of the pam matrices. Useful for matrices like
-MD_10, _20, and _40 which require much higher gap penalties.
-X
->>March 13, 1999 (not distributed)
-X
-X --> v32t01
-X
-X -r results.file has been changed to -R results.file to accomodate
-X DNA match/mismatch penalties of the form: -r "+1/-3".
-X
->>February 10, 1999
-X
-Modify functions in scalesw*.c to prevent underflow after exp() on
-Alpha Linux machines. The Alpha/LINUX gcc compiler is buggy and
-doesn't behave properly with "denormalized" numbers, so "gcc -g -m
-ieee" is recommended.
-X
-Add "Display alignments also (y/n)[n] "
-X
-pvcomplib.c again provides alignments!! In addition, there is a
-new "-m 9" option, which reports alignments as:
-X
->>>/home/wrp/slib/hlibs/hum0.aa#5>HS5 gi:1280326 T-cell receptor beta chain 30 aa, 30 aa vs /home/wrp/slib/hlibs/hum0.seg library
-HS5 30 HS5 30 1.873e-11 1.000 30 1 30 1 30
-HS5 30 HS2249 40 1.061e-07 0.774 31 1 30 7 37
-HS5 30 HS2221 38 1.207e-07 0.833 30 1 30 7 35
-HS5 30 HS2283 40 1.455e-07 0.774 31 1 30 7 37
-HS5 30 HS2239 38 1.939e-07 0.800 30 1 30 7 35
-X
-where the columns are:
-X
-query-name q-len lib-name lib-len E() %id align-len q-start q-end l-start l-end
-X
->>February 9, 1999
-X
-Corrected bug in showalign.c that offset reverse complement alignments
-by one.
-X
->>Febrary 2, 1999
-X
-Changed the formatting slightly in showbest.c to have columns line up better.
-X
->>January 11, 1999
-X
-Corrected some bugs introduced into fastf3(_t) in the previous version.
-X
->>December 28, 1998
-X
-Corrected various problems in dropfz.c affecting alignment scores
-and coordinates.
-X
-Introduced a new program, fasts3(_t), for searching with peptide
-sequences.
-X
->>November 11, 1998
-X
-X --> v32t0
-X
-Added code to correct problems with coordinate number in long library
-sequences with tfastx/tfasty. With this release, sequences should be
-numbered properly, and sequence numbers count down with reverse
-complement library sequences.
-X
-In addition, with this release, fastx/y and tfastx/y translated
-protein alignments are numbered as nucleotides (increasing by 3,
-labels every 30 nucleotides) rather than codons.
-X
-SHAR_EOF
-chmod 0644 readme.v33t0 ||
-echo 'restore of readme.v33t0 failed'
-Wc_c="`wc -c < 'readme.v33t0'`"
-test 50697 -eq "$Wc_c" ||
- echo 'readme.v33t0: original size 50697, current size' "$Wc_c"
-fi
-# ============= readme.v34t0 ==============
-if test -f 'readme.v34t0' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.v34t0 (File already exists)'
-else
-echo 'x - extracting readme.v34t0 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.v34t0' &&
-X
-X $Name: fa_34_26_5 $ - $Id: readme.v34t0,v 1.167 2007/04/26 18:42:43 wrp Exp $
-X
->>April 26, 2007
-X
-Modify scaleswn.c to prevent mle_cen() from hanging when it fails to
-converge. Also, free() more arrays in work_thr.c; initialize
-m_msg.hist.entries=0 in comp_lib.c, and various clean-ups for a_res
-encoded alignments.
-X
->>March 22, 2007
-X
-Update faatran.c genetic codes (and documentation on -t option). Update
-ncbl2_mlib.c to parse non-NCBI format 12 databases better.
-X
->>March 21, 2007 fasta-34_26_2
-X
-Fix conflict between "-S" "-s matrix.file".
-X
->>February 26, 2007 fasta-34_26_2
-X
-Fix problem with dropfs2.c (curv.start = lpos before initialized).
-X
->>January 12, 2007
-X
-Fix a problem with pssm_asn_subs.c reading strings (sequences) longer
-than 1024 bytes.
-X
-Remove searchfa.cgi, searchnn.cgi, cgi-lib.pl, my-cgi.pl - this code
-was used for an ancient FASTA WWW implementation and has been replaced
-by the FASTA_WWW package.
-X
-FASTA Version numbers are being modified to make releases easier to
-track, thus fa34t26b5 has become fasta-34_26_1. I would prefer to use
-decimal versions, but CVS does not allow '.' in tags.
-X
->>January 4, 2007 fasta-34_26_1
-X
-Include scripts for building Mac OS X Universal binaries on a PPC
-machine. Programs are compiled first with Makefile.os_x (gcc-3.3 for
-PPC) and then installed into ./ppc/. Programs are next compiled with
-Makefile.os_x86 for i386, and the resulting executables installed into
-./i386/. Finally, the "make_osx_univ.sh" script is run to build the
-universal binaries from the two executables using "lipo".
-X
->>December 12, 2006
-X
-Fix some problems with p2_workcomp.c: (1) no longer initialize pad
-characters for non-existant sequences. (2) deal with small libraries
-consistently with the serial versions.
-X
->>November 17, 2006 fa34t26b5
-X
-Fixed a problem reading ASN.1 format 2 PSSM's. It is now possible to
-download a PSI-BLAST PSSM RID and search properly. Next, the query
-sequence from the PSSM should be used instead of the provided query
-sequence, so that the query sequence is ignored.
-X
->>October 19, 2006 fa34t26b4
-X
-Fixed problem with SSE2 code when PSSM's are used.
-X
->>October 6, 2006 fa34t26b3
-X
-A new set of WIN32 programs is now available that use the Intel C++
-9.1 compiler, rather than the much older Borland Turbo-C compiler. All
-of the unthreaded programs that are part of the Unix and MacOSX FASTA
-distributions are now available. Threaded (multiprocessor) versions
-of the program as available as well, as are sse2 accelerated versions
-of ssearch34 (ssearch34sse2.exe, ssearch34sse2_t.exe).
-X
-Th new WIN32 code also uses Microsoft's "nmake" program to build the
-programs, which allows much greater consistency between the Unix and
-Windows versions.
-X
-X
->>September 18, 2006
-X
-Static global alignment variables removed from dropnfa.c, dropfx.c,
-dropfz2.c. dropnfa.c, dropfx.c and dropfz2.c should be thread safe.
-Together with the earlier changes, all the FASTA functions should now
-be thread safe during the alignment process.
-X
->>August 17, 2006
-X
-Begin removal of static variables from Smith-Waterman alignment
-functions. These variables kept the functions from being thread-safe.
-Now dropgsw.c and dropnsw.c are thread-safe.
-X
->>August 15, 2006 fa34t26b2
-X
-Fixed a problem with pv34compfx/mp34compfx (and fy) producing
-improperly labeled alignments and de-allocating memory for the reverse
-complement.
-X
->>July 18, 2006
-X
-The library file name parsing programs now provide the option for
-environment variable substitions. For example, SLIB2=/slib2 as an
-environment variable (e.g. export SLIB2=/slib2 for ksh and bash), then
-X
-X fasta34 -q query.aa '${SLIB2}/swissprot.fa' expands as expected.
-X
-While this is not important for command lines, where the Unix shell
-would expand things anyway, it is very helpful for various
-configuration files, such as files of file names, where:
-X
-X <${SLIB2}/blast
-X swissprot.fa
-X
-now expands properly, and in FASTLIBS files the line:
-X
-X NCBI/Blast Swissprot$0S${SLIB2}/blast/swissprot.fa
-X
-expands properly. Currently, Environment variable expansion only
-takes place for library file names, and the <directory in a file of
-file names.
-X
->>July 14, 2006 fa34t26b1
-X
-Updated Farrar smith_waterman_sse2.c code to address possible bug
-(code from Michael Farrar). Include <sunmedia_intrin.h> for
-compilation with Sun compiler with Makefile.sun_x86.
-X
->>July 2, 2006 fa34t26b0
-X
-This release provides an extremely efficient SSE2 implementation of
-the Smith-Waterman algorithm for the SSE2 vector instructions written
-by Michael Farrar (farrar.michael@gmail.com). The SSE code speeds up
-Smith-Waterman 8 - 10-fold in my tests, making it comparable to Eric
-Lindahl's Altivec code for the Apple/IBM G4/G5 architecture.
-X
-The Farrar code is largely confined to smith_waterman_sse2.c and
-smith_waterman_sse2.h, which are copyright (2006) by Michael Farrar,
-and cannot be redistributed without his permission. Mr. Farrar has
-agreed to provide his code under the same policy used by FASTA -
-e.g. the code can be used without permission, but not redistributed.
-X
-The Farrar code uses GCC version 4.0 SSE2 intrinsic functions to avoid
-assembly language code. Unfortunately, in my hands, "gcc -O3" causes
-"out of memory" errors, and other problems, so "gcc -O" is used instead.
-X
->>June 23, 2006 fa34t25d10
-X
-Modifications to comp_lib.c, compacc.c, and other files to ensure that
-function-specific MAXTOT values are used properly. MAXTOT is now
-available as m_msg.max_tot, which is set in initfa.c (m_msg.max_tot =
-MAXTOT) to ensure that functions that need very large MAXTOT values
-(e.g. TFASTX) can get them. tfastx can now search successfully with
-titin, a 27,000 residue protein.
-X
-Other changes have been made to accomodate long query sequences.
-X
-A serious bug was found in fastx34(_t) that caused alignment
-coordinates to be calculated improperly when the DNA sequence was much
-longer than the protein sequence.
-X
->>May 31, 2006 fa34t25d9
-X
-Fixed some problems with fasts/fastf alignments when -m 9 options were
-used. Unlike the other algorithms, the a_res structure does not
-capture all the information to re-produce an alignment, so do_walign
-now sets bptr->have_ares to indicate whether the a_res structure is
-valid.
-X
-Various problems with bad library names, and short query titles were
-also fixed.
-X
-Updated version number/date on all drop*.c functions.
-X
->>May 24, 2006 fa34t25d8
-X
-Revised code for NCBI *.pal/*.nal databases has been tested on all
-architectures, including Windows.
-X
-In addition, support for ASN.1 PSSM:2 files provided by the NCBI
-PSI-BLAST WWW site is included. This code will not work with
-iteration 0 PSSM's (which have no PSSM information). For ASN.1
-PSSM's, which provide the matrix name (and in some cases the gap
-penalties), the scoring matrix and gap penalties are set appropriately
-if they were not specified on the command line. ASN.1 PSSM's are type 2:
-X ssearch34 -P "pssm.asn1 2" .....
-X
->>May 18, 2006
-X
-Support for NCBI Blast formatdb databases has been expanded. The
-FASTA programs can now read some NCBI *.pal and *.nal files, which are
-used to specify subsets of databases. Specifically, the
-swissprot.00.pal and pdbaa.00.pal files are supported. FASTA supports
-files that refer to *.msk files (i.e. swissprot.00.pal refers to
-swissprot.00.msk); it does not currently support .pal files that
-simply list other .pal or database files (e.g. FASTA does not support
-nr.pal or swissprot.pal).
-X
-In the process of providing this support, the routines used to read
-ASN.1 binary formatdb files were substantially improved. It is now
-possible to see multiple description lines for a single sequence.
-X
-IS_BIG_ENDIAN has been removed from all of the Makefiles. The code
-now looks for the definition of __BIG_ENDIAN__ or _BIG_ENDIAN to
-decide whether the architecture IS_BIG_ENDIAN. If, for some reason,
-one of these macros is not defined on a BIG_ENDIAN architecture, then
--DIS_BIG_ENDIAN is required.
-X
->>May 12, 2006 CVS fa34t25d7
-X
-Corrected serious problem with coordinate display calculation for
-fasta34 and ssearch34 - in some cases the coordinates and alignment
-symbols were off by the length of the context (typically 30 residues).
-X
-Added capability to read ASN.1 binary PSSM information. This
-information is provided (in an encoded form) from the NCBI PSI-BLAST
-WWW site. (What is actually provided from the WWW site is a bzip2-ed
-binary file that is converted to ASCII HEX. The ASCII HEX file must
-be converted to binary, and then bunzip'ed. This bunzip-ed file is
-binary ASN.1.) These files can also be generated by
-X
-X blastpgp -J T -C pssm.asn1_bin -u 2
-X
-I am parsing the ASN.1 binary manually, not using the NCBI toolkit, so
-there may be some files that are not parsed properly - if so, let me
-know.
-X
-(May 12, 2006 - The NCBI changed the format of the psi-blast ASN.1
-PSSM - and has not yet provided documentation of the new structure, so
-this code does not work. It does work with blastpgp v 2.2.13, but not
-with the web site version 2.2.14. A fix was provided 24-May-2006)
-X
->>April 18, 2006
-X
-Small modification in mshowbest.c to provide more consistent display
-widths with -m 9i in list of best hits.
-X
->>April 11, 2006 CVS fa34t25d6
-X
-Corrected a problem introduced with the new, more efficient method for
-displaying alignments. For the tfast* programs, which must translate
-the library sequence, translations were not done when alignments were
-re-displayed.
-X
-Corrected an older problem with tfastx34 against very long sequence
-databases - the code to more efficiently do the display alignment did
-not use the correct sequence coordinates.
-X
-Modifications to dropfs2.c to ensure that exact peptide matches are
-captured more frequently.
-X
->>March 16, 2006 CVS fa34t25d5
-X
-Change to initfa.c to allow lower case DNA libraries using the
--DDNALIB_LC compile time option.
-X
-Modify p2_complib.c, p2_worklib.c (and doinit.c, msg.h) to allow the
--V annotation option for the parallel programs. Also modify to allow
-specification of the query range (but only for the first query, like
-fasta34) for the parallel programs.
-X
-Modification of p2_workcomp.c to correct some problems presenting
-percent similarity. Also correct unreleased bugs in the alignment
-routines that allow more efficient alignment re-calculation.
-X
->>Nov 20, 2005
-X
-Changes to support asymmetric matrices - a scoring matrix read in from
-a file can be asymmetric. Default matrices are all symmetric.
-X
->>Oct 24, 2005
-X
-Modifications extended to p2_complib.c/p2_workcomp.c. Incorporation
-of drop_func.h into p2_workcomp.c greatly simplifies things. No
-changes in communication - struct a_res_str is internal to
-p2_workcomp.c.
-X
-Additional changes to do_walign() so that aln_func_vals() must be
-called to set llfact, qlfact, etc in a_struct aln before or after
-do_walign is called. do_walign produces a_res_str a_res, which has
-all the information necessary to produce a calcons() or calc_code()
-alignment.
-X
->>Oct 19, 2005 CVS fa34t26b0
-X
-Modifications to drop*.c and c_dispn.c to separate (and simplify) some
-of the alignment coordinate calculations. Before, the "a_struct" had
-the coordinates of the alignment used in the display (seqc0, seqc1)
-AND in the original sequences (aa0, aa1), as well as other information
-used to calculate alignment coordinates. In the new version, astruct
-coordinates always refer to seqc0,1, while a new structure, a_res_str,
-has coordinates for aa0, aa1 as well as the alignment encoding in res[nres].
-Eventually, this should make it possible to display multiple local
-alignments from the same two sequences.
-X
-In addition, the file "drop_func.h" has been added to the project, and
-is included by many of the files (all the drop*.c functions,
-mshowbest.c, mshowalign.c) to ensure that the various functions are
-declared and used consistently.
-X
->>Sept 19, 2005 CVS fa34t25d4
-X
-Changes to support Mac OS 10.4 - Tiger (include sys/types.h in more
-files). Documentation update for prss34/prfx34. Modifications to
-comp_lib.c to support prss34_t/prfx34_t. Shuffle numbers for
-prss/prfx can now be specified by "-k #".
-X
->>Sept 2, 2005
-X
-The prss34 program has been modified to use the same display routines
-as the other search programs. To be more consistent with the other
-programs, the old "-w shuffle-window-size" is now "-v window-size".
-X
-prss34/prfx34 will also show the optimal alignment for which the
-significance is calculated by using the "-A" option.
-X
-Since the new program reports results exactly like other
-fasta/ssearch/fastxy34 programs, parsing for statistical significance
-is considerably different. The old format program can be make using
-"make prss34o".
-X
->>Aug 26, 2005
-X
-Modifications to save_best() in comp_lib.c to support prss34_t. It
-did not work before.
-X
->>July 25, 2005
-X
-Modify mshowbest.c to suppress gi|12345 in HTML mode.
-X
->>July 18, 2005 CVS fa34t25d3
-X
-Modifications to Makefile.tc to support NCBI formatdb formats under
-Windows.
-X
->>May 19, 2005 CVS fa34t25d2
-X
-Modifications to dropfs2.c to fix an obscure bug that occurred when
-correctly ordered peptides aligned one residue apart.
-X
->>May 5, 2005 CVS fa34t25d1
-X
-Modification to the -x option, so that both an "X:X" match score and
-an "X:not-X" mismatch score can be specified. (This score is also used
-X
-give a positive score to a "*:*" match - the end of a reading frame,
-while giving a negative score to "*:not-*".
-X
->>March 14, 2005 CVS fa34t25b4
-X
-Fixed some problems caused by padding characters required for
-Smith-Waterman ALTIVEC in the parallel (p2_complib.c, p2_workcomp.c)
-versions.
-X
->>Feb 24, 2005 CVS fa34t25b3
-X
-Changes to comp_lib.c (and Makefile.pcom) to support prss34_t.
-X
->>Feb 12, 2005
-X
-Modify dropfs.c to dynamically allocate space for alignments, so that
-queries with a large number of fragments can still place all the
-fragments on the alignment. Also fix a problem produced by removing
--DBIGMEM from most of the Makefile's, but not fixing defs.h to use
-BIGMEM sizes by default.
-X
->>Jan 24, 2005
-X
-Include a new program, "print_pssm", which reads a blastpgp binary
-checkpoint file and writes out the frequency values as text. These
-values can be used with a new option with ssearch34(_t) and prss34,
-which provides the ability to read a text PSSM file. To specify a
-text PSSM, use the option -P "query.ckpt 1" where the "1" indicates a
-text, rather than a binary checkpoint file. "initfa.c" has also been
-modified to work with PSSM files with zero's in the in the frequency
-table. Presumably these positions (at the ends) do not provide
-information. (Jan 26, 2005) blastpgp actually uses BLOSUM62 values
-when zero frequencies are provided, so read_pssm() has been modified
-to use scoring matrix values for zero frequencies as well.
-X
->>Jan 13, 2005
-X
-Change to initfa.c to have fasts34 do a protein comparison by default,
-rather than an unknown sequence type. Automatic checking for fasts34
-does not work reliably, because queries can be very short. Likewise
-for fastm34. [Jan 26, 2004] Undo this change, which broke DNA
-comparison when "-n" was specified.
-X
->>Jan 7, 2005
-X
-Changes to tatstats.h, dropfs2.c to allow larger numbers of peptides
-to match when fasts is used to show coverage on a proteomics
-experiment. Previously fasts could match no more than 30 peptides,
-that has been increased to 50. In addition, ktup=2 can be used
-to increase the likelihood that short exact matchs trump longer
-mismatched regions.
-X
->>Nov 11, 2004 CVS fa34t25
-X
-Finished merge of earlier fa34t24 branch with HEAD. Correct
-labeling of TFASTM.
-X
->>Nov 4-8, 2004
-X
-Incorporation of Erik Lindahl "anti-diagonal" Altivec code for
-Smith-Waterman, only. Altivec SSEARCH is now faster than FASTA for
-query sequences < 250 amino acids.
-X
-Small modifications to output score display to ensure that the correct
-scores are shown, and that they are correctly labeled.
-X
->>Aug 25,26, 2004 CVS fa34t24b3
-X
-Small change in output format for p34comp* programs in
-">>>query_file#1 string" line before alignments. This line is not present
-in the non-parallel versions - it would be better for them to be consistent.
-X
-Change in last_stats.c to properly label fasts statistics with -z != 1.
-X
-Change in dropfs2.c to ensure that tatprobs are not precalculated with -z 4.
-X
-Modify -m 9i output option to show in HTML output.
-X
-Add "#ifdef NOOVERHANG" to dropfs2.c that causes overlapping
-alignments to score a 0, rather than the partial overlap score.
-Useful for SAGE alignments, because "fasts" requires global alignments
-(except for for overhangs, unless NOOVERHANG is defined).
-X
->>Aug 23, 2004
-X
-Fix problem with very long definition lines with formatdb version4
-ASN databases. Fix mshowalign.c to re-enable "-L" option.
-X
->>July 28, 2004
-X
-Fix to re-enable -w window shuffle for PRSS. Modify comp_lib.c
-for PRSS to ensure that the unshuffled score and probability
-are shown, even for very high probabililty alignments.
-X
->>July 21, 2004
-X
-Modifications to support PostgreSQL databases with the same commands
-as MySQL databases. MySQL database libraries are type 16, PostgreSQL
-are type 17. Makefile.linux_sql and Makefile.pvm4_sql support both
-database types simultaneously.
-X
->>June 23, 2004 CVS fa34t24b2
-X
-Additional fixes to enable -n or -p with fasts34 and
-fastm34. Makefile.pcom was fixed for fastm34_t. A new file,
-mgstm1.nts, of DNA fragments from mgstm1.seq, is included for testing
-fasts34 and fastm34.
-X
->>May 4, 2004
-X
-Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM. This change
-introduced a bug that broke FASTS completely, but was fixed June 18,
-2004 (and retagged fa34t24b2).
-X
->>April 23, 2004 CVS fa34t24b1
-X
-Fix bug in initfa.c that caused tfasts/tfastf not to examine all six
-frames.
-X
->>May 4, 2004
-X
-Fixes to initfa.c to allow DNA:DNA for FASTS, FASTM.
-X
->>March 19, 2004 CVS fa34t24b0
-X
-Modify all the drop*.c files, plus mshowbest.c and mshowalign.c, to
-display percent similarity, rather than percent ungapped. An
-alignment is counted as similar if the score is greater than or equal
-to zero (the same criterion used for placing ".". To disable this
-change, remove -DSHOWSIM from the appropriate Makefile.*.
-X
->>March 18, 2004 CVS fa34t23b8
-X
-Fix bug in initfa.c tables that caused prss to generally compare
-proteins.
-X
->>March 15, 2004
-X
-Fix bug in calls to revcomp(); make revcomp() guarantee NULL termination.
-X
->>March 2, 2004 CVS fa34t23b7
-X
-Fix a very embarrassing and surprising bug that caused insertions
-in fasta alignments to appear in the wrong sequence.
-X
->>Feb 7, 2004 CVS fa34t23b6
-X
-Change initfa.c to allow "-i" (reverse complement) and "-i -3" with
-"fastx34" and "prfx34". In addition, "prfx34" now examines both query
-DNA strands in calculated the shuffled statistical significance.
-X
->>Feb 5, 2004
-X
-Reverse assignments for G:U baseparing in initfa.c.
-X
-Fix memory allocation error caused by doubling DNA alignment width.
-X
->>Jan 7, 2004 CVS fa34t23b5
-X
-Change in do_walign() in dropnfa.c to make final DNA alignments use a
-band that is 2X as large as the search band width.
-X
->>Dec 22, 2003 CVS fa34t23b4
-X
-Fix typo in p2_complib.c that prevented compilation. Fix problem
-with karlin.c for assymetrical matrices, such as used with -U.
-X
->>Dec 10, 2003 CVS fa34t23b3
-X
-Fix problem in resetp()/initfa.c that disabled banded Smith-Waterman
-DNA alignments.
-X
-Allow spam() to do extended alignments for DNA if one of the sequences
-is < 50 nt.
-X
-Cause default ktup to drop for short sequences. For protein < 50, ktup=1;
-for DNA < 20, 50, 100 ktup = 1, 2, 3, respectively.
-X
->>Dec 7, 2003
-X
-A new option, "-U" is available for RNA sequence comparison. "-U"
-functions like "-n", indicating that the query is an RNA sequence. In
-addition, to account for "G:U" base pairs, "-U" modifies the scoring
-matrices so that a "G:A" match has the same score as a "G:G" match,
-and "T:C" match has the same score as a "T:T" match. The asymmetric
-matrix required changes in dropnfa.c that were similar to the changes
-in dropgsw.c required for profiles. In addition, m_msg.qdnaseq and pst.dnaseq
-X can now be SEQT_DNA, SEQT_RNA, SEQT_PROT, SEQT_UNK, or SEQT_OTHER.
-m_msg.ldnaseq does not use SEQT_RNA, only SEQT_DNA. A new member of
-struct pstruct: int nt_align, is used to indicate nucleotide
-alignments.
-X
->>Nov 19, 2003
-X
-Changes to Makefile's to distinguish between tatstats_fs.o and
-tatstats_ff.o.
-X
->>Nov 2, 2003
-X
-Substantial changes to comp_lib.c, p2_complib.c, mshowbest.c, and
-mshowalign.c to support more sophisticated display options.
-Previously, one could have only on "-m #" option, even though several
-of the options were orthogonal (-m 9c is independent of -m 1 and -m2,
-which is independent of -m 6 (HTML)). The programs now use a bitmask
-that allows independent options to be combined. In particular -m 9c
-can be combined with -m 6, which can be very helpful for runs that
-need HTML output but can also exploit the encoding provided by -m 9c.
-X
-The "-m 9" option now also allows "-m 9i", which shows the standard
-best score information, plus percent identity and alignment length.
-X
->>Oct 26, 2003 CVS fa34t23b1
-X
-Additional fixes to Makefiles to enable tfastf34(_t). Changes to
-support ossearch34 (a non-Phil Green optimized Smith-Waterman).
-X
->>Oct 8, 2003 CVS fa34t23b0
-X
-Fixes to get DNA queries working in both directions, and to fix PCOMPLIB
-programs for "-V" option. Currently, the parallel programs cannot use
-the "-V" option.
-X
->>Sept 25, 2003
-X
-A new option is available for annotating alignments. -V '@#?!'
-can be used to annotate sites in a sequence, e.g:
-X >GTM1_HUMAN ...
-X PMILGYWDIRGLAHAIRLLLEYTDS@S?YEEKKYT@MG
-X DAPDYDRS@QWLNEKFKLGLDFPNLPYLIDGAHKIT
-might mark known and expected (S,T) phosphorylation sites. These
-symbols are then displayed on the query coordinate line:
-X
-X 10 20 @? 30 @ 40 @ 50 60
-GTM1_H PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
-X ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-gtm1_h PMILGYWDIRGLAHAIRLLLEYTDSSYEEKKYTMGDAPDYDRSQWLNEKFKLGLDFPNLP
-X 10 20 30 40 50 60
-X
-This annotation is mostly designed to display post-translational
-modifications detected by MassSpec with FASTS, but is also available
-with FASTA and SSEARCH.
-X
->>Sept 22, 2003 CVS fa34t22b5
-X
-The Altivec Smith-Waterman code has been removed.
-X
->>Sept 17, 2003 CVS fa34t22b4
-X
-A variety of different bugs have been fixed. (1) All the functions in
-the old initsw.c are now in initfa.c; initsw.c will be removed.
-Specifically, the Profile/PSSM code is now in initfa.c. initfa.c is
-now fully table driven. (2) various problems with prss34 and prfx34
-have been fixed in initfa.c. (3) An additional ncbl2_mlib.c buffer
-overrun has been fixed. (4) fastf34 is now available in this package.
-Its performance is very similar to, but not identical to, fastf33. I
-am tracking down the differences. In general, the raw scores
-calculated by both programs are the same, but the statistical analysis
-seems to be slightly different.
-X
->>July 30, 2003 CVS fa34t22b3
-X
-Fix bug in ncbl2_mlib.c that caused buffer overrun with blast/formatdb
-v3 description lines.
-X
->>July 28, 2003
-X
-The initfa.c file has been substantially re-structured to use a
-table-driven approach to parameter setting, rather than the previous
-confusing combinations of #ifdef's. Two tables of parameters are
-used, pgm_def_arr[] and msg_def_arr[], which specify values like the
-program name, reference, scoring matrix, default gap penalties, etc.
-msg_def_arr[] has the sequence types for the query, library, and
-algorithm, as well as other parameters (qframe, nframe, nrelv, etc),
-which greatly simplifies the sequence recognition logic. ppst->pgm_id
-can be used to identify the program that is running. Eventually,
-almost all of the program specific #ifdef's will be removed from
-initfa.c. initfa.c now provides initsw.c functionality, so that
-initsw.c is no longer needed.
-X
->>July 25, 2003
-X
-A new file is included - fasta.defaults - that lists the scoring
-matrix, gap penalty, and other defaults for all of the fasta34
-programs. This file will be used soon to simplify parameter setting
-for the FASTA programs, and should also be used by Javascript WWW
-interfaces to the FASTA programs.
-X
->>July 22, 2003 CVS fa34t22b2
-X
-Fixes to dropfs2.c, tatprobs.c to ensure that negative probabilities
-cannot occur. Negative probabilities were never seen with standard
-matrices, but did occur with BL50. Another optimization in dropfs.c
-considerably improves fasts34 performance in some cases.
-X
-Fix a problem with formatdb v4 ASN.1 format files.
-X
->>July 12, 2003
-X
-Fix a bug that prevented "-L" (long sequence descriptions) from
-working.
-X
->>July 9, 2003
-X
-Fix reverse complement (M:K) error. Fix off-by-one error for FASTA
-DNA alignments that caused the first aligned residue pair to be
-missed.
-X
->>July 4 - 8, 2003
-X
-Incorporate blast-def-line ASN.1 parsing so that NCBI formatdb version
-4 files can be read.
-X
->>June 26, 2003
-X
-The strategy for displaying the match/mismatch line (" .:" for -m 0)
-has been changed dramatically to acommodate more sophisticated
-strategies for indicating conservative replacements, e.g. because of
-PSSM's. In addition to seqc0 and seqc1, which hold the aligned
-sequences for display, there is also seqca, which holds the alignment
-symbol. calcons(), do_show(), and discons() have all changed to
-include seqca. calcons() is somewhat more complex; discons() is much
-simpler. (June 29, 2003 - dropgsw.c calcons() now displays profile
-similarity accurately - it is very very illuminating.)
-X
->>June 16, 2003 version: fasta34t22
-X
-ssearch34 now supports PSI-BLAST PSSM/profiles. Currently, it only
-supports the "checkpoint" file produced by blastall, and only on
-certain architectures where byte-reordering is unnecessary. It has not
-been tested extensively with the -S option.
-X
-X ssearch34 -P blast.ckpt -f -11 -g -1 -s BL62 query.aa library
-X
-Will use the frequency information in the blast.chkpt file to do a
-position specific scoring matrix (PSSM) search using the
-Smith-Waterman algorithm. Because ssearch34 calculates scores for
-each of the sequences in the database, we anticipate that PSSM
-ssearch34 statistics will be more reliable than PSI-Blast statistics.
-X
-The Blast checkpoint file is mostly double precision frequency
-numbers, which are represented in a machine specific way. Thus, you
-must generate the checkpoint file on the same machine that you run
-ssearch34 or prss34 -P query.ckpt. To generate a checkpoint file,
-run:
-X
-blastpgp -j 2 -h 1e-6 -i query.fa -d swissprot -C query.ckpt -o /dev/null
-X
-(This searches swissprot for 2 iterations ("-j 2" using a E()
-threshold 1e-6 saving the resulting position specific frequencies in
-query.ckpt. Note that the original query.fa and query.ckpt must
-match.)
-X
->>June 5, 2003
-X
-Fix to mshowbest.c to get -m 9 coordinates correct on reverse strand
-with pv34comp*. Some additional fixes for prfx34.
-X
->>May 22, 2003
-X
-Changes to llgetaa.c, getseq.c, comp_lib.c to provide a different
-library residue lookup table (sascii) for queries and libraries. This
-allows one to make a prfx34 (like prss34, but using the fastx
-algorithm). prfx34 is now available.
-X
->>May 13,14 2003
-X
-Fixes to most of the drop*.c files, and mshowbest.c, to ensure that
-coordinates displayed with -m 9(c) and the final alignment are
-consistent. They were consistent for fasta34/ssearch34/fasts34, but
-not for fastx34/fasty34. The alignment coordinate system has been
-been revised for consistency in allthe drop*.c programs (coordinates
-used to be off-by-one for some, but not other functions).
-X
-Fixes to -m 9c for fasty34/pv34compfy. In addition, a problem was
-fixed with fastx34/fasty34 that appeared with a protein sequence was
-considerably longer than the DNA query, e.g. an EST vs titin (26K
-residues). This problem only appeared on pv34compfx/fy on Xserve's
-under OS_X; but it should improve fastx34/fasty34 performance with
-very long protein sequences on all platforms.
-X
->>May 7,8 2003
-X
-Changes to p2_workcomp.c, compacc.c, and p_mw.h to fix persistent
-bugs in the -m 9c display. Previous pv34comp* programs would not
-return the correct coded alignment if more than 100 alignments came
-from the same node, or if an encoding was longer than 127 chars.
-X
-Also, fixes to p2_complib.c, comp_lib.c, to allow long query sequences
-to be segmented. Previously, only the first 20,000 residues were
-used. The segmented queries are not overlapped; segmented library
-sequences are.
-X
->>May 5, 2003
-X
-Changes to last_tat.c, scaleswt.c to ensure that all fasts alignments
-that are likely to have significant scores are displayed. In previous
-implementations, if the query had more than 10 fragments, only the 100
-best scores were shown. Now, we rescore up to 2500 alignments. The
-new approach allows large mixtures to be used for searches, where some
-of the fragments from the mixture match too many proteins
-(e.g. actins). Some differences between the fasts34 and pv34compfs
-implementations have been fixed. The two programs typically will not
-give exactly the same results, because of small differences in the
-sampling procedures, but the results are essentially equivalent.
-X
->>Apr 11, 2003 CVS fa34t21b3
-X
-Fixes for "-E" and "-F" with ssearch34, which was inadvertantly disabled.
-X
-A new option, "-t t", is available to specify that all the protein
-sequences have implicit termination codons "*" at the end. Thus, all
-protein sequences are one residue longer, and full length matches are
-extended one extra residue and get a higher score. For
-fastx34/tfastx34, this helps extend alignments to the very end in
-cases where there may be a mismatch at the C-terminal residues.
-X
--m 9c has also been modified to indicate locations of termination
-codons ( *1).
-X
->>Mar 17, 2003 CVS fa34t21b2
-X
-A new option on scoring matrices "-MS" (e.g. "BL50-MS") can be used to
-turn the I/L, K/Q identities on or off. Thus, to make "fastm34" use
-the isobaric identities, use "-s M20-MS". To turn them off for "fasts34",
-use "-s M20".
-X
-More fixes for correct alignment coordinates. There was a conflict between
--m 9 and -m 9c and subsequent alignment displays.
-X
->>Mar 13, 2003
-X
-Various fixes to produce correct fastm34 alignments. Changes to all
-functions to correct potential problem with -m 9 alignment coordinates
-when both -m 9 and actual alignments are shown.
-X
->>Feb 25,27, 2003
-X
-Modifications to re-activate showsum.c, which included corrections to
-the showbest() call in p2_complib.c.
-X
->>Feb 13, 2003 CVS fa34t21b1
-X
-Modifications to dropfx.c to dramatically improve alignment speed for
-cases where the DNA sequence is considerably longer than the protein
-sequence. Previously, a 200 aa vs 5000 nt comparison would do a full
-200 x 5000 Smith-Waterman alignment; with this modification, no more
-than a 200 x 1200 (2x3x200) alignment is done. This optimization has
-not (yet) been applied to dropfz2.c (fasty/tfasty).
-X
->>Feb 11, 2003
-X
-Small modifications to comp_lib.c, p2_complib.c, and nmgetlib.c to
-pass openlib() a possibly old lmf_str. This allows openlib() to
-re-use memory mapped files. closelib() no longer releases memory
-mapped file buffers. Under Linux, memory mapped file buffers were not
-really released, so when comparing a set of sequences against nr, the
-program could not mmap() the database after several searches. This
-will also speed up memory mapped multiple sequence searches.
-X
->>Jan 28-31, 2003 CVS fa34t21b0
-X
-Fix another bug (all of v34t20) involved with overlapping long
-sequences. And another bug that occurred when using sampled
-statistics, but appeared only on the SGI platform - thanks to Dmitri
-Mikhailov. Several other issues have been addressed based on more
-instrumented runtime testing.
-X
-Fix an old (all v34) bug that caused problems with -z 11-16 (shuffled
-sequence array was not allocated properly). Fixed another bug with -z
-6/16 when using threaded (_t) searches in fasta34_t.
-X
-Restructure statistical analysis functions (scaleswn.c, scaleswt.c) to
-return the "final" statistical estimation routine done in pst.zsflag_f.
-This allows the program to cope with searches against a single sequence
-correctly.
-X
-Corrected an error for DNA sequences needing Altschul-Gish statistics.
-X
->>Jan 25, 2003
-X
-Add option "-J start:stop" to pv34comp*/mp34comp*. "-J x" used to
-allow one to start at query sequence "x"; now both start and stop can
-be specified.
-X
->>Jan 14, 2003
-X
-Changes to apam.c to provide an error message on stderr when a scoring
-matrix cannot be found.
-X
-Changes to dropfs2.c, initsw.c, initfa.c to provide -m9c information
-for fasts34 searches. Modify the alignment algorithm to use
-probabilistic scores properly.
-X
->>Dec 22, 2002
-X
-Change to compacc.c (sortbeste()) to do a second sort on zscore when
-several sequences have E() == 0.
-X
->>Nov 27, 2002
-X
-Change FSEEK_T to fseek_t to keep Borland BCC5 happy.
-X
->>Nov 14-22, 2002 CVS fa34t20b6
-X
-Include compile-time define (-DPGM_DOC) that causes all the fasta
-programs to provide the same command line echo that is provided by the
-PVM and MPI parallel programs. Thus, if you run the program:
-X
-X fasta34_t -q -S gtt1_drome.aa /slib/swissprot 12
-X
-the first lines of output from FASTA will be:
-X
-X # fasta34_t -q gtt1_drome.aa /slib/swissprot
-X FASTA searches a protein or DNA sequence data bank
-X version 3.4t20 Nov 10, 2002
-X Please cite:
-X W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
-X
-This has been turned on by default in most FASTA Makefiles.
-X
-Fix p2_complib.c so that qstats[] is always allocated before it is used.
-X
-Fix serious bug in non-threaded comp_lib.c that caused some high
-scoring sequences to be missed by fasts34. New tests are included in
-test.sh to detect this problem in the future.
-X
-The shell sort algorithm in sortbeste(), sortbestz(), and sortbesto()
-has been modified to use an improved algorithm that will not go
-quadratic in pathological cases.
-X
-nmgetlib.c and mmgetaa.c have been modified to remove "^A" in libstr
-when used with p2_complib.c.
-X
-Fix problem with MAXSEG in tatstats.h with IBM/AIX.
-X
-Changes to most Makefiles to use -DSAMP_STATS; fixes to p2_complib.c
-for SAMP_STATS.
-X
->>Oct 22, Nov 3, Nov 9, 2002 CVS tag fa34t20b5
-X
-Fix problem in comp_lib.c that caused the query sequence length to be
-counted twice.
-X
-Fixed problem with prss34 (updated find_zp in showrss.c).
-X
-Correct shuffling function in several places.
-X
-Add jitter back to addhistz() - improves appearance with prss34.
-X
-Changes to fix problems with aln_code using -m 9c.
-X
-Fix to serious bug in scaleswt.c (fasts34, etc) that caused sorts on
-the high scores to take much to long. The program is now 10X faster,
-and scales well on PVM/MPI.
-X
-Fix to llgetaa.c to work with new getseq() API with automatic alphabet
-recognition.
-X
->>Oct 12, 2002 CVS tag fa34t20b4
-X
-Several very obscure (and sometimes old) bugs that appeared in certain
-MPI environments have been fixed. This occurred because the pst.sq[]
-array did not always have a '\0' at the end. In addition,
-mshowalign.c/p2_workcomp.c sometimes failed to put the '\0' at the end
-of seqc0/seqc1. Correct bug introduced in fa34t20b3 for fasts34(_t).
-X
->>Oct 9, 2002 CVS tag fa34t20b3
-X
-Fix to apam.c build_xascii() to not zero-out qascii[0]. Fix
-Makefile.pvm4. Mix problem with -m 9c with compacc.c.
-X
->>Sept 28, 2002
-X
-Additional fixes to -m 9c in p2_complib.c/compacc.c/mshowbest.c.
-Remove restriction in fasts34(_t) to less than 30 peptides (though no
-more than 30 peptides can be aligned currently).
-X
->>Sept 24, 2002
-X
-Fix p2_workcomp.c so that e_scores are delivered correctly when
-last_calc flag is set, and -m 9c provides alignments when only one
-best hit is present.
-X
-Fix comp_lib.c to use different maxn and overlap for each different
-query sequence. fasta34 and fasta34_t now have identical results when
-a long sequence is searched.
-X
-Add '@C:101' support to memory mapped FASTA format files.
-X
-Fix mshowalign.c so that coordinates returned by cal_coord() use
-loffset+l_off.
-X
->>Sept 14, 2002 CVS tag fa34t20b2
-X
-Changes to p2_complib.c, compacc.c to fix statistics problems with
-pv34compfs on query sequences with more than 10 fragments.
-X
->>Aug 27, 2002
-X
-Modifications to mshowbest.c and drop*.c (and p2_workcomp.c,
-compacc.c, doinit.c, etc.) to provide more information about the
-alignment with the -m 9 option. There is now a "-m 9c" option, which
-displays an encoded alignment after the -m 9 alignment information.
-The encoding is a string of the form: "=#mat+#ins=#mat-#del=#mat".
-Thus, an alignment over 218 amino acids with no gaps (not necessarily
-100% identical) would be =218. The alignment:
-X
-X 10 20 30 40 50 60 70
-GT8.7 NVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQWLNEKFKL--GLDFPNLPYL-IDGSHKITQ
-X :.:: . :: :: . .::: : .: ::.: .: : ..:.. ::: :..:
-XXURTG NARGRMECIRWLLAAAGVEFDEK---------FIQSPEDLEKLKKDGNLMFDQVPMVEIDG-MKLAQ
-X 20 30 40 50 60
-X
-would be encoded: "=23+9=13-2=10-1=3+1=5". The alignment encoding is
-with respect to the beginning of the alignment, not the beginning of
-either sequence. The beginning of the alignment in either sequence is
-given by the an0/an1 values. This capability is particularly useful
-for [t]fast[xy], where it can be used to indicate frameshift positions
-"/#\#" compactly. If "-m 9c" is used, the "The best scores" title
-line includes "aln_code".
-X
->>Aug 14, 2002 CVS tag fa34t20
-X
-Changes to nmgetlib.c to allow multiple query searches coming from
-STDIN, either through pipes or input redirection. Thus, the command
-X
-X cat prot_test.lseg | fasta34 -q -S @ /seqlib/swissprot
-X
-produces 11 searches. If you use the multiple query functions, the
-query subset applies only to the first sequence.
-X
-Unfortunately, it is not possible to search against a STDIN library,
-because the FASTA programs do not keep the entire library in memory
-and need to be able to re-read high-scoring library sequences. Since
-it is not possible to fseek() against STDIN, searching against a STDIN
-library is not possible.
-X
->>Aug 5, 2002
-X
-fasts34(_t) and fastm34(_t) have been modified to allow searches with
-DNA sequences. This gives a new capability to search for DNA motifs,
-or to search for ordered or unordered DNA sequences spaced at
-arbitrary distances.
-X
->>Aug 4, 2002
-X
-comp_lib.c has been modified to provide comp_mlib.c function.
-comp_mlib.c is no longer used. comp_lib.c with the "mlib" function
-can now recognize protein or DNA sequences automatically, and reads
-from stdin can now detect DNA/protein sequence types automatically.
-Changes to compacc.c, getseq.c, doinit.c initfa.c, initsw.c, and
-nmgetlib.c to support automatic sequence type detection.
-X
->>July 28-31, 2002
-X
-(1) The various Makefile's have been "normalized". The fast*34[_t]
-X (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
-X Makefile.mpi4[_sql] make files all use a common set of filenames,
-X described in Makefile.fcom. This greatly simplifies adding
-X programs, but requires that all *.o files be deleted when moving
-X from fast*34* to pv34comp* to mp34comp*.
-X
-(2) showalign.c/p_showalign.c have been merged into mshowalign.c
-X showbest.c/manshowbest.c have been merged into mshowbest.c. Some
-X of the related files (showun.c, manshowun.c, have not been merged
-X or tested).
-X
-(3) Code for ranking scores with valid e_value's incorporated.
-X
-(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
-X provide identical statistics.
-X
->>July 26, 2002
-X
-Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
-by providing the worker program name from the h_init() function in the
-initfa.c/initsw.c files.
-X
->>July 24, 2002
-X
-Substantial modifications to param.h, structs.h to ensure that no
-sequence specific information is kept in struct pstruct. This
-structure now holds the pam[] matrix, and other scoring parameters,
-but nothing that is dependent on aa0. The aa0 dependent stuff (nm0,
-Lambda, K, etc) is now stored in struct mngmsg. This was mostly done
-to support the pv34comp* programs, which have separate mngmsg
-structures but the same pstructs.
-X
-The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
-successfully.
-X
->>July 19, 2002
-X
-Fix an old bug in the calculation of E()-values in DNA databases
-longer than 2147483647 residues on machines with 32-bit longs.
-X
-X
->>July 28-31, 2002
-X
-(1) The various Makefile's have been "normalized". The fast*34[_t]
-X (Makefile.34m.common[_sql]), Makefile.pvm4[_sql], and
-X Makefile.mpi4[_sql] make files all use a common set of filenames,
-X described in Makefile.fcom. This greatly simplifies adding
-X programs, but requires that all *.o files be deleted when moving
-X from fast*34* to pv34comp* to mp34comp*.
-X
-(2) showalign.c/p_showalign.c have been merged into mshowalign.c
-X showbest.c/manshowbest.c have been merged into mshowbest.c. Some
-X of the related files (showun.c, manshowun.c, have not been merged
-X or tested).
-X
-(3) Code for ranking scores with valid e_value's incorporated.
-X
-(4) Bug fixes in p2_complib.c, so that fasts34/fasts34_t/pvcompfs
-X provide identical statistics.
-X
->>July 26, 2002
-X
-Makefile.pvm4_sql and Makefile.pvm4 have been substantially simplified
-by providing the worker program name from the h_init() function in the
-initfa.c/initsw.c files.
-X
->>July 24, 2002
-X
-Substantial modifications to param.h, structs.h to ensure that no
-sequence specific information is kept in struct pstruct. This
-structure now holds the pam[] matrix, and other scoring parameters,
-but nothing that is dependent on aa0. The aa0 dependent stuff (nm0,
-Lambda, K, etc) is now stored in struct mngmsg. This was mostly done
-to support the pv34comp* programs, which have separate mngmsg
-structures but the same pstructs.
-X
-The fasts34, fasts34_t, and pv34compfs/c34.workfs have all been tested
-successfully.
-X
->>July 8, 2002
-X
-Modifications to comp_lib.c, initfa.c and new scaleswt.c, tatstats.c
-to support FASTS with Tatusov statistics.
-X
-last_params() has been introduced to allow aa0 dependent changes in m_msg/pstr.
-X
-sortbest() has been moved into initfa.c/initsw.c to make it function specific.
-X
-find_z() takes an additional parameter, escore.
-X
-The do_work() results structure, beststr, and stat_str all accommodate
-escores as well as integer scores (stat_str also saves segn and segl
-but doesn't need them).
-X
-In scaleswt.c, process_hist() now knows much more about Tatusov statistics.
-X
-last_stats() provided to accommodate rank-based statistical corrections.
-X
-scale_scores() is the last function to modify the beststr scores
-(final calculation of E-value).
-X
-Some sortbest*() calls and some bptr[i]->zscore=find_zp() loops have
-been moved into scale_scores();
-X
->>July 3,5, 2002
-X
-Modifications to allow mySQL comments (--) in "library.sql 16" files.
-Thus, a first line of:
-X
-X --host seqdb user password;
-X
-is read by FASTA as the login information to a mySQL server, but is
-ignored by mySQL. "DO" commands in FASTA mySQL files can also be
-rendered invisible to mySQL in this way. See "do.sql".
-X
-Modifications to mysql_lib.c to allow very long SQL statements. The
-buffer is now dynamically reallocated in 4Kb chunks.
-X
-The fasta3.1 man page has been updated and re-organized.
-X
->>June 26, 2002
-X
-Minor modifications to nmgetaa.c (openlib()) to use the same arguments
-for searching and PRSS. PRSS needs access to all of m_msg, but
-searches do not. Other small fixes to comp_mlib.c, towards the goal
-of merging comp_mlib.c and comp_lib.c.
-X
->>June 25, 2002
-X
-Modify the statistical estimation strategy to sample all the sequences
-in the database, not just the first 60,000. The histogram is still
-based only on the first 60,000 scores and lengths, though all scores
-an lengths are shown. The fit to the data may be better than the
-histogram indicates, but it should not be worse.
-X
-Currently, this modification is available only if the -DSAMPLE_STATS
-option is defined.
-X
->>June 23, 2002 CVS fa34t11d4
-X
-Fix a very long-standing bug in fasty/tfasty that caused 'NNN' to be
-translated as 'S', rather than 'X'. fastx/tfastx has done this
-correctly for many years, but the fasty/tfasty code that I received
-from Zheng Zhang was not implemented correctly (my fault, his code was
-fine).
-X
->>June 19, 2002
-X
-Added "-C #" option, where 6 <= # <= MAX_UID (20), to specify the
-length of the sequence name display on the alignment labels. Until
-now, only 6 characters were ever displayed. Now, up to MAX_UID
-characters are available.
-X
->>May 30, 2002 CVS fa34t11d3
-X
-Fixed problem with programs using the default -E cutoff when -b was
-provided. With this implementation, -E can override -b, but -b
-overrides the default -E.
-X
-Fixed problem with 64-bit file offsets in param.h (change USE_FSEEK0
--> USE_FSEEKO, include -D_LARGEFILE_SOURCE and -D_LARGEFILE64_SOURCE
-in Makefile.linux_sql). Put limits on alignment display length (200
-chars). More checks for null returns from SQL queries.
-X
->>Apr 17, 2002 CVS fa34t11d2
-X
-Fixed bug in mm_file.h/ncbl2_mlib.c that caused the SGI version to be
-unable to read blast2 format files.
-X
-Changed "mp_*" tags to "pg_*" for -m 10 option.
-X
->>Mar 30, 2002
-X
-Fix embarrassing bug in revcomp() (getseq.c) that failed to complement
-the central nucleotide in a sequence with an odd number of residues.
-X
-Small changes to dropfs.c for more segments.
-X
->>Mar 16, 2002
-X
-Added create_seq_demo.sql, nt_to_sql.pl to show how to build an SQL
-protein sequence database that can be used with with the mySQL
-versions of the fasta34 programs. Once the mySQL seq_demo database
-has been installed, it can be searched using the command:
-X
-X fasta34 -q mgstm1.aa "seq_demo.sql 16"
-X
-mysql_lib.c has been modified to remove the restriction that mySQL
-protein sequence unique identifiers be integers. This allows the
-program to be used with the PIRPSD database. The RANLIB() function
-call has been changed to include "libstr", to support SQL text keys.
-Due to the size of libstr[], unique ID's must be < MAX_UID (20)
-characters.
-X
-A "pirpsd.sql" file is available for searching the mySQL distribution
-of the PIRPSD database. PIRPSD is available from
-ftp://nbrfa.georgetown.edu/pir_databases/psd/mysql.
-X
->>Mar 6, 2002
-X
-Fix showbest.c showbest() to report pst.zdb_size as database size.
-Fix dropnfa.c spam() to address off-by-one on end of run, and double
-counting on backwards scan. Fix dropnfa.c do_fasta() to fix another
-problem introduced by -S. Changes to comp_lib.c to ensure that both
-the beginning and end of the query and library sequence have '\0'
-present. Changes to initfa.c, initsw.c to ensure that a match to a
-lower-case letter with -S gets exactly the same score as a match to an
-'X'. Changes to mmgetlib.c to work with 64-bit longs in *.xin files.
-X
->>Feb 26, 2002
-X
-Fixes to doinit.c, initfa.c, initsw.c to allow DNA matrices using the
-"-s dna.mat" option. A new matrix, "d50ry.mat" is available that
-scores +5 for a match, -2 for a transition, and -5 for a
-transversion. "d50ry.mat" corresponds to DNA PAM50 with transitions
-twice as common as transversions. When "-s dna.mat" is used, "-n"
-MUST be used as well.
-X
-Query sequence names ("aa", "nt") should be more accurate.
-X
->>Feb 22, 2002
-X
-Fix to getseq.c to allow "plain" sequence files.
-X
->>Feb 12, 2002
-X
-Minor fix to res_stats.c.
-X
->>Jan 28, 2002
-X
-Fixes to resurrect res_stats.c. res_stats (cc -o res_stats
-res_stats.c scaleswn.c -lm) takes the output from a current "-R
-file.res" file and calculates statistical significance - this allows
-one to take exactly the same set of scores (and lengths) and calculate
-statistical estimates using different strategies.
-X
->>Jan 24, 2002
-X
-modifications to mmgetlib.c, ncbl2_mlib.c to more robustly read memory
-mapped files (*.xin, map_db) on machines lacking "native" 64-bit
-longs. If the machine provides some definition for a 64-bit long
-(e.g. "long long", "int64_t"), things should work. 64-bit offsets into
-memory mapped files work properly on Alpha, SGI, i386 Linux, and
-MacOSX. The current implementation depends either on 64 bit longs
-(Compaq Alpha's pre 4.0G) or the <sys/inttype.h> file. Makefile,
-Makefile.alpha, and Makefile.linux have been modified.
-X
-Modifications to nmgetlib.c, mmgetlib.c to provide GI numbers and
-Accession versions for Genbank searches. If the GI:123456 number is
-available, it will be used and the description line will be formatted:
-X
-X gi|123456|gb|ACC1234.1|LOCUS description
-X
-This should help FAST_PAN runs, where the version of a sequence
-changes frequently.
-X
->>Jan 10, 2002
-X
-Modifications to p2_complib.c, p2_workcomp.c to more reliably allocate
-space for library sequence descriptions on the master and workers.
-X
->>Jan 2-3, 2002 CVS fa34t10c/fa34t10d3
-X
-Fixes to comp_lib.c to support Macintosh and Windows/Turbo-C
-compilation. New Makefile.tc. Macintosh version supports both
-"Classic" and "Carbon" environments.
-X
-"<values.h>" has been replaced with the more modern "<limits.h>"
-X
-Fixes to p2_complib.c to support n_libstr (libstr length) in GETLIB().
-X
-comp_thr.c, complib.c removed.
-X
->>Dec 16, 2001
-X
-Complete integration of comp_mlib.c with both the unthreaded and
-threaded programs. Comp_mlib allows fasta34 and fasta34_t to compare
-a database with a second database, just as pv34compfa does. Using
-multiple queries with fasta34_t is not as efficient as pv34compfa (and
-it cannot use networks of Unix workstations), but it is much easier to
-use and install.
-X
-With the comp_mlib.c option, fasta34 cannot automatically recognize
-DNA sequences, just as pv34compfa no longer recognizes DNA sequences.
-You must use the "-n" option to search with DNA sequences. The other
-programs (fastx34, tfastx34, etc) "know" the type of the query and
-database sequences, so "-n" is only required for fasta34(_t).
-X
->>Dec 14, 2001 CVS tag fa34t10b
-X
-Fix problems reading DNA databases in blast2 format.
-X
->>Dec 11, 2001
-X
-Changes to spam() in dropnfa.c so that, for DNA sequences, the
-previous behavior for finding the boundaries of a local alignment
-region use the same algorithm as previous versions of fasta. For
-protein sequences, the algorithm will extend the local region beyond
-the "ktup" boundaries if a better score can be found. For DNA
-sequences, this raises the noise rather than increasing sensitivity,
-so it is turned off and "ktup" boundaries are respected. The old,
-"ktup" boundary algorithm is available with -DNOSPAM_EXT.
-X
-This version also includes a working res_stats.c, which can be used to
-test various statistical estimates on exactly the same set of scores.
-X
-Fixed problems with -m 9 percent identity for fastx/fasty/tfastx/tfasty.
-These errors have been present since -m 9 was implemented.
-X
->>Dec 10, 2001
-X
-Fix to map_db.c to work correctly with files > 2 Gb when 64-bit longs
-are available. It is not yet designed to work with ftello() and other
-offset types.
-X
->>Nov 11,21, 2001 CVS tag fa34t10a, fa34t10d1
-X
-Substantial changes to revcomp(), getseq(), and other functions to
-correct problems with -S on DNA sequences. Sequences with lower case
-nucleotides were not recognized or reverse complemented properly.
-X
-Fix to dropnfa.c (v34t07, Nov 21, 2001) bg_align() to re-initialize
-static globals - this fixes a problem encountered with pv34compfa. A
-new main program, comp_mlib.c has been added to the CVS archive,
-although it is not referenced in any of the Makefile. comp_mlib.c
-works like p2_complib.c and compares a library against another
-library.
-X
->>Nov 4, 2001
-X
-Change to dropnfa.c spam () while(1) -> while(lpos <= dmax->stop).
-This fixes a problem with ktup=1 on Suns only, so far.
-X
->>Oct 4, 2001 CVS tag fa34t10
-X
-Add comp_lib.c file, which merges complib.c (unthreaded) and
-comp_thr.c (threaded) code into one file.
-X
-Modifications to nmgetlib.c, mmgetaa.c to allow Genbank flatfile
-format without DESCRIPTION or ACCESSION lines.
-X
-Additional fix for -S with ktup=1.
-X
->>Sept. 24, 2001
-X
-Fix to have correct gap-penalties for short scoring matrices with
-tfastx/fastx.
-X
->>Sept. 10, 2001 CVS tag fa34t05d6
-X
-Fix a bug introduced by -S fix in fa34t05d5. Also, try to remove
-changes in p34compfa compared to pv4compfa output.
-X
->>Sept. 6, 2001 CVS tag fa34t05d5
-X
-Fix the -S dropnfa/fx/fz2 bug that was not actually fixed in
-fa34t05d4. Incorporate the correct scaleswn.c refered to in
-fa34t05d4.
-X
->>Sept. 5, 2001 CVS tag fa34t05d4
-X
-Fix problem with m_msg.quiet that prevented interactive prompts for
-ktup, file name, etc with threaded programs.
-X
-Fix serious bug in dropnfa.c/dropfx.c/dropfz2.c that caused -S to work
-improperly on sequences with effective length of 3 or less.
-X
-Change to scaleswn.c to make mle_cen(), mle_cen2() more robust to cases
-where the top and bottom scores are the same.
-X
-Change p2_complib.c to avoid compiler complaints with (void *)wstage2p=NULL
-on some platforms.
-X
->>Aug. 30, 2001 CVS tag fa34t05d3
-X
-Fixed problem with uthr_subs.c for Suns, but changed Makefile.sun to
-use pthreads rather than Sun Unix threads. Removed SQL stuff from
-Makefile.mpi4/pvm4 and added Makefile.mpi4_sql/pvm4_sql.
-X
-fa34t05d2 - fix to map_db.c to provide *sascii.
-X
-fa34t05d1 - fixes to ibm_pthr_subs.c and Makefile.ibm from IBM.
-X
->>Aug. 20, 2001 CVS tag fa34t05d0
-X
-The pvm/mpi complib programs have been substantially updated with
-release 3.4. See readme.v34t0 for more information. With version
-3.4, the MPI programs are mp34comp*, mu34comp*, etc.
-X
-A major effect of this change is to disable automatic sequence type
-(protein/DNA) recognition with pv34compfa/mp34compfa. By default,
-protein libraries are assumed. Thus, pv34compfa/mp34compfa require
-the "-n" command line option when running pv34compfa/mp34compfa on DNA
-sequence libraries. This issue does not occur with the other
-programs, which will recognize the appropriate sequence type, because
-it is determined by the program (e.g. pv34compfx requires
-DNA:protein).
-X
-Fixed substantial problem with 64-bit file offsets for Linux in
-complib.c/comp_thr.c, p2_complib.c. This problem, solved by Doug
-Blair, was preventing the threaded versions from working properly in
-memory mapped mode.
-X
-In all earlier versions of fasta, when very long sequences were
-searched, the sequence length reported was that of the "chunk" that
-was actually searched (typically 80,000-query_length) rather than the
-actual library sequence length. The peculiar behavior now changed,
-and the full length of the library sequence, not the sequence chunk,
-is reported as the library sequence length. Note that chunks are
-still used, however, which can cause the same alignment to be shown
-twice. In addition, the "-m 9" output format has changed to report
-the coordinates of the query and library sequence (see below), which
-may be different from 1-sequence_length because the the query and
-library sequences may have been extracted from larger sequences. Four
-additional fields have been added, "pn0", "px0","pn1", "px1" that are
-the positions in for the beginning (pn0/1) and end (px0/1) of they
-query/library sequence. pn0/1 would typically be changed with the
-"@C:#" directive, described below.
-X
-Changes to doinit.c/initfa.c/initsw.c to provide a new function -
-f_lastenv() - that allows function-specific adjustments to parameters
-after the command line options have been read but before the first
-sequence is read. This change solved problems with "mp/pv34compfx -S".
-X
-fasts34/tfasts34 now recognize that 'I/L' are the same, as are 'Q/K'
-(which are apparently indistinguishable by Mass-Spec). The latter
-identity is on by default, but can be turned off with "-h 0".
-X
-The MPI/PVM versions of the programs have been tested extensively with
-compfa, compfx, and comptfx. Makefile.mpi4 now works properly.
-Changes to p2complib.c to support the PVM option "-T 1-4", which
-allows one to run on nodes 1-4 of a (presumably larger) PVM virtual
-machine. This option has no effect on the mp34comp* programs. The
-old "-T 4" to run on 4 nodes, is also available. If each node has 2
-cpu's, as indicated in the "pvmd hostfile", both CPU's will be used
-for a total, in this example, of 8 processes. This allows one to
-specify a large PVM machine and use separate parts of it
-independently.
-X
-Changes to nmgetlib.c to fix problems with longer dates in GCG files
-(Y2K). Fixes to faatran.c for extended alphabets and 'X's. Various
-code clean-ups to make "gcc -Wall" a little bit (not much) happier.
-X
-This is the first distributed fasta34 version.
-X
-================
->>Aug 9, 2001 CVS tag fa34t05
-X
-Corrections to initfa.c to allow -S to work with tfastx/y.
-Fix to manshowbest.c for query position with -m 9.
-X
->>July 18, 2001 CVS tag fa34t04
-X
-Various changes to complib.c, comp_thr.c, p2_complib.c, showbest.c,
-showalign.c to deal with overlapping alignments in long sequences that
-have been segmented. When long sequences are segmented (lcont>0), the
-eventual total length (n1tot_v) is saved at beststr->n1tot_p. If
-there was no lcont, then beststr->n1tot_p = NULL, and beststr->n1
-should be used as the sequence length. This has the advantage of
-requiring space only when long sequences are encountered, and
-requiring only one integer for several segments.
-X
-m_msg.noshow has been removed.
-X
-The -m 9 format has been changed - 5 fields have been added, 4
-(pmn0/pmx0/pmn1/pmx1) provide the beginning and end coordinates of the
-query and library sequence; the last (fs) reports the number of
-frameshifts. The names of the alignment boundaries have been changed
-from min0/max0/min1/max1 to amn0/amx0/amn1/amx1 (Alignment miN/maX).
-X
-The SQL format has been extended to provide for statements that do
-things but do not generate results, such as creating and selecting into a temporary table, e.g.:
-================
-X do
-X create temporary table seq_pos (
-X id int unsigned not null auto_increment primary key,
-X prot_id int unsigned not null default 0,
-X start int unsigned not null default 0,
-X length int unsigned not null default 0,
-X )
-X ;
-X do
-X insert into seq_pos (prot_id, start, length)
-X select id, 11, len-10
-X from protein, annot
-X where len > 100
-X and annot.protein_id = protein.id
-X and annot.pref=1
-X ;
-X select seq_pos.id,
-X substring(protein.seq, start, length),
-X concat("@C:", start, " ", descr)
-X from protein, seq_pos, annot
-X where protein.id = annot.protein_id
-X and protein.id = seq_pos.prot_id
-X and annot.pref = 1
-X ;
-X select prot_id,
-X concat("@C:", start, " ", descr)
-X from seq_pos, annot
-X where annot.protein_id = seq_pos.prot_id
-X and seq_pos.id = #
-X and annot.pref = 1
-X ;
-================
-X
-X In the current implementation, these statements must start with "DO"
-as the first two characters on the line, and come immediately after a
-line ending with ';'. The text from "DO" to the next ";", excluding
-the "DO", is executed when the database connection is made.
-X
-===== >>July 12, 2001
-X
-The allocation of the work_info data structure used to send
-information to the worker threads has been changed. The old method
-worked, possibly by accident.
-X
-A bug in p2_complib.c that caused E()-values to be calculated
-improperly for the first query sequence has been fixed.
-X
->>July 11, 2001 --> fa34t02
-X
-It is now possible to specify output coordinates in library sequences
-by including the string: "@C:number" on the description line, e.g.
-X
-X >gtm1_human gi|12345 human glutathione transferase M1 @C:21
-X
-would label the first residue in the library sequence "21" rather than
-"1". This capability has been included to provide accurate
-coordinates for searches done against subsequences generated by an SQL
-query. For example, one could use a query of the form:
-X
-X SELECT protein.id, substring(protein.seq,11,length(protein.seq)-20),
-X concat(protein.name," @C:11 ",protein.descr)
-X FROM protein;
-X
-to generate a sequence set with each sequence starting with residue
-11. Without the "@C:11" option on the description line, the program
-would number the alignment positions starting at 1, even though the
-first residue of the sequence really started at 11. "@C:11" allows
-one to correct the coordinate system.
-X
-Currently, "@C:offset" is available only with library type 1 (fasta
-format) and 16 (mySQL).
-X
-The SQL-generated database with "@C:offset" can be used with both the
-fast*34(_t) programs and with pv34comp*. However, the SQL syntax is
-used differently in the fasta34 and pv34compfa programs. fast*34(_t)
-requires three SQL statements during a search: (1) a statement to
-generate a large set of library sequences; (2) a statement to generate
-a description of a single sequence, given a unique identifier provided
-by (1); and (3) a statement to generate a single sequence given a
-unique identifier provided by (1). For fast*34 searches, the third
-(3) SQL statement must provide the "@C:offset" information in the
-third results field for the offset to be used. It is optional in (1)
-and (2).
-X
-The pv34comp* programs only require one SQL statement, statement (1)
-above, which must provide three fields, a unique identifier, the
-sequence, and a complete description that must include "@C:offset" if
-substrings are used. If SQL queries (2) and (3) are provided, they
-are ignored. Thus, the same files can be used by both programs, but
-the "@C:offset" is required in different SQL queries by the fast*34
-and pv34comp* programs.
-X
-Other changes:
-X
-Re-incorporation of GAP_OPEN option; fix to Altschul-Gish stats when
-GAP_OPEN is used.
-X
-Re-incorporation of A. Mackey's spam() improvement in dropnfa.
-X
-Fixes to include file ordering to allow fast*34(_t) pv34comp* programs
-to compile.
-X
-Fix to lascii[] for SQL database queries.
-X
-Fix to an old bug in comp_thr.c to send individual worker_info
-structures to threads (does not fix LINUX threads problems, however).
-X
-=====
->>July 9, 2001
-X
-Considerable changes to support no-global library functions.
-X
-(1) Separate ascii/sequence mapping arrays are used by the
-X query-reading (qascii), library-reading (lascii), and sequence
-X comparison function (pascii) routines. As a result, there is no
-X longer a need for tgetlib.o/lgetlib.o - lgetlib.o can serve both
-X functions.
-X
-(2) This also allows us to remove all #ifdef TFAST/FASTX conditionals
-X from complib.c/comp_thr.c/p2_complib.c. We no longer need
-X tcomp_thr.o, comp_thrx.o, etc. We still have a variety of
-X p2_complib.o variations to support the different c34.work* files.
-X
-(3) Because non-global openlib/getlib functions are available, exactly
-X the same open/get functions are available for reading both the
-X query and reference libraries in pv34comp* programs. The
-X host-specific openlib/getlib functions in hxgetaa.c are now
-X provided by nmgetlib.c, etc. This has two effect:
-X
-X (a) it is now possible to compare a query database generated by an
-X SQL query to a library database generated by a different SQL
-X query.
-X
-X (b) pv34comp* has lost (at least in this version) the ability to
-X automatically detect the query sequence type. To search with a
-X DNA query, you MUST use "-n".
-X
-(4) the resetp() function is now responsible for almost all of the
-X function sepcific (TFAST/FASTX/etc) initializations. All of the
-X function specific code has been removed from complib.c/comp_thr.c
-X and most of it has been moved to initfa.c/resetp().
-X
-(5) manageacc.c has been merged into compacc.c (mostly prhist()).
-X
-=====
->>June 1, 2001
-X
-Many changes to accommodate a new - no global variable - strategy for
-reading sequence databases. Every time a file is opened, a struct
-lmf_str is allocated which can be used for memory mapped files, ncbl2,
-files, and mysql files.
-X
-In addition, an open'ed file has a default sequence type: DNA or
-protein, or one can open a file in a mode that will allow the sequence
-type to be changed.
-X
-=====
->>May 18, 2001 CVS: fa33t09d0
-X
-A new compile time parameter - -DGAP_OPEN, is available to change the
-definition of the "-f gap-open" parameter from the penalty for the
-first residue in a gap to a true gap-open penalty, as is used in BLAST
-and many other comparison algorithms. This will probably become the
-default for fasta in version 3.4.
-X
-Fixes to conflicts between "-S" and "-s matrix". When a scoring
-matrix file was specified, lower-case alignments were not displayed
-with -S (although the scores were calculated properly).
-X
-More extensive testting of mysql_lib.c (mySQL query-libraries) with
-the pv4comp* and mp4comp* programs.
-X
-=====
->>April 5, 2001 CVS: fa33t08d4b3
-X
-Changes in nmgetlib.c and ncbl2_mlib.c to return long sequence
-descriptions for PCOMPLIB (pv4/mp3comp*). Also fix p2_complib.c to
-request DNA library for translated comparisons.
-X
-Fix for prss33(_t) to read both sequences from stdin.
-X
-=====
->>March 27, 2001 CVS: fa33t08d4
-X
-Modifications to allow 64-bit fseek/ftell on machines like Sun,
-Linux/Intel, that support -D_FILE_OFFSET_BITS=64, -D_LARGE_FILE_SOURCE
-off_t, and fseeko(), ftello() with the option -DUSE_FSEEKO. Machines
-with 64-bit long's do not need this option. Machines with 32-bit
-longs that allow files >2 Gb can do so with 64-bit file access
-functions, including fseeko() and ftello(), which work with off_t file
-offsets instead of long's.
-X
-=====
->>March 3, 2001 CVS: fa33t08d2
-X
-Corrected problems in nmgetaa.c and mysql_lib.c with parallel
-programs, and one serious problem with alternate DNA scoring matrices
-(initfa.c, initsw.c) not being set properly. A subtle problem with
-the merge of scaleswn.c and scaleswg.c is fixed.
-X
->>February 17, 2001
-X
-Modified mysql_lib.c to use "#", rather than "%ld", to indicate the
-position of the GID. This change was made because sprintf() cannot be
-used reliably to generate an SQL string, as '"' and '%' are used in
-such strings.
-X
-=====
->>January 17, 2001
-(no version change, date change)
-X
-Minor fixes to initfa.c, initsw.c to deal with DNA scoring matrices
-properly. "-n -s dna.mat" is required for the sequence/matrix to be
-recognized as DNA.
-X
->>January 16, 2001
--->v34t00
-X
-Merge of the main CVS trunk - fa33t06 with the latest release branch,
-fa33t08.
-X
-In addition, PCOMPLIB mods have been made to mysql_lib.c. Because
-p2_complib.c gets sequence description information during the first
-read of the database, the mysql_query must be changed to return:
-result[0]=GID, result[1]=description, result[2]=sequence. In the
-PCOMPLIB case, the other SQL queries (for GID description, sequence)
-are not necessary but must still be provided.
-SHAR_EOF
-chmod 0644 readme.v34t0 ||
-echo 'restore of readme.v34t0 failed'
-Wc_c="`wc -c < 'readme.v34t0'`"
-test 66121 -eq "$Wc_c" ||
- echo 'readme.v34t0: original size 66121, current size' "$Wc_c"
-fi
-# ============= readme.w32 ==============
-if test -f 'readme.w32' -a X"$1" != X"-c"; then
- echo 'x - skipping readme.w32 (File already exists)'
-else
-echo 'x - extracting readme.w32 (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'readme.w32' &&
-October 6, 2006
-X
-The FASTA programs for Windows32 environments (WindowsNT, 2000, XP)
-has undergone a major upgrade, so that now all the programs in the
-Unix/MacOSX distribution are available to Windows users. Moreover,
-Windows users with modern (SSE2 compatible) processors can run greatly
-accelerated versions of the Smith-Waterman ssearch program.
-X
-Moreover, these programs work both with FASTA formatted files, and
-NCBI BLAST formatted files.
-X
-The following programs are available:
-X
-X fasta34.exe protein-protein or DNA-DNA database searches
-X fastf34.exe
-X fastm34.exe
-X fasts34.exe
-X fastx34.exe compare DNA query to protein library with frameshifts
-X fasty34.exe compare DNA query to protein library with frameshifts
-X prfx34.exe
-X prss34.exe evaluate statistical significance using shuffles
-X prss34sse2.exe
-X ssearch34.exe Smith-Waterman for prot-prot or DNA-DNA searches
-X ssearch34sse2.exe Smith-Waterman, accelerated with SSE2 extensions
-X tfastf34.exe
-X tfastm34.exe
-X tfasts34.exe
-X tfastx34.exe compare protein to DNA library with frameshifts
-X tfasty34.exe compare protein to DNA library with frameshifts
-X
-Each of these programs also has a "threaded" version, which can run on
-multiple processors (or dual cores) if they are available. However,
-they are built using the Unix pthreads API, so to use these programs,
-you must download the pthreadVC2.dll from:
-X
-ftp://sources.redhat.com/pub/pthreads-win32/dll-latest/lib/pthreadVC2.dll
-X
-see also http://sourceware.org/pthreads-win32/
-X
-X fasta34_t.exe
-X fastf34_t.exe
-X fastm34_t.exe
-X fasts34_t.exe
-X fastx34_t.exe
-X fasty34_t.exe
-X prfx34_t.exe
-X prss34_t.exe
-X prss34sse2_t.exe
-X ssearch34_t.exe
-X ssearch34sse2_t.exe
-X tfastf34_t.exe
-X tfasts34_t.exe
-X tfastx34_t.exe
-X tfasty34_t.exe
-X
-Without that DLL, the threaded programs will not run at all. The
-current compilation supports two threads, and speeds up searches about
-2-fold on dual-core processors.
-X
-The programs have been tested with protein and DNA databases in FASTA
-format, PIR/GCG-text format, and Genbank flatfile format. The program
-does not work properly with GCG binary format databases, but it seems
-unlikely that Windows users would need these.
-X
-Be certain to use an program that can work with long file names when
-unpacking the program source files.
-X
-Please report bugs to:
-X
-X wrp@virginia.edu
-SHAR_EOF
-chmod 0644 readme.w32 ||
-echo 'restore of readme.w32 failed'
-Wc_c="`wc -c < 'readme.w32'`"
-test 2402 -eq "$Wc_c" ||
- echo 'readme.w32: original size 2402, current size' "$Wc_c"
-fi
-# ============= res_stats.c ==============
-if test -f 'res_stats.c' -a X"$1" != X"-c"; then
- echo 'x - skipping res_stats.c (File already exists)'
-else
-echo 'x - extracting res_stats.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'res_stats.c' &&
-/* calculate stats from results file using scalesws.c */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include <limits.h>
-#include <math.h>
-X
-#define MAX_LLEN 200
-X
-#define LN_FACT 10.0
-X
-#include "defs.h"
-#include "structs.h"
-#include "param.h"
-X
-struct beststr {
-X int score; /* smith-waterman score */
-X int sscore; /* duplicate for compatibility with fasta */
-X double comp;
-X double H;
-X double zscore;
-X double escore;
-X int n1;
-#ifndef USE_FTELLO
-X long lseek; /* position in library file */
-#else
-X off_t lseek;
-#endif
-X int cont; /* offset into sequence */
-X int frame;
-X int lib;
-X char libstr[13];
-} *bbp, *bestptr, **bptr, *best;
-X
-struct stat_str {
-X int score;
-X int n1;
-X double comp;
-X double H;
-};
-X
-static struct db_str qtt = {0l, 0l, 0};
-X
-char gstring2[MAX_STR]; /* string for label */
-char gstring3[MAX_STR];
-char hstring1[MAX_STR];
-X
-FILE *outfd;
-X
-int nbest; /* number of sequences better than bestcut in best */
-int bestcut=1; /* cut off for getting into MAXBEST */
-int bestfull;
-X
-int dohist = 0;
-int zsflag = 1;
-int outtty=1;
-int llen=40;
-X
-/* statistics functions */
-extern void
-process_hist(struct stat_str *sptr, int nstat, struct pstruct pst,
-X struct hist_str *hist, void **);
-extern void addhistz(double, struct hist_str *); /* scaleswn.c */
-void selectbestz(struct beststr **, int, int );
-X
-extern double zs_to_E(double, int, int, long, struct db_str);
-extern double zs_to_Ec(double zs, long entries);
-X
-extern double (*find_zp)(int score, int length, double comp, void *);
-X
-void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str,
-X int, struct db_str, char *);
-X
-int nshow=20, mshow=50, ashow= -1;
-double e_cut=10.0;
-X
-main(argc, argv)
-X int argc; char **argv;
-{
-X FILE *fin;
-X char line[512];
-X int max, icol, iarg, i, qsfnum, lsfnum, n0, n1, s[3], frame;
-X double comp, H;
-X int idup, ndup, max_s;
-X char libstr[20], *bp;
-X char bin_file[80];
-X FILE *bout=NULL;
-X struct mngmsg m_msg; /* Message from host to manager */
-X struct pstruct pst;
-X struct stat_str *stats;
-X int nstats;
-X double zscor, mu, var;
-X
-#if defined(UNIX)
-X outtty = isatty(1);
-#else
-X outtty = 1;
-#endif
-X
-X if (argc < 2 ) {
-X fprintf(stderr," useage - res_stats -c col -r bin_file file\n");
-X exit(1);
-X }
-X
-X m_msg.db.length = qtt.length = 0l;
-X m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
-X m_msg.pstat_void = NULL;
-X m_msg.hist.hist_a = NULL;
-X m_msg.nohist = 0;
-X m_msg.markx = 0;
-X
-X pst.n0 = 200; /* sensible dummy value */
-X pst.zsflag = 1;
-X pst.dnaseq = 0;
-X pst.histint = 2;
-X
-X bin_file[0]='\0';
-X icol = 1;
-X iarg = 1;
-X ndup = 1;
-X while (1) {
-X if (argv[iarg][0]=='-' && argv[iarg][1]=='c') {
-X sscanf(argv[iarg+1],"%d",&icol);
-X iarg += 2;
-X }
-X else if (argv[iarg][0]=='-' && argv[iarg][1]=='r') {
-X strncpy(bin_file,argv[iarg+1],sizeof(bin_file));
-X iarg += 2;
-X }
-X else if (argv[iarg][0]=='-' && argv[iarg][1]=='z') {
-X sscanf(argv[iarg+1],"%d",&pst.zsflag);
-X iarg += 2;
-X }
-X else if (argv[iarg][0]=='-' && argv[iarg][1]=='n') {
-X pst.dnaseq = 1;
-X iarg += 1;
-X }
-X else if (argv[iarg][0]=='-' && argv[iarg][1]=='s') {
-X sscanf(argv[iarg+1],"%d",&ndup);
-X iarg += 2;
-X }
-X else if (argv[iarg][0]=='-' && argv[iarg][1]=='q') {
-X outtty = 0;
-X iarg += 1;
-X }
-X else break;
-X }
-X
-X icol--;
-X
-X if ((fin=fopen(argv[iarg],"r"))==NULL) {
-X fprintf(stderr," cannot open %s\n",argv[1]);
-X exit(1);
-X }
-X
-X if (bin_file[0]!='\0' && ((bout=fopen(bin_file,"w"))==NULL)) {
-X fprintf(stderr,"cannot open %s for output\n",bin_file);
-X }
-X
-X if ((stats =
-X (struct stat_str *)malloc((MAXSTATS)*sizeof(struct stat_str)))==NULL)
-X s_abort ("Cannot allocate stats struct","");
-X nstats = 0;
-X
-X initbest(MAXBEST+1); /* +1 required for select() */
-X
-X for (nbest=0; nbest<MAXBEST+1; nbest++)
-X bptr[nbest] = &best[nbest];
-X bptr++; best++;
-X best[-1].score= BIGNUM;
-X
-X nbest = 0;
-X
-X pst.Lambda=0.232;
-X pst.K = 0.11;
-X pst.H = 0.34;
-X
-X /* read the best scores from the results file */
-X
-X max_s = -1;
-X idup = 0;
-X
-X /* get first line with sequence length */
-X fgets(line,sizeof(line),fin);
-X sscanf(line,"%d",&n0);
-X if (n0 > 0) pst.n0 = n0;
-X
-X while (fgets(line,sizeof(line),fin)!=NULL) {
-X if (line[0]=='/' && line[1]=='*') {
-X fputs(line,stdout);
-X strncpy(gstring2,line,sizeof(gstring2));
-X if ((bp=strchr(gstring2,'\n'))!=NULL) *bp = '\0';
-X break;
-X }
-X if (line[0]==';') {
-X if ((bp=strchr(line,'|'))!=NULL) qsfnum = atoi(bp+1);
-X else continue;
-X if ((bp=strchr(line,'('))!=NULL) {
-X n0 = atoi(bp+1);
-X pst.n0 = n0;
-X }
-X else {
-X fprintf(stderr, "cannot find n0:\n %s\n",line);
-X continue;
-X }
-X }
-X else {
-X sscanf(line,"%s %d %d %d %lf %lf %d %d %d",
-X libstr,&lsfnum,&n1,&frame,&comp, &H, &s[0],&s[1],&s[2]);
-X if (lsfnum==0 && n1==0) {
-X fputs(line,stderr);
-X continue;
-X }
-X if (n1 < 10 || s[icol]<=0) fputs(line,stderr);
-X idup++;
-X
-X if (s[icol] > max_s) max_s = s[icol];
-X if (idup < ndup) continue;
-X
-X m_msg.db.entries++;
-X m_msg.db.length += n1;
-X
-X if (dohist) addhistz(zscor=(*find_zp)(max_s,n1,comp,m_msg.pstat_void),
-X &m_msg.hist);
-X else zscor = (double)max_s;
-X
-X if (nstats < MAXSTATS) {
-X stats[nstats].n1 = n1;
-X stats[nstats].comp = comp;
-X stats[nstats].H = H;
-X stats[nstats++].score = max_s;
-X }
-X
-X else if (!dohist) {
-X /* do_bout(bout,stats,nstats); */
-X process_hist(stats,nstats,pst,&m_msg.hist, &m_msg.pstat_void);
-X for (i=0; i<nbest; i++)
-X bptr[i]->zscore =
-X (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,
-X m_msg.pstat_void);
-X dohist = 1;
-X }
-X
-X if (dohist) {
-X zscor =(*find_zp)(max_s,n1,comp,m_msg.pstat_void);
-X addhistz(zscor,&m_msg.hist);
-X }
-X else zscor = (double)max_s;
-X
-X if (nbest >= MAXBEST) {
-X bestfull = nbest-MAXBEST/4;
-X selectz(bestfull-1,nbest);
-X bestcut = (int)(bptr[bestfull-1]->zscore+0.5);
-X nbest = bestfull;
-X }
-X bestptr = bptr[nbest];
-X bestptr->score = max_s;
-X bestptr->sscore = max_s;
-X bestptr->n1 = n1;
-X bestptr->comp = comp;
-X bestptr->H = H;
-X bestptr->lib = lsfnum;
-X bestptr->zscore = zscor;
-X strncpy(bestptr->libstr,libstr,12);
-X bestptr->libstr[12]='\0';
-X nbest++;
-X
-X max_s = -1;
-X idup = 0;
-X }
-X } /* done with reading results */
-X
-X if (!dohist) {
-X if (nbest < 20) {
-X zsflag = 0;
-X }
-X else {
-X /* do_bout(bout,stats,nstats); */
-X process_hist(stats,nstats,pst,&m_msg.hist,&m_msg.pstat_void);
-X for (i=0; i<nbest; i++)
-X bptr[i]->zscore =
-X (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,m_msg.pstat_void);
-X dohist = 1;
-X }
-X }
-X
-X printf(" using n0: %d\n",pst.n0);
-X
-X /* print histogram, statistics */
-X
-X m_msg.nbr_seq = m_msg.db.entries;
-X pst.zdb_size = m_msg.db.entries;
-X /* get_param(&pst, gstring2,gstring3); */
-X
-X prhist(stdout,m_msg,pst,m_msg.hist,nstats,m_msg.db,gstring2);
-X
-X if (!zsflag) sortbest();
-X else {
-X sortbestz(bptr,nbest);
-X for (i=0; i<nbest; i++)
-X bptr[i]->escore = zs_to_E(bptr[i]->zscore,bptr[i]->n1,pst.dnaseq,
-X pst.zdb_size, m_msg.db);
-X }
-X
-X outfd = stdout;
-X showbest(m_msg.db); /* display best matches */
-}
-X
-initbest(nbest) /* allocate arrays for best sort */
-X int nbest;
-{
-X
-X if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr)))
-X == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);}
-X if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *)))
-X == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}
-}
-X
-void
-prhist(FILE *fd, struct mngmsg m_msg,
-X struct pstruct pst,
-X struct hist_str hist,
-X int nstats,
-X struct db_str ntt,
-X char *gstring2)
-{
-X int i,j,hl,hll, el, ell, ev;
-X char hline[80], pch, *bp;
-X int mh1, mht;
-X int maxval, maxvalt, dotsiz, ddotsiz,doinset;
-X double cur_e, prev_e, f_int;
-X double max_dev, x_tmp;
-X double db_tt;
-X int n_chi_sq, cum_hl, max_i;
-X
-X
-X fprintf(fd,"\n");
-X
-X if (pst.zsflag < 0 || nstats <= 10) {
-X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);
-X fprintf(fd,"\n%s\n",gstring2);
-X return;
-X }
-X
-X max_dev = 0.0;
-X mh1 = hist.maxh-1;
-X mht = (3*hist.maxh-3)/4 - 1;
-X
-X if (!m_msg.nohist && mh1 > 0) {
-X for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
-X if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
-X if (i >= mht && hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
-X }
-X n_chi_sq = 0;
-X cum_hl = -hist.hist_a[0];
-X dotsiz = (maxval-1)/60+1;
-X ddotsiz = (maxvalt-1)/50+1;
-X doinset = (ddotsiz < dotsiz && dotsiz > 2);
-X
-X if (pst.zsflag>=0)
-X fprintf(fd," opt E()\n");
-X else
-X fprintf(fd," opt\n");
-X
-X prev_e = zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);
-X for (i=0; i<=mh1; i++) {
-X pch = (i==mh1) ? '>' : ' ';
-X pch = (i==0) ? '<' : pch;
-X hll = hl = hist.hist_a[i];
-X if (pst.zsflag>=0) {
-X cum_hl += hl;
-X f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
-X cur_e = (double)zs_to_Ec(f_int,hist.entries);
-X ev = el = ell = (int)(cur_e - prev_e + 0.5);
-X if (hl > 0 && i > 5 && i < (90-hist.min_hist)/hist.histint) {
-X x_tmp = fabs(cum_hl - cur_e);
-X if ( x_tmp > max_dev) {
-X max_dev = x_tmp;
-X max_i = i;
-X }
-X n_chi_sq++;
-X }
-X if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
-X if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
-X fprintf(fd,"%c%3d %5d %5d:",
-X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
-X mh1*hist.histint+hist.min_hist,hl,ev);
-X }
-X else fprintf(fd,"%c%3d %5d :",
-X pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
-X mh1*hist.histint+hist.min_hist,hl);
-X
-X if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
-X if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
-X for (j=0; j<hl; j++) hline[j]='=';
-X if (pst.zsflag>=0) {
-X if (el <= hl ) {
-X if (el > 0) hline[el-1]='*';
-X hline[hl]='\0';
-X }
-X else {
-X for (j = hl; j < el; j++) hline[j]=' ';
-X hline[el-1]='*';
-X hline[hl=el]='\0';
-X }
-X }
-X else hline[hl] = 0;
-X if (i==1) {
-X for (j=hl; j<10; j++) hline[j]=' ';
-X sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
-X }
-X if (doinset && i == mht-2) {
-X for (j = hl; j < 10; j++) hline[j]=' ';
-X sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
-X }
-X if (i >= mht&& doinset ) {
-X for (j = hl; j < 10; j++) hline[j]=' ';
-X hline[10]=':';
-X for (j = 11; j<11+hll; j++) hline[j]='=';
-X hline[11+hll]='\0';
-X if (pst.zsflag>=0) {
-X if (ell <= hll) hline[10+ell]='*';
-X else {
-X for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
-X hline[10+ell] = '*';
-X hline[11+ell] = '\0';
-X }
-X }
-X }
-X
-X fprintf(fd,"%s\n",hline);
-X prev_e = cur_e;
-X }
-X }
-X
-X if (ntt.carry==0) {
-X fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length, ntt.entries);
-X }
-X else {
-X db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
-X fprintf(fd, "%.0f residues in %5ld library sequences\n", db_tt, ntt.entries);
-X }
-X
-X if (pst.zsflag>=0) {
-X if (MAXSTATS < hist.entries)
-X fprintf(fd," statistics extrapolated from %d to %ld sequences\n",
-X MAXSTATS,hist.entries);
-X /* summ_stats(stat_info); */
-X fprintf(fd," %s\n",hist.stat_info);
-X if (!m_msg.nohist && cum_hl > 0)
-X fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%d) at %3d\n",
-X max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
-X if (m_msg.markx & MX_M10FORM) {
-X while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
-X if (cum_hl <= 0) cum_hl = -1;
-X sprintf(hstring1,"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
-X MAXSTATS,hist.entries,hist.stat_info,max_dev/(double)cum_hl, n_chi_sq,max_i*hist.histint+hist.min_hist);
-X }
-X }
-X fprintf(fd,"\n%s\n",gstring2);
-X fflush(fd);
-}
-X
-showbest(struct db_str ntt)
-X {
-X int ib, istart, istop;
-X char bline[200], fmt[40], pad[200];
-X char rline[20];
-X int ntmp;
-X int lcont, ccont, loff;
-X int hcutoff;
-X
-X sprintf(fmt,"%%-%ds (%%3d)",llen-10);
-X
-X nshow = min(20,nbest);
-X mshow = min(20,nbest);
-X
-X if (outtty) {
-X printf(" How many scores would you like to see? [%d] ",nshow);
-X fflush(stdout);
-X if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
-X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
-X if (nshow<=0) nshow = min(20,nbest);
-X }
-X else nshow=mshow;
-X
-X memset(pad,' ',llen-10);
-X pad[llen-31]='\0';
-X if (zsflag)
-X fprintf(outfd,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
-X else
-X fprintf(outfd,"The best scores are:%s s-w\n",pad);
-X
-X if (outfd != stdout)
-X if (zsflag)
-X fprintf(stdout,"The best scores are:%s s-w Z-score E(%ld)\n",pad,ntt.entries);
-X else
-X fprintf(stdout,"The best scores are:%s s-w\n",pad);
-X
-X istart = 0;
-X l1: istop = min(nbest,nshow);
-X for (ib=istart; ib<istop; ib++) {
-X bbp = bptr[ib];
-X
-X if (!outtty && zsflag && bbp->escore > e_cut) {
-X nshow = ib;
-X goto done;
-X }
-X
-X sprintf(bline,"%-12s %d",bbp->libstr,bbp->lib);
-X bline[13]='\0';
-X
-X fprintf(outfd,fmt,bline,bbp->n1);
-X
-X if (zsflag)
-X fprintf(outfd,"%4d %4.1f %6.2g\n",
-X bbp->score,bbp->zscore,
-X bbp->escore);
-X else
-X fprintf(outfd,"%4d\n",bbp->score);
-X
-X if (outfd!=stdout) {
-X fprintf(stdout,fmt,bline,bbp->n1);
-X if (zsflag)
-X printf("%4d %4.1f %6.2g\n",
-X bbp->score,bbp->zscore,
-X bbp->escore);
-X else
-X printf("%4d\n",bbp->score);
-X }
-X }
-X
-X fflush(outfd); if (outfd!=stdout) fflush(stdout);
-X
-X if (outtty) {
-X printf(" More scores? [0] ");
-X fflush(stdout);
-X if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
-X ntmp = 0;
-X if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
-X if (ntmp<=0) ntmp = 0;
-X if (ntmp>0) {
-X istart = istop;
-X nshow += ntmp;
-X mshow += ntmp;
-X goto l1;
-X }
-X }
-X else if (zsflag && bbp->escore < e_cut) {
-X istart=istop;
-X nshow += 10;
-X goto l1;
-X }
-X
-X done:
-X if (outfd!=stdout) fprintf(outfd,"\n");
-}
-X
-selectz(k,n) /* k is rank in array */
-X int k,n;
-{
-X int t, i, j, l, r;
-X double v;
-X struct beststr *tmptr;
-X
-X l=0; r=n-1;
-X
-X while ( r > l ) {
-X i = l-1;
-X j = r;
-X v = bptr[r]->zscore;
-X do {
-X while (bptr[++i]->zscore > v ) ;
-X while (bptr[--j]->zscore < v ) ;
-X tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
-X } while (j > i);
-X bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
-X if (i>=k) r = i-1;
-X if (i<=k) l = i+1;
-X }
-}
-X
-sortbest()
-{
-X int cmps(), cmp1(), cmpa(), cmpz();
-X ksort(bptr,nbest,cmps);
-}
-X
-sortbeste()
-{
-X int cmpe();
-X ksort(bptr,nbest,cmpe);
-}
-X
-sortbestz()
-{
-X int cmpz();
-X ksort(bptr,nbest,cmpz);
-}
-X
-cmps(ptr1,ptr2)
-X struct beststr *ptr1, *ptr2;
-{
-X if (ptr1->score < ptr2->score) return (1);
-X else if (ptr1->score > ptr2->score) return (-1);
-X else return (0);
-}
-X
-cmpe(ptr1,ptr2)
-X struct beststr *ptr1, *ptr2;
-{
-X if (ptr1->escore < ptr2->escore) return (-1);
-X else if (ptr1->escore > ptr2->escore) return (1);
-X else return (0);
-}
-X
-cmpz(ptr1,ptr2)
-X struct beststr *ptr1, *ptr2;
-{
-X if (ptr1->zscore < ptr2->zscore) return (1);
-X else if (ptr1->zscore > ptr2->zscore) return (-1);
-X else return (0);
-}
-X
-ksort(v,n,comp)
-X char *v[]; int n, (*comp)();
-{
-X int gap, i, j;
-X char *tmp;
-X
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if ((*comp)(v[j],v[j+gap]) <=0)
-X break;
-X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
-X }
-}
-X
-/*
-do_bout(FILE *bout,struct stat_str **bptr, int nbest)
-{
-X int i, min_hist, max_hist;
-X double mu, var;
-X
-X if (bout==NULL) return;
-X
-X inithist();
-X for (i = 0; i<nbest; i++)
-X addhist(bptr[i]->score,bptr[i]->n1);
-X
-X for (i=0; i<MAX_LLEN; i++)
-X if (llen_hist[i]>0) {
-X min_hist=i;
-X break;
-X }
-X
-X for (i=MAX_LLEN-1; i>=0; i--)
-X if (llen_hist[i]>0) {
-X max_hist=i;
-X break;
-X }
-X
-X for (i=min_hist; i<=max_hist; i++) {
-X mu=(double)score_sums[i]/(double)llen_hist[i];
-X if (llen_hist[i]>1) {
-X var = ((double)score2_sums[i]-(double)llen_hist[i]*mu*mu)/
-X (double)(llen_hist[i]-1);
-X
-X fprintf(bout,"%d\t%d\t%.1f\t%.1f\t%.1f\t%.4f\t%.4f\n",
-X i,llen_hist[i],exp(((double)(i))/LN_FACT),
-X score_sums[i],score2_sums[i],mu,var);
-X }
-X }
-X free_hist();
-X fclose(bout);
-}
-*/
-X
-s_abort()
-{
-X exit(1);
-}
-SHAR_EOF
-chmod 0644 res_stats.c ||
-echo 'restore of res_stats.c failed'
-Wc_c="`wc -c < 'res_stats.c'`"
-test 16277 -eq "$Wc_c" ||
- echo 'res_stats.c: original size 16277, current size' "$Wc_c"
-fi
-# ============= rna.mat ==============
-if test -f 'rna.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping rna.mat (File already exists)'
-else
-echo 'x - extracting rna.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'rna.mat' &&
-# Sample rna matrix with +2 for G:A, TU:C
-X A C G T U R Y M W S K D H V B N X
-A 5 -4 2 -4 -4 2 -1 2 2 -1 -1 1 1 1 -2 -1 -1
-C -4 5 -4 2 2 -1 2 2 -1 2 -1 -2 1 1 1 -1 -1
-G 2 -4 5 -4 -4 2 -1 -1 -1 2 2 1 -2 1 1 -1 -1
-T -4 2 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
-U -4 2 -4 5 5 -1 2 -1 2 -1 2 1 1 -2 1 -1 -1
-R 2 -1 2 -1 -1 2 -2 -1 1 1 1 1 -1 1 -1 -1 -1
-Y -1 2 -1 2 2 -2 2 -1 1 1 1 -1 1 -1 1 -1 -1
-M 2 2 -1 -1 -1 -1 -1 2 1 1 -1 -1 1 1 -1 -1 -1
-W 2 -1 -1 2 2 1 1 1 2 -1 1 1 1 -1 -1 -1 -1
-S -1 2 2 -1 -1 1 1 1 -1 2 1 -1 -1 1 1 -1 -1
-K -1 -1 2 2 2 1 1 -1 1 1 2 1 -1 -1 1 -1 -1
-D 1 -2 1 1 1 1 -1 -1 1 -1 1 1 -1 -1 -1 -1 -1
-H 1 1 -2 1 1 -1 1 1 1 -1 -1 -1 1 -1 -1 -1 -1
-V 1 1 1 -2 -2 1 -1 1 -1 1 -1 -1 -1 1 -1 -1 -1
-B -2 1 1 1 1 -1 1 -1 -1 1 1 -1 -1 -1 1 -1 -1
-N -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-XX -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-SHAR_EOF
-chmod 0644 rna.mat ||
-echo 'restore of rna.mat failed'
-Wc_c="`wc -c < 'rna.mat'`"
-test 998 -eq "$Wc_c" ||
- echo 'rna.mat: original size 998, current size' "$Wc_c"
-fi
-# ============= sc_to_e.c ==============
-if test -f 'sc_to_e.c' -a X"$1" != X"-c"; then
- echo 'x - skipping sc_to_e.c (File already exists)'
-else
-echo 'x - extracting sc_to_e.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'sc_to_e.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: sc_to_e.c,v 1.2 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* sc_to_e uses statistical parameters from search and
-X score, length, and database size to calculate E()
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-X
-double mean_var, mu, rho;
-X
-main(argc, argv)
-X int argc; char **argv;
-{
-X char line[128];
-X int score, length, db_size;
-X double z_val, s_to_zv(), zv_to_E();
-X
-X if (argc == 4) {
-X sscanf(argv[1],"%lf",&rho);
-X sscanf(argv[2],"%lf",&mu);
-X sscanf(argv[3],"%lf",&mean_var);
-X }
-X else {
-X fprintf(stderr," enter rho mu mean_var: ");
-X fgets(line,sizeof(line),stdin);
-X sscanf(line,"%lf %lf %lf",&rho, &mu, &mean_var);
-X }
-X
-X while (1) {
-X fprintf(stderr," enter score length db_size: ");
-X if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
-X if (line[0]=='\n') exit(0);
-X sscanf(line,"%d %d %d",&score, &length, &db_size);
-X if (db_size < 1) db_size = 50000;
-X
-X z_val = s_to_zv(score, length);
-X
-X printf(" s: %d (%d) E(%d): %4.2g\n",score,length,db_size,zv_to_E(z_val,db_size));
-X }
-}
-X
-double s_to_zv(int score, int length)
-{
-X return ((double)score - rho * log((double)length) - mu)/sqrt(mean_var);
-}
-X
-double zv_to_E(double zv, int db_size)
-{
-X double e;
-X
-X e = exp(-1.282554983 * zv - .577216);
-X return (double)db_size * (e > .01 ? 1.0 - exp(-e) : e);
-}
-SHAR_EOF
-chmod 0644 sc_to_e.c ||
-echo 'restore of sc_to_e.c failed'
-Wc_c="`wc -c < 'sc_to_e.c'`"
-test 1427 -eq "$Wc_c" ||
- echo 'sc_to_e.c: original size 1427, current size' "$Wc_c"
-fi
-# ============= scaleswn.c ==============
-if test -f 'scaleswn.c' -a X"$1" != X"-c"; then
- echo 'x - skipping scaleswn.c (File already exists)'
-else
-echo 'x - extracting scaleswn.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'scaleswn.c' &&
-/* scaleswn.c */
-X
-/* $Name: fa_34_26_5 $ - $Id: scaleswn.c,v 1.60 2007/04/26 18:32:48 wrp Exp $ */
-X
-/* as of 24 Sept, 2000 - scaleswn uses no global variables */
-X
-/*
-X Provide statistical estimates using an extreme value distribution
-X
-X copyright (c) 1995, 1996, 2000 William R. Pearson
-X
-X This code provides multiple methods for scaling sequence
-X similarity scores to correct for length effects.
-X
-X Currently, six methods are available:
-X
-X pst.zsflag = 0 - no scaling (AVE_STATS)
-X pst.zsflag = 1 - regression-scaled scores (REG_STATS)
-X pst.zsflag = 2 - (revised) MLE Lmabda/K scaled scores (MLE_STATS)
-X pst.zsflag = 3 - scaling using Altschul's parameters (AG_STATS)
-X pst.zsflag = 4 - regression-scaled with iterative outlier removal (REGI_STATS)
-X pst.zsflag = 5 = like 1, but length scaled variance (REG2_STATS)
-X pst.zsflag = 6 = like 2, but uses lambda composition/scale (MLE2_STATS)
-X pst.zsflag = 11 = 10 + 1 - use random shuffles, method 1
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-X
-#include <limits.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "structs.h"
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#define MAXHIST 50
-#define MAX_LLEN 200
-#define LHISTC 5
-#define VHISTC 5
-#define MAX_SSCORE 300
-X
-#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
-X
-#define LN_FACT 10.0
-#ifndef M_LN2
-#define M_LN2 0.69314718055994530942
-#endif
-#define EULER_G 0.57721566490153286060
-#define PI_SQRT6 1.28254983016186409554
-X
-#ifndef M_SQRT2
-#define M_SQRT2 1.41421356237
-#endif
-#define LN200 5.2983173666
-#define ZS_MAX 400.0 /* used to prevent underflow on some machines */
-#define TOLERANCE 1.0e-12
-#define TINY 1.0e-6
-X
-/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
-struct rstat_str {
-X double rho, rho_e, mu, mu_e, mean_var, var_e; /* ?_e:std. error of ? */
-/* used by REG2_STATS */
-X double rho2, mu2, var_cutoff;
-X int n_trimmed; /* excluded because of high z-score */
-X int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
-};
-X
-/* used by AG_STATS, MLE_STATS */
-struct ag_stat_str {
-X double K, Lambda, H, a_n0f, a_n0;
-};
-X
-/* used by MLE2_STATS */
-struct mle2_stat_str {
-X double a_n0;
-X double mle2_a0, mle2_a1, mle2_a2, mle2_b1;
-X double ave_comp, max_comp, ave_H;
-};
-X
-struct pstat_str {
-X double ngLambda, ngK, ngH;
-X union {
-X struct rstat_str rg;
-X struct ag_stat_str ag;
-X struct mle2_stat_str m2;
-X } r_u;
-};
-X
-#define AVE_STATS 0 /* no length effect, only mean/variance */
-double find_zn(int score, double escore, int len, double comp, struct pstat_str *);
-X
-int proc_hist_n(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-X
-#define REG_STATS 1 /* length-regression scaled */
-#define REGI_STATS 4 /* length regression, iterative */
-double find_zr(int score, double escore, int len, double comp, struct pstat_str *);
-int proc_hist_r(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu);
-X
-#define MLE_STATS 2 /* MLE for lambda, K */
-double find_ze(int score, double escore, int len, double comp, struct pstat_str *);
-int proc_hist_ml(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-X
-#define AG_STATS 3 /* Altschul-Gish parameters */
-double find_za(int score, double escore, int len, double comp, struct pstat_str *);
-int proc_hist_a(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-X
-#define REG2_STATS 5 /* length regression on mean + variance */
-double find_zr2(int score, double escore, int len, double comp, struct pstat_str *);
-int proc_hist_r2(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-X
-#define MLE2_STATS 6 /* MLE stats using comp(lambda) */
-double find_ze2(int score, double escore, int length, double comp, struct pstat_str *);
-int proc_hist_ml2(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-X
-#ifdef USE_LNSTATS
-#define LN_STATS 2
-double find_zl(int score, double escore, int len, double comp, struct pstat_str *);
-int proc_hist_ln(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct pstat_str *);
-#endif
-X
-/* scaleswn.c local variables that belong in their own structure */
-X
-double (*find_zp)(int score, double escore, int len, double comp, struct pstat_str *) = &find_zr;
-X
-/* void s_sort (double **ptr, int nbest); */
-void ss_sort ( int *sptr, int n);
-X
-struct llen_str {
-X int min, max;
-X int max_score, min_score;
-X int *hist;
-X double *score_sums, *score2_sums;
-X double *score_var;
-X int max_length, min_length, zero_s;
-X int fit_flag;
-};
-X
-static void inithist(struct llen_str *, struct pstruct, int);
-static void free_hist( struct llen_str *);
-static void addhist(struct llen_str *, int, int, int);
-static void prune_hist(struct llen_str *, int, int, int, long *);
-void inithistz(int, struct hist_str *histp);
-void addhistz(double zs, struct hist_str *histp);
-void addhistzp(double zs, struct hist_str *histp);
-X
-static void fit_llen(struct llen_str *, struct rstat_str *);
-static void fit_llen2(struct llen_str *, struct rstat_str *);
-static void fit_llens(struct llen_str *, struct rstat_str *);
-X
-extern void sortbeste(struct beststr **bptr, int nbest);
-X
-/* void set_db_size(int, struct db_str *, struct hist_str *); */
-X
-#ifdef DEBUG
-FILE *tmpf;
-#endif
-X
-int
-process_hist(struct stat_str *sptr, int nstats,
-X struct mngmsg m_msg,
-X struct pstruct pst,
-X struct hist_str *histp,
-X struct pstat_str **ps_sp,
-X int do_hist)
-{
-X int zsflag, do_trim, i;
-X struct pstat_str *ps_s;
-X
-X if (pst.zsflag < 0) {
-X *ps_sp = NULL;
-X return pst.zsflag;
-X }
-X
-X if (*ps_sp == NULL) {
-X if ((ps_s=(struct pstat_str *)calloc(1,sizeof(struct pstat_str)))==NULL) {
-X fprintf(stderr," cannot allocate pstat_union: %ld\n",sizeof(struct pstat_str));
-X exit(1);
-X }
-X else *ps_sp = ps_s;
-X }
-X else {
-X ps_s = *ps_sp;
-X memset(ps_s,0,sizeof(struct pstat_str));
-X }
-X
-X ps_s->ngLambda = m_msg.Lambda;
-X ps_s->ngK = m_msg.K;
-X ps_s->ngH = m_msg.H;
-X
-X if (nstats < 10) pst.zsflag = AG_STATS;
-X
-X zsflag = pst.zsflag;
-X
-/*
-#ifdef DEBUG
-X if (pst.debug_lib) {
-X tmpf=fopen("tmp_stats.res","w+");
-X for (i=0; i<nstats; i++) fprintf(tmpf,"%d\t%d\n",sptr[i].score,sptr[i].n1);
-X fclose(tmpf);
-X }
-#endif
-*/
-X
-X if (zsflag >= 10) {
-X zsflag -= 10;
-X do_trim = 0;
-X }
-X else do_trim = 1;
-X
-#ifdef USE_LNSCALE
-X if (zsflag==LN_STATS) {
-X find_zp = &find_zl;
-X pst.zsflag = proc_hist_ln(sptr, nstats, histp, do_trim, ps_s);
-X }
-#else
-X if (zsflag==MLE_STATS) {
-X find_zp = &find_ze;
-X pst.zsflag = proc_hist_ml(sptr, nstats, pst, histp, do_trim, ps_s);
-X }
-#endif
-X else if (zsflag==REG_STATS) {
-X find_zp = &find_zr;
-X pst.zsflag = proc_hist_r(sptr, nstats,pst, histp, do_trim, ps_s);
-X }
-X else if (zsflag==AG_STATS) {
-X find_zp = &find_za;
-X pst.zsflag = proc_hist_a(sptr, nstats, pst, histp, do_trim, ps_s);
-X }
-X else if (zsflag==REGI_STATS) {
-X find_zp = &find_zr;
-X pst.zsflag = proc_hist_r2(sptr,nstats, pst, histp, do_trim, ps_s);
-X }
-X else if (zsflag==REG2_STATS) {
-X find_zp = &find_zr2;
-X pst.zsflag = proc_hist_r(sptr,nstats,pst, histp, do_trim, ps_s);
-X }
-#if !defined(TFAST) && !defined(FASTX)
-X else if (zsflag == MLE2_STATS) {
-X find_zp = &find_ze2;
-X pst.zsflag = proc_hist_ml2(sptr, nstats, pst, histp, do_trim, ps_s);
-X }
-#endif
-X else { /* AVE_STATS */
-X find_zp = &find_zn;
-X pst.zsflag = proc_hist_n(sptr,nstats, pst, histp, do_trim, ps_s);
-X }
-X
-X if (!do_hist) {
-X histp->entries = nstats; /* db->entries = 0; */
-X inithistz(MAXHIST, histp);
-X for (i = 0; i < nstats; i++) {
-X if (sptr[i].n1 < 0) sptr[i].n1 = -sptr[i].n1;
-X addhistz(find_zp(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,ps_s),
-X histp);
-X }
-X }
-X return pst.zsflag;
-}
-X
-int
-calc_thresh(struct pstruct pst, int nstats,
-X double Lambda, double K, double H, double *zstrim)
-{
-X int max_hscore;
-X double ave_n1, tmp_score, z, l_fact;
-X
-X if (pst.dnaseq == SEQT_DNA || pst.dnaseq == SEQT_RNA) {
-X ave_n1 = 5000.0;
-X l_fact = 1.0;
-X }
-X else {
-X ave_n1 = 400.0;
-X l_fact = 0.7;
-X }
-X
-/* max_hscore = MAX_SSCORE; */
-/* mean expected for pst.n0 * 400 for protein, 5000 for DNA */
-/* we want a number of offsets that is appropriate for the database size so
-X far (nstats)
-*/
-X
-/*
-X the calculation below sets a high-score threshold using an
-X ungapped lambda, but errs towards the high-score side by using
-X E()=0.001 and calculating with 0.70*lambda, which is the correct for
-X going from ungapped to -12/-2 gapped lambda with BLOSUM50
-*/
-X
-#ifndef NORMAL_DIST
-X tmp_score = 0.01/((double)nstats*K*(double)pst.n0*ave_n1);
-X tmp_score = -log(tmp_score)/(Lambda*l_fact);
-X max_hscore = (int)(tmp_score+0.5);
-X
-X z = 1.0/(double)nstats;
-X z = (log(z)+EULER_G)/(- PI_SQRT6);
-#else
-X max_hscore = 100;
-X z = 5.0;
-#endif
-X *zstrim = 10.0*z+50.0;
-X return max_hscore;
-}
-X
-int
-proc_hist_r(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X int i, max_hscore;
-X double zs, ztrim;
-X char s_string[128];
-X struct llen_str llen;
-X char *f_string;
-X llen.fit_flag=1;
-X llen.hist=NULL;
-X
-X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
-X pu->ngK, pu->ngH, &ztrim);
-X
-X inithist(&llen,pst,max_hscore);
-X
-X f_string = &(histp->stat_info[0]);
-X
-X for (i = 0; i<nstats; i++)
-X addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
-X
-X if ((llen.max_score - llen.min_score) < 10) {
-X free_hist(&llen);
-X llen.fit_flag = 0;
-X find_zp = &find_zn;
-X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
-X }
-X
-X fit_llen(&llen, &(pu->r_u.rg)); /* now we have rho, mu, rho2, mu2, mean_var
-X to set the parameters for the histogram */
-X
-X if (!llen.fit_flag) { /* the fit failed, fall back to proc_hist_ml */
-X free_hist(&llen);
-X find_zp = &find_ze;
-X return proc_hist_ml(sptr,nstats, pst, histp, do_trim, pu);
-X }
-X
-X pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
-X
-X if (do_trim) {
-X if (llen.fit_flag) {
-X for (i = 0; i < nstats; i++) {
-X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
-X if (zs < 20.0 || zs > ztrim) {
-X pu->r_u.rg.n_trimmed++;
-X prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
-X &(histp->entries));
-X }
-X }
-X }
-X
-X /* fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", pu->r_u.rg.n_trimmed); */
-X
-X if (llen.fit_flag) fit_llens(&llen, &(pu->r_u.rg));
-X
-X /* fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", pu->r_u.rg.n1_trimmed,pu->r_u.rg.nb_trimmed); */
-X }
-X
-X free_hist(&llen);
-X
-X /* put all the scores in the histogram */
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X if (pst.zsflag == REG2_STATS || pst.zsflag == 10+REG2_STATS)
-X sprintf(f_string,"%s Expectation_v fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n rho2=%6.2f; mu2= %6.2f, 0's: %d Z-trim: %d B-trim: %d in %d/%d",
-X s_string, pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
-X pu->r_u.rg.rho2,pu->r_u.rg.mu2,llen.zero_s,
-X pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot);
-X else
-X sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d B-trim: %d in %d/%d\n Lambda= %8.6f",
-X s_string,
-X pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e), pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),
-X llen.zero_s, pu->r_u.rg.n_trimmed, pu->r_u.rg.n1_trimmed, pu->r_u.rg.nb_trimmed, pu->r_u.rg.nb_tot,
-X PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
-X return REG_STATS;
-}
-X
-X
-int
-proc_hist_r2(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X int i, nit, nprune, max_hscore;
-X double zs, ztrim;
-X char s_string[128];
-X char *f_string;
-X struct llen_str llen;
-X
-X llen.fit_flag=1;
-X llen.hist=NULL;
-X
-X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
-X pu->ngK, pu->ngH, &ztrim);
-X
-X inithist(&llen, pst,max_hscore);
-X f_string = &(histp->stat_info[0]);
-X
-X for (i = 0; i<nstats; i++)
-X addhist(&llen,sptr[i].score,sptr[i].n1,max_hscore);
-X
-X pu->r_u.rg.n_trimmed= pu->r_u.rg.n1_trimmed = pu->r_u.rg.nb_trimmed = 0;
-X if (do_trim) nit = 5;
-X else nit = 0;
-X
-X while (nit-- > 0) {
-X nprune = 0;
-X fit_llen2(&llen, &(pu->r_u.rg));
-X
-X for (i = 0; i < nstats; i++) {
-X if (sptr[i].n1 < 0) continue;
-X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp,pu);
-X if (zs < 20.0 || zs > ztrim ) {
-X nprune++;
-X pu->r_u.rg.n_trimmed++;
-X prune_hist(&llen,sptr[i].score,sptr[i].n1,max_hscore,
-X &(histp->entries));
-X sptr[i].n1 = -sptr[i].n1;
-X }
-X }
-X /* fprintf(stderr," %d Z-trimmed at %d\n",nprune,nit); */
-X if (nprune < LHISTC) { break; }
-X }
-X
-X fit_llen(&llen, &(pu->r_u.rg));
-X
-X free_hist(&llen);
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X sprintf(f_string,"%s Expectation_i fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f;\n mean_var=%6.4f+/-%6.3f 0's: %d Z-trim: %d N-it: %d\n Lambda= %8.6f",
-X s_string,
-X pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),
-X pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),llen.zero_s,pu->r_u.rg.n_trimmed, nit,
-X PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
-X return REGI_STATS;
-}
-X
-/* this procedure implements Altschul's pre-calculated values for lambda, K */
-X
-#include "alt_parms.h"
-X
-int
-look_p(struct alt_p parm[], int gap, int ext,
-X double *K, double *Lambda, double *H);
-X
-int
-proc_hist_a(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X double Lambda, K, H;
-X char *f_string;
-X int r_v;
-X int t_gdelval, t_ggapval;
-X
-#ifdef OLD_FASTA_GAP
-X t_gdelval = pst.gdelval;
-X t_ggapval = pst.ggapval;
-#else
-X t_gdelval = pst.gdelval+pst.ggapval;
-X t_ggapval = pst.ggapval;
-#endif
-X
-X f_string = &(histp->stat_info[0]);
-X
-X if (strcmp(pst.pamfile,"BL50")==0 || strcmp(pst.pamfile,"BLOSUM50")==0)
-X r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"BL62")==0 || strcmp(pst.pamfile,"BLOSUM62")==0)
-X r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"BL80")==0 || strcmp(pst.pamfile,"BLOSUM80")==0)
-X r_v = look_p(bl80_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"P250")==0)
-X r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"P120")==0)
-X r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"MD_10")==0)
-X r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"MD_20")==0)
-X r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"MD_40")==0)
-X r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"DNA")==0 || strcmp(pst.pamfile,"+5/-4")==0)
-X r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"+3/-2")==0)
-X r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else if (strcmp(pst.pamfile,"+1/-3")==0)
-X r_v = look_p(nt13_p,t_gdelval,t_ggapval,&K,&Lambda,&H);
-X else r_v = 0;
-X
-X pu->r_u.ag.Lambda = Lambda;
-X pu->r_u.ag.K = K;
-X pu->r_u.ag.H = H;
-X
-X if (r_v == 0) {
-X fprintf(stderr,"Parameters not available for: %s: %d/%d\n",
-X pst.pamfile,t_gdelval,t_ggapval);
-X
-X find_zp = &find_zr;
-X return proc_hist_r(sptr, nstats,pst, histp, do_trim, pu);
-X }
-X
-X /*
-X fprintf(stderr," the parameters are: Lambda: %5.3f K: %5.3f H: %5.3f\n",
-X Lambda, K, H);
-X */
-X
-X pu->r_u.ag.a_n0 = (double)pst.n0;
-X pu->r_u.ag.a_n0f = log (K * pu->r_u.ag.a_n0)/H;
-X
-X sprintf(f_string,"Altschul/Gish params: n0: %d Lambda: %5.3f K: %5.3f H: %5.3f",
-X pst.n0,Lambda, K, H);
-X return AG_STATS;
-}
-X
-int
-ag_parm(char *pamfile, int gdelval, int ggapval, struct pstat_str *pu)
-{
-X double Lambda, K, H;
-X int r_v;
-X
-X if (strcmp(pamfile,"BL50")==0)
-X r_v = look_p(bl50_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"BL62")==0)
-X r_v = look_p(bl62_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"P250")==0)
-X r_v = look_p(p250_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"P120")==0)
-X r_v = look_p(p120_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"MD_10")==0)
-X r_v = look_p(md10_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"MD_20")==0)
-X r_v = look_p(md20_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"MD_40")==0)
-X r_v = look_p(md40_p,gdelval,ggapval,&K,&Lambda,&H);
-X else if (strcmp(pamfile,"DNA")==0 || strcmp(pamfile,"+5/-4")==0)
-X r_v = look_p(nt54_p,gdelval,ggapval, &K,&Lambda,&H);
-X else if (strcmp(pamfile,"+3/-2")==0)
-X r_v = look_p(nt32_p,gdelval,ggapval, &K,&Lambda,&H);
-X else if (strcmp(pamfile,"+1/-3")==0)
-X r_v = look_p(nt13_p,gdelval,ggapval, &K,&Lambda,&H);
-X else r_v = 0;
-X
-X pu->r_u.ag.K = K;
-X pu->r_u.ag.Lambda = Lambda;
-X pu->r_u.ag.H = H;
-X
-X if (r_v == 0) {
-X fprintf(stderr,"Parameters not available for: %s: %d/%d\n",
-X pamfile,gdelval,ggapval);
-X }
-X return r_v;
-}
-X
-int
-look_p(struct alt_p parm[], int gap, int ext,
-X double *K, double *Lambda, double *H)
-{
-X int i;
-X
-X gap = -gap;
-X ext = -ext;
-X
-X if (gap > parm[1].gap) {
-X *K = parm[0].K;
-X *Lambda = parm[0].Lambda;
-X *H = parm[0].H;
-X return 1;
-X }
-X
-X for (i=1; parm[i].gap > 0; i++) {
-X if (parm[i].gap > gap) continue;
-X else if (parm[i].gap == gap && parm[i].ext > ext ) continue;
-X else if (parm[i].gap == gap && parm[i].ext == ext) {
-X *K = parm[i].K;
-X *Lambda = parm[i].Lambda;
-X *H = parm[i].H;
-X return 1;
-X }
-X else break;
-X }
-X return 0;
-}
-X
-/* uncensored and censored maximum likelihood estimates developed
-X by Aaron Mackey based on a preprint from Sean Eddy */
-X
-int mle_cen (struct stat_str *, int, int, double, double *, double *);
-X
-int
-proc_hist_ml(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X double f_cen;
-X char s_string[128];
-X char *f_string;
-X
-X f_string = &(histp->stat_info[0]);
-X pu->r_u.ag.a_n0 = (double)pst.n0;
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X if (!do_trim) {
-X if (mle_cen(sptr, nstats, pst.n0, 0.0, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
-X goto bad_mle;
-X sprintf(f_string,"%s MLE statistics: Lambda= %6.4f; K=%6.4g",
-X s_string,pu->r_u.ag.Lambda,pu->r_u.ag.K);
-X }
-X else {
-X if (nstats/20 > 1000) f_cen = 1000.0/(double)nstats;
-X else f_cen = 0.05;
-X if (mle_cen(sptr, nstats, pst.n0, f_cen, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)
-X goto bad_mle;
-X sprintf(f_string,"MLE_cen statistics: Lambda= %6.4f; K=%6.4g (cen=%d)",
-X pu->r_u.ag.Lambda,pu->r_u.ag.K,(int)((double)nstats*f_cen));
-X }
-X
-X return MLE_STATS;
-X bad_mle:
-X find_zp = &find_zn;
-X
-X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
-}
-X
-int
-mle_cen2 (struct stat_str *, int, int, double, double *, double *, double *, double *);
-X
-X
-int
-proc_hist_ml2(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X int i, ns=0, nneg=0;
-X double f_cen, ave_lambda;
-X char s_string[128], ex_string[64];
-X char *f_string;
-X
-X f_string = &(histp->stat_info[0]);
-X pu->r_u.m2.a_n0 = (double)pst.n0;
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X pu->r_u.m2.ave_comp = 0.0;
-X pu->r_u.m2.max_comp = -1.0;
-X
-X ns = nneg = 0;
-X for (i=0; i<nstats; i++) {
-X if (sptr[i].comp > pu->r_u.m2.max_comp) pu->r_u.m2.max_comp = sptr[i].comp;
-X if (sptr[i].comp > 0.0) {
-X pu->r_u.m2.ave_comp += log(sptr[i].comp);
-X ns++;
-X }
-X else nneg++;
-X }
-X pu->r_u.m2.ave_comp /= (double)ns;
-X pu->r_u.m2.ave_comp = exp(pu->r_u.m2.ave_comp);
-X for (i=0; i<nstats; i++) if (sptr[i].comp < 0.0) {
-X sptr[i].comp = pu->r_u.m2.ave_comp;
-X }
-X
-X if (nneg > 0)
-X sprintf(ex_string,"composition = -1 for %d sequences",nneg);
-X else ex_string[0]='\0';
-X
-X if (!do_trim) {
-X if (mle_cen2(sptr, nstats, pst.n0, 0.0,
-X &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1,
-X &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1) == -1) goto bad_mle2;
-X ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
-X
-X sprintf(f_string,"%s MLE-2 statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f\n ave Lamdba: %6.4f",
-X s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1,ave_lambda);
-X }
-X else {
-X if (nstats/20 > 500) f_cen = 500.0/(double)nstats;
-X else f_cen = 0.05;
-X if (mle_cen2(sptr, nstats, pst.n0, f_cen, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1)== -1) goto bad_mle2;
-X
-X ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);
-X
-X sprintf(f_string,"%s MLE-2-cen statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f (cen=%d)\n ave Lambda:%6.4f",
-X s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1, (int)((double)nstats*f_cen),ave_lambda);
-X }
-X
-X return MLE2_STATS;
-X bad_mle2:
-X find_zp = &find_zn;
-X return proc_hist_n(sptr, nstats, pst, histp, do_trim, pu);
-}
-X
-double first_deriv_cen(double lambda, struct stat_str *sptr,
-X int start, int stop,
-X double sumlenL, double cenL,
-X double sumlenH, double cenH);
-X
-double second_deriv_cen(double lambda, struct stat_str *sptr,
-X int start, int stop,
-X double sumlenL, double cenL,
-X double sumlenH, double cenH);
-X
-static void
-st_sort (struct stat_str *v, int n) {
-X int gap, i, j;
-X int tmp;
-X
-X for (gap = 1; gap < n/3; gap = 3*gap +1) ;
-X
-X for (; gap > 0; gap = (gap-1)/3)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j -= gap) {
-X if (v[j].score <= v[j + gap].score) break;
-X
-X tmp = v[j].score;
-X v[j].score = v[j + gap].score;
-X v[j + gap].score = tmp;
-X
-X tmp = v[j].n1;
-X v[j].n1 = v[j + gap].n1;
-X v[j + gap].n1 = tmp;
-X }
-}
-X
-/* sptr[].score, sptr[].n1; sptr[] must be sorted
-X int n = total number of samples
-X int M = length of query
-X double fn = fraction of scores to be censored fn/2.0 from top, bottom
-X double *Lambda = Lambda estimate
-X double *K = K estimate
-*/
-X
-#define MAX_NIT 100
-X
-int
-mle_cen(struct stat_str *sptr, int n, int M, double fc,
-X double *Lambda, double *K) {
-X
-X double sumlenL, sumlenH, cenL, cenH;
-X double sum_s, sum2_s, mean_s, var_s, dtmp;
-X int start, stop;
-X int i, nf;
-X int nit = 0;
-X double deriv, deriv2, lambda, old_lambda, sum = 0.0;
-X /*
-X int sumlenL, int sumlenghtsR = sum of low (Left), right (High) seqs.
-X int cenL, cenH = censoring score low, high
-X */
-X
-X nf = (fc/2.0) * n;
-X start = nf;
-X stop = n - nf;
-X
-X st_sort(sptr,n);
-X
-X sum_s = sum2_s = 0.0;
-X for (i=start; i<stop; i++) {
-X sum_s += sptr[i].score;
-X }
-X dtmp = (double)(stop-start);
-X mean_s = sum_s/dtmp;
-X
-X for (i=start; i<stop; i++) {
-X sum2_s += sptr[i].score * sptr[i].score;
-X }
-X var_s = sum2_s/(dtmp-1.0);
-X
-X sumlenL = sumlenH = 0.0;
-X for (i=0; i<start; i++) sumlenL += (double)sptr[i].n1;
-X for (i=stop; i<n; i++) sumlenH += (double)sptr[i].n1;
-X
-X if (nf > 0) {
-X cenL = (double)sptr[start].score;
-X cenH = (double)sptr[stop].score;
-X }
-X else {
-X cenL = (double)sptr[start].score/2.0;
-X cenH = (double)sptr[start].score*2.0;
-X }
-X
-X if (cenL >= cenH) return -1;
-X
-X /* initial guess for lambda is 0.2 - this does not work for matrices
-X with very different scales */
-X /* lambda = 0.2; */
-X lambda = PI_SQRT6/sqrt(var_s);
-X if (lambda > 1.0) {
-X fprintf(stderr," Lambda initial estimate error: lambda: %6.4g; var_s: %6.4g\n",lambda,var_s);
-X lambda = 0.2;
-X }
-X
-X do {
-X deriv = first_deriv_cen(lambda, sptr, start, stop,
-X sumlenL, cenL, sumlenH, cenH);
-X /* (uncensored version)
-X first_deriv(lambda, &sptr[start], stop - start))
-X */
-X
-X /* (uncensored version)
-X deriv2 = second_deriv(lambda, &sptr[start], stop-start);
-X */
-X deriv2 = second_deriv_cen(lambda, sptr, start, stop,
-X sumlenL, cenL, sumlenH, cenH);
-X
-X old_lambda = lambda;
-X if (lambda - deriv/deriv2 > 0.0) lambda = lambda - deriv/deriv2;
-X else lambda = lambda/2.0;
-X nit++;
-X } while (fabs((lambda - old_lambda)/lambda) > TINY && nit < MAX_NIT);
-X
-X /* fprintf(stderr," mle_cen nit: %d\n",nit); */
-X
-X if (nit >= MAX_NIT) return -1;
-X
-X for(i = start; i < stop ; i++) {
-X sum += (double) sptr[i].n1 * exp(- lambda * (double)sptr[i].score);
-X }
-X
-X *Lambda = lambda;
-X /*
-X *K = (double)(stop-start)/((double)M*sum);
-X */
-X *K = (double)n/((double)M*
-X (sum+sumlenL*exp(-lambda*cenL)-sumlenH*exp(-lambda*cenH)));
-X return 0;
-}
-X
-/*
-double
-first_deriv(double lambda, struct stat_str *sptr, int n) {
-X
-X int i;
-X double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
-X double s, l, es;
-X
-X for(i = 0 ; i < n ; i++) {
-X s = (double)sptr[i].score;
-X l = (double)sptr[i].n1;
-X es = exp(-lambda * s );
-X sum += s;
-X sum2 += l * es;
-X sum1 += s * l * es;
-X }
-X
-X return (1.0/lambda) - (sum/(double)n) + (sum1/sum2);
-}
-*/
-X
-/*
-double
-second_deriv(double lambda, struct stat_str *sptr, int n) {
-X double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
-X double s, l, es;
-X int i;
-X
-X for(i = 0 ; i < n ; i++) {
-X l = (double)sptr[i].n1;
-X s = (double)sptr[i].score;
-X es = exp(-lambda * s);
-X sum2 += l * es;
-X sum1 += l * s * es;
-X sum3 += l * s * s * es;
-X }
-X
-X return ((sum1*sum1)/(sum2*sum2)) - (sum3/sum2) - (1.0/(lambda*lambda));
-}
-*/
-X
-double
-first_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
-X double sumlenL, double cenL, double sumlenH, double cenH) {
-X int i;
-X double sum = 0.0, sum1 = 0.0, sum2 = 0.0;
-X double s, l, es;
-X
-X for(i = start ; i < stop ; i++) {
-X s = (double)sptr[i].score;
-X l = (double)sptr[i].n1;
-X es = exp(-lambda * s );
-X sum += s;
-X sum2 += l * es;
-X sum1 += s * l * es;
-X }
-X
-X sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
-X sum2 += sumlenL*exp(-lambda*cenL) - sumlenH*exp(-lambda*cenH);
-X
-X return (1.0 / lambda) - (sum /(double)(stop-start)) + (sum1 / sum2);
-}
-X
-double
-second_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,
-X double sumlenL, double cenL, double sumlenH, double cenH) {
-X
-X double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;
-X double s, l, es;
-X int i;
-X
-X for(i = start ; i < stop ; i++) {
-X s = (double)sptr[i].score;
-X l = (double)sptr[i].n1;
-X es = exp(-lambda * s);
-X sum2 += l * es;
-X sum1 += l * s * es;
-X sum3 += l * s * s * es;
-X }
-X
-X sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);
-X sum2 += sumlenL*exp(-lambda * cenL) - sumlenH*exp(-lambda * cenH);
-X sum3 += sumlenL*cenL*cenL * exp(-lambda * cenL) -
-X sumlenH*cenH*cenH * exp(-lambda * cenH);
-X return ((sum1 * sum1) / (sum2 * sum2)) - (sum3 / sum2)
-X - (1.0 / (lambda * lambda));
-}
-X
-double mle2_func(double *params,
-X double *consts,
-X struct stat_str *values,
-X int n, int start, int stop);
-X
-void simplex(double *fitparams,
-X double *lambda,
-X int nparam,
-X double (*minfunc) (double *tryparams, double *consts,
-X struct stat_str *data, int ndata,
-X int start, int stop),
-X double *consts,
-X void *data,
-X int ndata, int start, int stop
-X );
-X
-int
-mle_cen2(struct stat_str *sptr, int n, int M, double fc,
-X double *a0, double *a1, double *a2, double *b1) {
-X
-X double params[4], lambdas[4], consts[9];
-X double avglenL, avglenH, avgcompL, avgcompH, cenL, cenH;
-X int start, stop;
-X int i, nf;
-X
-X nf = (fc/2.0) * n;
-X start = nf;
-X stop = n - nf;
-X
-X st_sort(sptr,n);
-X
-X /* choose arithmetic or geometic mean for compositions by appropriate commenting */
-X
-X if (nf > 0) {
-X avglenL = avglenH = 0.0;
-X avgcompL = avgcompH = 0.0;
-X /* avgcompL = avgcompH = 1.0 */
-X for (i=0; i<start; i++) {
-X avglenL += (double)sptr[i].n1;
-X avgcompL += (double)sptr[i].comp;
-X /* avgcompL *= (double) sptr[i].comp; */
-X }
-X avglenL /= (double) start;
-X avgcompL /= (double) start;
-X /* avgcompL = pow(avgcompL, 1.0/(double) start); */
-X
-X for (i=stop; i<n; i++) {
-X avglenH += (double)sptr[i].n1;
-X avgcompH += (double)sptr[i].comp;
-X /* avgcompH *= (double) sptr[i].comp; */
-X }
-X avglenH /= (double) (n - stop);
-X avgcompH /= (double) (n - stop);
-X /* avgcompL = pow(avgcompL, 1.0/(double) (n - stop)); */
-X
-X cenL = (double)sptr[start].score;
-X cenH = (double)sptr[stop].score;
-X if (cenL >= cenH) return -1;
-X }
-X else {
-X avglenL = avglenH = cenL = cenH = 0.0;
-X avgcompL = avgcompH = 1.0;
-X }
-X
-X params[0] = 10.0;
-X params[1] = -10.0;
-X params[2] = 1.0;
-X params[3] = 1.0;
-X
-X lambdas[0] = 1.0;
-X lambdas[1] = 0.5;
-X lambdas[2] = 0.1;
-X lambdas[3] = 0.01;
-X
-X consts[0] = M;
-X consts[1] = (double) start;
-X consts[2] = (double) stop;
-X consts[3] = cenL;
-X consts[4] = cenH;
-X consts[5] = avglenL;
-X consts[6] = avglenH;
-X consts[7] = avgcompL;
-X consts[8] = avgcompH;
-X
-X simplex(params, lambdas, 4,
-X (double (*) (double *, double *, struct stat_str *, int, int, int) )mle2_func,
-X consts, sptr, n, start, stop);
-X
-X *a0 = params[0];
-X *a1 = params[1];
-X *a2 = params[2];
-X *b1 = params[3];
-X
-X return 0;
-}
-X
-double mle2_func(double *params,
-X double *consts,
-X struct stat_str *values,
-X int n, int start, int stop
-X ) {
-X
-X double a0, a1, a2, b1, M;
-X double score, length, comp;
-X double cenL, cenH, avglenL, avglenH, avgcompL, avgcompH;
-X double L, y;
-X
-X int i;
-X
-X a0 = params[0];
-X a1 = params[1];
-X a2 = params[2];
-X b1 = params[3];
-X
-X M = consts[0];
-X /*
-X start = (int) consts[1];
-X stop = (int) consts[2];
-X */
-X cenL = consts[3];
-X cenH = consts[4];
-X avglenL = consts[5];
-X avglenH = consts[6];
-X avgcompL = consts[7];
-X avgcompH = consts[8];
-X
-X L = 0;
-X y = 0;
-X
-X if (start > 0) {
-X y = -(cenL - (a0 + a1*avgcompL +a2*avgcompL*log(M*avglenL)))/(b1*avgcompL);
-X L += (double) start * exp(y);
-X }
-X
-X for(i = start ; i < stop ; i++) {
-X score = (double) values[i].score;
-X length = (double) values[i].n1;
-X comp = (double) values[i].comp;
-X
-X y = - (score - (a0 + a1*comp + a2 * comp * log(M*length))) / (b1*comp);
-X
-X L += -y + exp(y) + log(b1 * comp);
-X }
-X
-X if (stop < n) {
-X y = -(cenH -(a0 + a1*avgcompH + a2*avgcompH*log(M*avglenH)))/(b1*avgcompH);
-X L -= (double) (n - stop) * exp(y);
-X }
-X return L;
-}
-X
-/* Begin Nelder-Mead simplex code: */
-X
-double evalfunc(double **param,
-X double *vals,
-X double *psums,
-X double *ptry,
-X int nparam,
-X double (*minfunc) (double *params, double *consts,
-X struct stat_str *data, int ndata,
-X int start, int stop),
-X double *consts,
-X void *data,
-X int ndata, int start, int stop,
-X int ihi,
-X double factor);
-X
-void simplex(double *fitparams,
-X double *lambda,
-X int nparam,
-X double (*minfunc) (double *tryparams, double *consts,
-X struct stat_str *data, int ndata,
-X int start, int stop),
-X double *consts,
-X void *data,
-X int ndata,
-X int start,
-X int stop
-X )
-{
-X
-X int i, j, ilo, ihi, inhi;
-X double rtol, sum, tmp, ysave, ytry;
-X double *psum, *vals, *ptry, **param;
-X
-X
-X psum = (double *) calloc(nparam, sizeof(double));
-X ptry = (double *) calloc(nparam, sizeof(double));
-X
-X vals = (double *) calloc(nparam + 1, sizeof(double));
-X
-X param = (double **) calloc(nparam + 1, sizeof(double *));
-X param[0] = (double *) calloc((nparam + 1) * nparam, sizeof(double));
-X for( i = 1 ; i < (nparam + 1) ; i++ ) {
-X param[i] = param[0] + i * nparam;
-X }
-X
-X /* Get our N+1 initial parameter values for the simplex */
-X
-X for( i = 0 ; i < nparam ; i++ ) {
-X param[0][i] = fitparams[i];
-X }
-X
-X for( i = 1 ; i < (nparam + 1) ; i++ ) {
-X for( j = 0 ; j < nparam ; j++ ) {
-X param[i][j] = fitparams[j] + lambda[j] * ( (i - 1) == j ? 1 : 0 );
-X }
-X }
-X
-X /* calculate initial values at the simplex nodes */
-X
-X for( i = 0 ; i < (nparam + 1) ; i++ ) {
-X vals[i] = minfunc(param[i], consts, data, ndata, start, stop);
-X }
-X
-X /* Begin Nelder-Mead simplex algorithm from Numerical Recipes in C */
-X
-X for( j = 0 ; j < nparam ; j++ ) {
-X for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
-X sum += param[i][j];
-X }
-X psum[j] = sum;
-X }
-X
-X
-X while( 1 ) {
-/*
-X determine which point is highest (ihi), next highest (inhi) and
-X lowest (ilo) by looping over the points in the simplex
-*/
-X ilo = 0;
-X
-/* ihi = vals[0] > vals[1] ? (inhi = 1, 0) : (inhi = 0, 1); */
-X if(vals[0] > vals[1]) { ihi = 0; inhi = 1; }
-X else { ihi = 1; inhi = 0; }
-X
-X for( i = 0 ; i < nparam + 1 ; i++) {
-X if( vals[i] <= vals[ilo] ) ilo = i;
-X if( vals[i] > vals[ihi] ) {
-X inhi = ihi;
-X ihi = i;
-X } else if ( vals[i] > vals[inhi] && i != ihi ) inhi = i;
-X }
-X
-X /* Are we finished? */
-X
-X rtol = 2.0 * fabs(vals[ihi] - vals[ilo]) /
-X (fabs(vals[ihi]) + fabs(vals[ilo]) + TINY);
-X
-X if( rtol < TOLERANCE ) {
-X
-/* put the best value and best parameters into the first index */
-X
-X tmp = vals[0];
-X vals[0] = vals[ilo];
-X vals[ilo] = tmp;
-X
-X for( i = 0 ; i < nparam ; i++ ) {
-X tmp = param[0][i];
-X param[0][i] = param[ilo][i];
-X param[ilo][i] = tmp;
-X }
-X
-X /* et voila, c'est finis */
-X break;
-X }
-X
-X /* Begin a new iteration */
-X
-X /* first, extrapolate by -1 through the face of the simplex across from ihi */
-X
-X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
-X data, ndata, start, stop, ihi, -1.0);
-X
-X if( ytry <= vals[ilo] ) {
-X
-X /* Good result, try additional extrapolation by 2 */
-X
-X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
-X data, ndata, start, stop, ihi, 2.0);
-X
-X } else if ( ytry >= vals[inhi] ) {
-X
-X /* no good, look for an intermediate lower point by contracting */
-X
-X ysave = vals[ihi];
-X ytry = evalfunc(param, vals, psum, ptry, nparam, minfunc, consts,
-X data, ndata, start, stop, ihi, 0.5);
-X
-X if( ytry >= ysave ) {
-X
-X /* Still no good. Contract around lowest (best) point. */
-X
-X for( i = 0 ; i < nparam + 1 ; i++ ) {
-X if( i != ilo ) {
-X for ( j = 0 ; j < nparam ; j++ ) {
-X param[i][j] = psum[j] = 0.5 * (param[i][j] + param[ilo][j]);
-X }
-X vals[i] = minfunc(psum, consts, data, ndata, start, stop);
-X }
-X }
-X
-X
-X for( j = 0 ; j < nparam ; j++ ) {
-X for( sum = 0.0, i = 0 ; i < nparam + 1 ; i++ ) {
-X sum += param[i][j];
-X }
-X psum[j] = sum;
-X }
-X
-X }
-X }
-X }
-X
-X for( i = 0 ; i < nparam ; i++ ) {
-X fitparams[i] = param[0][i];
-X }
-X
-X if (ptry!=NULL) {
-X free(ptry);
-X ptry=NULL;
-X }
-X free(param[0]);
-X free(param);
-X free(vals);
-X free(psum);
-}
-X
-X
-double evalfunc(double **param,
-X double *vals,
-X double *psum,
-X double *ptry,
-X int nparam,
-X double (*minfunc)(double *tryparam, double *consts,
-X struct stat_str *data, int ndata,
-X int start, int stop),
-X double *consts,
-X void *data,
-X int ndata, int start, int stop,
-X int ihi,
-X double factor) {
-X
-X int j;
-X double fac1, fac2, ytry;
-X
-X
-X fac1 = (1.0 - factor) / nparam;
-X fac2 = fac1 - factor;
-X
-X for( j = 0 ; j < nparam ; j++ ) {
-X ptry[j] = psum[j] * fac1 - param[ihi][j] * fac2;
-X }
-X
-X ytry = minfunc(ptry, consts, data, ndata, start, stop);
-X
-X if( ytry < vals[ihi] ) {
-X vals[ihi] = ytry;
-X for( j = 0 ; j < nparam ; j++ ) {
-X psum[j] += ptry[j] - param[ihi][j];
-X param[ihi][j] = ptry[j];
-X }
-X }
-X
-X return ytry;
-}
-X
-/* end of Nelder-Mead simplex code */
-X
-int
-proc_hist_n(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct pstat_str *pu)
-{
-X int i, j;
-X double s_score, s2_score, ssd, ztrim;
-X int nit, max_hscore;
-X char s_string[128];
-X char *f_string;
-X
-X f_string = &(histp->stat_info[0]);
-X
-X max_hscore = calc_thresh(pst, nstats, pu->ngLambda,
-X pu->ngK, pu->ngH, &ztrim);
-X
-X s_score = s2_score = 0.0;
-X
-X for ( j = 0, i = 0; i < nstats; i++) {
-X if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
-X s_score += (ssd=(double)sptr[i].score);
-X s2_score += ssd * ssd;
-X j++;
-X }
-X }
-X
-X if (j > 1 ) {
-X pu->r_u.rg.mu = s_score/(double)j;
-X pu->r_u.rg.mean_var = s2_score - (double)j * pu->r_u.rg.mu * pu->r_u.rg.mu;
-X pu->r_u.rg.mean_var /= (double)(j-1);
-X }
-X else {
-X pu->r_u.rg.mu = 50.0;
-X pu->r_u.rg.mean_var = 10.0;
-X }
-X
-X if (pu->r_u.rg.mean_var < 0.01) {
-X pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;
-X }
-X
-X /* now remove some scores */
-X
-X nit = 5;
-X while (nit-- > 0) {
-X pu->r_u.rg.n_trimmed = 0;
-X
-X for (i=0; i< nstats; i++) {
-X if (sptr[i].n1 < 0) continue;
-X ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, pu);
-X if (ssd > ztrim || ssd < 20.0) {
-X /* fprintf(stderr,"removing %3d %3d %4.1f\n",
-X sptr[i].score, sptr[i].n1,ssd); */
-X ssd = sptr[i].score;
-X s_score -= ssd;
-X s2_score -= ssd*ssd;
-X j--;
-X pu->r_u.rg.n_trimmed++;
-X histp->entries--;
-X sptr[i].n1 = -sptr[i].n1;
-X }
-X }
-X
-X if (j > 1 ) {
-X pu->r_u.rg.mu = s_score/(double)j;
-X pu->r_u.rg.mean_var = s2_score - (double)j * pu->r_u.rg.mu * pu->r_u.rg.mu;
-X pu->r_u.rg.mean_var /= (double)(j-1);
-X }
-X else {
-X pu->r_u.rg.mu = 50.0;
-X pu->r_u.rg.mean_var = 10.0;
-X }
-X
-X if (pu->r_u.rg.mean_var < 0.01) {
-X pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;
-X }
-X
-X if (pu->r_u.rg.n_trimmed < LHISTC) {
-X /*
-X fprintf(stderr,"nprune %d at %d\n",nprune,nit);
-X */
-X break;
-X }
-X }
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X sprintf(f_string,"%s unscaled statistics: mu= %6.4f var=%6.4f; Lambda= %6.4f",
-X s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var,PI_SQRT6/sqrt(pu->r_u.rg.mean_var));
-X return AVE_STATS;
-}
-X
-/*
-This routine calculates the maximum likelihood estimates for the
-extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
-X
-X <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
-X <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
-X
-X The <a> parameter can be transformed into and K
-X of the formula: 1 - exp ( - K m n exp ( - lambda S ))
-X using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
-X 1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
-X
-X a = log(K m n) / lambda
-X a lambda = log (K m n)
-X exp(a lambda) = K m n
-X but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
-X so: K m n = (1/n sum{ exp( -x[i] *lambda)})
-X K = sum{}/(nlib m n )
-X
-*/
-X
-void
-alloc_hist(struct llen_str *llen)
-{
-X int max_llen, i;
-X max_llen = llen->max;
-X
-X if (llen->hist == NULL) {
-X llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
-X llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X }
-X
-X for (i=0; i< max_llen+1; i++) {
-X llen->hist[i] = 0;
-X llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
-X }
-}
-X
-void
-free_hist(struct llen_str *llen)
-{
-X if (llen->hist!=NULL) {
-X free(llen->score_var);
-X free(llen->score2_sums);
-X free(llen->score_sums);
-X free(llen->hist);
-X llen->hist=NULL;
-X }
-}
-X
-void
-inithist(struct llen_str *llen, struct pstruct pst, int max_hscore)
-{
-X llen->max = MAX_LLEN;
-X
-X llen->max_score = -1;
-X llen->min_score=10000;
-X
-X alloc_hist(llen);
-X
-X llen->zero_s = 0;
-X llen->min_length = 10000;
-X llen->max_length = 0;
-}
-X
-void
-addhist(struct llen_str *llen, int score, int length, int max_hscore)
-{
-X int llength;
-X double dscore;
-X
-X if ( score<=0 || length < LENGTH_CUTOFF) {
-X llen->min_score = 0;
-X llen->zero_s++;
-X return;
-X }
-X
-X if (score < llen->min_score) llen->min_score = score;
-X if (score > llen->max_score) llen->max_score = score;
-X
-X if (length > llen->max_length) llen->max_length = length;
-X if (length < llen->min_length) llen->min_length = length;
-X if (score > max_hscore) score = max_hscore;
-X
-X llength = (int)(LN_FACT*log((double)length)+0.5);
-X
-X if (llength < 0 ) llength = 0;
-X if (llength > llen->max) llength = llen->max;
-X llen->hist[llength]++;
-X dscore = (double)score;
-X llen->score_sums[llength] += dscore;
-X llen->score2_sums[llength] += dscore * dscore;
-}
-X
-/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
-X
-void
-inithistz(int mh, struct hist_str *histp )
-{
-X int i;
-X
-X histp->z_calls = 0;
-X
-X histp->min_hist = 20;
-X histp->max_hist = 120;
-X
-X histp->histint = (int)
-X ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
-X histp->maxh = (int)
-X ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
-X
-X if (histp->hist_a==NULL) {
-X if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
-X NULL) {
-X fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
-X histp->histflg = 0;
-X }
-X else histp->histflg = 1;
-X }
-X else {
-X for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
-X }
-X histp->entries = 0;
-}
-X
-static double nrv[100]={
-X 0.3098900570,-0.0313400923, 0.1131975903,-0.2832547606, 0.0073672659,
-X 0.2914489107, 0.4209306311,-0.4630181404, 0.3326537896, 0.0050140359,
-X -0.1117435426,-0.2835630301, 0.2302997065,-0.3102716394, 0.0819894916,
-X -0.1676455701,-0.3782225018,-0.3204509938,-0.3594969187,-0.0308950398,
-X 0.2922813812, 0.1337170751, 0.4666577031,-0.2917784349,-0.2438179916,
-X 0.3002301394, 0.0231147123, 0.5687927366,-0.2318208709,-0.1476839273,
-X -0.0385043851,-0.1213476523, 0.1486341995, 0.1027917167, 0.1409192644,
-X -0.3280652579, 0.4232041455, 0.0775993309, 0.1159071787, 0.2769424442,
-X 0.3197284751, 0.1507346903, 0.0028580909, 0.4825103412,-0.0496843610,
-X -0.2754357656, 0.6021881753,-0.0816123956,-0.0899148991, 0.4847183201,
-X 0.2151621865,-0.4542246220, 0.0690709102, 0.2461894193, 0.2126042295,
-X -0.0767060668, 0.4819746149, 0.3323031326, 0.0177600676, 0.1143185210,
-X 0.2653977455, 0.0921872958,-0.1330986718, 0.0412287716,-0.1691604748,
-X -0.0529679078,-0.0194157955,-0.6117493924, 0.1199067932, 0.0210243193,
-X -0.5832259838,-0.1685528664, 0.0008591271,-0.1120347822, 0.0839125069,
-X -0.2787486831,-0.1937017962,-0.1915733940,-0.7888453635,-0.3316745163,
-X 0.1180885226,-0.3347001067,-0.2477492636,-0.2445697600, 0.0001342482,
-X -0.0015759812,-0.1516473992,-0.5202267615, 0.2136975210, 0.2500423188,
-X -0.2402926401,-0.1094186280,-0.0618869933,-0.0815221188, 0.2623337275,
-X 0.0219427302 -0.1774469919, 0.0828245026,-0.3271952808,-0.0632898028};
-X
-void
-addhistz(double zs, struct hist_str *histp)
-{
-X int ih, zi;
-X double rv;
-X
-X rv = nrv[histp->z_calls++ % 100];
-X zi = (int)(zs + 0.5+rv );
-X
-X if ((zi >= 0) && (zi <= 120)) histp->entries++;
-X
-X if (zi < histp->min_hist) zi = histp->min_hist;
-X if (zi > histp->max_hist) zi = histp->max_hist;
-X
-X ih = (zi - histp->min_hist)/histp->histint;
-X
-X histp->hist_a[ih]++;
-}
-X
-/* addhistzp() does not increase histp->entries since addhist did it already */
-/*
-void
-addhistzp(double zs, struct hist_str *histp)
-{
-X int ih, zi;
-X double rv;
-X
-X rv = nrv[histp->z_calls++ %100];
-X zi = (int)(zs + 0.5 + rv);
-X
-X if (zi < histp->min_hist) zi = histp->min_hist;
-X if (zi > histp->max_hist) zi = histp->max_hist;
-X
-X ih = (zi - histp->min_hist)/histp->histint;
-X
-X histp->hist_a[ih]++;
-}
-*/
-X
-void
-prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
-X long *entries)
-{
-X int llength;
-X double dscore;
-X
-X if (score <= 0 || length < LENGTH_CUTOFF) return;
-X
-X if (score > max_hscore) score = max_hscore;
-X
-X llength = (int)(LN_FACT*log((double)length)+0.5);
-X
-X if (llength < 0 ) llength = 0;
-X if (llength > llen->max) llength = llen->max;
-X llen->hist[llength]--;
-X dscore = (double)score;
-X llen->score_sums[llength] -= dscore;
-X llen->score2_sums[llength] -= dscore * dscore;
-X
-/* (*entries)--; histp->entries is not yet initialized */
-}
-X
-/* fit_llen: no trimming
-X (1) regress scores vs log(n) using weighted variance
-X (2) calculate mean variance after length regression
-*/
-X
-void
-fit_llen(struct llen_str *llen, struct rstat_str *pr)
-{
-X int j;
-X int n;
-X int n_size;
-X double x, y2, u, z;
-X double mean_x, mean_y, var_x, var_y, covar_xy;
-X double mean_y2, covar_xy2, var_y2, dllj;
-X
-X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
-X
-/* now fit scores to best linear function of log(n), using
-X simple linear regression */
-X
-X for (llen->min=0; llen->min < llen->max; llen->min++)
-X if (llen->hist[llen->min]) break;
-X llen->min--;
-X
-X for (n_size=0,j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] > 1) {
-X dllj = (double)llen->hist[j];
-X llen->score_var[j] = llen->score2_sums[j]/dllj
-X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-X llen->score_var[j] /= (double)(llen->hist[j]-1);
-X if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
-X n_size++;
-X }
-X }
-X
-X pr->nb_tot = n_size;
-X
-X n_w = 0.0;
-X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-X for (j = llen->min; j < llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = j + 0.5;
-X dllj = (double)llen->hist[j];
-X n_w += dllj/llen->score_var[j];
-X sum_x += dllj * x / llen->score_var[j] ;
-X sum_y += llen->score_sums[j] / llen->score_var[j];
-X sum_x2 += dllj * x * x /llen->score_var[j];
-X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
-X }
-X
-X if (n_size < 5 ) {
-X llen->fit_flag=0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X return;
-X }
-X else {
-X det = n_w * sum_x2 - sum_x * sum_x;
-X if (det > 0.001) {
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
-X pr->rho_e = n_w/det;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
-X pr->mu_e = sum_x2/det;
-X }
-X else {
-X llen->fit_flag = 0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X return;
-X }
-X }
-X
-X det = n_w * sum_x2 - sum_x * sum_x;
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
-X
-X n = 0;
-X mean_x = mean_y = mean_y2 = 0.0;
-X var_x = var_y = 0.0;
-X covar_xy = covar_xy2 = 0.0;
-X
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1 ) {
-X n += llen->hist[j];
-X x = (double)j + 0.5;
-X mean_x += (double)llen->hist[j] * x;
-X mean_y += llen->score_sums[j];
-X var_x += (double)llen->hist[j] * x * x;
-X var_y += llen->score2_sums[j];
-X covar_xy += x * llen->score_sums[j];
-X }
-X mean_x /= n; mean_y /= n;
-X var_x = var_x / n - mean_x * mean_x;
-X var_y = var_y / n - mean_y * mean_y;
-X
-X covar_xy = covar_xy / n - mean_x * mean_y;
-/*
-X pr->rho = covar_xy / var_x;
-X pr->mu = mean_y - pr->rho * mean_x;
-*/
-X mean_y2 = covar_xy2 = var_y2 = 0.0;
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = (double)j + 0.5;
-X u = pr->rho * x + pr->mu;
-X y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-/*
-X dllj = (double)llen->hist[j];
-X fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
-X llen->score_sums[j]/dllj,y2/dllj);
-*/
-X mean_y2 += y2;
-X var_y2 += y2 * y2;
-X covar_xy2 += x * y2;
-X /* fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
-X x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
-X }
-X
-X pr->mean_var = mean_y2 /= (double)n;
-X covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
-X
-X if (pr->mean_var <= 0.01) {
-X llen->fit_flag = 0;
-X pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
-X }
-X
-X /*
-X fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
-X pr->rho*LN_FACT,pr->mu,pr->mean_var);
-X */
-X if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
-X else pr->var_e = 0.0;
-X
-X if (llen->fit_flag) {
-X pr->rho2 = covar_xy2 / var_x;
-X pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
-X }
-X else {
-X pr->rho2 = 0;
-X pr->mu2 = pr->mean_var;
-X }
-X
-X if (pr->rho2 < 0.0 )
-X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-X if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-X
-X pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
-}
-X
-/* fit_llens: trim high variance bins
-X (1) regress scores vs log(n) using weighted variance
-X (2) regress residuals vs log(n)
-X (3) remove high variance bins
-X (4) calculate mean variance after length regression
-*/
-X
-void
-fit_llens(struct llen_str *llen, struct rstat_str *pr)
-{
-X int j;
-X int n, n_u2;
-X double x, y, y2, u, u2, v, z;
-X double mean_x, mean_y, var_x, var_y, covar_xy;
-X double mean_y2, covar_xy2;
-X double mean_u2, mean_3u2, dllj;
-X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
-X
-/* now fit scores to best linear function of log(n), using
-X simple linear regression */
-X
-X for (llen->min=0; llen->min < llen->max; llen->min++)
-X if (llen->hist[llen->min]) break;
-X llen->min--;
-X
-X for (j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] > 1) {
-X dllj = (double)llen->hist[j];
-X llen->score_var[j] = (double)llen->score2_sums[j]/dllj
-X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-X llen->score_var[j] /= (double)(llen->hist[j]-1);
-X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
-X }
-X }
-X
-X n_w = 0.0;
-X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-X for (j = llen->min; j < llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = j + 0.5;
-X dllj = (double)llen->hist[j];
-X n_w += dllj/llen->score_var[j];
-X sum_x += dllj * x / llen->score_var[j] ;
-X sum_y += llen->score_sums[j] / llen->score_var[j];
-X sum_x2 += dllj * x * x /llen->score_var[j];
-X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
-X }
-X
-X det = n_w * sum_x2 - sum_x * sum_x;
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
-X
-/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
-X
-X n = 0;
-X mean_x = mean_y = mean_y2 = 0.0;
-X var_x = var_y = 0.0;
-X covar_xy = covar_xy2 = 0.0;
-X
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1 ) {
-X n += llen->hist[j];
-X x = (double)j + 0.5;
-X dllj = (double)llen->hist[j];
-X mean_x += dllj * x;
-X mean_y += llen->score_sums[j];
-X var_x += dllj * x * x;
-X var_y += llen->score2_sums[j];
-X covar_xy += x * llen->score_sums[j];
-X }
-X mean_x /= n; mean_y /= n;
-X var_x = var_x / n - mean_x * mean_x;
-X var_y = var_y / n - mean_y * mean_y;
-X
-X covar_xy = covar_xy / n - mean_x * mean_y;
-/* pr->rho = covar_xy / var_x;
-X pr->mu = mean_y - pr->rho * mean_x;
-*/
-X
-X mean_y2 = covar_xy2 = 0.0;
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = (double)j + 0.5;
-X u = pr->rho * x + pr->mu;
-X y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-X mean_y2 += y2;
-X covar_xy2 += x * y2;
-X }
-X
-X mean_y2 /= n;
-X covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
-X pr->rho2 = covar_xy2 / var_x;
-X pr->mu2 = mean_y2 - pr->rho2 * mean_x;
-X
-X if (pr->rho2 < 0.0 )
-X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-X if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-X
-X pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
-X
-/* fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
-X pr->var_cutoff,z);
-*/
-X mean_u2 = 0.0;
-X n_u2 = 0;
-X for ( j = llen->min; j < llen->max; j++) {
-X y = j+0.5;
-X dllj = (double)llen->hist[j];
-X x = pr->rho * y + pr->mu;
-X v = pr->rho2 * y + pr->mu2;
-X if (v < pr->var_cutoff) v = pr->var_cutoff;
-X if (llen->hist[j]> 1) {
-X u2 = (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
-X mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
-X n_u2++;
-X /* fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
-X j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
-X }
-X else llen->score_var[j] = -1.0;
-X }
-X
-X mean_u2 = sqrt(mean_u2/(double)n_u2);
-X /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
-X
-X mean_3u2 = mean_u2*3.0;
-X
-X for (j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] <= 1) continue;
-X if (sqrt(llen->score_var[j]) > mean_3u2) {
-X /* fprintf(stderr," removing %d %d %.2f\n",
-X j, (int)(exp((double)j/LN_FACT)-0.5),
-X sqrt(llen->score_var[j]));
-X */
-X pr->nb_trimmed++;
-X pr->n1_trimmed += llen->hist[j];
-X llen->hist[j] = 0;
-X }
-X }
-X fit_llen(llen, pr);
-}
-X
-struct s2str {double s; int n;};
-void s2_sort ( struct s2str *sptr, int n);
-X
-void
-fit_llen2(struct llen_str *llen, struct rstat_str *pr)
-{
-X int j;
-X int n, n_y2, llen_delta, llen_del05;
-X int n_size;
-X double x, y2, u;
-X double mean_x, mean_y, var_x, var_y, covar_xy;
-X double mean_y2, covar_xy2;
-X struct s2str *ss2;
-X
-X double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;
-X
-/* now fit scores to best linear function of log(n), using
-X simple linear regression */
-X
-X for (llen->min=0; llen->min < llen->max; llen->min++)
-X if (llen->hist[llen->min]) break;
-X
-X for ( ; llen->max > llen->min; llen->max--)
-X if (llen->hist[llen->max]) break;
-X
-X for (n_size=0,j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] > 1) {
-X llen->score_var[j] = llen->score2_sums[j]/(double)llen->hist[j]
-X - (llen->score_sums[j]/(double)llen->hist[j])
-X * (llen->score_sums[j]/(double)llen->hist[j]);
-X llen->score_var[j] /= (double)(llen->hist[j]-1);
-X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
-X n_size++;
-X }
-X }
-X
-X n_w = 0.0;
-X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-X for (j = llen->min; j < llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = j + 0.5;
-X n_w += (double)llen->hist[j]/llen->score_var[j];
-X sum_x += (double)llen->hist[j] * x / llen->score_var[j] ;
-X sum_y += llen->score_sums[j] / llen->score_var[j];
-X sum_x2 += (double)llen->hist[j] * x * x /llen->score_var[j];
-X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
-X }
-X
-X if (n_size < 5 ) {
-X llen->fit_flag=0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X }
-X else {
-X det = n_w * sum_x2 - sum_x * sum_x;
-X if (det > 0.001) {
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
-X pr->rho_e = n_w/det;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
-X pr->mu_e = sum_x2/det;
-X }
-X else {
-X llen->fit_flag = 0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X }
-X }
-X
-X det = n_w * sum_x2 - sum_x * sum_x;
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/det;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;
-X
-/* fprintf(stderr," rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
-X
-X n = 0;
-X mean_x = mean_y = mean_y2 = 0.0;
-X var_x = var_y = 0.0;
-X covar_xy = covar_xy2 = 0.0;
-X
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1 ) {
-X n += llen->hist[j];
-X x = (double)j + 0.5;
-X mean_x += (double)llen->hist[j] * x;
-X mean_y += llen->score_sums[j];
-X var_x += (double)llen->hist[j] * x * x;
-X var_y += llen->score2_sums[j];
-X covar_xy += x * llen->score_sums[j];
-X }
-X mean_x /= n; mean_y /= n;
-X var_x = var_x / n - mean_x * mean_x;
-X var_y = var_y / n - mean_y * mean_y;
-X
-X covar_xy = covar_xy / n - mean_x * mean_y;
-/*
-X pr->rho = covar_xy / var_x;
-X pr->mu = mean_y - pr->rho * mean_x;
-*/
-X
-X if ((ss2=(struct s2str *)calloc(llen->max+1,sizeof(struct s2str)))==NULL) {
-X fprintf(stderr," cannot allocate ss2\n");
-X return;
-X }
-X
-X mean_y2 = 0.0;
-X n_y2 = n = 0;
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > VHISTC) {
-X n++;
-X n_y2 += ss2[j].n = llen->hist[j];
-X x = (double)j + 0.5;
-X u = pr->rho * x + pr->mu;
-X ss2[j].s = y2 = llen->score2_sums[j] - 2*llen->score_sums[j]*u + llen->hist[j]*u*u;
-X mean_y2 += y2;
-X }
-X pr->mean_var = mean_y2/(double)n_y2;
-X
-X s2_sort(ss2+llen->min,llen->max-llen->min+1);
-X
-X /* fprintf(stderr,"llen->min: %d, max: %d\n",llen->min,llen->max); */
-X llen_delta = 0;
-X for (j=llen->min; j<=llen->max; j++) {
-X if (ss2[j].n > 1) {
-X llen_delta++;
-/* fprintf(stderr,"%d\t%d\t%.2f\t%.4f\n",
-X j,ss2[j].n,ss2[j].s,ss2[j].s/ss2[j].n);
-*/
-X }
-X }
-X
-X llen_del05 = llen_delta/20;
-X mean_y2 = 0.0;
-X n_y2 = 0;
-X for (j = llen->min; j<llen->min+llen_del05; j++) {
-X pr->n1_trimmed += ss2[j].n;
-X pr->nb_trimmed++;
-X }
-X for (j = llen->min+llen_del05; j <= llen->min+llen_delta-llen_del05; j++)
-X if (ss2[j].n > 1) {
-X mean_y2 += ss2[j].s;
-X n_y2 += ss2[j].n;
-X }
-X for (j = llen->min+llen_delta-llen_del05+1; j< llen->max; j++) {
-X pr->n1_trimmed += ss2[j].n;
-X pr->nb_trimmed++;
-X }
-X
-X free(ss2);
-X if (n_y2 > 1) pr->mean_var = mean_y2/(double)n_y2;
-X
-X /* fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var: %.4f/%d\n",
-X pr->rho*LN_FACT,pr->mu,pr->mean_var,n); */
-X
-X pr->var_e = 0.0;
-}
-X
-/* REG_STATS - Z() from rho/mu/mean_var */
-double find_zr(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double log_len, z;
-X
-X if (score <= 0) return 0;
-X if ( length < LENGTH_CUTOFF) return 0;
-X
-X log_len = LN_FACT*log((double)(length));
-/* var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
-X if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.var_cutoff;
-*/
-X
-X z = ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
-X
-X return (50.0 + z*10.0);
-}
-X
-/* REG2_STATS Z() from rho/mu, rho2/mu2 */
-double find_zr2(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double log_len, var;
-X double z;
-X
-X if ( length < LENGTH_CUTOFF) return 0;
-X
-X log_len = LN_FACT*log((double)(length));
-X
-X var = pu->r_u.rg.rho2 * log_len + pu->r_u.rg.mu2;
-X if (var < pu->r_u.rg.var_cutoff) var = pu->r_u.rg.mean_var;
-X
-X z = ((double)score - pu->r_u.rg.rho * log_len - pu->r_u.rg.mu) / sqrt(var);
-X
-X return (50.0 + z*10.0);
-}
-X
-#ifdef USE_LNSTATS
-/* LN_STATS - ln()-scaled mu, mean_var */
-double find_zl(int score, int length, double comp, struct pstat_str *pu)
-{
-X double ls, z;
-X
-X ls = (double)score*LN200/log((double)length);
-X
-X z = (ls - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
-X
-X return (50.0 + z*10.0);
-}
-#endif
-X
-/* MLE_STATS - Z() from MLE for lambda, K */
-double
-find_ze(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double z, mp, np, a_n1;
-X
-X a_n1 = (double)length;
-X
-X mp = pu->r_u.ag.a_n0;
-X np = a_n1;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
-X
-X z = -z + EULER_G;
-X z /= - PI_SQRT6;
-X
-X return (50.0 + z*10.0);
-}
-X
-/* MLE2_STATS - Z() from MLE for mle_a0..2, mle_b1, length, comp */
-double
-find_ze2(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double z, mp, np, a_n1;
-X
-X a_n1 = (double)length;
-X
-X if (comp <= 0.0) comp = pu->r_u.m2.ave_comp;
-X
-X /* avoid very biased comp estimates */
-X /* comp = exp((4.0*log(comp)+log(pu->r_u.m2.ave_comp))/5.0); */
-X
-X mp = pu->r_u.m2.a_n0;
-X np = a_n1;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X z = (-(pu->r_u.m2.mle2_a0 + pu->r_u.m2.mle2_a1 * comp + pu->r_u.m2.mle2_a2 * comp * log(np * mp)) + score) / (pu->r_u.m2.mle2_b1 * comp);
-X
-X z = -z + EULER_G;
-X z /= - PI_SQRT6;
-X
-X return (50.0 + z*10.0);
-}
-X
-/* AG_STATS - Altschul-Gish Lamdba, K */
-double
-find_za(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double z, mp, np, a_n1, a_n1f;
-X
-X a_n1 = (double)length;
-X a_n1f = log(a_n1)/pu->r_u.ag.H;
-X
-X mp = pu->r_u.ag.a_n0 - pu->r_u.ag.a_n0f - a_n1f;
-X np = a_n1 - pu->r_u.ag.a_n0f - a_n1f;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X z = pu->r_u.ag.Lambda * score - log(pu->r_u.ag.K * np * mp);
-X
-X z = -z + EULER_G;
-X z /= - PI_SQRT6;
-X
-X return (50.0 + z*10.0);
-}
-X
-double find_zn(int score, double escore, int length, double comp, struct pstat_str *pu)
-{
-X double z;
-X
-X z = ((double)score - pu->r_u.rg.mu) / sqrt(pu->r_u.rg.mean_var);
-X
-X return (50.0 + z*10.0);
-}
-X
-/* computes E value for a given z value, assuming extreme value distribution */
-double
-z_to_E(double zs, long entries, struct db_str db)
-{
-X double e, n;
-X
-X /* if (db->entries < 5) return (double)db.entries; */
-X if (entries < 1) { n = db.entries;}
-X else {n = entries;}
-X
-X if (zs > ZS_MAX) return 0.0;
-X
-#ifndef NORMAL_DIST
-X e = exp(- PI_SQRT6 * zs - .577216);
-X return n * (e > .01 ? 1.0 - exp(-e) : e);
-#else
-X return n * erfc(zs/M_SQRT2)/2.0;
-#endif
-}
-X
-double
-zs_to_p(double zs)
-{
-X double e, z;
-X
-X /* if (db.entries < 5) return 0.0; */
-X
-X z = (zs - 50.0)/10.0;
-X
-X if (z > ZS_MAX) return 0.0;
-X
-#ifndef NORMAL_DIST
-X e = exp(- PI_SQRT6 * z - EULER_G);
-X return (e > .01 ? 1.0 - exp(-e) : e);
-#else
-X return erfc(zs/M_SQRT2)/2.0;
-#endif
-}
-X
-double
-zs_to_bit(double zs, int n0, int n1)
-{
-X double z, a_n0, a_n1;
-X
-X z = (zs - 50.0)/10.0;
-X a_n0 = (double)n0;
-X a_n1 = (double)n1;
-X
-X return (PI_SQRT6 * z + EULER_G + log(a_n0*a_n1))/M_LN2 ;
-}
-X
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
-{
-X double e, z, k;
-X
-X /* if (db->entries < 5) return 0.0; */
-X
-X z = (zs - 50.0)/10.0;
-X
-X if (z > ZS_MAX ) return 0.0;
-X
-X if (entries < 1) entries = db.entries;
-X
-X if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
-X k = (double)db.length /(double)n1;
-X if (db.carry > 0) {
-X k += ((double)db.carry * (double)LONG_MAX)/(double)n1;
-X }
-X }
-X else k = (double)entries;
-X
-X if (k < 1.0) k = 1.0;
-X
-#ifndef NORMAL_DIST
-X z *= PI_SQRT6;
-X z += EULER_G;
-X e = exp(-z);
-X return k * (e > .01 ? 1.0 - exp(-e) : e);
-#else
-X return k * erfc(z/M_SQRT2)/2.0;
-#endif
-}
-X
-#ifdef NORMAL_DIST
-double np_to_z(double, int *);
-#endif
-X
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-E_to_zs(double E, long entries)
-{
-X double e, z;
-X int error;
-X
-X e = E/(double)entries;
-X
-#ifndef NORMAL_DIST
-X z = (log(e)+EULER_G)/(- PI_SQRT6);
-X return z*10.0+50.0;
-#else
-X z = np_to_z(1.0-e,&error);
-X
-X if (!error) return z*10.0+50.0;
-X else return 0.0;
-#endif
-}
-X
-/* computes 1.0 - E value for a given z value, assuming extreme value
-X distribution */
-double
-zs_to_Ec(double zs, long entries)
-{
-X double e, z;
-X
-X if (entries < 5) return 0.0;
-X
-X z = (zs - 50.0)/10.0;
-X
-X if (z > ZS_MAX) return 1.0;
-X
-#ifndef NORMAL_DIST
-X e = exp(- PI_SQRT6 * z - EULER_G);
-X return (double)entries * (e > .01 ? exp(-e) : 1.0 - e);
-#else
-X return (double)entries*erf(z/M_SQRT2)/2.0;
-#endif
-}
-X
-/* calculate a threshold score, given an E() value and Lambda,K,H */
-X
-int
-E1_to_s(double e_val, int n0, int n1, struct pstat_str *pu) {
-X double mp, np, a_n0, a_n0f, a_n1;
-X int score;
-X
-X a_n0 = (double)n0;
-X a_n1 = (double)n1;
-X a_n0f = log(pu->r_u.ag.K * a_n0 * a_n1)/pu->r_u.ag.H;
-X
-X mp = a_n0 - a_n0f;
-X np = a_n1 - a_n0f;
-X
-X if (np < 1.0) np = 1.0;
-X if (mp < 1.0) mp = 1.0;
-X
-X score = (int)((log( pu->r_u.ag.K * mp * np) - log(e_val))/pu->r_u.ag.Lambda +0.5);
-X if (score < 0) score = 0;
-X return score;
-}
-X
-/* no longer used; stat_str returned by process_hist
-void
-summ_stats(char *s_str, struct pstat_str *pu)
-{
-X strcpy(s_str,f_string);
-}
-*/
-X
-void
-vsort(v,s,n)
-X double *v; int *s, n;
-{
-X int gap, i, j;
-X double tmp;
-X int itmp;
-X
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if (v[j] >= v[j+gap]) break;
-X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
-X itmp = s[j]; s[j]=s[j+gap]; s[j+gap]=itmp;
-X }
-}
-X
-/*
-void s_sort (double **ptr, int nbest)
-{
-X int gap, i, j;
-X double *tmp;
-X
-X for (gap = nbest/2; gap > 0; gap /= 2)
-X for (i = gap; i < nbest; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (*ptr[j] >= *ptr[j + gap]) break;
-X tmp = ptr[j];
-X ptr[j] = ptr[j + gap];
-X ptr[j + gap] = tmp;
-X }
-}
-*/
-X
-void ss_sort (int *ptr, int n)
-{
-X int gap, i, j;
-X int tmp;
-X
-X for (gap = n/2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (ptr[j] >= ptr[j + gap]) break;
-X tmp = ptr[j];
-X ptr[j] = ptr[j + gap];
-X ptr[j + gap] = tmp;
-X }
-}
-X
-X
-void s2_sort (struct s2str *ptr, int n)
-{
-X int gap, i, j;
-X struct s2str tmp;
-X
-X for (gap = n/2; gap > 0; gap /= 2)
-X for (i = gap; i < n; i++)
-X for (j = i - gap; j >= 0; j-= gap) {
-X if (ptr[j].s >= ptr[j + gap].s) break;
-X tmp.s = ptr[j].s;
-X tmp.n = ptr[j].n;
-X ptr[j].s = ptr[j + gap].s;
-X ptr[j].n = ptr[j + gap].n;
-X ptr[j + gap].s = tmp.s;
-X ptr[j + gap].n = tmp.n;
-X }
-}
-X
-void last_stats() {}
-X
-void
-scale_scores(struct beststr **bptr, int nbest, struct db_str db,
-X struct pstruct pst, struct pstat_str *rs)
-{
-X int i;
-X double zscore;
-X
-X if (pst.zsflag < 0 || pst.zsflag_f < 0) return;
-X
-X for (i=0; i<nbest; i++) {
-X zscore = find_zp(bptr[i]->score[pst.score_ix], bptr[i]->escore,
-X bptr[i]->n1,bptr[i]->comp,rs);
-X bptr[i]->zscore = zscore;
-X bptr[i]->escore
-X =zs_to_E(zscore,bptr[i]->n1,pst.dnaseq, pst.zdb_size,db);
-X }
-X sortbeste(bptr,nbest);
-}
-X
-#ifdef NORMAL_DIST
-/* ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3
-X
-X Produces the normal deviate Z corresponding to a given lower
-X tail area of P; Z is accurate to about 1 part in 10**16.
-X
-X The hash sums below are the sums of the mantissas of the
-X coefficients. They are included for use in checking
-X transcription.
-*/
-X
-double np_to_z(double p, int *fault) {
-X
-X double q, r, ppnd16;
-X
-X double zero = 0.0, one = 1.0, half = 0.5;
-X double split1 = 0.425, split2 = 5.0;
-X double const1 = 0.180625, const2 = 1.6;
-X
-/* Coefficients for P close to 0.5 */
-X
-X double a0 = 3.3871328727963666080e0;
-X double a1 = 1.3314166789178437745e+2;
-X double a2 = 1.9715909503065514427e+3;
-X double a3 = 1.3731693765509461125e+4;
-X double a4 = 4.5921953931549871457e+4;
-X double a5 = 6.7265770927008700853e+4;
-X double a6 = 3.3430575583588128105e+4;
-X double a7 = 2.5090809287301226727e+3;
-X double b1 = 4.2313330701600911252e+1;
-X double b2 = 6.8718700749205790830e+2;
-X double b3 = 5.3941960214247511077e+3;
-X double b4 = 2.1213794301586595867e+4;
-X double b5 = 3.9307895800092710610e+4;
-X double b6 = 2.8729085735721942674e+4;
-X double b7 = 5.2264952788528545610e+3;
-X
-X double sum_ab= 55.8831928806149014439;
-/*
-X Coefficients for P not close to 0, 0.5 or 1.
-*/
-X
-X double c0 = 1.42343711074968357734;
-X double c1 = 4.63033784615654529590;
-X double c2 = 5.76949722146069140550;
-X double c3 = 3.64784832476320460504;
-X double c4 = 1.27045825245236838258;
-X double c5 = 2.41780725177450611770e-1;
-X double c6 = 2.27238449892691845833e-2;
-X double c7 = 7.74545014278341407640e-4;
-X double d1 = 2.05319162663775882187;
-X double d2 = 1.67638483018380384940;
-X double d3 = 6.89767334985100004550e-1;
-X double d4 = 1.48103976427480074590e-1;
-X double d5 = 1.51986665636164571966e-2;
-X double d6 = 5.47593808499534494600e-4;
-X double d7 = 1.05075007164441684324e-9;
-X
-X double sum_cd=49.33206503301610289036;
-/*
-X Coefficients for P near 0 or 1.
-*/
-X double e0 = 6.65790464350110377720e0;
-X double e1 = 5.46378491116411436990e0;
-X double e2 = 1.78482653991729133580e0;
-X double e3 = 2.96560571828504891230e-1;
-X double e4 = 2.65321895265761230930e-2;
-X double e5 = 1.24266094738807843860e-3;
-X double e6 = 2.71155556874348757815e-5;
-X double e7 = 2.01033439929228813265e-7;
-X double f1 = 5.99832206555887937690e-1;
-X double f2 = 1.36929880922735805310e-1;
-X double f3 = 1.48753612908506148525e-2;
-X double f4 = 7.86869131145613259100e-4;
-X double f5 = 1.84631831751005468180e-5;
-X double f6 = 1.42151175831644588870e-7;
-X double f7 = 2.04426310338993978564e-15;
-X
-X double sum_ef=47.52583317549289671629;
-X
-X double sum_tmp = 0.0;
-X
-X /*
-X sum_tmp = a0+a1+a2+a3+a4+a5+a6+a7+b1+b2+b3+b4+b5+b6+b7;
-X if (fabs(sum_tmp - sum_ab) > 1e-12) {
-X fprintf (stderr," sum_ab error: %lg %lg\n",sum_tmp,sum_ab);
-X *fault = 1;
-X return zero;
-X }
-X
-X sum_tmp = c0+c1+c2+c3+c4+c5+c6+c7+d1+d2+d3+d4+d5+d6+d7;
-X if (fabs(sum_tmp - sum_cd) > 1e-12) {
-X fprintf (stderr," sum_cd error: %lg %lg\n",sum_tmp,sum_cd);
-X *fault = 1;
-X return zero;
-X }
-X sum_tmp = e0+e1+e2+e3+e4+e5+e6+e7+f1+f2+f3+f4+f5+f6+f7;
-X if (fabs(sum_tmp - sum_ef) > 1e-12) {
-X fprintf (stderr," sum_ef error: %lg %lg\n",sum_tmp,sum_ef);
-X *fault = 1;
-X return zero;
-X }
-X */
-X
-X *fault = 0;
-X q = p - half;
-X if (fabs(q) <= split1) {
-X r = const1 - q * q;
-X return q * (((((((a7 * r + a6) * r + a5) * r + a4) * r + a3)
-X * r + a2) * r + a1) * r + a0) /
-X (((((((b7 * r + b6) * r + b5) * r + b4) * r + b3)
-X * r + b2) * r + b1) * r + one);
-X }
-X else {
-X r = (q < zero) ? p : one - p;
-X if (r <= zero) {
-X *fault = 1;
-X return zero;
-X }
-X r = sqrt(-log(r));
-X if (r <= split2) {
-X r -= const2;
-X ppnd16 = (((((((c7 * r + c6) * r + c5) * r + c4) * r + c3)
-X * r + c2) * r + c1) * r + c0) /
-X (((((((d7 * r + d6) * r + d5) * r + d4) * r + d3)
-X * r + d2) * r + d1) * r + one);
-X }
-X else {
-X r -= split2;
-X ppnd16 = (((((((e7 * r + e6) * r + e5) * r + e4) * r + e3)
-X * r + e2) * r + e1) * r + e0) /
-X (((((((f7 * r + f6) * r + f5) * r + f4) * r + f3)
-X * r + f2) * r + f1) * r + one);
-X }
-X if (q < zero) return -ppnd16;
-X else return ppnd16;
-X }
-}
-#endif
-SHAR_EOF
-chmod 0644 scaleswn.c ||
-echo 'restore of scaleswn.c failed'
-Wc_c="`wc -c < 'scaleswn.c'`"
-test 69722 -eq "$Wc_c" ||
- echo 'scaleswn.c: original size 69722, current size' "$Wc_c"
-fi
-# ============= scaleswt.c ==============
-if test -f 'scaleswt.c' -a X"$1" != X"-c"; then
- echo 'x - skipping scaleswt.c (File already exists)'
-else
-echo 'x - extracting scaleswt.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'scaleswt.c' &&
-/* scaleswt.c */
-X
-/* $Name: fa_34_26_5 $ - $Id: scaleswt.c,v 1.21 2006/04/12 18:50:01 wrp Exp $ */
-/* as of 24 Sept, 2000 - scaleswn uses no global variables */
-X
-/*
-X copyright (c) 1995, 1996, 2000, 2002 William R. Pearson
-X
-X This version is designed for fasts/f, which used Tatusov
-X probabilities for statistical estimates, but still needs a
-X quick-and-dirty linear regression fit to rank things
-X
-X For comparisons that obey tatusov statistics, we try whenever
-X possible to provide accurate e_scores, rather than raw scores. As a
-X result, no lambda/K fitting is required; and process_hist() can be
-X called atthe very beginning of the search to initialize some of the
-X statistics structures and find_zp().
-X
-X find_zp() must still return a valid z_score surrogate, as
-X comp_lib.c/p2_complib.c continue to use z_score's to rank hits, save
-X the best, etc.
-X
-X If e_score's cannot be calculated, the process_hist() provides
-X linear regression fitting for conventional z_score estimates.
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include <limits.h>
-#include <float.h>
-#include <math.h>
-X
-#include <limits.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "structs.h"
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#define MAXHIST 50
-#define MAX_LLEN 200
-#define LHISTC 5
-#define VHISTC 5
-#define MAX_SSCORE 300
-X
-#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
-X
-#define LN_FACT 10.0
-#ifndef M_LN2
-#define M_LN2 0.69314718055994530942
-#endif
-X
-#define EULER_G 0.57721566490153286060
-#define PI_SQRT6 1.28254983016186409554
-X
-#ifndef M_SQRT2
-#define M_SQRT2 1.41421356237
-#endif
-#define LN200 5.2983173666
-#define ZS_MAX 400.0 /* used to prevent underflow on some machines */
-#define TOLERANCE 1.0e-12
-#define TINY 1.0e-6
-X
-/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
-struct rstat_str {
-X double ngLambda, ngK, ngH;
-X double rho, rho_e, mu, mu_e, mean_var, var_e; /* ?_e:std. error of ? */
-/* used by REG2_STATS */
-X double rho2, mu2, var_cutoff;
-X int n_trimmed; /* excluded because of high z-score */
-X int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
-X double tat_a, tat_b, tat_c, spacefactor;
-X int have_tat;
-X int tie_j;
-};
-X
-#define AVE_STATS 0 /* no length effect, only mean/variance */
-double find_zt(int score, double escore, int len, double comp, struct rstat_str *);
-X
-double find_zn(int score, double escore, int len, double comp, struct rstat_str *);
-X
-double power(double, int);
-X
-void sortbesto(double *, int );
-extern void sortbeste(struct beststr **bptr, int nbest);
-X
-int proc_hist_n(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp, int do_trim,
-X struct rstat_str *);
-X
-#define REG_STATS 1 /* length-regression scaled */
-double find_zr(int score, double escore, int len, double comp, struct rstat_str *);
-X
-int proc_hist_r(struct stat_str *sptr, int n,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct rstat_str *rs);
-X
-double (*find_zp)(int score, double escore, int len, double comp,
-X struct rstat_str *) = &find_zr;
-X
-struct llen_str {
-X int min, max;
-X int max_score, min_score;
-X int *hist;
-X double *score_sums, *score2_sums;
-X double *score_var;
-X int max_length, min_length, zero_s;
-X int fit_flag;
-};
-X
-static void inithist(struct llen_str *, struct pstruct, int);
-static void free_hist( struct llen_str *);
-static void addhist(struct llen_str *, int, int, int);
-static void prune_hist(struct llen_str *, int, int, int, long *);
-void inithistz(int, struct hist_str *histp);
-void addhistz(double zs, struct hist_str *histp);
-X
-static void fit_llen(struct llen_str *, struct rstat_str *);
-static void fit_llens(struct llen_str *, struct rstat_str *);
-X
-void linreg(double *lny, double *x, double *lnx, int n,
-X double *a, double *b, double *c, int start);
-X
-double calc_spacefactor(const unsigned char *, int, int, int);
-X
-double det(double a11, double a12, double a13,
-X double a21, double a22, double a23,
-X double a31, double a32, double a33);
-X
-double factorial (int a, int b);
-X
-/* void set_db_size(int, struct db_str *, struct hist_str *); */
-X
-#ifdef DEBUG
-FILE *tmpf;
-#endif
-X
-int
-process_hist(struct stat_str *sptr, int nstats,
-X struct mngmsg m_msg,
-X struct pstruct pst,
-X struct hist_str *histp,
-X struct rstat_str **rs_sp,
-X int do_hist
-X )
-{
-X int zsflag, do_trim;
-X struct rstat_str *rs_s;
-X
-X if (pst.zsflag < 0) {
-X *rs_sp = NULL;
-X return pst.zsflag;
-X }
-X
-X if (*rs_sp == NULL) {
-X if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
-X fprintf(stderr," cannot allocate rs_snion: %ld\n",sizeof(struct rstat_str));
-X exit(1);
-X }
-X else *rs_sp = rs_s;
-X }
-X else {
-X rs_s = *rs_sp;
-X memset(rs_s,0,sizeof(struct rstat_str));
-X }
-X
-X if (m_msg.escore_flg) {
-X find_zp = &find_zt;
-X inithistz(MAXHIST,histp);
-X return 1;
-X }
-X
-X if (nstats < 20) {
-X fprintf(stderr," too few sequences for sampling: %d\n",nstats);
-X free(rs_s);
-X *rs_sp = NULL;
-X return -1;
-X }
-X
-X rs_s->ngLambda = m_msg.Lambda;
-X rs_s->ngK = m_msg.K;
-X rs_s->ngH = m_msg.H;
-X
-X zsflag = pst.zsflag;
-X
-X if (zsflag >= 10) {
-X zsflag -= 10;
-X do_trim = 0;
-X }
-X else do_trim = 1;
-X
-X find_zp = &find_zr;
-X return proc_hist_r(sptr, nstats,pst, histp, do_trim, rs_s);
-}
-X
-int
-calc_thresh(struct pstruct pst, int nstats,
-X double Lambda, double K, double H, double *zstrim)
-{
-X int max_hscore;
-X double ave_n1, tmp_score, z, l_fact;
-X
-X if (pst.dnaseq == SEQT_DNA || pst.dnaseq == SEQT_RNA) {
-X ave_n1 = 5000.0;
-X l_fact = 1.0;
-X }
-X else {
-X ave_n1 = 400.0;
-X l_fact = 0.7;
-X }
-X
-/* max_hscore = MAX_SSCORE; */
-/* mean expected for pst.n0 * 400 for protein, 5000 for DNA */
-/* we want a number of offsets that is appropriate for the database size so
-X far (nstats)
-*/
-X
-/*
-X the calculation below sets a high-score threshold using an
-X ungapped lambda, but errs towards the high-score side by using
-X E()=0.001 and calculating with 0.70*lambda, which is the correct for
-X going from ungapped to -12/-2 gapped lambda with BLOSUM50
-*/
-X
-#ifndef NORMAL_DIST
-X tmp_score = 0.01/((double)nstats*K*(double)pst.n0*ave_n1);
-X tmp_score = -log(tmp_score)/(Lambda*l_fact);
-X max_hscore = (int)(tmp_score+0.5);
-X
-X z = 1.0/(double)nstats;
-X z = (log(z)+EULER_G)/(-PI_SQRT6);
-#else
-X max_hscore = 100;
-X z = 5.0;
-#endif
-X *zstrim = 10.0*z+50.0;
-X return max_hscore;
-}
-X
-int
-proc_hist_r(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct rstat_str *rs)
-{
-X int i, max_hscore;
-X double zs, ztrim;
-X char s_string[128];
-X struct llen_str llen;
-X char *f_string;
-X llen.fit_flag=1;
-X llen.hist=NULL;
-X
-X max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
-X rs->ngK, rs->ngH, &ztrim);
-X
-X inithist(&llen,pst,max_hscore);
-X f_string = &(histp->stat_info[0]);
-X
-X for (i = 0; i<nstats; i++)
-X addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
-X histp->entries = nstats - llen.zero_s;
-X
-X if ((llen.max_score - llen.min_score) < 10) {
-X free_hist(&llen);
-X llen.fit_flag = 0;
-X find_zp = &find_zn;
-X return proc_hist_n(sptr, nstats, pst, histp, do_trim, rs);
-X }
-X
-X fit_llen(&llen, rs); /* now we have rho, mu, rho2, mu2, mean_var
-X to set the parameters for the histogram */
-X
-X if (!llen.fit_flag) { /* the fit failed, fall back to proc_hist_n */
-X free_hist(&llen);
-X find_zp = &find_zn;
-X return proc_hist_n(sptr,nstats, pst, histp, do_trim, rs);
-X }
-X
-X rs->n_trimmed= rs->n1_trimmed = rs->nb_trimmed = 0;
-X
-X if (do_trim) {
-X if (llen.fit_flag) {
-X for (i = 0; i < nstats; i++) {
-X zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
-X if (zs < 20.0 || zs > ztrim) {
-X rs->n_trimmed++;
-X prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
-X &(histp->entries));
-X }
-X }
-X }
-X
-X /* fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", rs->n_trimmed); */
-X
-X if (llen.fit_flag) fit_llens(&llen, rs);
-X
-X /* fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", rs->n1_trimmed,rs->nb_trimmed); */
-X }
-X
-X
-X free_hist(&llen);
-X
-X /* rst all the scores in the histogram */
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X inithistz(MAXHIST, histp);
-X
-X sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d B-trim: %d in %d/%d\n Lambda= %6.4f",
-X s_string,
-X rs->rho*LN_FACT,sqrt(rs->rho_e),rs->mu,sqrt(rs->mu_e), rs->mean_var,sqrt(rs->var_e),
-X llen.zero_s, rs->n_trimmed, rs->n1_trimmed, rs->nb_trimmed, rs->nb_tot,
-X PI_SQRT6/sqrt(rs->mean_var));
-X return REG_STATS;
-}
-X
-X
-int
-proc_hist_n(struct stat_str *sptr, int nstats,
-X struct pstruct pst, struct hist_str *histp,
-X int do_trim, struct rstat_str *rs)
-{
-X int i, j;
-X double s_score, s2_score, ssd;
-X double ztrim;
-X int nit, max_hscore;
-X char s_string[128];
-X char *f_string;
-X
-X f_string = &(histp->stat_info[0]);
-X /* db->entries = db->length = db->carry = 0; */
-X
-X max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
-X rs->ngK, rs->ngH, &ztrim);
-X
-X s_score = s2_score = 0.0;
-X
-X histp->entries = 0;
-X
-X for ( j = 0, i = 0; i < nstats; i++) {
-X if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
-X s_score += (ssd=(double)sptr[i].score);
-X s2_score += ssd * ssd;
-X histp->entries++;
-X /*
-X db->length += sptr[i].n1;
-X if (db->length > LONG_MAX) {
-X db->carry++;
-X db->length -= LONG_MAX;
-X }
-X */
-X j++;
-X }
-X }
-X
-X if (j > 1 ) {
-X rs->mu = s_score/(double)j;
-X rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
-X rs->mean_var /= (double)(j-1);
-X }
-X else {
-X rs->mu = 50.0;
-X rs->mean_var = 10.0;
-X }
-X
-X if (rs->mean_var < 0.01) {
-X rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
-X }
-X
-X /* now remove some scores */
-X
-X nit = 5;
-X while (nit-- > 0) {
-X rs->n_trimmed = 0;
-X
-X for (i=0; i< nstats; i++) {
-X if (sptr[i].n1 < 0) continue;
-X ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
-X if (ssd > ztrim || ssd < 20.0) {
-X /* fprintf(stderr,"removing %3d %3d %4.1f\n",
-X sptr[i].score, sptr[i].n1,ssd); */
-X ssd = sptr[i].score;
-X s_score -= ssd;
-X s2_score -= ssd*ssd;
-X j--;
-X rs->n_trimmed++;
-X histp->entries--;
-X sptr[i].n1 = -sptr[i].n1;
-X }
-X }
-X
-X if (j > 1 ) {
-X rs->mu = s_score/(double)j;
-X rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
-X rs->mean_var /= (double)(j-1);
-X }
-X else {
-X rs->mu = 50.0;
-X rs->mean_var = 10.0;
-X }
-X
-X if (rs->mean_var < 0.01) {
-X rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
-X }
-X
-X if (rs->n_trimmed < LHISTC) {
-X /*
-X fprintf(stderr,"nprune %d at %d\n",nprune,nit);
-X */
-X break;
-X }
-X }
-X
-X if (pst.zsflag < 10) s_string[0]='\0';
-X else if (pst.zs_win > 0)
-X sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-X else strncpy(s_string,"(shuffled)",sizeof(s_string));
-X
-X sprintf(f_string,"%s unscaled statistics: mu= %6.4f var=%6.4f; Lambda= %6.4f",
-X s_string, rs->mu,rs->mean_var,PI_SQRT6/sqrt(rs->mean_var));
-X return AVE_STATS;
-}
-X
-X
-/*
-This routine calculates the maximum likelihood estimates for the
-extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
-X
-X <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
-X <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
-X
-X The <a> parameter can be transformed into and K
-X of the formula: 1 - exp ( - K m n exp ( - lambda S ))
-X using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
-X 1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
-X
-X a = log(K m n) / lambda
-X a lambda = log (K m n)
-X exp(a lambda) = K m n
-X but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
-X so: K m n = (1/n sum{ exp( -x[i] *lambda)})
-X K = sum{}/(nlib m n )
-X
-*/
-X
-void
-alloc_hist(struct llen_str *llen)
-{
-X int max_llen, i;
-X max_llen = llen->max;
-X
-X if (llen->hist == NULL) {
-X llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
-X llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-X }
-X
-X for (i=0; i< max_llen+1; i++) {
-X llen->hist[i] = 0;
-X llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
-X }
-}
-X
-void
-free_hist(struct llen_str *llen)
-{
-X if (llen->hist!=NULL) {
-X free(llen->score_var);
-X free(llen->score2_sums);
-X free(llen->score_sums);
-X free(llen->hist);
-X llen->hist=NULL;
-X }
-}
-X
-void
-inithist(struct llen_str *llen, struct pstruct pst, int max_hscore)
-{
-X llen->max = MAX_LLEN;
-X
-X llen->max_score = -1;
-X llen->min_score=10000;
-X
-X alloc_hist(llen);
-X
-X llen->zero_s = 0;
-X llen->min_length = 10000;
-X llen->max_length = 0;
-}
-X
-void
-addhist(struct llen_str *llen, int score, int length, int max_hscore)
-{
-X int llength;
-X double dscore;
-X
-X if ( score<=0 || length < LENGTH_CUTOFF) {
-X llen->min_score = 0;
-X llen->zero_s++;
-X return ;
-X }
-X
-X if (score < llen->min_score) llen->min_score = score;
-X if (score > llen->max_score) llen->max_score = score;
-X
-X if (length > llen->max_length) llen->max_length = length;
-X if (length < llen->min_length) llen->min_length = length;
-X if (score > max_hscore) score = max_hscore;
-X
-X llength = (int)(LN_FACT*log((double)length)+0.5);
-X
-X if (llength < 0 ) llength = 0;
-X if (llength > llen->max) llength = llen->max;
-X llen->hist[llength]++;
-X dscore = (double)score;
-X llen->score_sums[llength] += dscore;
-X llen->score2_sums[llength] += dscore * dscore;
-X
-X /*
-X db->entries++;
-X db->length += length;
-X if (db->length > LONG_MAX) {db->carry++;db->length -= LONG_MAX;}
-X */
-}
-X
-/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
-X
-X
-void
-inithistz(int mh, struct hist_str *histp )
-{
-X int i;
-X
-X histp->min_hist = 20;
-X histp->max_hist = 120;
-X
-X histp->histint = (int)
-X ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
-X histp->maxh = (int)
-X ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
-X
-X if (histp->hist_a==NULL) {
-X if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
-X NULL) {
-X fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
-X histp->histflg = 0;
-X }
-X else histp->histflg = 1;
-X }
-X else {
-X for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
-X }
-}
-X
-/* fasts/f will not show any histogram */
-void
-addhistz(double zs, struct hist_str *histp)
-{
-}
-X
-void
-prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
-X long *entries)
-{
-X int llength;
-X double dscore;
-X
-X if (score <= 0 || length < LENGTH_CUTOFF) return;
-X
-X if (score > max_hscore) score = max_hscore;
-X
-X llength = (int)(LN_FACT*log((double)length)+0.5);
-X
-X if (llength < 0 ) llength = 0;
-X if (llength > llen->max) llength = llen->max;
-X llen->hist[llength]--;
-X dscore = (double)score;
-X llen->score_sums[llength] -= dscore;
-X llen->score2_sums[llength] -= dscore * dscore;
-X
-X (*entries)--;
-X /*
-X if (length < db->length) db->length -= length;
-X else {db->carry--; db->length += (LONG_MAX - (unsigned long)length);}
-X */
-}
-X
-/* fit_llen: no trimming
-X (1) regress scores vs log(n) using weighted variance
-X (2) calculate mean variance after length regression
-*/
-X
-void
-fit_llen(struct llen_str *llen, struct rstat_str *pr)
-{
-X int j;
-X int n;
-X int n_size;
-X double x, y2, u, z;
-X double mean_x, mean_y, var_x, var_y, covar_xy;
-X double mean_y2, covar_xy2, var_y2, dllj;
-X
-X double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
-X
-/* now fit scores to best linear function of log(n), using
-X simple linear regression */
-X
-X for (llen->min=0; llen->min < llen->max; llen->min++)
-X if (llen->hist[llen->min]) break;
-X llen->min--;
-X
-X for (n_size=0,j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] > 1) {
-X dllj = (double)llen->hist[j];
-X llen->score_var[j] = llen->score2_sums[j]/dllj
-X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-X llen->score_var[j] /= (double)(llen->hist[j]-1);
-X if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
-X n_size++;
-X }
-X }
-X
-X pr->nb_tot = n_size;
-X
-X n_w = 0.0;
-X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-X for (j = llen->min; j < llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = j + 0.5;
-X dllj = (double)llen->hist[j];
-X n_w += dllj/llen->score_var[j];
-X sum_x += dllj * x / llen->score_var[j] ;
-X sum_y += llen->score_sums[j] / llen->score_var[j];
-X sum_x2 += dllj * x * x /llen->score_var[j];
-X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
-X }
-X
-X if (n_size < 5 ) {
-X llen->fit_flag=0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X return;
-X }
-X else {
-X delta = n_w * sum_x2 - sum_x * sum_x;
-X if (delta > 0.001) {
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
-X pr->rho_e = n_w/delta;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-X pr->mu_e = sum_x2/delta;
-X }
-X else {
-X llen->fit_flag = 0;
-X pr->rho = 0;
-X pr->mu = sum_y/n_w;
-X return;
-X }
-X }
-X
-X delta = n_w * sum_x2 - sum_x * sum_x;
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-X
-X n = 0;
-X mean_x = mean_y = mean_y2 = 0.0;
-X var_x = var_y = 0.0;
-X covar_xy = covar_xy2 = 0.0;
-X
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1 ) {
-X n += llen->hist[j];
-X x = (double)j + 0.5;
-X mean_x += (double)llen->hist[j] * x;
-X mean_y += llen->score_sums[j];
-X var_x += (double)llen->hist[j] * x * x;
-X var_y += llen->score2_sums[j];
-X covar_xy += x * llen->score_sums[j];
-X }
-X mean_x /= n; mean_y /= n;
-X var_x = var_x / n - mean_x * mean_x;
-X var_y = var_y / n - mean_y * mean_y;
-X
-X covar_xy = covar_xy / n - mean_x * mean_y;
-/*
-X pr->rho = covar_xy / var_x;
-X pr->mu = mean_y - pr->rho * mean_x;
-*/
-X mean_y2 = covar_xy2 = var_y2 = 0.0;
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = (double)j + 0.5;
-X u = pr->rho * x + pr->mu;
-X y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-/*
-X dllj = (double)llen->hist[j];
-X fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
-X llen->score_sums[j]/dllj,y2/dllj);
-*/
-X mean_y2 += y2;
-X var_y2 += y2 * y2;
-X covar_xy2 += x * y2;
-X /* fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
-X x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
-X }
-X
-X pr->mean_var = mean_y2 /= (double)n;
-X covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
-X
-X if (pr->mean_var <= 0.01) {
-X llen->fit_flag = 0;
-X pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
-X }
-X
-X /*
-X fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
-X pr->rho*LN_FACT,pr->mu,pr->mean_var);
-X */
-X if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
-X else pr->var_e = 0.0;
-X
-X if (llen->fit_flag) {
-X pr->rho2 = covar_xy2 / var_x;
-X pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
-X }
-X else {
-X pr->rho2 = 0;
-X pr->mu2 = pr->mean_var;
-X }
-X
-X if (pr->rho2 < 0.0 )
-X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-X if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-X
-X pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
-}
-X
-/* fit_llens: trim high variance bins
-X (1) regress scores vs log(n) using weighted variance
-X (2) regress residuals vs log(n)
-X (3) remove high variance bins
-X (4) calculate mean variance after length regression
-*/
-X
-void
-fit_llens(struct llen_str *llen, struct rstat_str *pr)
-{
-X int j;
-X int n, n_u2;
-X double x, y, y2, u, u2, v, z;
-X double mean_x, mean_y, var_x, var_y, covar_xy;
-X double mean_y2, covar_xy2;
-X double mean_u2, mean_3u2, dllj;
-X double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
-X
-/* now fit scores to best linear function of log(n), using
-X simple linear regression */
-X
-X for (llen->min=0; llen->min < llen->max; llen->min++)
-X if (llen->hist[llen->min]) break;
-X llen->min--;
-X
-X for (j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] > 1) {
-X dllj = (double)llen->hist[j];
-X llen->score_var[j] = (double)llen->score2_sums[j]/dllj
-X - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-X llen->score_var[j] /= (double)(llen->hist[j]-1);
-X if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
-X }
-X }
-X
-X n_w = 0.0;
-X sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-X for (j = llen->min; j < llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = j + 0.5;
-X dllj = (double)llen->hist[j];
-X n_w += dllj/llen->score_var[j];
-X sum_x += dllj * x / llen->score_var[j] ;
-X sum_y += llen->score_sums[j] / llen->score_var[j];
-X sum_x2 += dllj * x * x /llen->score_var[j];
-X sum_xy += x * llen->score_sums[j]/llen->score_var[j];
-X }
-X
-X delta = n_w * sum_x2 - sum_x * sum_x;
-X pr->rho = (n_w * sum_xy - sum_x * sum_y)/delta;
-X pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-X
-/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
-X
-X n = 0;
-X mean_x = mean_y = mean_y2 = 0.0;
-X var_x = var_y = 0.0;
-X covar_xy = covar_xy2 = 0.0;
-X
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1 ) {
-X n += llen->hist[j];
-X x = (double)j + 0.5;
-X dllj = (double)llen->hist[j];
-X mean_x += dllj * x;
-X mean_y += llen->score_sums[j];
-X var_x += dllj * x * x;
-X var_y += llen->score2_sums[j];
-X covar_xy += x * llen->score_sums[j];
-X }
-X mean_x /= n; mean_y /= n;
-X var_x = var_x / n - mean_x * mean_x;
-X var_y = var_y / n - mean_y * mean_y;
-X
-X covar_xy = covar_xy / n - mean_x * mean_y;
-/* pr->rho = covar_xy / var_x;
-X pr->mu = mean_y - pr->rho * mean_x;
-*/
-X
-X mean_y2 = covar_xy2 = 0.0;
-X for (j = llen->min; j <= llen->max; j++)
-X if (llen->hist[j] > 1) {
-X x = (double)j + 0.5;
-X u = pr->rho * x + pr->mu;
-X y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-X mean_y2 += y2;
-X covar_xy2 += x * y2;
-X }
-X
-X mean_y2 /= n;
-X covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
-X pr->rho2 = covar_xy2 / var_x;
-X pr->mu2 = mean_y2 - pr->rho2 * mean_x;
-X
-X if (pr->rho2 < 0.0 )
-X z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-X else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-X if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-X
-X pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
-X
-/* fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
-X pr->var_cutoff,z);
-*/
-X mean_u2 = 0.0;
-X n_u2 = 0;
-X for ( j = llen->min; j < llen->max; j++) {
-X y = j+0.5;
-X dllj = (double)llen->hist[j];
-X x = pr->rho * y + pr->mu;
-X v = pr->rho2 * y + pr->mu2;
-X if (v < pr->var_cutoff) v = pr->var_cutoff;
-X if (llen->hist[j]> 1) {
-X u2 = (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
-X mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
-X n_u2++;
-X /* fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
-X j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
-X }
-X else llen->score_var[j] = -1.0;
-X }
-X
-X mean_u2 = sqrt(mean_u2/(double)n_u2);
-X /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
-X
-X mean_3u2 = mean_u2*3.0;
-X
-X for (j = llen->min; j < llen->max; j++) {
-X if (llen->hist[j] <= 1) continue;
-X if (sqrt(llen->score_var[j]) > mean_3u2) {
-X /* fprintf(stderr," removing %d %d %.2f\n",
-X j, (int)(exp((double)j/LN_FACT)-0.5),
-X sqrt(llen->score_var[j]));
-X */
-X pr->nb_trimmed++;
-X pr->n1_trimmed += llen->hist[j];
-X llen->hist[j] = 0;
-X }
-X }
-X fit_llen(llen, pr);
-}
-X
-X
-/* REG_STATS - Z() from rho/mu/mean_var */
-double find_zr(int score, double escore, int length, double comp,
-X struct rstat_str *rs)
-{
-X double log_len, z;
-X
-X if (score <= 0) return 0.0;
-X if ( length < LENGTH_CUTOFF) return 0.0;
-X
-X log_len = LN_FACT*log((double)(length));
-/* var = rs->rho2 * log_len + rs->mu2;
-X if (var < rs->var_cutoff) var = rs->var_cutoff;
-*/
-X
-X z = ((double)score - rs->rho * log_len - rs->mu) / sqrt(rs->mean_var);
-X
-X return (50.0 + z*10.0);
-}
-X
-double find_zt(int score, double escore, int length, double comp,
-X struct rstat_str *rs)
-{
-X if (escore > 0.0) return -log(escore)/M_LN2;
-X else return 744.440071/M_LN2;
-}
-X
-double find_zn(int score, double escore, int length, double comp,
-X struct rstat_str *rs)
-{
-X double z;
-X
-X z = ((double)score - rs->mu) / sqrt(rs->mean_var);
-X
-X return (50.0 + z*10.0);
-}
-X
-/* computes E value for a given z value, assuming extreme value distribution */
-double
-z_to_E(double zs, long entries, struct db_str db)
-{
-X double e, n;
-X
-X /* if (db->entries < 5) return (double)db.entries; */
-X if (entries < 1) { n = db.entries;}
-X else {n = entries;}
-X
-X if (zs > ZS_MAX) return 0.0;
-X
-X e = exp(-PI_SQRT6 * zs - EULER_G);
-X return n * (e > .01 ? 1.0 - exp(-e) : e);
-}
-X
-double
-zs_to_p(double zs)
-{
-X return zs;
-}
-X
-/* this version assumes the probability is in the ->zscore variable,
-X which is provided by this file after last_scale()
-*/
-X
-double
-zs_to_bit(double zs, int n0, int n1)
-{
-X return zs+log((double)(n0*n1))/M_LN2 ;
-}
-X
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
-{
-X double e, z, k;
-X
-X /* if (db->entries < 5) return 0.0; */
-X
-X if (zs > ZS_MAX ) return 0.0;
-X
-X if (entries < 1) entries = db.entries;
-X
-X if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
-X k = (double)db.length /(double)n1;
-X if (db.carry > 0) { k *= (double)db.carry * (double)LONG_MAX;}
-X }
-X else k = (double)entries;
-X
-X if (k < 1.0) k = 1.0;
-X
-X zs *= M_LN2;
-X if ( zs > 100.0) e = 0.0;
-X else e = exp(-zs);
-X return k * e;
-}
-X
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-E_to_zs(double E, long entries)
-{
-X double e, z;
-X int error;
-X
-X e = E/(double)entries;
-X
-#ifndef NORMAL_DIST
-X z = (log(e)+EULER_G)/(-PI_SQRT6);
-X return z*10.0+50.0;
-#else
-X z = np_to_z(1.0-e,&error);
-X
-X if (!error) return z*10.0+50.0;
-X else return 0.0;
-#endif
-}
-X
-/* computes 1.0 - E value for a given z value, assuming extreme value
-X distribution */
-double
-zs_to_Ec(double zs, long entries)
-{
-X double e, z;
-X
-X if (entries < 5) return 0.0;
-X
-X z = (zs - 50.0)/10.0;
-X
-X if (z > ZS_MAX) return 1.0;
-X
-X e = exp(-PI_SQRT6 * z - EULER_G);
-X return (double)entries * (e > .01 ? exp(-e) : 1.0 - e);
-}
-X
-void
-vsort(v,s,n)
-X double *v; int *s, n;
-{
-X int gap, i, j;
-X double tmp;
-X int itmp;
-X
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if (v[j] >= v[j+gap]) break;
-X tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
-X itmp = s[j]; s[j]=s[j+gap]; s[j+gap]=itmp;
-X }
-}
-X
-void
-sort_escore(double *v, int n)
-{
-X int gap, i, j;
-X double dtmp;
-X
-X for (gap=n/2; gap>0; gap/=2) {
-X for (i=gap; i<n; i++) {
-X for (j=i-gap; j>=0; j -= gap) {
-X if (v[j] <= v[j+gap]) break;
-X dtmp = v[j];
-X v[j] = v[j+gap];
-X v[j+gap] = dtmp;
-X }
-X }
-X }
-}
-X
-/* scale_tat - compute 'a', 'b', 'c' coefficients for scaling fasts/f
-X escores
-X 5-May-2003 - also calculate index for high ties
-*/
-void
-scale_tat(double *escore, int nstats,
-X long db_entries, int do_trim,
-X struct rstat_str *rs)
-{
-X int i, j, k, start;
-X double *x, *lnx, *lny;
-X
-X /* sort_escore(escore, nstats); */
-X
-X while (*escore<0.0) {escore++; nstats--; }
-X
-X x = (double *) calloc(nstats, sizeof(double));
-X if(x == NULL) {
-X fprintf(stderr, "Couldn't calloc tatE/x\n");
-X exit(1);
-X }
-X
-X lnx = (double *) calloc(nstats,sizeof(double));
-X if(lnx == NULL) {
-X fprintf(stderr, "Couldn't calloc tatE/lnx\n");
-X exit(1);
-X }
-X
-X lny = (double *) calloc(nstats,sizeof(double));
-X if(lny == NULL) {
-X fprintf(stderr, "Couldn't calloc tatE/lny\n");
-X exit(1);
-X }
-X
-X for(i = 0 ; i < nstats ; ) {
-X
-X lny[i] = log(escore[i]);
-X
-X for(j = i+1 ; j < nstats ; j++) {
-X if(escore[j] != escore[i]) break;
-X }
-X
-X x[i] = ((((double)i + (double)(j - i - 1)/2.0)*(double)nstats/(double)db_entries)+1.0)/(double)nstats;
-X lnx[i] = log(x[i]);
-X
-X for(k = i+1 ; k < j ; k++) {
-X lny[k]=lny[i];
-X x[k] = x[i];
-X lnx[k]=lnx[i];
-X }
-X i = k;
-X }
-X
-X if (!do_trim) {
-X start = 0;
-X } else {
-X start = 0.05 * (double) nstats;
-X start = start > 500 ? 500 : start;
-X }
-X
-X linreg(lny, x, lnx, nstats, &rs->tat_a, &rs->tat_b, &rs->tat_c, start);
-X
-X /* I have the coefficients I need - a, b, c; free arrays */
-X
-X free(lny);
-X free(lnx);
-X free(x);
-X
-X /* calculate tie_j - the index below which all scores are considered
-X positional ties */
-X
-X rs->tie_j = 0.005 * db_entries;
-}
-X
-void
-linreg(double *lny, double *x, double *lnx, int n,
-X double *a, double *b, double *c, int start) {
-X
-X double yf1, yf2, yf3;
-X double f1f1, f1f2, f1f3;
-X double f2f2, f2f3;
-X double f3f3, delta;
-X
-X int i;
-X
-X yf1 = yf2 = yf3 = 0.0;
-X f1f1 = f1f2 = f1f3 = f2f2 = f2f3 = f3f3 = 0.0;
-X
-X for(i = start; i < n; i++) {
-X yf1 += lny[i] * lnx[i];
-X yf2 += lny[i] * x[i];
-X yf3 += lny[i];
-X
-X f1f1 += lnx[i] * lnx[i];
-X f1f2 += lnx[i] * x[i];
-X f1f3 += lnx[i];
-X
-X f2f2 += x[i] * x[i];
-X f2f3 += x[i];
-X
-X f3f3 += 1.0;
-X }
-X
-X delta = det(f1f1, f1f2, f1f3, f1f2, f2f2, f2f3, f1f3, f2f3, f3f3);
-X
-X *a = det(yf1, f1f2, f1f3, yf2, f2f2, f2f3, yf3, f2f3, f3f3) / delta;
-X *b = det(f1f1, yf1, f1f3, f1f2, yf2, f2f3, f1f3, yf3, f3f3) / delta;
-X *c = det(f1f1, f1f2, yf1, f1f2, f2f2, yf2, f1f3, f2f3, yf3) / delta;
-X
-}
-X
-double det(double a11, double a12, double a13,
-X double a21, double a22, double a23,
-X double a31, double a32, double a33)
-{
-X double result;
-X
-X result = a11 * (a22 * a33 - a32 * a23);
-X result -= a12 * (a21 * a33 - a31 * a23);
-X result += a13 * (a21 * a32 - a31 * a22);
-X
-X return result;
-}
-X
-void
-last_stats(const unsigned char *aa0, int n0,
-X struct stat_str *sptr, int nstats,
-X struct beststr **bestp_arr, int nbest,
-X struct mngmsg m_msg, struct pstruct pst,
-X struct hist_str *histp, struct rstat_str **rs_sp)
-{
-X double *obs_escore;
-X int i, nobs, nobs_t, do_trim;
-X long db_entries;
-X struct rstat_str *rs_s;
-X
-X if (*rs_sp == NULL) {
-X if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
-X fprintf(stderr," cannot allocate rs_s: %ld\n",sizeof(struct rstat_str));
-X exit(1);
-X }
-X else *rs_sp = rs_s;
-X }
-X else rs_s = *rs_sp;
-X
-X histp->entries = 0;
-X
-X sortbeste(bestp_arr,nbest);
-X
-X rs_s->spacefactor =
-X calc_spacefactor(aa0, n0, m_msg.nm0,pst.nsq);
-X
-X if (pst.zsflag >= 1 && pst.zsflag <= 4) {
-X if (m_msg.escore_flg) {
-X nobs = nbest;
-X do_trim = 1;
-X }
-X else {
-X nobs = nstats;
-X do_trim = 0;
-X }
-X
-X if ((obs_escore = (double *)calloc(nobs,sizeof(double)))==NULL) {
-X fprintf(stderr," cannot allocate obs_escore[%d]\n",nbest);
-X exit(1);
-X }
-X
-X if (m_msg.escore_flg) {
-X for (i=nobs=0; i<nbest; i++) {
-X if (bestp_arr[i]->escore<= 1.00)
-X obs_escore[nobs++]=bestp_arr[i]->escore;
-X }
-X /*
-X nobs_t = nobs;
-X for (i=0; i<nbest; i++) {
-X if (bestp_arr[i]->escore >= 0.99 &&
-X bestp_arr[i]->escore <= 1.00)
-X obs_escore[nobs++]=bestp_arr[i]->escore;
-X }
-X */
-X db_entries = m_msg.db.entries;
-X }
-X else {
-X for (i=nobs=0; i<nstats; i++) {
-X if (sptr[i].escore <= 1.00 ) obs_escore[nobs++]=sptr[i].escore;
-X }
-X /*
-X nobs_t = nobs;
-X for (i=0; i<nstats; i++) {
-X if (sptr[i].escore >= 0.99 &&
-X sptr[i].escore <= 1.0) obs_escore[nobs++]=sptr[i].escore;
-X }
-X */
-X db_entries = nobs;
-/* db_entries = m_msg.db.entries;*/
-X }
-X
-X sortbesto(obs_escore,nobs);
-X if (nobs > 100) {
-X scale_tat(obs_escore,nobs,db_entries,do_trim,rs_s);
-X rs_s->have_tat=1;
-X sprintf(histp->stat_info,"scaled Tatusov statistics (%d): tat_a: %6.4f tat_b: %6.4f tat_c: %6.4f",
-X nobs,rs_s->tat_a, rs_s->tat_b, rs_s->tat_c);
-X }
-X else {
-X rs_s->have_tat=0;
-X sprintf(histp->stat_info,"Space_factor %.4g scaled statistics",
-X rs_s->spacefactor);
-X }
-X free(obs_escore);
-X }
-X else {
-X rs_s->have_tat=0;
-X histp->stat_info[0] = '\0';
-X }
-}
-X
-/* scale_scores() takes the best (real) scores and re-scales them;
-X beststr bptr[] must be sorted */
-X
-void
-scale_scores(struct beststr **bptr, int nbest, struct db_str db,
-X struct pstruct pst, struct rstat_str *rs)
-{
-X int i, j, k;
-X double obs, r_a, r_b, r_c;
-X
-X /* this scale function absolutely requires that the results be sorted
-X before it is used */
-X
-X sortbeste(bptr,nbest);
-X
-X if (!rs->have_tat) {
-X for (i=0; i<nbest; i++) {
-X bptr[i]->escore *= rs->spacefactor;
-X }
-X }
-X else {
-X
-X /* here if more than 1000 scores */
-X
-X r_a = rs->tat_a; r_b = rs->tat_b; r_c = rs->tat_c;
-X
-X /* the problem with scaletat is that the E() value is related to
-X ones position in the list of top scores - thus, knowing the score
-X is not enough - one must know the rank */
-X
-X for(i = 0 ; i < nbest ; ) {
-X /* take the bottom 0.5%, and the ties, and treat them all the same */
-X j = i + 1;
-X while (j< nbest &&
-X (j <= (0.005 * db.entries) || bptr[j]->escore == bptr[i]->escore)
-X ) {
-X j++;
-X }
-X
-X /* observed frequency */
-X obs = ((double)i + ((double)(j - i - 1)/ 2.0) + 1.0)/(double)db.entries;
-X
-X /* make certain ties all have the same correction */
-X for (k = i ; k < j ; k++) {
-X bptr[k]->escore *= obs/exp(r_a*log(obs) + r_b*obs + r_c);
-X }
-X i = k;
-X }
-X }
-X
-X for (i=0; i<nbest; i++) {
-X if(bptr[i]->escore > 0.01)
-X bptr[i]->escore = 1.0 - exp(-bptr[i]->escore);
-X if (bptr[i]->escore > 0.0)
-X bptr[i]->zscore = -log(bptr[i]->escore)/M_LN2;
-X else
-X bptr[i]->zscore = 744.440071/M_LN2;
-X bptr[i]->escore *= pst.zdb_size;
-X }
-}
-X
-double scale_one_score (int ipos, double escore,
-X struct db_str db,
-X struct rstat_str *rs) {
-X double obs;
-X double a, b, c;
-X
-X if (!rs->have_tat)
-X return escore * rs->spacefactor;
-X
-X if (ipos < rs->tie_j) ipos = rs->tie_j/2;
-X
-X a = rs->tat_a; b = rs->tat_b; c = rs->tat_c;
-X
-X obs = ((double)ipos + 1.0)/(double)db.entries;
-X
-X escore *= obs/exp(a*log(obs) + b*obs + c);
-X
-X return escore;
-}
-X
-double calc_spacefactor(const unsigned char *aa0, int n0,
-X int nm0, int nsq) {
-X
-#if !defined(FASTF)
-X return pow(2.0, (double) nm0) - 1.0;
-#else
-X
-X int i, j, n, l, nr, bin, k;
-X int nmoff;
-X int **counts;
-X int **factors;
-X double tmp, result = 0.0;
-X
-X nmoff = (n0 - nm0 + 1)/nm0+1;
-X
-X counts = (int **) calloc(nsq, sizeof(int *));
-X if(counts == NULL) {
-X fprintf(stderr, "couldn't calloc counts array!\n");
-X exit(1);
-X }
-X
-X counts[0] = (int *) calloc(nsq * (nmoff - 1), sizeof(int));
-X if(counts[0] == NULL) {
-X fprintf(stderr, "couldn't calloc counts array!\n");
-X exit(1);
-X }
-X
-X for(i = 0 ; i < nsq ; i++) {
-X counts[i] = counts[0] + (i * (nmoff - 1));
-X }
-X
-X for(i = 0 ; i < nm0 ; i++) {
-X for(j = 0 ; j < (nmoff - 1) ; j++) {
-X counts[ aa0[nmoff * i + j] ] [ j ] ++;
-X }
-X }
-X
-X factors = (int **) calloc(nm0 + 1, sizeof(int *));
-X if(factors == NULL) {
-X fprintf(stderr, "Couldn't calloc factors array!\n");
-X exit(1);
-X }
-X
-X factors[0] = (int *) calloc((nm0 + 1) * (nmoff - 1), sizeof(int));
-X if(factors[0] == NULL) {
-X fprintf(stderr, "Couldn't calloc factors array!\n");
-X exit(1);
-X }
-X
-X for(i = 0 ; i <= nm0 ; i++) {
-X factors[i] = factors[0] + (i * (nmoff - 1));
-X }
-X
-X /*
-X this algorithm was adapted from the GAP4 library's NrArrangement function:
-X The GAP Group, GAP --- Groups, Algorithms, and Programming,
-X Version 4.1; Aachen, St Andrews, 1999.
-X (http://www-gap.dcs.st-and.ac.uk/ gap)
-X */
-X
-X /* calculate K factors for each column in query: */
-X for(j = 0 ; j < (nmoff - 1) ; j++) {
-X
-X /* only one way to select 0 elements */
-X factors[0][j] = 1;
-X
-X /* for each of the possible elements in this column */
-X for(n = 0 ; n < nsq ; n++) {
-X
-X /* if there aren't any of these, skip it */
-X if(counts[n][j] == 0) { continue; }
-X
-X /* loop over the possible lengths of the arrangement: K..0 */
-X for(l = nm0 ; l >= 0 ; l--) {
-X nr = 0;
-X bin = 1;
-X
-X /*
-X compute the number of arrangements of length <l>
-X using only the first <n> elements of <mset>
-X */
-X for(i = 0, k = min(counts[n][j], l); i <= k ; i++) {
-X
-X /*
-X add the number of arrangements of length <l>
-X that consist of <l>-<i> of the first <n>-1 elements
-X and <i> copies of the <n>th element
-X */
-X nr += bin * factors[l-i][j];
-X bin = (int) ((float) bin * (float) (l - i) / (float) (i + 1));
-X }
-X
-X factors[l][j] = nr;
-X }
-X }
-X }
-X
-X result = 0.0;
-X for(i = 1 ; i <= nm0 ; i++) {
-X tmp = 1.0;
-X for(j = 0 ; j < (nmoff - 1) ; j++) {
-X tmp *= (double) factors[i][j];
-X }
-X tmp /= factorial(i, 1);
-X result += tmp;
-X }
-X
-X free(counts[0]);
-X free(counts);
-X free(factors[0]);
-X free(factors);
-X
-X return result;
-#endif
-}
-X
-void sortbesto (double *obs, int nobs)
-{
-X int gap, i, j, k;
-X double v;
-X int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
-X 13776, 4592, 1968, 861, 336,
-X 112, 48, 21, 7, 3, 1 };
-X
-X for ( k = 0; k < 16; k++)
-X for (gap = incs[k], i=gap; i < nobs; i++) {
-X v = obs[i];
-X j = i;
-X while ( j >= gap && obs[j-gap] > v) {
-X obs[j] = obs[j - gap];
-X j -= gap;
-X }
-X obs[j] = v;
-X }
-}
-SHAR_EOF
-chmod 0644 scaleswt.c ||
-echo 'restore of scaleswt.c failed'
-Wc_c="`wc -c < 'scaleswt.c'`"
-test 37581 -eq "$Wc_c" ||
- echo 'scaleswt.c: original size 37581, current size' "$Wc_c"
-fi
-# ============= search.html ==============
-if test -f 'search.html' -a X"$1" != X"-c"; then
- echo 'x - skipping search.html (File already exists)'
-else
-echo 'x - extracting search.html (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'search.html' &&
-<html>
-<head>
-<title>FASTA Sequence Comparison Engine</title></head>
-<body bgcolor="white" >
-X
-<h1 align=center>Search with FASTA</h1>
-X
-<form action="http://fasta.bioch.virginia.edu/fasta/cgi/searchnn.cgi" method=post>
-X
-<b>Choose program and database(s) to query:</b><br>
-<b>Program:</b>
-<select name = "program">
-X <option> FASTA
-X <option> FASTX
-X <option> FASTY
-X <option> FASTF
-X <option> FASTS
-X <option> TFASTX
-X <option> TFASTY
-X <option> TFASTF
-X <option> TFASTS
-X <option> SSEARCH
-</select><br><br>
-X
-<b>Databases:</b> <font color="blue">Blue databases</font> and possibly DNA databases can be re-searched<br>
-<table align=center cellspacing=10>
-<tr>
-<td><b>Protein</b><br>
-X <input type=checkbox name = "libpa" value="a">NBRF Annotated Protein Database (rel. 53)<br>
-X <input type=checkbox name = "libpp" value="p">NBRF Protein Database (complete)<br>
-X <input type=checkbox name = "libpd" value="d">NRL_3d structure database<br>
-X <input type=checkbox name = "libpn" value="n"><font color="blue">NCBI/Blast NR protein database</font><br>
-X <input type=checkbox name = "libpk" value="k"><font color="blue">NCBI/Blast NR protein database (seg)</font><br>
-X <input type=checkbox name = "libps" value="q"><font color="blue">NCBI/Blast Swiss-Prot</font><br>
-X <input type=checkbox name = "libpr" value="r"><font color="blue">NCBI/BLAST Swiss-Prot (seg)</font><br>
-X <input type=checkbox name = "libpo" value="o">OWL Nonredundant database<br>
-X <input type=checkbox name = "libpy" value="y">Yeast Proteins<br>
-</td>
-X
-<td><b>DNA</b><br>
-X <input type=checkbox name = "libnp" value="p">Primate<br>
-X <input type=checkbox name = "libnr" value="r">Rodent<br>
-X <input type=checkbox name = "libnm" value="m">Other Mammals<br>
-X <input type=checkbox name = "libnb" value="b">Vertebrates<br>
-X <input type=checkbox name = "libnh" value="h">High Throughput Genomics<br>
-X <input type=checkbox name = "libni" value="i">Invertebrates<br>
-X <input type=checkbox name = "libnl" value="l">Plants<br>
-X <input type=checkbox name = "libnt" value="t">Bacteria<br>
-</td>
-X
-<td valign=top><br>
-X <input type=checkbox name = "libns" value="s">Structural RNA<br>
-X <input type=checkbox name = "libnv" value="v">Viral<br>
-X <input type=checkbox name = "libng" value="g">Phage<br>
-X <input type=checkbox name = "libnz" value="z">Synthetics<br>
-X <input type=checkbox name = "libne" value="e">EST sequences<br>
-X <input type=checkbox name = "libnf" value="f"><font color="blue">BLAST human ESTs</A><br>
-X <input type=checkbox name = "libnc" value="c"><font color="blue">BLAST mouse ESTs</A><br>
-</td>
-</tr>
-</table>
-<p>
-<b>Sequence type:</b><br>
-<input type=radio name="seqtype" value=1 checked>Protein
-<input type=radio name="seqtype" value=2>DNA (both strands)
-<input type=radio name="seqtype" value=3>DNA (forward only)
-<input type=radio name="seqtype" value=4>DNA (rev-comp only)
-X
-<p>
-<b>Enter query sequence: </b><select name="in_seq"><option>FASTA format<option>Accession/GI number</select> <b>Subset range:</b>
-<input type=text name="ssr" maxlength=20 size=10></input>
-<table>
-<tr>
-<td>
-<textarea name="sequence" rows=6 cols=60 wrap=hard align=left></textarea>
-<td valign=top>
-<a href="http://www.ncbi.nlm.nih.gov/Entrez/protein.html" target="entrez_window">Entrez protein sequence browser</A><br><br>
-<a href="http://www.ncbi.nlm.nih.gov/Entrez/nucleotide.html" target="entrez_window">Entrez DNA sequence browser</A>
-<br><br>
-<input type=submit name="input" value="Submit Query">
-</table>
-<br><br>
-X
-<b>Other options:</b><br>
-<table>
-<tr>
-<td>
-<b>Ktup:</b><br>
-<input type=text name="ktup" maxlength=3 size=3></input>
-<td>
-<b>Protein matrix:</b><br>
-<select name = "pmatrix">
-X <option> Default
-X <option> Blosum50
-X <option> Blosum62
-X <option> Blosum80
-X <option> Pam250
-X <option> Pam120
-X <option> MD20
-X <option> MD10
-</select>
-<td>
-X <b>DNA matrix:</b><br>
-<select name = "dmatrix">
-X <option> Default
-X <option> +4/-3
-X <option> blastn2
-X <option> +4/-4
-X <option> +4/-8
-</select>
-<td>
-X <b>gap:</b><br>
-<input type=text name="gap" maxlength=4 size=3></input>
-<td>
-X <b>ext:</b><br>
-<input type=text name="ext" maxlength=4 size=3></input>
-<td>
-<b>misc:</b><br>
-<input type=text name="out_opt" maxlength=10 size=5></input>
-</tr>
-</table>
-<br>
-X
-<b>Output limits:</b><br>
-<b>E():</b><input type=text name="eval" maxlength=6 size=4></input>
-<b>Highest E():</b><input type=text name="etop" maxlength=6 size=4></input>
-<b>scores:</b><input type=text name="best" maxlength=3 size=3></input>
-<b>alignments:</b><input type=text name="align" maxlength=3 size=3></input>
-</form>
-<br>
-X
-<hr>
-<CENTER>
-<a href="http://fasta.bioch.virginia.edu/">FASTA Home</a> | <a href="search.html">Search FASTA</a> |
-<a href="ftp://ftp.virginia.edu/pub/fasta/"> Get FASTA</a> |
-<a href="http://www.people.virginia.edu/~wrp/pearson.html">About the Author </a>
-<hr>
-X
-<br>
-X
-<font size=-1><i><br>
-Copyright 1988, 1991, 1992, 1993, 1994 1995, 1997, 1999 by
-William R. Pearson and the University of Virginia. All rights
-reserved. The FASTA program and documentation may not be sold or
-incorporated into a commercial product, in whole or in part, without
-written consent of William R. Pearson and the University of Virginia.
-X
-</center>
-X
-</body>
-X </frameset>
-</html>
-SHAR_EOF
-chmod 0644 search.html ||
-echo 'restore of search.html failed'
-Wc_c="`wc -c < 'search.html'`"
-test 5247 -eq "$Wc_c" ||
- echo 'search.html: original size 5247, current size' "$Wc_c"
-fi
-# ============= showrss.c ==============
-if test -f 'showrss.c' -a X"$1" != X"-c"; then
- echo 'x - skipping showrss.c (File already exists)'
-else
-echo 'x - extracting showrss.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'showrss.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: showrss.c,v 1.12 2006/04/12 18:00:02 wrp Exp $ */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#include "structs.h"
-#include "param.h"
-X
-extern double
-zs_to_E(double zs, int n1, int isdna, long entries,struct db_str db);
-extern double zs_to_bit(double zs, int n0, int n1);
-extern double zs_to_p(double zs);
-X
-extern double (*find_zp)(int score, double escore, int length, double comp, void *);
-X
-extern char *prog_func;
-X
-void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1, int maxn,
-X struct beststr **bptr, int nbest, int qlib, struct mngmsg *m_msg,
-X struct pstruct pst, struct db_str db,
-X char *gstring2, void **f_str)
-{
-X double zs;
-X int score;
-X char *rlabel;
-X struct beststr *bbp;
-X
-X if ((rlabel=strrchr(m_msg->label,' '))==NULL) rlabel = m_msg->label;
-X
-X fprintf(fp,"\n %s - %d shuffles; ",prog_func,m_msg->shuff_max);
-X if (m_msg->shuff_wid > 0)
-X fprintf(fp," window shuffle, window size: %d\n",m_msg->shuff_wid);
-X else
-X fprintf(fp," uniform shuffle\n");
-X
-X bbp = bptr[0];
-X
-X fprintf(fp," unshuffled %s score: %d; bits(s=%d|n_l=%d): %4.1f p(%d) < %g\n",
-X rlabel,bbp->score[0],bbp->score[0], bbp->n1,
-X zs_to_bit(bbp->zscore,m_msg->n0,bbp->n1),bbp->score[0],zs_to_p(bbp->zscore));
-X
-X fprintf(fp,"For %ld sequences, a score >= %d is expected %4.4g times\n\n",
-X pst.zdb_size,bbp->score[0],zs_to_E(bbp->zscore,bbp->n1,0l,pst.zdb_size,db));
-}
-X
-void showalign (FILE *fp, unsigned char *aa0, unsigned char *aa1, int maxn,
-X struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,
-X struct pstruct pst, void *f_str, char *gstring2)
-{
-}
-X
-void
-aancpy(char *to, char *from, int count,
-X struct pstruct pst)
-{
-X char *tp;
-X
-X tp=to;
-X while (count-- && *from) {
-X if (*from <= pst.nsq) *tp++ = pst.sq[*(from++)];
-X else *tp++ = *from++;
-X }
-X *tp='\0';
-}
-SHAR_EOF
-chmod 0644 showrss.c ||
-echo 'restore of showrss.c failed'
-Wc_c="`wc -c < 'showrss.c'`"
-test 2033 -eq "$Wc_c" ||
- echo 'showrss.c: original size 2033, current size' "$Wc_c"
-fi
-# ============= showsum.c ==============
-if test -f 'showsum.c' -a X"$1" != X"-c"; then
- echo 'x - skipping showsum.c (File already exists)'
-else
-echo 'x - extracting showsum.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'showsum.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: showsum.c,v 1.21 2006/06/22 15:00:51 wrp Exp $ */
-X
-/* 10 December 1999 --
-X
-X code modified to reflect the fact that there may be two scores for
-X each sequence - e.g. forward and reverse strand - and only one of them
-X - presumably the best - is a related score.
-*/
-X
-/* showsum.c should report statistics for success -
-X
-X given the sorted results
-X
-X (1) find the highest scoring unrelated sequence: unf_score0
-X find the number of related sequences missed: relm_num0
-X (2) find the 0.5% highest scoring unrelated sequence: unf_score05
-X find the number of related sequences missed: relm_num05
-X (3) find the score where the number of related sequences
-X missed and the number of unrelated sequences found
-X matches; report the score and the number: equ_score, equ_num;
-X
-The query sequence library number will be put in qsfnum.
-X
-*/
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-X
-#include "defs.h"
-#include "param.h"
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-#include "structs.h"
-X
-#ifndef SFCHAR
-#define SFCHAR ':'
-#define NSFCHAR '!'
-#endif
-X
-#ifdef PCOMPLIB
-#define BSFNUM(i) bptr[i]->desptr->sfnum
-#define QSFNUM qsfnum
-#define NQSFNUM qsfnum_n
-#else
-#define BSFNUM(i) bptr[i]->sfnum
-#define QSFNUM m_msg->qsfnum
-#define NQSFNUM m_msg->qsfnum_n
-#endif
-X
-#define MAX_BLINE 200
-X
-double E_to_zs(double, long);
-double zs_to_E(double,int,int,long,struct db_str db);
-double zs_to_bit(double,int,int);
-#ifdef PVM_SRC
-void sf_sort(int *s, int n);
-#endif
-void lnum_sort(struct beststr **s, int n);
-X
-void showbest (FILE *fp,
-#ifndef PCOMPLIB
-X unsigned char **aa0, unsigned char *aa1, int maxn,
-#endif
-X struct beststr **bptr,int nbest,
-X int qlib, struct mngmsg *m_msg, struct pstruct pst,
-X struct db_str db,
-X char *gstring2
-#ifndef PCOMPLIB
-X ,void *f_str
-#endif
-X )
-{
-X int i, j, k, rel_tot;
-X int irelv;
-X
-X int unf_num0, relm_num0;
-X int unf_num01,relm_num01;
-X int unf_num02, relm_num02;
-X int unf_num05, relm_num05;
-X int unf_num100, relm_num100;
-X int equ_num, rel_3_num, rel_1_num;
-X
-X double unf_score0, unf_score01, unf_score02 ,unf_score05;
-X double unf_score100, equ_score, rel_3_score, rel_1_score;
-X double unf_score0_b, unf_score01_b, unf_score02_b ,unf_score05_b;
-X double unf_score100_b, equ_score_b, rel_3_score_b, rel_1_score_b;
-X char *bp;
-X
-#ifdef PCOMPLIB
-X int qsfnum[10],qsfnum_n[10],isf,nsf,nsf_n;
-X char *bp1, *bpn, *tp;
-X char sfstr[MAX_FN];
-#endif
-X
-#ifdef PCOMPLIB
-X /* not done here because done in pvcomplib.c */
-X if ((bp=strchr(m_msg->qtitle,SFCHAR))!=NULL) {
-X strncpy(sfstr,bp+1,sizeof(sfstr));
-X sfstr[sizeof(sfstr)-1]='\0';
-X if ((bp1=strchr(sfstr,SFCHAR)) != NULL) { /* look for second | */
-X if ((bpn=strchr(sfstr,NSFCHAR))!=NULL) *bpn = '\0';
-X *bp1='\0';
-X tp = strtok(sfstr," \t");
-X qsfnum[0]=atoi(tp);
-X isf = 1;
-X while ((tp=strtok(NULL," \t"))!=NULL) {
-X qsfnum[isf++] = atoi(tp);
-X if (isf >= 10) {
-X fprintf(stderr," error - too many superfamilies: %d\n %s\n",
-X isf,m_msg->qtitle);
-X break;
-X }
-X }
-X qsfnum[nsf=isf]=0;
-X sf_sort(qsfnum,nsf);
-X
-X /* now get negatives */
-X qsfnum_n[0]= nsf_n = 0;
-X if (bpn != NULL) {
-X tp = strtok(bpn+1," \t");
-X qsfnum_n[0]=atoi(tp);
-X isf = 1;
-X while ((tp=strtok(NULL," \t"))!=NULL) {
-X qsfnum_n[isf++] = atoi(tp);
-X if (isf >= 10) {
-X fprintf(stderr,
-X " error - too many negative superfamilies: %d\n %s\n",
-X isf,m_msg->qtitle);
-X break;
-X }
-X }
-X qsfnum[nsf_n=isf]=0;
-X sf_sort(qsfnum_n,nsf_n);
-X }
-X }
-X else { /* only one sfnum */
-X sscanf(bp+1,"%d",qsfnum);
-X qsfnum[1]=0;
-X qsfnum_n[0]= nsf_n = 0;
-X }
-X }
-X else {
-X fprintf(stderr," no query superfamily number\n %s\n",m_msg->qtitle);
-X return;
-X }
-#endif
-X
-X if (m_msg->qframe > 1 || m_msg->nframe > 1) {
-X
-X /* this code is included for cases where there are several scores -
-X forward and reverse, or six in the case of tfastf33s, for each
-X sequence
-X
-X lnum_sort sorts the library by lseek position, which will be
-X the same for the same sequence
-X */
-X
-X lnum_sort(bptr,nbest);
-X
-X /* merge, saving the best score */
-X i = j = 0;
-X
-X /* i has the source position we are currently examining
-X k has the adjacent alternative scores ( k > i)
-X j has the destination
-X */
-X
-X while (i<nbest) {
-X for (k=i+1; k < nbest && bptr[i]->lseek == bptr[k]->lseek; k++) {
-X if (bptr[i]->zscore < bptr[k]->zscore) bptr[i] = bptr[k];
-X }
-X bptr[j++]=bptr[i];
-X i = k;
-X }
-X
-X if (j != m_msg->nbr_seq) {
-X fprintf(stderr,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
-X j,nbest,m_msg->nbr_seq);
-X fprintf(stdout,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
-X j,nbest,m_msg->nbr_seq);
-X }
-X nbest = j;
-X
-X if (pst.zsflag >=0) sortbeste(bptr, nbest);
-X else sortbest(bptr,nbest,pst.score_ix);
-X }
-X
-/* fprintf(stderr," %1d label is %s (%s)\n",irelv,labptr,label); */
-X
-/* get the query superfamily */
-X
-X for (i=0; i<nbest; i++) {
-X /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)==0 && sfn_cmp(BSFNUM(i),NQSFNUM)==0) {
-X unf_num0=i;
-X unf_score0=bptr[i]->zscore;
-X unf_score0_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X break;
-X }
-X }
-X
-X if (i>=nbest) {
-X fprintf(stderr," %s: %d\n error - no unrelated sequences\n",
-X m_msg->qtitle,QSFNUM[0]);
-X return;
-X }
-X
-X for (i=rel_tot=relm_num0=0; i<nbest; i++) {
-X /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)>0 ) {
-X rel_tot++; /* total related */
-X if (bptr[i]->zscore <= unf_score0) relm_num0++;
-#ifdef DEBUG
-X if (pst.debug_lib)
-X fprintf(stderr,"%d\t%l\t%.1f\n",i,bptr[i]->lseek,bptr[i]->zscore);
-#endif
-X }
-X }
-X
-X /* relm_num0, unf_num0, unf_score0 done */
-X
-X /* now calculate number missed at various expectation value cutoffs */
-X /* calculate z-score cutoff for E()=0.01, 0.02, 0.05 */
-X
-X unf_score01 = E_to_zs(0.01,db.entries);
-X unf_score02 = E_to_zs(0.02,db.entries);
-X unf_score05 = E_to_zs(0.05,db.entries);
-X unf_score100 = E_to_zs(1.00,db.entries);
-X
-X /* relm_num01, unf_num01, unf_score01 done */
-X
-X for (i=unf_num01=0,relm_num01=rel_tot;
-X i<nbest && bptr[i]->zscore >= unf_score01; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
-X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num01++;
-X }
-X else relm_num01--;
-X }
-X unf_score01_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X for (i=unf_num02=0,relm_num02=rel_tot;
-X i<nbest && bptr[i]->zscore >= unf_score02; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
-X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num02++;
-X }
-X else relm_num02--;
-X }
-X unf_score02_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X for (i=unf_num05=0,relm_num05=rel_tot;
-X i<nbest && bptr[i]->zscore >= unf_score05; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
-X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num05++;
-X }
-X else relm_num05--;
-X }
-X unf_score05_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X for (i=unf_num100=0,relm_num100=rel_tot;
-X i<nbest && bptr[i]->zscore >= unf_score100; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
-X if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num100++;
-X }
-X else relm_num100--;
-X }
-X unf_score100_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X /* the final criterion finds the score and the number of sequences
-X where the number of unrelated sequences found == the number of
-X related sequences missed. */
-X
-X equ_num=0;
-X i = 0; j=nbest-1;
-X
-/* j is counting up the list of scores (actually down the array) from
-X the lowest scoring related sequence
-X
-X i is counting down the list of scores (actually up the array)
-X from the highest scoring unrelated sequence */
-X
-X for (i=0, j=nbest-1; j>=0 && i<nbest; i++,j--) {
-X /* i++ while sequences are related, stop at next unrelated */
-X while (i<nbest && (sfn_cmp(BSFNUM(i),QSFNUM) || sfn_cmp(BSFNUM(i),NQSFNUM))) i++;
-X /* j-- while sequences are unrelated, stop at next related */
-X while (j>=0 && ( sfn_cmp(BSFNUM(j),QSFNUM)==0)) j--;
-X /*
-X fprintf(stderr,"i: %3d %3d %4d; j: %3d %3d %4d\n",i,bptr[i]->zscore,
-X BSFNUM(i),j,bptr[j]->zscore,BSFNUM(j));
-X */
-X /* if unrelated [i] score <= related [j] score, quit */
-X if (bptr[i]->zscore <= bptr[j]->zscore) break;
-X equ_num++;
-X }
-X
-X equ_score = 0.0;
-X if (i>=nbest || j<0) {
-#ifndef PCOMPLIB
-X if (pst.debug_lib)
-#endif
-X fprintf(stderr," i (%3d), j (%3d) off end\n %s\n", i, j,m_msg->qtitle);
-X equ_num = rel_tot+1; equ_score = 0.0;
-X }
-X else {
-X equ_score=bptr[i]->zscore;
-X equ_score_b =zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X }
-X
-X /* get the lowest scoring related */
-X for (i=0,rel_1_num=rel_tot-1; i<nbest && rel_1_num > 0; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_1_num--;
-X }
-X rel_1_num = i;
-X rel_1_score = bptr[i]->zscore;
-X rel_1_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X /* get the 3rd lowest scoring related */
-X for (i=0,rel_3_num=rel_tot-3; i<nbest && rel_3_num > 0; i++) {
-/* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
-X if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_3_num--;
-X }
-X rel_3_num = i;
-X rel_3_score = bptr[i]->zscore;
-X rel_3_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
-X
-X fprintf(fp,"%3d>%s - %d (%d/%d)\n",
-X qlib,m_msg->qtitle, QSFNUM[0],rel_tot,nbest);
-X fprintf(fp," 0.0 criterion- relm: %3d pos: %3d score: %5.1f exp: %6.4g\n",
-X relm_num0, unf_num0+1, unf_score0_b,
-X zs_to_E(unf_score0,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X fprintf(fp," 0.01 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
-X relm_num01, unf_num01, unf_score01_b,
-X zs_to_E(unf_score01,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X fprintf(fp," 0.02 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
-X relm_num02, unf_num02, unf_score02_b,
-X zs_to_E(unf_score02,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X fprintf(fp," 0.05 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
-X relm_num05, unf_num05, unf_score05_b,
-X zs_to_E(unf_score05,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X fprintf(fp," 1.00 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
-X relm_num100, unf_num100, unf_score100_b,
-X zs_to_E(unf_score100,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X
-X fprintf(fp," equ num: %3d score: %5.1f exp: %6.4g\n",equ_num,equ_score_b,
-X zs_to_E(equ_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X
-X fprintf(fp," rel[-1]: %3d score: %5.1f exp: %6.4g\n",rel_1_num+1,rel_1_score_b,
-X zs_to_E(rel_1_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X fprintf(fp," rel[-3]: %3d score: %5.1f exp: %6.4g\n",rel_3_num+1,rel_3_score_b,
-X zs_to_E(rel_3_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
-X
-X /*
-X fprintf(fp,"/ ** %s ** /\n",gstring2);
-X fflush(fp);
-X */
-X m_msg->nshow = m_msg->ashow;
-}
-X
-#ifdef PCOMPLIB
-void showalign()
-{}
-X
-#if !defined(MPI_SRC) && !defined(PCOMPLIB)
-void
-sf_sort(int *s, int n)
-{
-X int gap, i, j;
-X int itmp;
-X
-X for (i=0; i<n-1; i++)
-X if (s[i]>s[i+1]) goto l2;
-X return;
-X
-l2:
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if (s[j] <= s[j+gap]) break;
-X itmp = s[j];
-X s[j]=s[j+gap];
-X s[j+gap]=itmp;
-X }
-}
-X
-#endif
-#endif
-X
-void
-lnum_sort(struct beststr **s, int n)
-{
-X int gap, i, j;
-X struct beststr *btmp;
-X
-X for (i=0; i<n-1; i++)
-X if (s[i]->lseek > s[i+1]->lseek) goto l2;
-X return;
-X
-l2:
-X for (gap=n/2; gap>0; gap/=2)
-X for (i=gap; i<n; i++)
-X for (j=i-gap; j>=0; j -= gap) {
-X if (s[j]->lseek <= s[j+gap]->lseek) break;
-X btmp = s[j];
-X s[j]=s[j+gap];
-X s[j+gap]=btmp;
-X }
-}
-X
-#ifdef MPI_SRC
-void
-aancpy(char *to, char *from, int count, struct pstruct pst)
-{
-X char *tp, *sq;
-X int nsq;
-X
-X if (pst.ext_sq_set) {
-X nsq = pst.nsqx;
-X sq = pst.sqx;
-X }
-X else {
-X nsq = pst.nsq;
-X sq = pst.sq;
-X }
-X
-X tp=to;
-X while (count-- && *from) {
-X if (*from <= nsq) *tp++ = sq[*(from++)];
-X else *tp++ = *from++;
-X }
-X *tp='\0';
-}
-#endif
-SHAR_EOF
-chmod 0644 showsum.c ||
-echo 'restore of showsum.c failed'
-Wc_c="`wc -c < 'showsum.c'`"
-test 12412 -eq "$Wc_c" ||
- echo 'showsum.c: original size 12412, current size' "$Wc_c"
-fi
-# ============= smith_waterman_altivec.c ==============
-if test -f 'smith_waterman_altivec.c' -a X"$1" != X"-c"; then
- echo 'x - skipping smith_waterman_altivec.c (File already exists)'
-else
-echo 'x - extracting smith_waterman_altivec.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_altivec.c' &&
-X
-/* Implementation of the Wozniak "anti-diagonal" vectorization
-X strategy for Smith-Waterman comparison, Wozniak (1997) Comp.
-X Appl. Biosci. 13:145-150
-X
-X November, 2004
-*/
-X
-/*
-X Written by Erik Lindahl, Stockholm Bioinformatics Center, 2004.
-X Please send bug reports and/or suggestions to lindahl@sbc.su.se.
-*/
-X
-#include <stdio.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "dropgsw.h"
-X
-#ifdef SW_ALTIVEC
-X
-int
-smith_waterman_altivec_word(unsigned char * query_sequence,
-X unsigned short * query_profile_word,
-X int query_length,
-X unsigned char * db_sequence,
-X int db_length,
-X unsigned short bias,
-X unsigned short gap_open,
-X unsigned short gap_extend,
-X struct f_struct * f_str)
-{
-X int i,j,k;
-X unsigned short * p;
-X unsigned short score;
-X unsigned char * p_dbseq;
-X int alphabet_size = f_str->alphabet_size;
-X unsigned short * workspace = (unsigned short *)f_str->workspace;
-X
-X vector unsigned short Fup,Hup1,Hup2,E,F,H,tmp;
-X vector unsigned char perm;
-X vector unsigned short v_maxscore;
-X vector unsigned short v_bias,v_gapopen,v_gapextend;
-X vector unsigned short v_score;
-X vector unsigned short v_score_q1;
-X vector unsigned short v_score_q2;
-X vector unsigned short v_score_q3;
-X vector unsigned short v_score_load;
-X vector unsigned char queue1_to_score = (vector unsigned char)(16,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
-X vector unsigned char queue2_to_queue1 = (vector unsigned char)(0,1,18,19,4,5,6,7,8,9,10,11,12,13,14,15);
-X vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,16,16,16,16,21,16,0,16,1,16,2,16,3,16,4);
-X vector unsigned char queue3_with_load = (vector unsigned char)(23,5,6,7,8,25,9,10,11,27,12,13,29,14,31,16);
-X
-X /* Load the bias to all elements of a constant */
-X v_bias = vec_lde(0,&bias);
-X perm = vec_lvsl(0,&bias);
-X v_bias = vec_perm(v_bias,v_bias,perm);
-X v_bias = vec_splat(v_bias,0);
-X
-X /* Load gap opening penalty to all elements of a constant */
-X v_gapopen = vec_lde(0,&gap_open);
-X perm = vec_lvsl(0,&gap_open);
-X v_gapopen = vec_perm(v_gapopen,v_gapopen,perm);
-X v_gapopen = vec_splat(v_gapopen,0);
-X
-X /* Load gap extension penalty to all elements of a constant */
-X v_gapextend = vec_lde(0,&gap_extend);
-X perm = vec_lvsl(0,&gap_extend);
-X v_gapextend = vec_perm(v_gapextend,v_gapextend,perm);
-X v_gapextend = vec_splat(v_gapextend,0);
-X
-X v_maxscore = vec_xor(v_maxscore,v_maxscore);
-X
-X // Zero out the storage vector
-X k = 2*(db_length+7);
-X
-X for(i=0,j=0;i<k;i++,j+=16)
-X {
-X // borrow the zero value in v_maxscore to have something to store
-X vec_st(v_maxscore,j,workspace);
-X }
-X
-X for(i=0;i<query_length;i+=8)
-X {
-X // fetch first data asap.
-X p_dbseq = db_sequence;
-X k = *p_dbseq++;
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // zero lots of stuff.
-X // We use both the VPERM and VSIU unit to knock off some cycles.
-X
-X E = vec_splat_u16(0);
-X F = vec_xor(F,F);
-X H = vec_splat_u16(0);
-X Hup2 = vec_xor(Hup2,Hup2);
-X v_score_q1 = vec_splat_u16(0);
-X v_score_q2 = vec_xor(v_score_q2,v_score_q2);
-X v_score_q3 = vec_splat_u16(0);
-X
-X // reset pointers to the start of the saved data from the last row
-X p = workspace;
-X
-X // PROLOGUE 1
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 2
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 3
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 4
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 5
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 6
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 7
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 8
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // reset pointers to the start of the saved data from the last row
-X p = workspace;
-X
-X for(j=8;j<db_length;j+=8)
-X {
-X // STEP 1
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup1 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 2
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup2 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup2 = vec_sld(Hup2,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 3
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup1 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 4
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup2 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup2 = vec_sld(Hup2,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 5
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup1 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 6
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup2 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup2 = vec_sld(Hup2,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 7
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup1 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup1 = vec_sld(Hup1,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // STEP 8
-X
-X // prefetch next residue
-X k = *p_dbseq++;
-X
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // prefetch score for next step
-X v_score_load = vec_ld(16*k,query_profile_word);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(256, p);
-X Hup2 = vec_ld(272, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,14);
-X Hup2 = vec_sld(Hup2,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X }
-X
-X v_score_load = vec_splat_u16(0);
-X
-X for(;j<db_length+7;j++)
-X {
-X // Create the actual diagonal score vector
-X // and update the queue of incomplete score vectors
-X //
-X // This could of course be done with only vec_perm or vec_sel,
-X // but since they use different execution units we have found
-X // it to be slightly faster to mix them.
-X v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 16; // move ahead 32 bytes
-X
-X // v_score_load contains all zeros
-X Fup = vec_sld(v_score_load,F,14);
-X Hup1 = vec_sld(v_score_load,H,14);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X }
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X
-X query_profile_word += 8*alphabet_size;
-X }
-X
-X // find largest score in the v_maxscore vector
-X tmp = vec_sld(v_maxscore,v_maxscore,8);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X tmp = vec_sld(v_maxscore,v_maxscore,4);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X tmp = vec_sld(v_maxscore,v_maxscore,2);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X
-X // store in temporary variable
-X vec_ste(v_maxscore,0,&score);
-X
-X // return largest score
-X return score;
-}
-X
-int
-smith_waterman_altivec_byte(unsigned char * query_sequence,
-X unsigned char * query_profile_byte,
-X int query_length,
-X unsigned char * db_sequence,
-X int db_length,
-X unsigned char bias,
-X unsigned char gap_open,
-X unsigned char gap_extend,
-X struct f_struct * f_str)
-{
-X int i,j,k,k8;
-X int overflow;
-X unsigned char * p;
-X unsigned char score;
-X int alphabet_size = f_str->alphabet_size;
-X unsigned char * workspace = (unsigned char *)f_str->workspace;
-X
-X vector unsigned char Fup,Hup1,Hup2,E,F,H,tmp;
-X vector unsigned char perm;
-X vector unsigned char v_maxscore;
-X vector unsigned char v_bias,v_gapopen,v_gapextend;
-X vector unsigned char v_score;
-X vector unsigned char v_score_q1;
-X vector unsigned char v_score_q2;
-X vector unsigned char v_score_q3;
-X vector unsigned char v_score_q4;
-X vector unsigned char v_score_q5;
-X vector unsigned char v_score_load1;
-X vector unsigned char v_score_load2;
-X vector unsigned char v_zero;
-X
-X vector unsigned char queue1_to_score = (vector unsigned char)(16,1,2,3,4,5,6,7,24,9,10,11,12,13,14,15);
-X vector unsigned char queue2_to_queue1 = (vector unsigned char)(16,17,2,3,4,5,6,7,24,25,10,11,12,13,14,15);
-X vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,17,18,3,4,5,6,7,24,25,26,11,12,13,14,15);
-X vector unsigned char queue4_to_queue3 = (vector unsigned char)(16,17,18,19,4,5,6,7,24,25,26,27,12,13,14,15);
-X vector unsigned char queue5_to_queue4 = (vector unsigned char)(16,17,18,19,20,2,3,4,24,25,26,27,28,10,11,12);
-X vector unsigned char queue5_with_load = (vector unsigned char)(19,20,21,5,6,22,7,23,27,28,29,13,14,30,15,31);
-X vector unsigned char merge_score_load = (vector unsigned char)(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
-X
-X v_zero = vec_splat_u8(0);
-X
-X /* Load the bias to all elements of a constant */
-X v_bias = vec_lde(0,&bias);
-X perm = vec_lvsl(0,&bias);
-X v_bias = vec_perm(v_bias,v_bias,perm);
-X v_bias = vec_splat(v_bias,0);
-X
-X /* Load gap opening penalty to all elements of a constant */
-X v_gapopen = vec_lde(0,&gap_open);
-X perm = vec_lvsl(0,&gap_open);
-X v_gapopen = vec_perm(v_gapopen,v_gapopen,perm);
-X v_gapopen = vec_splat(v_gapopen,0);
-X
-X /* Load gap extension penalty to all elements of a constant */
-X v_gapextend = vec_lde(0,&gap_extend);
-X perm = vec_lvsl(0,&gap_extend);
-X v_gapextend = vec_perm(v_gapextend,v_gapextend,perm);
-X v_gapextend = vec_splat(v_gapextend,0);
-X
-X v_maxscore = vec_xor(v_maxscore,v_maxscore);
-X
-X // Zero out the storage vector
-X k = (db_length+15);
-X for(i=0,j=0;i<k;i++,j+=32)
-X {
-X // borrow the zero value in v_maxscore to have something to store
-X vec_st(v_maxscore,j,workspace);
-X vec_st(v_maxscore,j+16,workspace);
-X }
-X
-X for(i=0;i<query_length;i+=16)
-X {
-X // zero lots of stuff.
-X // We use both the VPERM and VSIU unit to knock off some cycles.
-X
-X E = vec_splat_u8(0);
-X F = vec_xor(F,F);
-X H = vec_splat_u8(0);
-X Hup2 = vec_xor(Hup2,Hup2);
-X v_score_q1 = vec_splat_u8(0);
-X v_score_q2 = vec_xor(v_score_q2,v_score_q2);
-X v_score_q3 = vec_splat_u8(0);
-X v_score_q4 = vec_xor(v_score_q4,v_score_q4);
-X v_score_q5 = vec_splat_u8(0);
-X
-X // reset pointers to the start of the saved data from the last row
-X p = workspace;
-X
-X // start directly and prefetch score column
-X k = db_sequence[0];
-X k8 = k;
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = v_score_load1;
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // PROLOGUE 1
-X // prefetch next residue
-X k = db_sequence[1];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X // PROLOGUE 2
-X // prefetch next residue
-X k = db_sequence[2];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 3
-X // prefetch next residue
-X k = db_sequence[3];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 4
-X // prefetch next residue
-X k = db_sequence[4];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 5
-X // prefetch next residue
-X k = db_sequence[5];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X // PROLOGUE 6
-X // prefetch next residue
-X k = db_sequence[6];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 7
-X // prefetch next residue
-X k = db_sequence[7];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 8
-X // prefetch next residue
-X k = db_sequence[8];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X // PROLOGUE 9
-X // prefetch next residue
-X k = db_sequence[9];
-X k8 = db_sequence[1];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 10
-X // prefetch next residue
-X k = db_sequence[10];
-X k8 = db_sequence[2];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X // PROLOGUE 11
-X // prefetch next residue
-X k = db_sequence[11];
-X k8 = db_sequence[3];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 12
-X // prefetch next residue
-X k = db_sequence[12];
-X k8 = db_sequence[4];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X // PROLOGUE 13
-X // prefetch next residue
-X k = db_sequence[13];
-X k8 = db_sequence[5];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 14
-X // prefetch next residue
-X k = db_sequence[14];
-X k8 = db_sequence[6];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 15
-X // prefetch next residue
-X k = db_sequence[15];
-X k8 = db_sequence[7];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup1 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X // PROLOGUE 16
-X // prefetch next residue
-X k = db_sequence[16];
-X k8 = db_sequence[8];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch score for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(0, p);
-X Hup2 = vec_ld(16, p);
-X p += 32; // move ahead 32 bytes
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X p = workspace;
-X
-X for(j=16;j<db_length;j+=16)
-X {
-X // STEP 1
-X
-X // prefetch next residue
-X k = db_sequence[j+1];
-X k8 = db_sequence[j-7];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X // STEP 2
-X
-X // prefetch next residue
-X k = db_sequence[j+2];
-X k8 = db_sequence[j-6];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 3
-X
-X // prefetch next residue
-X k = db_sequence[j+3];
-X k8 = db_sequence[j-5];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 4
-X
-X // prefetch next residue
-X k = db_sequence[j+4];
-X k8 = db_sequence[j-4];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 5
-X
-X // prefetch next residue
-X k = db_sequence[j+5];
-X k8 = db_sequence[j-3];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 6
-X
-X // prefetch next residue
-X k = db_sequence[j+6];
-X k8 = db_sequence[j-2];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 7
-X
-X // prefetch next residue
-X k = db_sequence[j+7];
-X k8 = db_sequence[j-1];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 8
-X
-X // prefetch next residue
-X k = db_sequence[j+8];
-X k8 = db_sequence[j];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X
-X
-X
-X
-X
-X // STEP 9
-X
-X // prefetch next residue
-X k = db_sequence[j+9];
-X k8 = db_sequence[j+1];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 10
-X
-X // prefetch next residue
-X k = db_sequence[j+10];
-X k8 = db_sequence[j+2];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 11
-X
-X // prefetch next residue
-X k = db_sequence[j+11];
-X k8 = db_sequence[j+3];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 12
-X
-X // prefetch next residue
-X k = db_sequence[j+12];
-X k8 = db_sequence[j+4];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 13
-X
-X // prefetch next residue
-X k = db_sequence[j+13];
-X k8 = db_sequence[j+5];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 14
-X
-X // prefetch next residue
-X k = db_sequence[j+14];
-X k8 = db_sequence[j+6];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 15
-X
-X // prefetch next residue
-X k = db_sequence[j+15];
-X k8 = db_sequence[j+7];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup1 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup1 = vec_sld(Hup1,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X // STEP 16
-X
-X // prefetch next residue
-X k = db_sequence[j+16];
-X k8 = db_sequence[j+8];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load1 = vec_ld(16*k,query_profile_byte);
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X
-X // load values of F and H from previous row (one unit up)
-X Fup = vec_ld(512, p);
-X Hup2 = vec_ld(528, p);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32;
-X
-X // shift into place so we have complete F and H vectors
-X // that refer to the values one unit up from each cell
-X // that we are currently working on.
-X Fup = vec_sld(Fup,F,15);
-X Hup2 = vec_sld(Hup2,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup2,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X v_score_load1 = vec_perm(v_score_load1,v_score_load2,merge_score_load);
-X
-X // add score to H
-X H = vec_adds(Hup1,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X
-X }
-X
-X for(;j<db_length+15;j++)
-X {
-X k8 = db_sequence[j-7];
-X
-X v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score);
-X v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1);
-X v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2);
-X v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3);
-X v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4);
-X v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load);
-X
-X
-X // prefetch scores for next step
-X v_score_load2 = vec_ld(16*k8,query_profile_byte);
-X v_score_load1 = vec_perm(v_zero,v_score_load2,merge_score_load);
-X
-X // save old values of F and H to use on next row
-X vec_st(F, 0, p);
-X vec_st(H, 16, p);
-X p += 32; // move ahead 32 bytes
-X
-X Fup = vec_sld(v_zero,F,15);
-X Hup1 = vec_sld(v_zero,H,15);
-X
-X // do the dynamic programming
-X
-X // update E value
-X E = vec_subs(E,v_gapextend);
-X tmp = vec_subs(H,v_gapopen);
-X E = vec_max(E,tmp);
-X
-X // update F value
-X F = vec_subs(Fup,v_gapextend);
-X tmp = vec_subs(Hup1,v_gapopen);
-X F = vec_max(F,tmp);
-X
-X // add score to H
-X H = vec_adds(Hup2,v_score);
-X H = vec_subs(H,v_bias);
-X
-X // set H to max of H,E,F
-X H = vec_max(H,E);
-X H = vec_max(H,F);
-X
-X // Save value to use for next diagonal H
-X Hup2 = Hup1;
-X
-X // Update highest score encountered this far
-X v_maxscore = vec_max(v_maxscore,H);
-X }
-X vec_st(F, 512, p);
-X vec_st(H, 528, p);
-X
-X query_profile_byte += 16*alphabet_size;
-X
-X // End of this row (actually 16 rows due to SIMD).
-X // Before we continue, check for overflow.
-X tmp = vec_subs(vec_splat_u8(-1),v_bias);
-X overflow = vec_any_ge(v_maxscore,tmp);
-X
-X
-X }
-X
-X if(overflow)
-X {
-X return 255;
-X }
-X else
-X {
-X // find largest score in the v_maxscore vector
-X tmp = vec_sld(v_maxscore,v_maxscore,8);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X tmp = vec_sld(v_maxscore,v_maxscore,4);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X tmp = vec_sld(v_maxscore,v_maxscore,2);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X tmp = vec_sld(v_maxscore,v_maxscore,1);
-X v_maxscore = vec_max(v_maxscore,tmp);
-X
-X // store in temporary variable
-X vec_ste(v_maxscore,0,&score);
-X
-X // return largest score
-X return score;
-X }}
-X
-X
-#else
-X
-/* No Altivec support. Avoid compiler complaints about empty object */
-X
-int sw_dummy;
-X
-#endif
-SHAR_EOF
-chmod 0644 smith_waterman_altivec.c ||
-echo 'restore of smith_waterman_altivec.c failed'
-Wc_c="`wc -c < 'smith_waterman_altivec.c'`"
-test 113815 -eq "$Wc_c" ||
- echo 'smith_waterman_altivec.c: original size 113815, current size' "$Wc_c"
-fi
-# ============= smith_waterman_altivec.h ==============
-if test -f 'smith_waterman_altivec.h' -a X"$1" != X"-c"; then
- echo 'x - skipping smith_waterman_altivec.h (File already exists)'
-else
-echo 'x - extracting smith_waterman_altivec.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_altivec.h' &&
-X
-int
-smith_waterman_altivec_word(const unsigned char * query_sequence,
-X unsigned short * query_profile_word,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned short bias,
-X unsigned short gap_open,
-X unsigned short gap_extend,
-X struct f_struct * f_str);
-X
-X
-int
-smith_waterman_altivec_byte(const unsigned char * query_sequence,
-X unsigned char * query_profile_byte,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned char bias,
-X unsigned char gap_open,
-X unsigned char gap_extend,
-X struct f_struct * f_str);
-X
-SHAR_EOF
-chmod 0644 smith_waterman_altivec.h ||
-echo 'restore of smith_waterman_altivec.h failed'
-Wc_c="`wc -c < 'smith_waterman_altivec.h'`"
-test 1144 -eq "$Wc_c" ||
- echo 'smith_waterman_altivec.h: original size 1144, current size' "$Wc_c"
-fi
-# ============= smith_waterman_sse2.c ==============
-if test -f 'smith_waterman_sse2.c' -a X"$1" != X"-c"; then
- echo 'x - skipping smith_waterman_sse2.c (File already exists)'
-else
-echo 'x - extracting smith_waterman_sse2.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_sse2.c' &&
-/******************************************************************
-X Copyright 2006 by Michael Farrar. All rights reserved.
-X This program may not be sold or incorporated into a commercial product,
-X in whole or in part, without written consent of Michael Farrar. For
-X further information regarding permission for use or reproduction, please
-X contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-X
-/*
-X Written by Michael Farrar, 2006.
-X Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-X
-#include <stdio.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "dropgsw.h"
-#include "smith_waterman_sse2.h"
-X
-#ifdef __SUNPRO_C
-#include <sunmedia_intrin.h>
-#else
-#include <emmintrin.h>
-#endif
-X
-#ifdef SW_SSE2
-X
-int
-smith_waterman_sse2_word(const unsigned char * query_sequence,
-X unsigned short * query_profile_word,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned short gap_open,
-X unsigned short gap_extend,
-X struct f_struct * f_str)
-{
-X int i, j, k;
-X short score;
-X
-X int cmp;
-X int iter = (query_length + 7) / 8;
-X
-X __m128i *p;
-X __m128i *workspace = (__m128i *) f_str->workspace;
-X
-X __m128i E, F, H;
-X
-X __m128i v_maxscore;
-X __m128i v_gapopen;
-X __m128i v_gapextend;
-X
-X __m128i v_min;
-X __m128i v_minimums;
-X __m128i v_temp;
-X
-X __m128i *pHLoad, *pHStore;
-X __m128i *pE;
-X
-X __m128i *pScore;
-X
-X /* Load gap opening penalty to all elements of a constant */
-X v_gapopen = _mm_insert_epi16 (v_gapopen, gap_open, 0);
-X v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
-X v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
-X
-X /* Load gap extension penalty to all elements of a constant */
-X v_gapextend = _mm_insert_epi16 (v_gapextend, gap_extend, 0);
-X v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
-X v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
-X
-X /* load v_maxscore with the zeros. since we are using signed */
-X /* math, we will bias the maxscore to -32768 so we have the */
-X /* full range of the short. */
-X v_maxscore = _mm_cmpeq_epi16 (v_maxscore, v_maxscore);
-X v_maxscore = _mm_slli_epi16 (v_maxscore, 15);
-X
-X v_minimums = _mm_shuffle_epi32 (v_maxscore, 0);
-X
-X v_min = _mm_shuffle_epi32 (v_maxscore, 0);
-X v_min = _mm_srli_si128 (v_min, 14);
-X
-X /* Zero out the storage vector */
-X k = 2 * iter;
-X
-X p = workspace;
-X for (i = 0; i < k; i++)
-X {
-X _mm_store_si128 (p++, v_maxscore);
-X }
-X
-X pE = workspace;
-X pHStore = pE + iter;
-X pHLoad = pHStore + iter;
-X
-X for (i = 0; i < db_length; ++i)
-X {
-X /* fetch first data asap. */
-X pScore = (__m128i *) query_profile_word + db_sequence[i] * iter;
-X
-X /* bias all elements in F to -32768 */
-X F = _mm_cmpeq_epi16 (F, F);
-X F = _mm_slli_epi16 (F, 15);
-X
-X /* load the next h value */
-X H = _mm_load_si128 (pHStore + iter - 1);
-X H = _mm_slli_si128 (H, 2);
-X H = _mm_or_si128 (H, v_min);
-X
-X p = pHLoad;
-X pHLoad = pHStore;
-X pHStore = p;
-X
-X for (j = 0; j < iter; j++)
-X {
-X /* load E values */
-X E = _mm_load_si128 (pE + j);
-X
-X /* add score to H */
-X H = _mm_adds_epi16 (H, *pScore++);
-X
-X /* Update highest score encountered this far */
-X v_maxscore = _mm_max_epi16 (v_maxscore, H);
-X
-X /* get max from H, E and F */
-X H = _mm_max_epi16 (H, E);
-X H = _mm_max_epi16 (H, F);
-X
-X /* save H values */
-X _mm_store_si128 (pHStore + j, H);
-X
-X /* subtract the gap open penalty from H */
-X H = _mm_subs_epi16 (H, v_gapopen);
-X
-X /* update E value */
-X E = _mm_subs_epi16 (E, v_gapextend);
-X E = _mm_max_epi16 (E, H);
-X
-X /* update F value */
-X F = _mm_subs_epi16 (F, v_gapextend);
-X F = _mm_max_epi16 (F, H);
-X
-X /* save E values */
-X _mm_store_si128 (pE + j, E);
-X
-X /* load the next h value */
-X H = _mm_load_si128 (pHLoad + j);
-X }
-X
-X /* reset pointers to the start of the saved data */
-X j = 0;
-X H = _mm_load_si128 (pHStore + j);
-X
-X /* the computed F value is for the given column. since */
-X /* we are at the end, we need to shift the F value over */
-X /* to the next column. */
-X F = _mm_slli_si128 (F, 2);
-X F = _mm_or_si128 (F, v_min);
-X v_temp = _mm_subs_epi16 (H, v_gapopen);
-X v_temp = _mm_cmpgt_epi16 (F, v_temp);
-X cmp = _mm_movemask_epi8 (v_temp);
-X
-X while (cmp != 0x0000)
-X {
-X E = _mm_load_si128 (pE + j);
-X
-X H = _mm_max_epi16 (H, F);
-X
-X /* save H values */
-X _mm_store_si128 (pHStore + j, H);
-X
-X /* update E in case the new H value would change it */
-X H = _mm_subs_epi16 (H, v_gapopen);
-X E = _mm_max_epi16 (E, H);
-X _mm_store_si128 (pE + j, E);
-X
-X /* update F value */
-X F = _mm_subs_epi16 (F, v_gapextend);
-X
-X j++;
-X if (j >= iter)
-X {
-X j = 0;
-X F = _mm_slli_si128 (F, 2);
-X F = _mm_or_si128 (F, v_min);
-X }
-X H = _mm_load_si128 (pHStore + j);
-X
-X v_temp = _mm_subs_epi16 (H, v_gapopen);
-X v_temp = _mm_cmpgt_epi16 (F, v_temp);
-X cmp = _mm_movemask_epi8 (v_temp);
-X }
-X }
-X
-X /* find largest score in the v_maxscore vector */
-X v_temp = _mm_srli_si128 (v_maxscore, 8);
-X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
-X v_temp = _mm_srli_si128 (v_maxscore, 4);
-X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
-X v_temp = _mm_srli_si128 (v_maxscore, 2);
-X v_maxscore = _mm_max_epi16 (v_maxscore, v_temp);
-X
-X /* extract the largest score */
-X score = _mm_extract_epi16 (v_maxscore, 0);
-X
-X /* return largest score biased by 32768 */
-X return score + 32768;
-}
-X
-X
-X
-X
-int
-smith_waterman_sse2_byte(const unsigned char * query_sequence,
-X unsigned char * query_profile_byte,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned char bias,
-X unsigned char gap_open,
-X unsigned char gap_extend,
-X struct f_struct * f_str)
-{
-X int i, j, k;
-X int score;
-X
-X int dup;
-X int cmp;
-X int iter = (query_length + 15) / 16;
-X
-X __m128i *p;
-X __m128i *workspace = (__m128i *) f_str->workspace;
-X
-X __m128i E, F, H;
-X
-X __m128i v_maxscore;
-X __m128i v_bias;
-X __m128i v_gapopen;
-X __m128i v_gapextend;
-X
-X __m128i v_temp;
-X __m128i v_zero;
-X
-X __m128i *pHLoad, *pHStore;
-X __m128i *pE;
-X
-X __m128i *pScore;
-X
-X /* Load the bias to all elements of a constant */
-X dup = ((short) bias << 8) | bias;
-X v_bias = _mm_insert_epi16 (v_bias, dup, 0);
-X v_bias = _mm_shufflelo_epi16 (v_bias, 0);
-X v_bias = _mm_shuffle_epi32 (v_bias, 0);
-X
-X /* Load gap opening penalty to all elements of a constant */
-X dup = ((short) gap_open << 8) | gap_open;
-X v_gapopen = _mm_insert_epi16 (v_gapopen, dup, 0);
-X v_gapopen = _mm_shufflelo_epi16 (v_gapopen, 0);
-X v_gapopen = _mm_shuffle_epi32 (v_gapopen, 0);
-X
-X /* Load gap extension penalty to all elements of a constant */
-X dup = ((short) gap_extend << 8) | gap_extend;
-X v_gapextend = _mm_insert_epi16 (v_gapextend, dup, 0);
-X v_gapextend = _mm_shufflelo_epi16 (v_gapextend, 0);
-X v_gapextend = _mm_shuffle_epi32 (v_gapextend, 0);
-X
-X /* initialize the max score */
-X v_maxscore = _mm_xor_si128 (v_maxscore, v_maxscore);
-X
-X /* create a constant of all zeros for comparison */
-X v_zero = _mm_xor_si128 (v_zero, v_zero);
-X
-X /* Zero out the storage vector */
-X k = iter * 2;
-X
-X p = workspace;
-X for (i = 0; i < k; i++)
-X {
-X _mm_store_si128 (p++, v_maxscore);
-X }
-X
-X pE = workspace;
-X pHStore = pE + iter;
-X pHLoad = pHStore + iter;
-X
-X for (i = 0; i < db_length; ++i)
-X {
-X /* fetch first data asap. */
-X pScore = (__m128i *) query_profile_byte + db_sequence[i] * iter;
-X
-X /* zero out F value. */
-X F = _mm_xor_si128 (F, F);
-X
-X /* load the next h value */
-X H = _mm_load_si128 (pHStore + iter - 1);
-X H = _mm_slli_si128 (H, 1);
-X
-X p = pHLoad;
-X pHLoad = pHStore;
-X pHStore = p;
-X
-X for (j = 0; j < iter; j++)
-X {
-X /* load values E. */
-X E = _mm_load_si128 (pE + j);
-X
-X /* add score to H */
-X H = _mm_adds_epu8 (H, *pScore++);
-X H = _mm_subs_epu8 (H, v_bias);
-X
-X /* Update highest score encountered this far */
-X v_maxscore = _mm_max_epu8 (v_maxscore, H);
-X
-X /* get max from H, E and F */
-X H = _mm_max_epu8 (H, E);
-X H = _mm_max_epu8 (H, F);
-X
-X /* save H values */
-X _mm_store_si128 (pHStore + j, H);
-X
-X /* subtract the gap open penalty from H */
-X H = _mm_subs_epu8 (H, v_gapopen);
-X
-X /* update E value */
-X E = _mm_subs_epu8 (E, v_gapextend);
-X E = _mm_max_epu8 (E, H);
-X
-X /* update F value */
-X F = _mm_subs_epu8 (F, v_gapextend);
-X F = _mm_max_epu8 (F, H);
-X
-X /* save E values */
-X _mm_store_si128 (pE + j, E);
-X
-X /* load the next h value */
-X H = _mm_load_si128 (pHLoad + j);
-X }
-X
-X /* reset pointers to the start of the saved data */
-X j = 0;
-X H = _mm_load_si128 (pHStore + j);
-X
-X /* the computed F value is for the given column. since */
-X /* we are at the end, we need to shift the F value over */
-X /* to the next column. */
-X F = _mm_slli_si128 (F, 1);
-X v_temp = _mm_subs_epu8 (H, v_gapopen);
-X v_temp = _mm_subs_epu8 (F, v_temp);
-X v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
-X cmp = _mm_movemask_epi8 (v_temp);
-X
-X while (cmp != 0xffff)
-X {
-X E = _mm_load_si128 (pE + j);
-X
-X H = _mm_max_epu8 (H, F);
-X
-X /* save H values */
-X _mm_store_si128 (pHStore + j, H);
-X
-X /* update E in case the new H value would change it */
-X H = _mm_subs_epu8 (H, v_gapopen);
-X E = _mm_max_epu8 (E, H);
-X _mm_store_si128 (pE + j, E);
-X
-X /* update F value */
-X F = _mm_subs_epu8 (F, v_gapextend);
-X
-X j++;
-X if (j >= iter)
-X {
-X j = 0;
-X F = _mm_slli_si128 (F, 1);
-X }
-X H = _mm_load_si128 (pHStore + j);
-X
-X v_temp = _mm_subs_epu8 (H, v_gapopen);
-X v_temp = _mm_subs_epu8 (F, v_temp);
-X v_temp = _mm_cmpeq_epi8 (v_temp, v_zero);
-X cmp = _mm_movemask_epi8 (v_temp);
-X }
-X }
-X
-X /* find largest score in the v_maxscore vector */
-X v_temp = _mm_srli_si128 (v_maxscore, 8);
-X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
-X v_temp = _mm_srli_si128 (v_maxscore, 4);
-X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
-X v_temp = _mm_srli_si128 (v_maxscore, 2);
-X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
-X v_temp = _mm_srli_si128 (v_maxscore, 1);
-X v_maxscore = _mm_max_epu8 (v_maxscore, v_temp);
-X
-X /* store in temporary variable */
-X score = _mm_extract_epi16 (v_maxscore, 0);
-X score = score & 0x00ff;
-X
-X /* check if we might have overflowed */
-X if (score + bias >= 255)
-X {
-X score = 255;
-X }
-X
-X /* return largest score */
-X return score;
-}
-#else
-X
-/* No SSE2 support. Avoid compiler complaints about empty object */
-X
-int sw_dummy;
-X
-#endif
-SHAR_EOF
-chmod 0644 smith_waterman_sse2.c ||
-echo 'restore of smith_waterman_sse2.c failed'
-Wc_c="`wc -c < 'smith_waterman_sse2.c'`"
-test 12106 -eq "$Wc_c" ||
- echo 'smith_waterman_sse2.c: original size 12106, current size' "$Wc_c"
-fi
-# ============= smith_waterman_sse2.h ==============
-if test -f 'smith_waterman_sse2.h' -a X"$1" != X"-c"; then
- echo 'x - skipping smith_waterman_sse2.h (File already exists)'
-else
-echo 'x - extracting smith_waterman_sse2.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'smith_waterman_sse2.h' &&
-/******************************************************************
-X Copyright 2006 by Michael Farrar. All rights reserved.
-X This program may not be sold or incorporated into a commercial product,
-X in whole or in part, without written consent of Michael Farrar. For
-X further information regarding permission for use or reproduction, please
-X contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-X
-/*
-X Written by Michael Farrar, 2006.
-X Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-X
-#ifndef SMITH_WATERMAN_SSE2_H
-#define SMITH_WATERMAN_SSE2_H
-X
-int
-smith_waterman_sse2_word(const unsigned char * query_sequence,
-X unsigned short * query_profile_word,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned short gap_open,
-X unsigned short gap_extend,
-X struct f_struct * f_str);
-X
-X
-int
-smith_waterman_sse2_byte(const unsigned char * query_sequence,
-X unsigned char * query_profile_byte,
-X const int query_length,
-X const unsigned char * db_sequence,
-X const int db_length,
-X unsigned char bias,
-X unsigned char gap_open,
-X unsigned char gap_extend,
-X struct f_struct * f_str);
-X
-#endif /* SMITH_WATERMAN_SSE2_H */
-SHAR_EOF
-chmod 0755 smith_waterman_sse2.h ||
-echo 'restore of smith_waterman_sse2.h failed'
-Wc_c="`wc -c < 'smith_waterman_sse2.h'`"
-test 1723 -eq "$Wc_c" ||
- echo 'smith_waterman_sse2.h: original size 1723, current size' "$Wc_c"
-fi
-# ============= structs.h ==============
-if test -f 'structs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping structs.h (File already exists)'
-else
-echo 'x - extracting structs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'structs.h' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: structs.h,v 1.36 2006/06/22 02:35:05 wrp Exp $ */
-X
-#include "aln_structs.h"
-X
-struct hist_str {
-X int histflg;
-X int *hist_a;
-X int histint, min_hist, max_hist, maxh;
-X long entries;
-X int z_calls;
-X char stat_info[MAX_STR];
-};
-X
-struct db_str {
-X long entries;
-X unsigned long length;
-X int carry;
-};
-X
-struct mngmsg /* Message from host to manager */
-{
-X int n0; /* Integer returned by hgetseq */
-X int nm0; /* number of segments */
-X int nmoff; /* length of fastf segment */
-X unsigned char *aa0a; /* annotation array */
-X char ann_arr[MAX_FN]; /* annotation characters */
-X int ann_flg; /* have annotation array, characters */
-X char tname[MAX_FN]; /* Query sequence name */
-X int tnamesize; /* Query name size */
-X int qsfnum[10];
-X int nqsfnum;
-X int qsfnum_n[10];
-X int nqsfnum_n;
-X char lname[MAX_FN]; /* Library file name */
-X char *lbnames[MAX_LF]; /* list of library files */
-X struct lmf_str *lb_mfd[MAX_LF]; /* list of opened file pointers */
-X
-X int max_tot; /* function defined total sequence area */
-X int maxn; /* longest library sequence chunk */
-X int dupn; /* overlap to use when segmenting sequence (p_comp) */
-X int qoff; /* overlap when segmenting long query sequence */
-X int loff; /* overlap when segmenting long library sequences */
-X int maxt3; /* overlap for tranlated sequences */
-X int qdnaseq; /* query is protein (0)/dna (1) */
-X int ldnaseq; /* library is protein (0)/dna (1) */
-X int qframe; /* number of possible query frames */
-X int nframe; /* frame for TFASTA */
-X int nitt1; /* nframe-1 */
-X int thr_fact; /* fudge factor for threads */
-X int s_int; /* sampling interval for statistics */
-X int ql_start; /* starting query sequence */
-X int ql_stop; /* ending query sequence */
-X int nln; /* number of library names */
-X int pbuf_siz; /* buffer size for sequences send in p2_complib */
-X char qtitle[MAX_FN]; /* query title */
-X char ltitle[MAX_FN]; /* library title */
-X char flstr[MAX_FN]; /* FASTLIBS string */
-X char outfile[MAX_FN];
-X char label [MAXLN]; /* Output label */
-X char f_id0[4]; /* function id for markx==10 */
-X char f_id1[4]; /* function id for markx==10 */
-X char sqnam[4]; /* "aa" or "nt" */
-X char sqtype[10]; /* "DNA" or "protein" */
-X int long_info; /* long description flag*/
-X long sq0off, sq1off; /* offset into aa0, aa1 */
-X int markx; /* alignment display type */
-X int seqnm; /* query sequence number */
-X int nbr_seq; /* number of library sequences */
-X int term_code; /* add termination codes to proteins if absent */
-X int n1_high; /* upper limit on sequence length */
-X int n1_low; /* lower limit on sequence length */
-X double e_cut; /* e_value for display */
-X double e_low; /* e_value for display */
-X int e_cut_set; /* e_value deliberately set */
-X int pamd1; /* 1st dimension of pam matrix */
-X int pamd2; /* 2nd dimension of pam matrix */
-X int revcomp; /* flag to do reverse complement */
-X int quiet; /* quiet option */
-X int nrelv; /* number of interesting scores */
-X int srelv; /* number of scores to show in showbest */
-X int arelv; /* number of scores to show at alignment */
-X int z_bits; /* z_bits==1: show bit score, ==0 show z-score */
-X char alab[3][24]; /* labels for alignment scores */
-X int nohist; /* no histogram option */
-X int nshow;
-X int mshow; /* number of scores to show */
-X int mshow_flg;
-X int ashow; /* number of alignments to show */
-X int nmlen; /* length of name label */
-X int show_code; /* show alignment code in -m 9; ==1 => identity only, ==2 alignment code*/
-X int self; /* self comparison */
-X int thold; /* threshold */
-X int last_calc_flg; /* needs a last calculation stage */
-X int qshuffle; /* shuffle the query and do additional comparisons */
-X int shuff_max; /* number of shuffles to perform */
-X int shuff_node; /* number of shuffles/worker node */
-X int shuff_wid;
-X int stages; /* number of stages */
-X double Lambda, K, H; /* Karlin-Altschul parameters */
-X int escore_flg; /* use escore calculated by do_work() */
-X struct hist_str hist;
-X struct db_str db;
-X void *pstat_void;
-X struct a_struct aln; /* has llen, llnctx, llnctx_flg, showall */
-X struct a_res_str a_res; /* has individual alignment coordinates */
-X char dfile [MAX_FN]; /* file for dumping scores to */
-};
-X
-X
-SHAR_EOF
-chmod 0644 structs.h ||
-echo 'restore of structs.h failed'
-Wc_c="`wc -c < 'structs.h'`"
-test 4279 -eq "$Wc_c" ||
- echo 'structs.h: original size 4279, current size' "$Wc_c"
-fi
-# ============= tatstats.c ==============
-if test -f 'tatstats.c' -a X"$1" != X"-c"; then
- echo 'x - skipping tatstats.c (File already exists)'
-else
-echo 'x - extracting tatstats.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'tatstats.c' &&
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "param.h"
-#include "tatstats.h"
-X
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-X
-/* calc_priors() - calculate frequencies of amino-acids, possibly with counts */
-/* generate_tatprobs() - build the table of score probabilities if the
-X sequences are not too long */
-X
-double
-det(double a11, double a12, double a13,
-X double a21, double a22, double a23,
-X double a31, double a32, double a33);
-X
-double power(double r, int p)
-{
-X double tr;
-X int neg;
-X
-X if (r==0.0) return(p==0?1.0:0.0);
-X if (neg = p<0) p = -p;
-X tr = 1.0;
-X while (p>0) {
-X if (p & 1) tr *= r;
-X p >>= 1;
-X if (p) r *= r;
-X }
-X return((neg? 1.0/tr: tr));
-}
-X
-double
-factorial (int a, int b) {
-X
-X double res = 1.0;
-X
-X if(a == 0) { return 1.0; }
-X
-X while(a > b) {
-X res *= (double) a;
-X a--;
-X }
-X
-X return res;
-}
-X
-void
-calc_priors(double *priors,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X const unsigned char *aa1, int n1,
-X int pseudocts)
-{
-X long counts[25], sum;
-X int i;
-X
-X if(n1 == 0 && f_str->priors[1] > 0.0) {
-X for(i = 1 ; i <= ppst->nsq ; i++) {
-X priors[i] = f_str->priors[i];
-X }
-X return;
-X }
-X
-X if(n1 == 0) {
-X if (ppst->dnaseq==SEQT_PROT ) {
-X
-X /* Robinson & Robinson residue counts from Stephen Altschul */
-X counts[ 1] = 35155; /* A */
-X counts[ 2] = 23105; /* R */
-X counts[ 3] = 20212; /* N */
-X counts[ 4] = 24161; /* D */
-X counts[ 5] = 8669; /* C */
-X counts[ 6] = 19208; /* Q */
-X counts[ 7] = 28354; /* E */
-X counts[ 8] = 33229; /* G */
-X counts[ 9] = 9906; /* H */
-X counts[10] = 23161; /* I */
-X counts[11] = 40625; /* L */
-X counts[12] = 25872; /* K */
-X counts[13] = 10101; /* M */
-X counts[14] = 17367; /* F */
-X counts[15] = 23435; /* P */
-X counts[16] = 32070; /* S */
-X counts[17] = 26311; /* T */
-X counts[18] = 5990; /* W */
-X counts[19] = 14488; /* Y */
-X counts[20] = 29012; /* V */
-X counts[21] = 0; /* B */
-X counts[22] = 0; /* Z */
-X counts[23] = 0; /* X */
-X counts[24] = 0; /* * */
-X }
-X else { /* SEQT_DNA */
-X counts[1] = 250;
-X counts[2] = 250;
-X counts[3] = 250;
-X counts[4] = 250;
-X for (i=5; i<=ppst->nsq; i++) counts[i]=0;
-X }
-X } else {
-X memset(&counts[0], 0, sizeof(counts));
-X
-X for(i = 0 ; i < n1 ; i++) {
-X if(aa1[i] > ppst->nsq || aa1[i] < 1) continue;
-X counts[aa1[i]]++;
-X }
-X }
-X
-X sum = 0;
-X for(i = 1 ; i <= ppst->nsq ; i++) sum += counts[i];
-X
-X for(i = 1 ; i <= ppst->nsq ; i++) {
-X if(n1 == 0) {
-X priors[i] = (double) counts[i] / (double) sum;
-X } else {
-X priors[i] = ( ((double) pseudocts * f_str->priors[i]) + (double) counts[i] ) / ( (double) sum + (double) pseudocts );
-X }
-X }
-X
-X return;
-}
-X
-int
-max_score(int *scores, int nsq) {
-X
-X int max, i;
-X
-X max = -BIGNUM;
-X for ( i = 1 ; i <= nsq ; i++ ) {
-X if (scores[i] > max) max = scores[i];
-X }
-X
-X return max;
-}
-X
-int
-min_score(int *scores, int nsq) {
-X
-X int min, i;
-X
-X min = BIGNUM;
-X for (i = 1 ; i <= nsq ; i++ ) {
-X if (scores[i] < min) min = scores[i];
-X }
-X
-X return min;
-}
-X
-double
-calc_tatusov ( struct slink *last,
-X struct slink *this,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int **pam2, int nsq,
-X struct f_struct *f_str,
-X int pseudocts,
-X int do_opt,
-X int zsflag
-X )
-{
-X int i, is, j, k;
-X
-X double *priors, my_priors[MAXSQ], tatprob, left_tatprob, right_tatprob;
-X unsigned char *query = NULL;
-X int length, maxlength, sumlength, sumscore, tmp, seg;
-X int start, stop;
-X struct slink *sl;
-X int N;
-X double *tatprobsptr;
-X
-#if defined(FASTS) || defined(FASTM)
-X int index = 0;
-X int notokay = 0;
-#endif
-X
-X struct tat_str *oldtat = NULL, *newtat = NULL;
-X
-#if defined(FASTS) || defined(FASTM)
-X start = this->vp->start - this->vp->dp + f_str->noff;
-X stop = this->vp->stop - this->vp->dp + f_str->noff;
-X tmp = stop - start + 1;
-#else
-X /*
-X FASTF alignments can also hang off the end of library sequences,
-X but no query residues are used up in the process, but we have to
-X keep track of which are
-X */
-X tmp = 0;
-X for(i = 0, j = 0 ; i < n0 ; i++) {
-X if (this->vp->used[i] == 1) {tmp++; }
-X }
-#endif
-X
-X sumlength = maxlength = length = tmp;
-X seg = 1;
-X sumscore = this->vp->score;
-X
-#if defined(FASTS) || defined(FASTM)
-X if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
-X index |= (1 << f_str->aa0i[start]);
-X } else {
-X notokay |= (1 << f_str->aa0i[start]);
-X }
-#endif
-X
-X for(sl = last; sl != NULL ; sl = sl->prev) {
-X
-#if defined(FASTS) || defined(FASTM)
-X start = sl->vp->start - sl->vp->dp + f_str->noff;
-X stop = sl->vp->stop - sl->vp->dp + f_str->noff;
-X tmp = stop - start + 1;
-#else
-X tmp = 0;
-X for(i = 0, j = 0 ; i < n0 ; i++) {
-X if(sl->vp->used[i] == 1) {
-X tmp++;
-X }
-X }
-#endif
-X sumlength += tmp;
-X maxlength = tmp > maxlength ? tmp : maxlength;
-X seg++;
-X sumscore += sl->vp->score;
-X
-#if defined(FASTS) || defined(FASTM)
-X if(f_str->aa0b[start] == start && f_str->aa0e[stop] == stop) {
-X index |= (1 << f_str->aa0i[start]);
-X } else {
-X notokay |= (1 << f_str->aa0i[start]);
-X }
-#endif
-X
-X }
-X
-X tatprob = -1.0;
-X
-#if defined(FASTS) || defined(FASTM)
-X
-X /* for T?FASTS, we try to use what we've precalculated: */
-X
-X /* with z = 3, do_opt is true, but we can use precalculated - with
-X all other z's we can use precalculated only if !do_opt */
-X if(!notokay && f_str->tatprobs != NULL) {
-X /* create our own newtat and copy f_str's tat into it */
-X index--;
-X
-X newtat = (struct tat_str *) malloc(sizeof(struct tat_str));
-X if(newtat == NULL) {
-X fprintf(stderr, "Couldn't calloc memory for newtat.\n");
-X exit(1);
-X }
-X
-X memcpy(newtat, f_str->tatprobs[index], sizeof(struct tat_str));
-X
-X newtat->probs = (double *) calloc(f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1, sizeof(double));
-X if(newtat->probs == NULL) {
-X fprintf(stderr, "Coudln't calloc memory for newtat->probs.\n");
-X exit(1);
-X }
-X
-X memcpy(newtat->probs, f_str->tatprobs[index]->probs,
-X (f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore + 1) * sizeof(double));
-X
-X
-X tatprob = f_str->intprobs[index][sumscore - f_str->tatprobs[index]->lowscore];
-X
-X } else { /* we need to recalculate from scratch */
-#endif
-X
-X /* for T?FASTF, we're always recalculating from scratch: */
-X
-X query = (unsigned char *) calloc(length, sizeof(unsigned char));
-X if(query == NULL) {
-X fprintf(stderr, "Couldn't calloc memory for query.\n");
-X exit(1);
-X }
-X
-#if defined(FASTS) || defined(FASTM)
-X start = this->vp->start - this->vp->dp + f_str->noff;
-X for(i = 0, j = 0 ; i < length ; i++) {
-X query[j++] = aa0[start + i];
-X }
-#else
-X for(i = 0, j = 0 ; i < n0 ; i++) {
-X if (this->vp->used[i] == 1) {query[j++] = aa0[i];}
-X }
-#endif
-X
-X /* calc_priors - not currently implemented for aa1 dependent */
-X /*
-X if( (do_opt && zsflag == 2) || zsflag == 4 ) {
-X priors = &my_priors[0];
-X calc_priors(priors, f_str, aa1, n1, pseudocts);
-X } else {
-X priors = f_str->priors;
-X }
-X */
-X
-X priors = f_str->priors;
-X oldtat = (last != NULL ? last->tat : NULL);
-X
-X generate_tatprobs(query, 0, length - 1, priors, pam2, nsq, &newtat, oldtat);
-X
-X free(query);
-#if defined(FASTS) || defined(FASTM)
-X } /* close the FASTS-specific if-else from above */
-#endif
-X
-X this->newtat = newtat;
-X
-X if(tatprob < 0.0) { /* hasn't been set by precalculated FASTS intprobs */
-X
-X /* integrate probabilities >= sumscore */
-X tatprobsptr = newtat->probs;
-X
-X is = i = newtat->highscore - newtat->lowscore;
-X N = sumscore - newtat->lowscore;
-X
-X right_tatprob = 0;
-X for ( ; i >= N; i--) {
-X right_tatprob += tatprobsptr[i];
-X }
-X
-X left_tatprob = tatprobsptr[0];
-X for (i = 1 ; i < N ; i++ ) {
-X left_tatprob += tatprobsptr[i];
-X }
-X
-X if (right_tatprob < left_tatprob) {tatprob = right_tatprob;}
-X else {tatprob = 1.0 - left_tatprob;}
-X
-X tatprob /= (right_tatprob+left_tatprob);
-X }
-X
-X if (maxlength > 0) {
-X n1 += 2 * (maxlength - 1);
-X }
-X
-#ifndef FASTM
-X tatprob *= factorial(n1 - sumlength + seg, n1 - sumlength);
-#else
-X tatprob *= power(n1 - sumlength,seg)/(1<<seg);
-#endif
-X
-X if(tatprob > 0.01)
-X tatprob = 1.0 - exp(-tatprob);
-X
-X return tatprob;
-}
-X
-void
-generate_tatprobs(const unsigned char *query,
-X int begin,
-X int end,
-X double *priors,
-X int **pam2,
-X int nsq,
-X struct tat_str **tatarg,
-X struct tat_str *oldtat)
-{
-X
-X int i, j, k, l, m, n, N, highscore, lowscore;
-X int *lowrange = NULL, *highrange = NULL;
-X double *probs = NULL, *newprobs = NULL, *priorptr, tmp;
-X struct tat_str *tatprobs = NULL;
-X int *pamptr, *pamptrsave;
-X
-X if((tatprobs = (struct tat_str *) calloc(1, sizeof(struct tat_str)))==NULL) {
-X fprintf(stderr, "Couldn't allocate individual tatprob struct.\n");
-X exit(1);
-X }
-X
-X n = end - begin + 1;
-X
-X if ( (lowrange = (int *) calloc(n, sizeof(int))) == NULL ) {
-X fprintf(stderr, "Couldn't allocate memory for lowrange.\n");
-X exit(1);
-X }
-X
-X if ( (highrange = (int *) calloc(n, sizeof(int))) == NULL ) {
-X fprintf(stderr, "Couldn't allocate memory for highrange.\n");
-X exit(1);
-X }
-X
-X /* calculate the absolute highest and lowest score possible for this */
-X /* segment. Also, set the range we need to iterate over at each position */
-X /* in the query: */
-X if(oldtat == NULL) {
-X highscore = lowscore = 0;
-X } else {
-X highscore = oldtat->highscore;
-X lowscore = oldtat->lowscore;
-X }
-X
-X for ( i = 0 ; i < n ; i++ ) {
-X
-X if (query[begin+i] == 0) break;
-X
-X highscore =
-X (highrange[i] = highscore + max_score(pam2[query[begin + i]], nsq));
-X
-X lowscore =
-X (lowrange[i] = lowscore + min_score(pam2[query[begin + i]], nsq));
-X
-X /*
-X fprintf(stderr, "i: %d, max: %d, min: %d, high[i]: %d, low[i]: %d, high: %d, low: %d, char: %d\n",
-X i,
-X max_score(pam2[query[begin + i]], nsq),
-X min_score(pam2[query[begin + i]], nsq),
-X highrange[i], lowrange[i],
-X highscore, lowscore, query[begin + i]);
-X */
-X }
-X
-X /* allocate an array of probabilities for all possible scores */
-X /* i.e. if highest score possible is 50 and lowest score possible */
-X /* is -20, then there are 50 - (-20) + 1 = 71 possible different */
-X /* scores (including 0): */
-X N = highscore - lowscore;
-X if ( (probs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
-X fprintf(stderr, "Couldn't allocate probability matrix : %d.\n", N + 1);
-X exit(1);
-X }
-X
-X if(oldtat == NULL) {
-X /* for the first position, iterate over the only possible scores, */
-X /* summing the priors for the amino acids that can yield each score. */
-X pamptr = pam2[query[begin]];
-X for ( i = 1 ; i <= nsq ; i++ ) {
-X if(priors[i] > 0.0) {
-X probs[(pamptr[i] - lowscore)] += priors[i];
-X }
-X }
-X } else {
-X /* Need to copy the data out of oldtat->probs into probs */
-X memcpy( &probs[oldtat->lowscore - lowscore],
-X oldtat->probs,
-X (oldtat->highscore - oldtat->lowscore + 1) * sizeof(double));
-X }
-X
-X if ( (newprobs = (double *) calloc(N + 1, sizeof(double))) == NULL ) {
-X fprintf(stderr, "Couldn't allocate newprobs matrix.\n");
-X exit(1);
-X }
-X
-X /* now for each remaining residue in the segment ... */
-X for ( i = (oldtat == NULL ? 1 : 0) ; i < n ; i++ ) {
-X
-X pamptrsave = pam2[query[begin + i]];
-X
-X /* ... calculate new probability distribution .... */
-X
-X /* ... for each possible score (limited to current range) ... */
-X for ( j = lowrange[i] - lowscore,
-X k = highrange[i] - lowscore ;
-X j <= k ;
-X j++ ) {
-X
-X tmp = 0.0;
-X pamptr = &pamptrsave[1];
-X priorptr = &priors[1];
-X /* ... for each of the possible alignment scores at this position ... */
-X for ( l = 1 ;
-X l <= nsq ;
-X l++) {
-X
-X /* make sure we don't go past highest possible score, or past
-X the lowest possible score; not sure why this can happen */
-X m = j - *pamptr++;
-X if ( m <= N && m >= 0 ) {
-X /* update the probability of getting score j: */
-X tmp += probs[m] * *priorptr++;
-X }
-X }
-X newprobs[j] += tmp;
-X }
-X
-X /* save the new set of probabilities, get rid of old; we don't
-X necessarily have to copy/clear all N+1 slots, we could use
-X high/low score boundaries -- not sure that's worth the
-X effort. */
-X memcpy(probs, newprobs, (N + 1) * sizeof(double));
-X memset(newprobs, 0, (N + 1) * sizeof(double));
-X }
-X
-X free(newprobs);
-X free(highrange);
-X free(lowrange);
-X
-X tatprobs->probs = probs;
-X /* tatprobs->intprobs = intprobs; */
-X tatprobs->lowscore = lowscore;
-X tatprobs->highscore = highscore;
-X
-X *tatarg = tatprobs;
-}
-X
-SHAR_EOF
-chmod 0644 tatstats.c ||
-echo 'restore of tatstats.c failed'
-Wc_c="`wc -c < 'tatstats.c'`"
-test 12998 -eq "$Wc_c" ||
- echo 'tatstats.c: original size 12998, current size' "$Wc_c"
-fi
-# ============= tatstats.h ==============
-if test -f 'tatstats.h' -a X"$1" != X"-c"; then
- echo 'x - skipping tatstats.h (File already exists)'
-else
-echo 'x - extracting tatstats.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'tatstats.h' &&
-#ifndef MAXSQ
-#include "param.h"
-#endif
-X
-#ifndef MAXSAV
-#ifdef FASTS
-#define MAXSAV 25
-#else
-#define MAXSAV 10
-#endif
-#endif
-X
-#if defined(IBM_AIX) && defined(MAXSEG)
-#undef MAXSEG
-#endif
-#define MAXSEG 30
-X
-struct savestr {
-X int score; /* pam score with segment optimization */
-X int score0; /* pam score of best single segment */
-X int start0; /* score from global match */
-X int dp; /* diagonal of match */
-X int start; /* start of match in lib seq */
-X int stop; /* end of match in lib seq */
-X int exact; /* exact match */
-#if defined(FASTF)
-X int *used; /* array of positions in aa0 that were used */
-#endif
-};
-X
-struct dstruct { /* diagonal structure for saving current run */
-X int score; /* hash score of current match */
-X int start; /* start of current match */
-X int stop; /* end of current match */
-X struct savestr *dmax; /* location in vmax[] where best score data saved */
-};
-X
-struct tat_str {
-X double *probs;
-X int lowscore;
-X int highscore;
-};
-X
-struct f_struct {
-X struct dstruct *diag;
-X struct savestr *vmax; /* best matches saved for one sequence */
-X struct savestr **vptr;
-X struct slink *sarr;
-X struct savestr *lowmax;
-X int maxsav; /* max number of peptide alignments saved in search */
-X int maxsav_w; /* max number of peptide alignments saved in alignment */
-X int shuff_cnt;
-X int nsave;
-X int ndo;
-X int noff;
-X int nm0; /* number of fragments */
-#if defined(FASTS) || defined(FASTM)
-X int *nmoff; /* offset number, start */
-X int *nm_u;
-X int *aa0b; /* beginning of each segment */
-X int *aa0e; /* end of each segment */
-X int *aa0i; /* index of each segment */
-X int *aa0s; /* max score of each segment */
-X int *aa0l; /* longest possible peptide match */
-#else
-X int nmoff; /* offset number, start */
-X unsigned char *aa0;
-X int aa0ix;
-#endif
-X unsigned char *aa0t; /* temp location for peptides */
-X int *aa0ti; /* temp index for peptides */
-X int hmask; /* hash constants */
-X int *pamh1; /* pam based array */
-X int *pamh2; /* pam based kfact array */
-#if defined(FASTS) || defined(FASTM)
-X int *link, *harr, *l_end; /* hash arrays */
-#else
-X struct hlstr *link, *harr; /* hash arrays */
-#endif
-X int kshft; /* shift width */
-X int nsav, lowscor; /* number of saved runs, worst saved run */
-X unsigned char *aa1x; /* contains translated codons 111222333 */
-X unsigned char *aa1y; /* contains translated codons 123123123 */
-X int n10;
-X int *waa;
-X int *res;
-X int max_res;
-X double *priors;
-#if defined(FASTS) || defined(FASTM)
-X struct tat_str **tatprobs; /* array of pointers to tat structs */
-X double **intprobs; /* array of integrated tatprobs */
-#endif
-X int dotat;
-X double spacefactor;
-};
-X
-struct slink {
-X int score;
-X double tatprob;
-X struct tat_str *tat;
-X struct tat_str *newtat;
-X struct savestr *vp;
-X struct slink *next;
-X struct slink *prev;
-};
-X
-struct segstr {
-X double tatprob;
-X int length;
-};
-X
-void generate_tatprobs(const unsigned char *query,
-X int begin,
-X int end,
-X double *priors,
-X int **pam2,
-X int nsq,
-X struct tat_str **tatarg, struct tat_str *oldtat);
-X
-double
-calc_tatusov ( struct slink *last,
-X struct slink *this,
-X const unsigned char *aa0, int n0,
-X const unsigned char *aa1, int n1,
-X int **pam2, int nsq,
-X struct f_struct *f_str,
-X int pseudocts,
-X int do_opt,
-X int zsflag
-X );
-X
-double seg_tatprob(struct slink *start,
-X const unsigned char *aa0,
-X int n0,
-X const unsigned char *aa1,
-X int n1,
-X struct f_struct *f_str,
-X struct pstruct *ppst,
-X int do_opt);
-X
-void calc_priors(double *priors,
-X struct pstruct *ppst,
-X struct f_struct *f_str,
-X const unsigned char *aa1,
-X int n1, int pseudocts);
-X
-double factorial (int a, int b);
-X
-int max_score(int *scores, int nsq);
-X
-int min_score(int *scores, int nsq);
-X
-double calc_spacefactor(struct f_struct *f_str);
-X
-void linreg(double *lnx, double *x, double *lny,
-X int n,
-X double *a, double *b, double *c, int start);
-SHAR_EOF
-chmod 0644 tatstats.h ||
-echo 'restore of tatstats.h failed'
-Wc_c="`wc -c < 'tatstats.h'`"
-test 4126 -eq "$Wc_c" ||
- echo 'tatstats.h: original size 4126, current size' "$Wc_c"
-fi
-# ============= test.bat ==============
-if test -f 'test.bat' -a X"$1" != X"-c"; then
- echo 'x - skipping test.bat (File already exists)'
-else
-echo 'x - extracting test.bat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test.bat' &&
-rem ""
-rem "starting fasta34_t - protein on win32"
-rem ""
-fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
-fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
-rem "done"
-rem "starting fastxy34_t"
-fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
-fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
-fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
-fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
-rem "done"
-rem "starting fastxy34_t rev"
-fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
-fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
-fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
-rem "done"
-rem "starting ssearch34_t"
-ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
-ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
-rem "starting ssearch34_t"
-ssearch34sse2_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3sse2
-ssearch34sse2_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25sse2
-rem "done"
-rem "starting prss34"
-prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
-prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
-rem "done"
-rem "starting fasta34_t - DNA"
-fasta34_t -S -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
-fasta34_t -S -q mgstm1.rev %M 4 > test_m1.ok4r_t
-rem "done"
-rem "starting tfastxy34_t"
-tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa m > test_m1.tx2_t.html
-tfasty34_t -q -i -3 -N 5000 mgstm1.aa m > test_m1.ty2_t
-rem "done"
-rem "starting fastf34_t"
-fastf34_t -q m1r.aa q > test_mf.ff_t
-fastf34 -q m1r.aa q > test_mf.ff_s
-rem "done"
-rem "starting tfastf34_t"
-tfastf34_t -q m1r.aa %m > test_mf.tf_tr
-rem "done"
-rem "starting fasts34_t"
-fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
-fasts34_t -q ngt.aa q > test_m1.fs_t
-fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
-rem "done"
-rem "starting tfasts34_t"
-tfasts34_t -q n0.aa %m > test_m1.ts_r
-rem "done"
-rem "starting fasta34 - protein"
-fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
-fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
-rem "done"
-rem "starting fastx3"
-fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
-rem "done"
-rem "starting fasty3"
-fasty34 -q mgstm1.esq q > test_m1.ok2y
-rem "done"
-rem "starting fasta34 - DNA "
-fasta34 -m 9c -q mgstm1.seq M 4 > test_m1.ok4
-rem "done"
-rem "starting ssearch3"
-ssearch34 -S -q -z 2 mgstm1.aa a > test_m1.ss_z2
-ssearch34 -q -s P250 mgstm1.aa a > test_m1.ss_p25
-ssearch34 -S -q -s BL50 mgstm1.aa a > test_m1.ss_bl50
-ssearch34 -S -q -s blosum50.mat mgstm1.aa a > test_m1.ss_bl50f
-ssearch34sse2 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2_sse2
-ssearch34sse2 -q -s P250 mgstm1.aa q > test_m1.ss_p25_sse2
-rem "done"
-rem "starting tfastxy3"
-tfastx34 -q mgstm1.aa M > test_m1.tx2
-tfasty34 -m 9c -q mgstm1.aa M > test_m1.ty2
-rem "done"
-rem "starting fasts34"
-fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
-fasts34 -q ngt.aa q > test_m1.fs
-rem "done"
-SHAR_EOF
-chmod 0644 test.bat ||
-echo 'restore of test.bat failed'
-Wc_c="`wc -c < 'test.bat'`"
-test 2891 -eq "$Wc_c" ||
- echo 'test.bat: original size 2891, current size' "$Wc_c"
-fi
-# ============= test.sh ==============
-if test -f 'test.sh' -a X"$1" != X"-c"; then
- echo 'x - skipping test.sh (File already exists)'
-else
-echo 'x - extracting test.sh (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test.sh' &&
-#!/bin/csh -f
-echo ""
-echo "starting fasta34_t - protein" `date` "on" `hostname`
-echo `uname -a`
-echo ""
-fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
-fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
-echo "done"
-echo "starting fastxy34_t" `date`
-fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
-fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
-fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
-fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
-echo "done"
-echo "starting fastxy34_t rev" `date`
-fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
-fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
-fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
-echo "done"
-echo "starting ssearch34_t" `date`
-ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
-ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
-echo "done"
-echo "starting prss34" `date`
-prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
-prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
-echo "done"
-echo "starting fasta34_t - DNA" `date`
-fasta34_t -S -q -z 2 mgstm1.seq %RMB 4 > test_m1.ok4_tz2
-fasta34_t -S -q mgstm1.rev %RMB 4 > test_m1.ok4r_t
-echo "done"
-#echo "starting tfasta34_t" `date`
-#tfasta34_t -q mgstm1.aa %RMB > test_m1.tk2_t
-#echo "done"
-echo "starting tfastxy34_t" `date`
-tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2_t.html
-tfasty34_t -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2_t
-echo "done"
-echo "starting fastf34_t" `date`
-fastf34_t -q m1r.aa q > test_mf.ff_t
-fastf34 -q m1r.aa q > test_mf.ff_s
-echo "done"
-echo "starting tfastf34_t" `date`
-tfastf34_t -q m1r.aa %r > test_mf.tf_tr
-echo "done"
-echo "starting fasts34_t" `date`
-fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
-fasts34_t -q ngt.aa q > test_m1.fs_t
-fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
-echo "done"
-echo "starting tfasts34_t" `date`
-tfasts34_t -q n0.aa %r > test_m1.ts_r
-echo "done"
-echo "starting fasta34 - protein" `date`
-fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
-fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
-echo "done"
-echo "starting fastx3" `date`
-fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
-echo "done"
-echo "starting fasty3" `date`
-fasty34 -q mgstm1.esq q > test_m1.ok2y
-echo "done"
-echo "starting fasta34 - DNA " `date`
-fasta34 -m 9c -q mgstm1.seq %RMB 4 > test_m1.ok4
-echo "done"
-echo "starting ssearch3" `date`
-ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
-ssearch34 -S -q -s BL50 mgstm1.aa q > test_m1.ss_bl50
-ssearch34 -S -q -s blosum50.mat mgstm1.aa q > test_m1.ss_bl50f
-ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
-echo "done"
-#echo "starting tfasta3" `date`
-#tfasta34 -q mgstm1.aa %RMB > test_m1.tk2
-#echo "done"
-echo "starting tfastxy3" `date`
-tfastx34 -q mgstm1.aa %RMB > test_m1.tx2
-tfasty34 -m 9c -q mgstm1.aa %RMB > test_m1.ty2
-echo "done"
-echo "starting fasts34" `date`
-fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
-fasts34 -q ngt.aa q > test_m1.fs
-echo "done" `date`
-SHAR_EOF
-chmod 0755 test.sh ||
-echo 'restore of test.sh failed'
-Wc_c="`wc -c < 'test.sh'`"
-test 2996 -eq "$Wc_c" ||
- echo 'test.sh: original size 2996, current size' "$Wc_c"
-fi
-# ============= test2.bat ==============
-if test -f 'test2.bat' -a X"$1" != X"-c"; then
- echo 'x - skipping test2.bat (File already exists)'
-else
-echo 'x - extracting test2.bat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test2.bat' &&
-rem ""
-rem "starting fasta34_t - protein on win32"
-rem ""
-fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
-fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
-rem "done"
-rem "starting fastxy34_t"
-fastx34_t -m 9c -S -q mgtt2_x.seq q 1 > test_t2.xk1_t
-fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
-fastx34_t -m 9c -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
-fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
-rem "done"
-rem "starting fastxy34_t rev"
-fastx34_t -m 9c -q -m 5 mgstm1.rev q > test_m1.xk2r_t
-fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
-fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
-rem "done"
-rem "starting ssearch34_t"
-ssearch34_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
-ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
-rem "starting ssearch34_t"
-ssearch34sse2_t -m 9c -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3sse2
-ssearch34sse2_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25sse2
-rem "done"
-rem "starting prss34"
-prss34_t -q -k 1000 -A mgstm1.aa xurt8c.aa > test_m1.rss
-prfx34_t -q -k 1000 -A mgstm1.esq xurt8c.aa > test_m1.rfx
-rem "done"
-rem "starting fasta34_t - DNA"
-fasta34_t -S -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
-fasta34_t -S -q mgstm1.rev %M 4 > test_m1.ok4r_t
-rem "done"
-rem "starting tfastxy34_t"
-tfastx34_t -m 9c -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2_t.html
-tfasty34_t -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2_t
-rem "done"
-rem "starting fastf34_t"
-fastf34_t -q m1r.aa q > test_mf.ff_t
-fastf34 -q m1r.aa q > test_mf.ff_s
-rem "done"
-rem "starting tfastf34_t"
-tfastf34_t -q m1r.aa %r > test_mf.tf_tr
-rem "done"
-rem "starting fasts34_t"
-fasts34_t -q -V '*?@' ngts.aa q > test_m1.fs1_t
-fasts34_t -q ngt.aa q > test_m1.fs_t
-fasts34_t -q -n mgstm1.nts m > test_m1.nfs_t
-rem "done"
-rem "starting tfasts34_t"
-tfasts34_t -q n0.aa %r > test_m1.ts_r
-rem "done"
-rem "starting fasta34 - protein"
-fasta34 -q -z 2 mgstm1.aa q 1 > test_m1.ok1z2
-fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
-rem "done"
-rem "starting fastx3"
-fastx34 -m 9c -q mgstm1.esq q > test_m1.ok2x
-rem "done"
-rem "starting fasty3"
-fasty34 -q mgstm1.esq q > test_m1.ok2y
-rem "done"
-rem "starting fasta34 - DNA "
-fasta34 -m 9c -q mgstm1.seq M 4 > test_m1.ok4
-rem "done"
-rem "starting ssearch3"
-ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
-ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
-ssearch34sse2 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2_sse2
-ssearch34sse2 -q -s P250 mgstm1.aa q > test_m1.ss_p25_sse2
-rem "done"
-rem "starting tfastxy3"
-tfastx34 -q mgstm1.aa M > test_m1.tx2
-tfasty34 -m 9c -q mgstm1.aa M > test_m1.ty2
-rem "done"
-rem "starting fasts34"
-fasts34 -q -V '@?*' ngts.aa q > test_m1.fs1
-fasts34 -q ngt.aa q > test_m1.fs
-rem "done"
-SHAR_EOF
-chmod 0755 test2.bat ||
-echo 'restore of test2.bat failed'
-Wc_c="`wc -c < 'test2.bat'`"
-test 2775 -eq "$Wc_c" ||
- echo 'test2.bat: original size 2775, current size' "$Wc_c"
-fi
-# ============= test_osx.sh ==============
-if test -f 'test_osx.sh' -a X"$1" != X"-c"; then
- echo 'x - skipping test_osx.sh (File already exists)'
-else
-echo 'x - extracting test_osx.sh (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test_osx.sh' &&
-#!/bin/csh -f
-echo ""
-echo "starting fasta34_t - protein" `date` "on" `hostname`
-echo `uname -a`
-echo ""
-fasta34_t -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2_t.html
-fasta34_t -S -q -z 11 -O test_m1.ok2_t_p25 -s P250 mgstm1.aa:100-218 q
-echo "done"
-echo "starting fastxy34_t" `date`
-fastx34_t -m 9 -S -q mgtt2_x.seq q > test_t2.xk2_t
-fasty34_t -S -q mgtt2_x.seq q > test_t2.yk2_t
-fastx34_t -m 9 -S -q -z 2 mgstm1.esq a > test_m1.xk2_tz2
-fasty34_t -S -q -z 2 mgstm1.esq a > test_m1.yk2_tz2
-echo "done"
-echo "starting fastxy34_t rev" `date`
-fastx34_t -m 9 -q -m 5 mgstm1.rev q > test_m1.xk2r_t
-fasty34_t -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2r_tz2
-fasty34_t -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11_t
-echo "done"
-echo "starting ssearch34_t" `date`
-ssearch34_t -m 9 -S -z 3 -q mgstm1.aa q > test_m1.ss_tz3
-ssearch34_t -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_t_p25
-echo "done"
-echo "starting fasta34_t - DNA" `date`
-fasta34_t -q -z 2 mgstm1.seq %M 4 > test_m1.ok4_tz2
-fasta34_t -q mgstm1.rev %M 4 > test_m1.ok4r_t
-echo "done"
-echo "starting tfasta34_t" `date`
-tfasta34_t -q mgstm1.aa %M > test_m1.tk2_t
-echo "done"
-echo "starting tfastxy34_t" `date`
-tfastx34_t -m 9 -q -i -3 -m 6 mgstm1.aa %m > test_m1.tx2_t.html
-tfasty34_t -q -3 -N 5000 mgstm1.aa %m > test_m1.ty2_t
-echo "done"
-echo "starting fastf34_t" `date`
-fastf34_t -q m1r.aa q > test_mf.ff_s
-echo "done"
-echo "starting tfastf34_t" `date`
-tfastf34_t -q m1r.aa %m > test_mf.tf_r
-echo "done"
-echo "starting fasts34_t" `date`
-fasts34_t -q n0.aa q > test_m1.fs_s
-echo "done"
-echo "starting tfasts34_t" `date`
-tfasts34_t -q n0.aa %m > test_m1.ts_r
-echo "done"
-echo "starting fasta34 - protein" `date`
-fasta34 -q -z 2 mgstm1.aa q > test_m1.ok2z2
-fasta34 -q -s P250 mgstm1.aa q > test_m1.ok2_p25
-echo "done"
-echo "starting fastx3" `date`
-fastx34 -m 9 -q mgstm1.esq q > test_m1.ok2x
-echo "done"
-echo "starting fasty3" `date`
-fasty34 -q mgstm1.esq q > test_m1.ok2y
-echo "done"
-echo "starting fasta34 - DNA " `date`
-fasta34 -m 9 -q mgstm1.seq %m 4 > test_m1.ok4
-echo "done"
-echo "starting ssearch3" `date`
-ssearch34 -S -q -z 2 mgstm1.aa q > test_m1.ss_z2
-ssearch34 -q -s P250 mgstm1.aa q > test_m1.ss_p25
-echo "done"
-echo "starting tfasta3" `date`
-tfasta34 -q mgstm1.aa %m > test_m1.tk2
-echo "done"
-echo "starting tfastxy3" `date`
-tfastx34 -q mgstm1.aa %m > test_m1.tx2
-tfasty34 -m 9 -q mgstm1.aa %m > test_m1.ty2
-echo "done" `date`
-SHAR_EOF
-chmod 0755 test_osx.sh ||
-echo 'restore of test_osx.sh failed'
-Wc_c="`wc -c < 'test_osx.sh'`"
-test 2429 -eq "$Wc_c" ||
- echo 'test_osx.sh: original size 2429, current size' "$Wc_c"
-fi
-# ============= test_s.sh ==============
-if test -f 'test_s.sh' -a X"$1" != X"-c"; then
- echo 'x - skipping test_s.sh (File already exists)'
-else
-echo 'x - extracting test_s.sh (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test_s.sh' &&
-#!/bin/csh -f
-echo ""
-echo "starting fasta34 - protein" `date` "on" `hostname`
-echo `uname -a`
-echo ""
-fasta34 -q -m 6 -Z 100000 mgstm1.aa:1-100 q > test_m1.ok2.html
-fasta34 -S -q -z 11 -O test_m1.ok2_p25 -s P250 mgstm1.aa:100-218 q
-echo "done"
-echo "starting fastxy34" `date`
-fastx34 -m 9 -S -q mgtt2_x.seq q > test_t2.xk2
-fasty34 -S -q mgtt2_x.seq q > test_t2.yk2
-fastx34 -m 9 -S -q -z 2 mgstm1.esq a > test_m1.xk2z2
-fasty34 -S -q -z 2 mgstm1.esq a > test_m1.yk2z2
-echo "done"
-echo "starting fastxy34 rev" `date`
-fastx34 -m 9 -q -m 5 mgstm1.rev q > test_m1.xk2r
-fasty34 -q -m 5 -M 200-300 -z 2 mgstm1.rev q > test_m1.yk2rz2
-fasty34 -q -m 5 -z 11 mgstm1.rev q > test_m1.yk2rz11
-echo "done"
-echo "starting ssearch34" `date`
-ssearch34 -m 9 -S -z 3 -q mgstm1.aa q > test_m1.ssz3
-ssearch34 -q -M 200-300 -z 2 -Z 100000 -s P250 mgstm1.aa q > test_m1.ss_p25
-echo "done"
-echo "starting fasta34 - DNA" `date`
-fasta34 -q -z 2 mgstm1.seq %RMB 4 > test_m1.ok4z2
-fasta34 -q mgstm1.rev %RMB 4 > test_m1.ok4r
-echo "done"
-echo "starting tfasta34" `date`
-tfasta34 -q mgstm1.aa %RMB > test_m1.tk2
-echo "done"
-echo "starting tfastxy34" `date`
-tfastx34 -m 9 -q -i -3 -m 6 mgstm1.aa %p > test_m1.tx2.html
-tfasty34 -q -i -3 -N 5000 mgstm1.aa %p > test_m1.ty2
-echo "done"
-echo "starting fastf34" `date`
-fastf34 -q m1r.aa q > test_mf.ff_s
-echo "done"
-echo "starting tfastf34" `date`
-tfastf34 -q -E 0.0001 m1r.aa %r > test_mf.tf_r
-echo "done"
-echo "starting fasts34" `date`
-fasts34 -q n0.aa q > test_m1.fs_s
-echo "done"
-echo "starting tfasts34" `date`
-tfasts34 -q n0.aa %r > test_m1.ts_r
-echo "done"
-echo "done" `date`
-SHAR_EOF
-chmod 0755 test_s.sh ||
-echo 'restore of test_s.sh failed'
-Wc_c="`wc -c < 'test_s.sh'`"
-test 1597 -eq "$Wc_c" ||
- echo 'test_s.sh: original size 1597, current size' "$Wc_c"
-fi
-# ============= test_z.sh ==============
-if test -f 'test_z.sh' -a X"$1" != X"-c"; then
- echo 'x - skipping test_z.sh (File already exists)'
-else
-echo 'x - extracting test_z.sh (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'test_z.sh' &&
-#!/bin/csh -f
-echo "starting fasta34_t - protein" `date`
-foreach z ( 1 2 3 6 11 )
-fasta34_t -q -z $z mgstm1.aa a > test_m1_a.ok2_t_${z}
-fasta34_t -q -z $z oohu.aa a > test_m1_b.ok2_t_${z}
-fasta34_t -q -S -z $z prio_atepa.aa a > test_m1_c.ok2S_t_${z}
-fasta34_t -q -S -z $z h10_human.aa a > test_m1_d.ok2S_t_${z}
-end
-echo "done"
-echo "starting ssearch34_t" `date`
-foreach z ( 1 2 3 6 11 )
-ssearch34_t -q -z $z mgstm1.aa a > test_m1_a.ssS_t_${z}
-ssearch34_t -q -z $z oohu.aa a > test_m1_b.ssS_t_${z}
-ssearch34_t -q -sBL62 -S -f -11 -z $z prio_atepa.aa a > test_m1_c.ssSbl62_t_${z}
-ssearch34_t -q -sBL62 -S -f -11 -z $z h10_human.aa a > test_m1_d.ssSbl62_t_${z}
-end
-echo "done"
-echo "starting fasta34 - protein" `date`
-foreach z ( 1 2 3 6 11 )
-fasta34 -q -z $z mgstm1.aa a > test_m1_a.ok2_${z}
-fasta34 -q -z $z oohu.aa a > test_m1_b.ok2_${z}
-fasta34 -q -S -sBL62 -f -11 -z $z prio_atepa.aa a > test_m1_c.ok2Sbl62_${z}
-fasta34 -q -S -sBL62 -f -11 -z $z h10_human.aa a > test_m1_d.ok2Sbl62_${z}
-end
-echo "done"
-echo "starting ssearch3" `date`
-foreach z ( 1 2 3 6 11 )
-ssearch34 -q -z $z mgstm1.aa a > test_m1_a.ssS_${z}
-ssearch34 -q -z $z oohu.aa a > test_m1_b.ssS_${z}
-ssearch34 -q -S -z $z prio_atepa.aa a > test_m1_c.ssS_${z}
-ssearch34 -q -S -z $z h10_human.aa a > test_m1_d.ssS_${z}
-end
-echo "done" `date`
-SHAR_EOF
-chmod 0755 test_z.sh ||
-echo 'restore of test_z.sh failed'
-Wc_c="`wc -c < 'test_z.sh'`"
-test 1312 -eq "$Wc_c" ||
- echo 'test_z.sh: original size 1312, current size' "$Wc_c"
-fi
-# ============= tfasts3.rsp ==============
-if test -f 'tfasts3.rsp' -a X"$1" != X"-c"; then
- echo 'x - skipping tfasts3.rsp (File already exists)'
-else
-echo 'x - extracting tfasts3.rsp (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'tfasts3.rsp' &&
-compacc.obj doinit.obj showbest.obj htime.obj apam.obj scaleswt.obj karlin.obj last_tat.obj tatsttfs.obj c_dispn.obj lib_sel.obj url_subs.obj nrand.obj getopt.obj regetlib.obj lgetlib.obj ncbl2_mlib.obj
-SHAR_EOF
-chmod 0644 tfasts3.rsp ||
-echo 'restore of tfasts3.rsp failed'
-Wc_c="`wc -c < 'tfasts3.rsp'`"
-test 203 -eq "$Wc_c" ||
- echo 'tfasts3.rsp: original size 203, current size' "$Wc_c"
-fi
-# ============= thr.h ==============
-if test -f 'thr.h' -a X"$1" != X"-c"; then
- echo 'x - skipping thr.h (File already exists)'
-else
-echo 'x - extracting thr.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'thr.h' &&
-X
-/***************************************/
-/* thread global variable declarations */
-/***************************************/
-X
-/* $Name: fa_34_26_5 $ - $Id: thr.h,v 1.2 1999/12/30 01:26:59 wrp Exp $ */
-X
-#ifndef MAX_WORKERS
-#define MAX_WORKERS 2
-#endif
-#define NUM_WORK_BUF 2*MAX_WORKERS
-X
-#ifndef XTERNAL
-struct buf_head *worker_buf[NUM_WORK_BUF]; /* pointers to full buffers */
-struct buf_head *reader_buf[NUM_WORK_BUF]; /* pointers to empty buffers */
-X
-/* protected by worker_mutex/woker_cond_var */
-int worker_buf_workp, worker_buf_readp; /* indices into full-buffers ptrs */
-int num_worker_bufs;
-int reader_done;
-X
-/* protected by reader_mutex/reader_cond var */
-int reader_buf_workp, reader_buf_readp; /* indices into empty-buffers ptrs */
-int num_reader_bufs;
-X
-/* protected by start_mutex/start_cont_var */
-int start_thread=1; /* start-up predicate, 0 starts */
-#else
-extern struct buf_head *worker_buf[];
-extern struct buf_head *reader_buf[];
-extern int num_worker_bufs, reader_done, num_reader_bufs;
-extern int worker_buf_workp, worker_buf_readp;
-extern int reader_buf_workp, reader_buf_readp;
-X
-extern int start_thread;
-#endif
-X
-SHAR_EOF
-chmod 0644 thr.h ||
-echo 'restore of thr.h failed'
-Wc_c="`wc -c < 'thr.h'`"
-test 1144 -eq "$Wc_c" ||
- echo 'thr.h: original size 1144, current size' "$Wc_c"
-fi
-# ============= titin_hum.aa ==============
-if test -f 'titin_hum.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping titin_hum.aa (File already exists)'
-else
-echo 'x - extracting titin_hum.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'titin_hum.aa' &&
->gi|20143914|ref|NP_003310.2| titin isoform N2-B [Homo sapiens]
-MTTQAPTFTQPLQSVVVLEGSTATFEAHISGFPVPEVSWFRDGQVISTSTLPGVQISFSDGRAKLTIPAV
-TKANSGRYSLKATNGSGQATSTAELLVKAETAPPNFVQRLQSMTVRQGSQVRLQVRVTGIPTPVVKFYRD
-GAEIQSSLDFQISQEGDLYSLLIAEAYPEDSGTYSVNATNSVGRATSTAELLVQGEEEVPAKKTKTIVST
-AQISESRQTRIEKKIEAHFDARSIATVEMVIDGAAGQQLPHKTPPRIPPKPKSRSPTPPSIAAKAQLARQ
-QSPSPIRHSPSPVRHVRAPTPSPVRSVSPAARISTSPIRSVRSPLLMRKTQASTVATGPEVPPPWKQEGY
-VASSSEAEMRETTLTTSTQIRTEERWEGRYGVQEQVTISGAAGAAASVSASASYAAEAVATGAKEVKQDA
-DKSAAVATVVAAVDMARVREPVISAVEQTAQRTTTTAVHIQPAQEQVRKEAEKTAVTKVVVAADKAKEQE
-LKSRTKEVITTKQEQMHVTHEQIRKETEKTFVPKVVISAAKAKEQETRISEEITKKQKQVTQEAIMKETR
-KTVVPKVIVATPKVKEQDLVSRGREGITTKREQVQITQEKMRKEAEKTALSTIAVATAKAKEQETILRTR
-ETMATRQEQIQVTHGKVDVGKKAEAVATVVAAVDQARVREPREPGHLEESYAQQTTLEYGYKERISAAKV
-AEPPQRPASEPHVVPKAVKPRVIQAPSETHIKTTDQKGMHISSQIKKTTDLTTERLVHVDKRPRTASPHF
-TVSKISVPKTEHGYEASIAGSAIATLQKELSATSSAQKITKSVKAPTVKPSETRVRAEPTPLPQFPFADT
-PDTYKSEAGVEVKKEVGVSITGTTVREERFEVLHGREAKVTETARVPAPVEIPVTPPTLVSGLKNVTVIE
-GESVTLECHISGYPSPTVTWYREDYQIESSIDFQITFQSGIARLMIREAFAEDSGRFTCSAVNEAGTVST
-SCYLAVQVSEEFEKETTAVTEKFTTEEKRFVESRDVVMTDTSLTEEQAGPGEPAAPYFITKPVVQKLVEG
-GSVVFGCQVGGNPKPHVYWKKSGVPLTTGYRYKVSYNKQTGECKLVISMTFADDAGEYTIVVRNKHGETS
-ASASLLEEADYELLMKSQQEMLYQTQVTAFVQEPKVGETAPGFVYSEYEKEYEKEQALIRKKMAKDTVVV
-RTYVEDQEFHISSFEERLIKEIEYRIIKTTLEELLEEDGEEKMAVDISESEAVESGFDLRIKNYRILEGM
-GVTFHCKMSGYPLPKIAWYKDGKRIKHGERYQMDFLQDGRASLRIPVVLPEDEGIYTAFASNIKGNAICS
-GKLYVEPAAPLGAPTYIPTLEPVSRIRSLSPRSVSRSPIRMSPARMSPARMSPARMSPARMSPGRRLEET
-DESQLERLYKPVFVLKPVSFKCLEGQTARFDLKVVGRPMPETFWFHDGQQIVNDYTHKVVIKEDGTQSLI
-IVPATPSDSGEWTVVAQNRAGRSSISVILTVEAVEHQVKPMFVEKLKNVNIKEGSRLEMKVRATGNPNPD
-IVWLKNSDIIVPHKYPKIRIEGTKGEAALKIDSTVSQDSAWYTATAINKAGRDTTRCKVNVEVEFAEPEP
-ERKLIIPRGTYRAKEIAAPELEPLHLRYGQEQWEEGDLYDKEKQQKPFFKKKLTSLRLKRFGPAHFECRL
-TPIGDPTMVVEWLHDGKPLEAANRLRMINEFGYCSLDYGVAYSRDSGIITCRATNKYGTDHTSATLIVKD
-EKSLVEESQLPEGRKGLQRIEELERMAHEGALTGVTTDQKEKQKPDIVLYPEPVRVLEGETARFRCRVTG
-YPQPKVNWYLNGQLIRKSKRFRVRYDGIHYLDIVDCKSYDTGEVKVTAENPEGVIEHKVKLEIQQREDFR
-SVLRRAPEPRPEFHVHEPGKLQFEVQKVDRPVDTTETKEVVKLKRAERITHEKVPEESEELRSKFKRRTE
-EGYYEAITAVELKSRKKDESYEELLRKTKDELLHWTKELTEEEKKALAEEGKITIPTFKPDKIELSPSME
-APKIFERIQSQTVGQGSDAHFRVRVVGKPDPECEWYKNGVKIERSDRIYWYWPEDNVCELVIRDVTAEDS
-ASIMVKAINIAGETSSHAFLLVQAKQLITFTQELQDVVAKEKDTMATFECETSEPFVKVKWYKDGMEVHE
-GDKYRMHSDRKVHFLSILTIDTSDAEDYSCVLVEDENVKTTAKLIVEGAVVEFVKELQDIEVPESYSGEL
-ECIVSPENIEGKWYHNDVELKSNGKYTITSRRGRQNLTVKDVTKEDQGEYSFVIDGKKTTCKLKMKPRPI
-AILQGLSDQKVCEGDIVQLEVKVSLESVEGVWMKDGQEVQPSDRVHIVIDKQSHMLLIEDMTKEDAGNYS
-FTIPALGLSTSGRVSVYSVDVITPLKDVNVIEGTKAVLECKVSVPDVTSVKWYLNDEQIKPDDRVQAIVK
-GTKQRLVINRTHASDEGPYKLIVGRVETNCNLSVEKIKIIRGLRDLTCTETQNVVFEVELSHSGIDVLWN
-FKDKEIKPSSKYKIEAHGKIYKLTVLNMMKDDEGKYTFYAGENITSGKLTVAGGAISKPLTDQTVAESQE
-AVFECEVANPDSKGEWLRDGKHLPLTNNIRSESDGHKRRLIIAATKLDDIGEYTYKVATSKTSAKLKVEA
-VKIKKTLKNLTVTETQDAVFTVELTHPNVKGVQWIKNGVVLESNEKYAISVKGTIYSLRIKNCAIVDESV
-YGFRLGRLGASARLHVETVKIIKKPKDVTALENATVAFEVSVSHDTVPVKWFHKSVEIKPSDKHRLVSER
-KVHKLMLQNISPSDAGEYTAVVGQLECKAKLFVETLHITKTMKNIEVPETKTASFECEVSHFNVPSMWLK
-NGVEIEMSEKFKIVVQGKLHQLIIMNTSTEDSAEYTFVCGNDQVSATLTVTPIMITSMLKDINAEEKDTI
-TFEVTVNYEGISYKWLKNGVEIKSTDKCQMRTKKLTHSLNIRNVHFGDAADYTFVAGKATSTATLYVEAR
-HIEFRKHIKDIKVLEKKRAMFECEVSEPDITVQWMKDDQELQITDRIKIQKEKYVHRLLIPSTRMSDAGK
-YTVVAGGNVSTAKLFVEGRDVRIRSIKKEVQVIEKQRAVVEFEVNEDDVDAHWYKDGIEINFQVQERHKY
-VVERRIHRMFISETRQSDAGEYTFVAGRNRSSVTLYVNAPEPPQVLQELQPVTVQSGKPARFCAVISGRP
-QPKISWYKEEQLLSTGFKCKFLHDGQEYTLLLIEAFPEDAAVYTCEAKNDYGVATTSASLSVEVPEVVSP
-DQEMPVYPPAIITPLQDTVTSEGQPARFQCRVSGTDLKVSWYSKDKKIKPSRFFRMTQFEDTYQLEIAEA
-YPEDEGTYTFVASNAVGQVSSTANLSLEAPESILHERIEQEIEMEMKEFSSSFLSAEEEGLHSAELQLSK
-INETLELLSESPVYSTKFDSEKEGTGPIFIKEVSNADISMGDVATLSVTVIGIPKPKIQWFFNGVLLTPS
-ADYKFVFDGDDHSLIILFTKLEDEGEYTCMASNDYGKTICSAYLKINSKGEGHKDTETESAVAKSLEKLG
-GPCPPHFLKELKPIRCAQGLPAIFEYTVVGEPAPTVTWFKENKQLCTSVYYTIIHNPNGSGTFIVNDPQR
-EDSGLYICKAENMLGESTCAAELLVLLEDTDMTDTPCKAKSTPEAPEDFPQTPLKGPAVEALDSEQEIAT
-FVKDTILKAALITEENQQLSYEHIAKANELSSQLPLGAQELQSILEQDKLTPESTREFLCINGSIHFQPL
-KEPSPNLQLQIVQSQKTFSKEGILMPEEPETQAVLSDTEKIFPSAMSIEQINSLTVEPLKTLLAEPEGNY
-PQSSIEPPMHSYLTSVAEEVLSPKEKTVSDTNREQRVTLQKQEAQSALILSQSLAEGHVESLQSPDVMIS
-QVNYEPLVPSEHSCTEGGKILIESANPLENAGQDSAVRIEEGKSLRFPLALEEKQVLLKEEHSDNVVMPP
-DQIIESKREPVAIKKVQEVQGRDLLSKESLLSGIPEEQRLNLKIQICRALQAAVASEQPGLFSEWLRNIE
-KVEVEAVNITQEPRHIMCMYLVTSAKSVTEEVTIIIEDVDPQMANLKMELRDALCAIIYEEIDILTAEGP
-RIQQGAKTSLQEEMDSFSGSQKVEPITEPEVESKYLISTEEVSYFNVQSRVKYLDATPVTKGVASAVVSD
-EKQDESLKPSEEKEESSSESGTEEVATVKIQEAEGGLIKEDGPMIHTPLVDTVSEEGDIVHLTTSITNAK
-EVNWYFENKLVPSDEKFKCLQDQNTYTLVIDKVNTEDHQGEYVCEALNDSGKTATSAKLTVVKRAAPVIK
-RKIEPLEVALGHLAKFTCEIQSAPNVRFQWFKAGREIYESDKCSIRSSKYISSLEILRTQVVDCGEYTCK
-ASNEYGSVSCTATLTVTVPGGEKKVRKLLPERKPEPKEEVVLKSVLRKRPEEEEPKVEPKKLEKVKKPAV
-PEPPPPKPVEEVEVPTVTKRERKIPEPTKVPEIKPAIPLPAPEPKPKPEAEVKTIKPPPVEPEPTPIAAP
-VTVPVVGKKAEAKAPKEEAAKPKGPIKGVPKKTPSPIEAERRKLRPGSGGEKPPDEAPFTYQLKAVPLKF
-VKEIKDIILTESEFVGSSAIFECLVSPSTAITTWMKDGSNIRESPKHRFIADGKDRKLHIIDVQLSDAGE
-YTCVLRLGNKEKTSTAKLVVEELPVRFVKTLEEEVTVVKGQPLYLSCELNKERDVVWRKDGKIVVEKPGR
-IVPGVIGLMRALTINDADDTDAGTYTVTVENANNLECSSCVKVVEVIRDWLVKPIRDQHVKPKGTAIFAC
-DIAKDTPNIKWFKGYDEIPAEPNDKTEILRDGNHLYLKIKNAMPEDIAEYAVEIEGKRYPAKLTLGEREV
-ELLKPIEDVTIYEKESASFDAEISEADIPGQWKLKGELLRPSPTCEIKAEGGKRFLTLHKVKLDQAGEVL
-YQALNAITTAILTVKEIELDFAVPLKDVTVPERRQARFECVLTREANVIWSKGPDIIKSSDKFDIIADGK
-KHILVINDSQFDDEGVYTAEVEGKKTSARLFVTGIRLKFMSPLEDQTVKEGETATFVCELSHEKMHVVWF
-KNDAKLHTSRTVLISSEGKTHKLEMKEVTLDDISQIKAQVKELSSTAQLKVLEADPYFTVKLHDKTAVEK
-DEITLKCEVSKDVPVKWFKDGEEIVPSPKYSIKADGLRRILKIKKADLKDKGEYVCDCGTDKTKANVTVE
-ARLIKVEKPLYGVEVFVGETAHFEIELSEPDVHGQWKLKGQPLTASPDCEIIEDGKKHILILHNCQLGMT
-GEVSFQAANAKSAANLKVKELPLIFITPLSDVKVFEKDEAKFECEVSREPKTFRWLKGTQEITGDDRFEL
-IKDGTKHSMVIKSAAFEDEAKYMFEAEDKHTSGKLIIEGIRLKFLTPLKDVTAKEKESAVFTVELSHDNI
-RVKWFKNDQRLHTTRSVSMQDEGKTHSITFKDLSIDDTSQIRVEAMGMSSEAKLTVLEGDPYFTGKLQDY
-TGVEKDEVILQCEISKADAPVKWFKDGKEIKPSKNAVIKADGKKRMLILKKALKSDIGQYTCDCGTDKTS
-GKLDIEDREIKLVRPLHSVEVMETETARFETEISEDDIHANWKLKGEALLQTPDCEIKEEGKIHSLVLHN
-CRLDQTGGVDFQAANVKSSAHLRVKPRVIGLLRPLKDVTVTAGETATFDCELSYEDIPVEWYLKGKKLEP
-SDKVVPRSEGKVHTLTLRDVKLEDAGEVQLTAKDFKTHANLFVKEPPVEFTKPLEDQTVEEGATAVLECE
-VSRENAKVKWFKNGTEILKSKKYEIVADGRVRKLVIHDCTPEDIKTYTCDAKDFKTSCNLNVVPPHVEFL
-RPLTDLQVREKEMARFECELSRENAKVKWFKDGAEIKKGKKYDIISKGAVRILVINKCLLDDEAEYSCEV
-RTARTSGMLTVLEEEAVFTKNLANIEVSETDTIKLVCEVSKPGAEVIWYKGDEEIIETGRYEILTEGRKR
-ILVIQNAHLEDAGNYNCRLPSSRTDGKVKVHELAAEFISKPQNLEILEGEKAEFVCSISKESFPVQWKRD
-DKTLESGDKYDVIADGKKRVLVVKDATLQDMGTYVVMVGAARAAAHLTVIEKLRIVVPLKDTRVKEQQEV
-VFNCEVNTEGAKAKWFRNEEAIFDSSKYIILQKDLVYTLRIRDAHLDDQANYNVSLTNHRGENVKSAANL
-IVEEEDLRIVEPLKDIETMEKKSVTFWCKVNRLNVTLKWTKNGEEVPFDNRVSYRVDKYKHMLTIKDCGF
-PDEGEYIVTAGQDKSVAELLIIEAPTEFVEHLEDQTVTEFDDAVFSCQLSREKANVKWYRNGREIKEGKK
-YKFEKDGSIHRLIIKDCRLDDECEYACGVEDRKSRARLFVEEIPVEIIRPPQDILEAPGADVVFLAELNK
-DKVEVQWLRNNMVVVQGDKHQMMSEGKIHRLQICDIKPRDQGEYRFIAKDKEARAKLELAAAPKIKTADQ
-DLVVDVGKPLTMVVPYDAYPKAEAEWFKENEPLSTKTIDTTAEQTSFRILEAKKGDKGRYKIVLQNKHGK
-AEGFINLKVIDVPGPVRNLEVTETFDGEVSLAWEEPLTDGGSKIIGYVVERRDIKRKTWVLATDRAESCE
-FTVTGLQKGGVEYLFRVSARNRVGTGEPVETDNPVEARSKYDVPGPPLNVTITDVNRFGVSLTWEPPEYD
-GGAEITNYVIELRDKTSIRWDTAMTVRAEDLSATVTDVVEGQEYSFRVRAQNRIGVGKPSAATPFVKVAD
-PIERPSPPVNLTSSDQTQSSVQLKWEPPLKDGGSPILGYIIERCEEGKDNWIRCNMKLVPELTYKVTGLE
-KGNKYLYRVSAENKAGVSDPSEILGPLTADDAFVEPTMDLSAFKDGLEVIVPNPITILVPSTGYPRPTAT
-WCFGDKVLETGDRVKMKTLSAYAELVISPSERSDKGIYTLKLENRVKTISGEIDVNVIARPSAPKELKFG
-DITKDSVHLTWEPPDDDGGSPLTGYVVEKREVSRKTWTKVMDFVTDLEFTVPDLVQGKEYLFKVCARNKC
-GPGEPAYVDEPVNMSTPATVPDPPENVKWRDRTANSIFLTWDPPKNDGGSRIKGYIVERCPRGSDKWVAC
-GEPVAETKMEVTGLEEGKWYAYRVKALNRQGASKPSRPTEEIQAVDTQEAPEIFLDVKLLAGLTVKAGTK
-IELPATVTGKPEPKITWTKADMILKQDKRITIENVPKKSTVTIVDSKRSDTGTYIIEAVNVCGRATAVVE
-VNVLDKPGPPAAFDITDVTNESCLLTWNPPRDDGGSKITNYVVERRATDSEVWHKLSSTVKDTNFKATKL
-IPNKEYIFRVAAENMYGVGEPVQASPITAKYQFDPPGPPTRLEPSDITKDAVTLTWCEPDDDGGSPITGY
-WVERLDPDTDKWVRCNKMPVKDTTYRVKGLTNKKKYRFRVLAENLAGPGKPSKSTEPILIKDPIDPPWPP
-GKPTVKDVGKTSVRLNWTKPEHDGGAKIESYVIEMLKTGTDEWVRVAEGVPTTQHLLPGLMEGQEYSFRV
-RAVNKAGESEPSEPSDPVLCREKLYPPSPPRWLEVINITKNTADLKWTVPEKDGGSPITNYIVEKRDVRR
-KGWQTVDTTVKDTKCTVTPLTEGSLYVFRVAAENAIGQSDYTEIEDSVLAKDTFTTPGPPYALAVVDVTK
-RHVDLKWEPPKNDGGRPIQRYVIEKKERLGTRWVKAGKTAGPDCNFRVTDVIEGTEVQFQVRAENEAGVG
-HPSEPTEILSIEDPTSPPSPPLDLHVTDAGRKHIAIAWKPPEKNGGSPIIGYHVEMCPVGTEKWMRVNSR
-PIKDLKFKVEEGVVPDKEYVLRVRAVNAIGVSEPSEISENVVAKDPDCKPTIDLETHDIIVIEGEKLSIP
-VPFRAVPVPTVSWHKDGKEVKASDRLTMKNDHISAHLEVPKSVRADAGIYTITLENKLGSATASINVKVI
-GLPGPCKDIKASDITKSSCKLTWEPPEFDGGTPILHYVLERREAGRRTYIPVMSGENKLSWTVKDLIPNG
-EYFFRVKAVNKVGGGEYIELKNPVIAQDPKQPPDPPVDVEVHNPTAEAMTITWKPPLYDGGSKIMGYIIE
-KIAKGEERWKRCNEHLVPILTYTAKGLEEGKEYQFRVRAENAAGISEPSRATPPTKAVDPIDAPKVILRT
-SLEVKRGDEIALDASISGSPYPTITWIKDENVIVPEEIKKRAAPLVRRRKGEVQEEEPFVLPLTQRLSID
-NSKKGESQLRVRDSLRPDHGLYMIKVENDHGIAKAPCTVSVLDTPGPPINFVFEDIRKTSVLCKWEPPLD
-DGGSEIINYTLEKKDKTKPDSEWIVVTSTLRHCKYSVTKLIEGKEYLFRVRAENRFGPGPPCVSKPLVAK
-DPFGPPDAPDKPIVEDVTSNSMLVKWNEPKDNGSPILGYWLEKREVNSTHWSRVNKSLLNALKANVDGLL
-EGLTYVFRVCAENAAGPGKFSPPSDPKTAHDPISPPGPPIPRVTDTSSTTIELEWEPPAFNGGGEIVGYF
-VDKQLVGTNEWSRCTEKMIKVRQYTVKEIREGADYKLRVSAVNAAGEGPPGETQPVTVAEPQEPPAVELD
-VSVKGGIQIMAGKTLRIPAVVTGRPVPTKVWTKEEGELDKDRVVIDNVGTKSELIIKDALRKDHGRYVIT
-ATNSCGSKFAAARVEVFDVPGPVLDLKPVVTNRKMCLLNWSDPEDDGGSEITGFIIERKDAKMHTWRQPI
-ETERSKCDITGLLEGQEYKFRVIAKNKFGCGPPVEIGPILAVDPLGPPTSPERLTYTERTKSTITLDWKE
-PRSNGGSPIQGYIIEKRRHDKPDFERVNKRLCPTTSFLVENLDEHQMYEFRVKAVNEIGESEPSLPLNVV
-IQDDEVPPTIKLRLSVRGDTIKVKAGEPVHIPADVTGLPMPKIEWSKNETVIEKPTDALQITKEEVSRSE
-AKTELSIPKAVREDKGTYTVTASNRLGSVFRNVHVEVYDRPSPPRNLAVTDIKAESCYLTWDAPLDNGGS
-EITHYVIDKRDASRKKAEWEEVTNTAVEKRYGIWKLIPNGQYEFRVRAVNKYGISDECKSDKVVIQDPYR
-LPGPPGKPKVLARTKGSMLVSWTPPLDNGGSPITGYWLEKREEGSPYWSRVSRAPITKVGLKGVEFNVPR
-LLEGVKYQFRAMAINAAGIGPPSEPSDPEVAGDPIFPPGPPSCPEVKDKTKSSISLGWKPPAKDGGSPIK
-GYIVEMQEEGTTDWKRVNEPDKLITTCECVVPNLKELRKYRFRVKAVNEAGESEPSDTTGEIPATDIQEE
-PEVFIDIGAQDCLVCKAGSQIRIPAVIKGRPTPKSSWEFDGKAKKAMKDGVHDIPEDAQLETAENSSVII
-IPECKRSHTGKYSITAKNKAGQKTANCRVKVMDVPGPPKDLKVSDITRGSCRLSWKMPDDDGGDRIKGYV
-IEKRTIDGKAWTKVNPDCGSTTFVVPDLLSEQQYFFRVRAENRFGIGPPVETIQRTTARDPIYPPDPPIK
-LKIGLITKNTVHLSWKPPKNDGGSPVTHYIVECLAWDPTGTKKEAWRQCNKRDVEELQFTVEDLVEGGEY
-EFRVKAVNAAGVSKPSATVGPCDCQRPDMPPSIDLKEFMEVEEGTNVNIVAKIKGVPFPTLTWFKAPPKK
-PDNKEPVLYDTHVNKLVVDDTCTLVIPQSRRSDTGLYTITAVNNLGTASKEMRLNVLGRPGPPVGPIKFE
-SVSADQMTLSWFPPKDDGGSKITNYVIEKREANRKTWVHVSSEPKECTYTIPKLLEGHEYVFRIMAQNKY
-GIGEPLDSEPETARNLFSVPGAPDKPTVSSVTRNSMTVNWEEPEYDGGSPVTGYWLEMKDTTSKRWKRVN
-RDPIKAMTLGVSYKVTGLIEGSDYQFRVYAINAAGVGPASLPSDPATARDPIAPPGPPFPKVTDWTKSSA
-DLEWSPPLKDGGSKVTGYIVEYKEEGKEEWEKGKDKEVRGTKLVVTGLKEGAFYKFRVSAVNIAGIGEPG
-EVTDVIEMKDRLVSPDLQLDASVRDRIVVHAGGVIRIIAYVSGKPPPTVTWNMNERTLPQEATIETTAIS
-SSMVIKNCQRSHQGVYSLLAKNEAGERKKTIIVDVLDVPGPVGTPFLAHNLTNESCKLTWFSPEDDGGSP
-ITNYVIEKRESDRRAWTPVTYTVTRQNATVQGLIQGKAYFFRIAAENSIGMGPFVETSEALVIREPITVP
-ERPEDLEVKEVTKNTVTLTWNPPKYDGGSEIINYVLESRLIGTEKFHKVTNDNLLSRKYTVKGLKEGDTY
-EYRVSAVNIVGQGKPSFCTKPITCKDELAPPTLHLDFRDKLTIRVGEAFALTGRYSGKPKPKVSWFKDEA
-DVLEDDRTHIKTTPATLALEKIKAKRSDSGKYCVVVENSTGSRKGFCQVNVVDRPGPPVGPVSFDEVTKD
-YMVISWKPPLDDGGSKITNYIIEKKEVGKDVWMPVTSASAKTTCKVSKLLEGKDYIFRIHAENLYGISDP
-LVSDSMKAKDRFRVPDAPDQPIVTEVTKDSALVTWNKPHDGGKPITNYILEKRETMSKRWARVTKDPIHP
-YTKFRVPDLLEGCQYEFRVSAENEIGIGDPSPPSKPVFAKDPIAKPSPPVNPEAIDTTCNSVDLTWQPPR
-HDGGSKILGYIVEYQKVGDEEWRRANHTPESCPETKYKVTGLRDGQTYKFRVLAVNAAGESDPAHVPEPV
-LVKDRLEPPELILDANMAREQHIKVGDTLRLSAIIKGVPFPKVTWKKEDRDAPTKARIDVTPVGSKLEIR
-NAAHEDGGIYSLTVENPAGSKTVSVKVLVLDKPGPPRDLEVSEIRKDSCYLTWKEPLDDGGSVITNYVVE
-RRDVASAQWSPLSATSKKKSHFAKHLNEGNQYLFRVAAENQYGRGPFVETPKPIKALDPLHPPGPPKDLH
-HVDVDKTEVSLVWNKPDRDGGSPITGYLVEYQEEGTQDWIKFKTVTNLECVVTGLQQGKTYRFRVKAENI
-VGLGLPDTTIPIECQEKLVPPSVELDVKLIEGLVVKAGTTVRFPAIIRGVPVPTAKWTTDGSEIKTDEHY
-TVETDNFSSVLTIKNCLRRDTGEYQITVSNAAGSKTVAVHLTVLDVPGPPTGPINILDVTPEHMTISWQP
-PKDDGGSPVINYIVEKQDTRKDTWGVVSSGSSKTKLKIPHLQKGCEYVFRVRAENKIGVGPPLDSTPTVA
-KHKFSPPSPPGKPVVTDITENAATVSWTLPKSDGGSPITGYYMERREVTGKWVRVNKTPIADLKFRVTGL
-YEGNTYEFRVFAENLAGLSKPSPSSDPIKACRPIKPPGPPINPKLKDKSRETADLVWTKPLSDGGSPILG
-YVVECQKPGTAQWNRINKDELIRQCAFRVPGLIEGNEYRFRIKAANIVGEGEPRELAESVIAKDILHPPE
-VELDVTCRDVITVRVGQTIRILARVKGRPEPDITWTKEGKVLVREKRVDLIQDLPRVELQIKEAVRADHG
-KYIISAKNSSGHAQGSAIVNVLDRPGPCQNLKVTNVTKENCTISWENPLDNGGSEITNFIVEYRKPNQKG
-WSIVASDVTKRLIKANLLANNEYYFRVCAENKVGVGPTIETKTPILAINPIDRPGEPENLHIADKGKTFV
-YLKWRRPDYDGGSPNLSYHVERRLKGSDDWERVHKGSIKETHYMVDRCVENQIYEFRVQTKNEGGESDWV
-KTEEVVVKEDLQKPVLDLKLSGVLTVKAGDTIRLEAGVRGKPFPEVAWTKDKDATDLTRSPRVKIDTRAD
-SSKFSLTKAKRSDGGKYVVTATNTAGSFVAYATVNVLDKPGPVRNLKIVDVSSDRCTVCWDPPEDDGGCE
-IQNYILEKCETKRMVWSTYSATVLTPGTTVTRLIEGNEYIFRVRAENKIGTGPPTESKPVIAKTKYDKPG
-RPDPPEVTKVSKEEMTVVWNPPEYDGGKSITGYFLEKKEKHSTRWVPVNKSAIPERRMKVQNLLPDHEYQ
-FRVKAENEIGIGEPSLPSRPVVAKDPIEPPGPPTNFRVVDTTKHSITLGWGKPVYDGGAPIIGYVVEMRP
-KIADASPDEGWKRCNAAAQLVRKEFTVTSLDENQEYEFRVCAQNQVGIGRPAELKEAIKPKEILEPPEID
-LDASMRKLVIVRAGCPIRLFAIVRGRPAPKVTWRKVGIDNVVRKGQVDLVDTMAFLVIPNSTRDDSGKYS
-LTLVNPAGEKAVFVNVRVLDTPGPVSDLKVSDVTKTSCHVSWAPPENDGGSQVTHYIVEKREADRKTWST
-VTPEVKKTSFHVTNLVPGNEYYFRVTAVNEYGPGVPTDVPKPVLASDPLSEPDPPRKLEVTEMTKNSATL
-AWLPPLRDGGAKIDGYITSYREEEQPADRWTEYSVVKDLSLVVTGLKEGKKYKFRVAARNAVGVSLPREA
-EGVYEAKEQLLPPKILMPEQITIKAGKKLRIEAHVYGKPHPTCKWKKGEDEVVTSSHLAVHKADSSSILI
-IKDVTRKDSGYYSLTAENSSGTDTQKIKVVVMDAPGPPQPPFDISDIDADACSLSWHIPLEDGGSNITNY
-IVEKCDVSRGDWVTALASVTKTSCRVGKLIPGQEYIFRVRAENRFGISEPLTSPKMVAQFPFGVPSEPKN
-ARVTKVNKDCIFVAWDRPDSDGGSPIIGYLIERKERNSLLWVKANDTLVRSTEYPCAGLVEGLEYSFRIY
-ALNKAGSSPPSKPTEYVTARMPVDPPGKPEVIDVTKSTVSLIWARPKHDGGSKIIGYFVEACKLPGDKWV
-RCNTAPHQIPQEEYTATGLEEKAQYQFRAIARTAVNISPPSEPSDPVTILAENVPPRIDLSVAMKSLLTV
-KAGTNVCLDATVFGKPMPTVSWKKDGTLLKPAEGIKMAMQRNLCTLELFSVNRKDSGDYTITAENSSGSK
-SATIKLKVLDKPGPPASVKINKMYSDRAMLSWEPPLEDGGSEITNYIVDKRETSRPNWAQVSATVPITSC
-SVEKLIEGHEYQFRICAENKYGVGDPVFTEPAIAKNPYDPPGRCDPPVISNITKDHMTVSWKPPADDGGS
-PITGYLLEKRETQAVNWTKVNRKPIIERTLKATGLQEGTEYEFRVTAINKAGPGKPSDASKAAYARDPQY
-PPAPPAFPKVYDTTRSSVSLSWGKPAYDGGSPIIGYLVEVKRADSDNWVRCNLPQNLQKTRFEVTGLMED
-TQYQFRVYAVNKIGYSDPSDVPDKHYPKDILIPPEGELDADLRKTLILRAGVTMRLYVPVKGRPPPKITW
-SKPNVNLRDRIGLDIKSTDFDTFLRCENVNKYDAGKYILTLENSCGKKEYTIVVKVLDTPGPPVNVTVKE
-ISKDSAYVTWEPPIIDGGSPIINYVVQKRDAERKSWSTVTTECSKTSFRVANLEEGKSYFFRVFAENEYG
-IGDPGETRDAVKASQTPGPVVDLKVRSVSKSSCSIGWKKPHSDGGSRIIGYVVDFLTEENKWQRVMKSLS
-LQYSAKDLTEGKEYTFRVSAENENGEGTPSEITVVARDDVVAPDLDLKGLPDLCYLAKENSNFRLKIPIK
-GKPAPSVSWKKGEDPLATDTRVSVESSAVNTTLIVYDCQKSDAGKYTITLKNVAGTKEGTISIKVVGKPG
-IPTGPIKFDEVTAEAMTLKWAPPKDDGGSEITNYILEKRDSVNNKWVTCASAVQKTTFRVTRLHEGMEYT
-FRVSAENKYGVGEGLKSEPIVARHPFDVPDAPPPPNIVDVRHDSVSLTWTDPKKTGGSPITGYHLEFKER
-NSLLWKRANKTPIRMRDFKVTGLTEGLEYEFRVMAINLAGVGKPSLPSEPVVALDPIDPPGKPEVINITR
-NSVTLIWTEPKYDGGHKLTGYIVEKRDLPSKSWMKANHVNVPECAFTVTDLVEGGKYEFRIRAKNTAGAI
-SAPSESTETIICKDEYEAPTIVLDPTIKDGLTIKAGDTIVLNAISILGKPLPKSSWSKAGKDIRPSDITQ
-ITSTPTSSMLTIKYATRKDAGEYTITATNPFGTKVEHVKVTVLDVPGPPGPVEISNVSAEKATLTWTPPL
-EDGGSPIKSYILEKRETSRLLWTVVSEDIQSCRHVATKLIQGNEYIFRVSAVNHYGKGEPVQSEPVKMVD
-RFGPPGPPEKPEVSNVTKNTATVSWKRPVDDGGSEITGYHVERREKKSLRWVRAIKTPVSDLRCKVTGLQ
-EGSTYEFRVSAENRAGIGPPSEASDSVLMKDAAYPPGPPSNPHVTDTTKKSASLAWGKPHYDGGLEITGY
-VVEHQKVGDEAWIKDTTGTALRITQFVVPDLQTKEKYNFRISAINDAGVGEPAVIPDVEIVEREMAPDFE
-LDAELRRTLVVRAGLSIRIFVPIKGRPAPEVTWTKDNINLKNRANIENTESFTLLIIPECNRYDTGKFVM
-TIENPAGKKSGFVNVRVLDTPGPVLNLRPTDITKDSVTLHWDLPLIDGGSRITNYIVEKREATRKSYSTA
-TTKCHKCTYKVTGLSEGCEYFFRVMAENEYGIGEPTETTEPVKASEAPSPPDSLNIMDITKSTVSLAWPK
-PKHDGGSKITGYVIEAQRKGSDQWTHITTVKGLECVVRNLTEGEEYTFQVMAVNSAGRSAPRESRPVIVK
-EQTMLPELDLRGIYQKLVIAKAGDNIKVEIPVLGRPKPTVTWKKGDQILKQTQRVNFETTATSTILNINE
-CVRSDSGPYPLTARNIVGEVGDVITIQVHDIPGPPTGPIKFDEVSSDFVTFSWDPPENDGGVPISNYVVE
-MRQTDSTTWVELATTVIRTTYKATRLTTGLEYQFRVKAQNRYGVGPGITSACIVANYPFKVPGPPGTPQV
-TAVTKDSMTISWHEPLSDGGSPILGYHVERKERNGILWQTVSKALVPGNIFKSSGLTDGIAYEFRVIAEN
-MAGKSKPSKPSEPMLALDPIDPPGKPVPLNITRHTVTLKWAKPEYTGGFKITSYIVEKRDLPNGRWLKAN
-FSNILENEFTVSGLTEDAAYEFRVIAKNAAGAISPPSEPSDAITCRDDVEAPKIKVDVKFKDTVILKAGE
-AFRLEADVSGRPPPTMEWSKDGKELEGTAKLEIKIADFSTNLVNKDSTRRDSGAYTLTATNPGGFAKHIF
-NVKVLDRPGPPEGPLAVTEVTSEKCVLSWFPPLDDGGAKIDHYIVQKRETSRLAWTNVASEVQVTKLKVT
-KLLKGNEYIFRVMAVNKYGVGEPLESEPVLAVNPYGPPDPPKNPEVTTITKDSMVVCWGHPDSDGGSEII
-NYIVERRDKAGQRWIKCNKKTLTDLRYKVSGLTEGHEYEFRIMAENAAGISAPSPTSPFYKACDTVFKPG
-PPGNPRVLDTSRSSISIAWNKPIYDGGSEITGYMVEIALPEEDEWQIVTPPAGLKATSYTITGLTENQEY
-KIRIYAMNSEGLGEPALVPGTPKAEDRMLPPEIELDADLRKVVTIRACCTLRLFVPIKGRPAPEVKWARD
-HGESLDKASIESTSSYTLLIVGNVNRFDSGKYILTVENSSGSKSAFVNVRVLDTPGPPQDLKVKEVTKTS
-VTLTWDPPLLDGGSKIKNYIVEKRESTRKAYSTVATNCHKTSWKVDQLQEGCSYYFRVLAENEYGIGLPA
-ETAESVKASERPLPPGKITLMDVTRNSVSLSWEKPEHDGGSRILGYIVEMQTKGSDKWATCATVKVTEAT
-ITGLIQGEEYSFRVSAQNEKGISDPRQLSVPVIAKDLVIPPAFKLLFNTFTVLAGEDLKVDVPFIGRPTP
-AVTWHKDNVPLKQTTRVNAESTENNSLLTIKDACREDVGHYVVKLTNSAGEAIETLNVIVLDKPGPPTGP
-VKMDEVTADSITLSWGPPKYDGGSSINNYIVEKRDTSTTTWQIVSATVARTTIKACRLKTGCEYQFRIAA
-ENRYGKSTYLNSEPTVAQYPFKVPGPPGTPVVTLSSRDSMEVQWNEPISDGGSRVIGYHLERKERNSILW
-VKLNKTPIPQTKFKTTGLEEGVEYEFRVSAENIVGIGKPSKVSECYVARDPCDPPGRPEAIIVTRNSVTL
-QWKKPTYDGGSKITGYIVEKKELPEGRWMKASFTNIIDTHFEVTGLVEDHRYEFRVIARNAAGVFSEPSE
-STGAITARDEVDPPRISMDPKYKDTIVVHAGESFKVDADIYGKPIPTIQWIKGDQELSNTARLEIKSTDF
-ATSLSVKDAVRVDSGNYILKAKNVAGERSVTVNVKVLDRPGPPEGPVVISGVTAEKCTLAWKPPLQDGGS
-DIINYIVERRETSRLVWTVVDANVQTLSCKVTKLLEGNEYTFRIMAVNKYGVGEPLESEPVVAKNPFVVP
-DAPKAPEVTTVTKDSMIVVWERPASDGGSEILGYVLEKRDKEGIRWTRCHKRLIGELRLRVTGLIENHDY
-EFRVSAENAAGLSEPSPPSAYQKACDPIYKPGPPNNPKVIDITRSSVFLSWSKPIYDGGCEIQGYIVEKC
-DVSVGEWTMCTPPTGINKTNIEVEKLLEKHEYNFRICAINKAGVGEHADVPGPIIVEEKLEAPDIDLDLE
-LRKIINIRAGGSLRLFVPIKGRPTPEVKWGKVDGEIRDAAIIDVTSSFTSLVLDNVNRYDSGKYTLTLEN
-SSGTKSAFVTVRVLDTPSPPVNLKVTEITKDSVSITWEPPLLDGGSKIKNYIVEKREATRKSYAAVVTNC
-HKNSWKIDQLQEGCSYYFRVTAENEYGIGLPAQTADPIKVAEVPQPPGKITVDDVTRNSVSLSWTKPEHD
-GGSKIIQYIVEMQAKHSEKWSECARVKSLQAVITNLTQGEEYLFRVVAVNEKGRSDPRSLAVPIVAKDLV
-IEPDVKPAFSSYSVQVGQDLKIEVPISGRPKPTITWTKDGLPLKQTTRINVTDSLDLTTLSIKETHKDDG
-GQYGITVANVVGQKTASIEIVTLDKPDPPKGPVKFDDVSAESITLSWNPPLYTGGCQITNYIVQKRDTTT
-TVWDVVSATVARTTLKVTKLKTGTEYQFRIFAENRYGQSFALESDPIVAQYPYKEPGPPGTPFATAISKD
-SMVIQWHEPVNNGGSPVIGYHLERKERNSILWTKVNKTIIHDTQFKAQNLEEGIEYEFRVYAENIVGVGK
-ASKNSECYVARDPCDPPGTPEPIMVKRNEITLQWTKPVYDGGSMITGYIVEKRDLPDGRWMKASFTNVIE
-TQFTVSGLTEDQRYEFRVIAKNAAGAISKPSDSTGPITAKDEVELPRISMDPKFRDTIVVNAGETFRLEA
-DVHGKPLPTIEWLRGDKEIEESARCEIKNTDFKALLIVKDAIRIDGGQYILRASNVAGSKSFPVNVKVLD
-RPGPPEGPVQVTGVTSEKCSLTWSPPLQDGGSDISHYVVEKRETSRLAWTVVASEVVTNSLKVTKLLEGN
-EYVFRIMAVNKYGVGEPLESAPVLMKNPFVLPGPPKSLEVTNIAKDSMTVCWNRPDSDGGSEIIGYIVEK
-RDRSGIRWIKCNKRRITDLRLRVTGLTEDHEYEFRVSAENAAGVGEPSPATVYYKACDPVFKPGPPTNAH
-IVDTTKNSITLAWGKPIYDGGSEILGYVVEICKADEEEWQIVTPQTGLRVTRFEISKLTEHQEYKIRVCA
-LNKVGLGEATSVPGTVKPEDKLEAPELDLDSELRKGIVVRAGGSARIHIPFKGRPTPEITWSREEGEFTD
-KVQIEKGVNYTQLSIDNCDRNDAGKYILKLENSSGSKSAFVTVKVLDTPGPPQNLAVKEVRKDSAFLVWE
-PPIIDGGAKVKNYVIDKRESTRKAYANVSSKCSKTSFKVENLTEGAIYYFRVMAENEFGVGVPVETVDAV
-KAAEPPSPPGKVTLTDVSQTSASLMWEKPEHDGGSRVLGYVVEMQPKGTEKWSIVAESKVCNAVVTGLSS
-GQEYQFRVKAYNEKGKSDPRVLGVPVIAKDLTIQPSLKLPFNTYSIQAGEDLKIEIPVIGRPRPNISWVK
-DGEPLKQTTRVNVEETATSTVLHIKEGNKDDFGKYTVTATNSAGTATENLSVIVLEKPGPPVGPVRFDEV
-SADFVVISWEPPAYTGGCQISNYIVEKRDTTTTTWHMVSATVARTTIKITKLKTGTEYQFRIFAENRYGK
-SAPLDSKAVIVQYPFKEPGPPGTPFVTSISKDQMLVQWHEPVNDGGTKIIGYHLEQKEKNSILWVKLNKT
-PIQDTKFKTTGLDEGLEYEFKVSAENIVGIGKPSKVSECFVARDPCDPPGRPEAIVITRNNVTLKWKKPA
-YDGGSKITGYIVEKKDLPDGRWMKASFTNVLETEFTVSGLVEDQRYEFRVIARNAAGNFSEPSDSSGAIT
-ARDEIDAPNASLDPKYKDVIVVHAGETFVLEADIRGKPIPDVVWSKDGKELEETAARMEIKSTIQKTTLV
-VKDCIRTDGGQYILKLSNVGGTKSIPITVKVLDRPGPPEGPLKVTGVTAEKCYLAWNPPLQDGGANISHY
-IIEKRETSRLSWTQVSTEVQALNYKVTKLLPGNEYIFRVMAVNKYGIGEPLESGPVTACNPYKPPGPPST
-PEVSAITKDSMVVTWARPVDDGGTEIEGYILEKRDKEGVRWTKCNKKTLTDLRLRVTGLTEGHSYEFRVA
-AENAAGVGEPSEPSVFYRACDALYPPGPPSNPKVTDTSRSSVSLAWSKPIYDGGAPVKGYVVEVKEAAAD
-EWTTCTPPTGLQGKQFTVTKLKENTEYNFRICAINSEGVGEPATLPGSVVAQERIEPPEIELDADLRKVV
-VLRASATLRLFVTIKGRPEPEVKWEKAEGILTDRAQIEVTSSFTMLVIDNVTRFDSGRYNLTLENNSGSK
-TAFVNVRVLDSPSAPVNLTIREVKKDSVTLSWEPPLIDGGAKITNYIVEKRETTRKAYATITNNCTKTTF
-RIENLQEGCSYYFRVLASNEYGIGLPAETTEPVKVSEPPLPPGRVTLVDVTRNTATIKWEKPESDGGSKI
-TGYVVEMQTKGSEKWSTCTQVKTLEATISGLTAGEEYVFRVAAVNEKGRSDPRQLGVPVIARDIEIKPSV
-ELPFHTFNVKAREQLKIDVPFKGRPQATVNWRKDGQTLKETTRVNVSSSKTVTSLSIKEASKEDVGTYEL
-CVSNSAGSITVPITIIVLDRPGPPGPIRIDEVSCDSITISWNPPEYDGGCQISNYIVEKKETTSTTWHIV
-SQAVARTSIKIVRLTTGSEYQFRVCAENRYGKSSYSESSAVVAEYPFSPPGPPGTPKVVHATKSTMLVTW
-QVPVNDGGSRVIGYHLEYKERSSILWSKANKILIADTQMKVSGLDEGLMYEYRVYAENIAGIGKCSKSCE
-PVPARDPCDPPGQPEVTNITRKSVSLKWSKPHYDGGAKITGYIVERRELPDGRWLKCNYTNIQETYFEVT
-ELTEDQRYEFRVFARNAADSVSEPSESTGPIIVKDDVEPPRVMMDVKFRDVIVVKAGEVLKINADIAGRP
-LPVISWAKDGIEIEERARTEIISTDNHTLLTVKDCIRRDTGQYVLTLKNVAGTRSVAVNCKVLDKPGPPA
-GPLEINGLTAEKCSLSWGRPQEDGGADIDYYIVEKRETSHLAWTICEGELQMTSCKVTKLLKGNEYIFRV
-TGVNKYGVGEPLESVAIKALDPFTVPSPPTSLEITSVTKESMTLCWSRPESDGGSEISGYIIERREKNSL
-RWVRVNKKPVYDLRVKSTGLREGCEYEYRVYAENAAGLSLPSETSPLIRAEDPVFLPSPPSKPKIVDSGK
-TTITIAWVKPLFDGGAPITGYTVEYKKSDDTDWKTSIQSLRGTEYTISGLTTGAEYVFRVKSVNKVGASD
-PSDSSDPQIAKEREEEPLFDIDSEMRKTLIVKAGASFTMTVPFRGRPVPNVLWSKPDTDLRTRAYVDTTD
-SRTSLTIENANRNDSGKYTLTIQNVLSAASLTLVVKVLDTPGPPTNITVQDVTKESAVLSWDVPENDGGA
-PVKNYHIEKREASKKAWVSVTNNCNRLSYKVTNLQEGAIYYFRVSGENEFGVGIPAETKEGVKITEKPSP
-PEKLGVTSISKDSVSLTWLKPEHDGGSRIVHYVVEALEKGQKNWVKCAVAKSTHHVVSGLRENSEYFFRV
-FAENQAGLSDPRELLLPVLIKEQLEPPEIDMKNFPSHTVYVRAGSNLKVDIPISGKPLPKVTLSRDGVPL
-KATMRFNTEITAENLTINLKESVTADAGRYEITAANSSGTTKAFINIVVLDRPGPPTGPVVISDITEESV
-TLKWEPPKYDGGSQVTNYILLKRETSTAVWTEVSATVARTMMKVMKLTTGEEYQFRIKAENRFGISDHID
-SACVTVKLPYTTPGPPSTPWVTNVTRESITVGWHEPVSNGGSAVVGYHLEMKDRNSILWQKANKLVIRTT
-HFKVTTISAGLIYEFRVYAENAAGVGKPSHPSEPVLAIDACEPPRNVRITDISKNSVSLSWQQPAFDGGS
-KITGYIVERRDLPDGRWTKASFTNVTETQFIISGLTQNSQYEFRVFARNAVGSISNPSEVVGPITCIDSY
-GGPVIDLPLEYTEVVKYRAGTSVKLRAGISGKPAPTIEWYKDDKELQTNALVCVENTTDLASILIKDADR
-LNSGCYELKLRNAMGSASATIRVQILDKPGPPGGPIEFKTVTAEKITLLWRPPADDGGAKITHYIVEKRE
-TSRVVWSMVSEHLEECIITTTKIIKGNEYIFRVRAVNKYGIGEPLESDSVVAKNAFVTPGPPGIPEVTKI
-TKNSMTVVWSRPIADGGSDISGYFLEKRDKKSLGWFKVLKETIRDTRQKVTGLTENSDYQYRVCAVNAAG
-QGPFSEPSEFYKAADPIDPPGPPAKIRIADSTKSSITLGWSKPVYDGGSAVTGYVVEIRQGEEEEWTTVS
-TKGEVRTTEYVVSNLKPGVNYYFRVSAVNCAGQGEPIEMNEPVQAKDILEAPEIDLDVALRTSVIAKAGE
-DVQVLIPFKGRPPPTVTWRKDEKNLGSDARYSIENTDSSSLLTIPQVTRNDTGKYILTIENGVGEPKSST
-VSVKVLDTPAACQKLQVKHVSRGTVTLLWDPPLIDGGSPIINYVIEKRDATKRTWSVVSHKCSSTSFKLI
-DLSEKTPFFFRVLAENEIGIGEPCETTEPVKAAEVPAPIRDLSMKDSTKTSVILSWTKPDFDGGSVITEY
-VVERKGKGEQTWSHAGISKTCEIEVSQLKEQSVLEFRVFAKNEKGLSDPVTIGPITVKELIITPEVDLSD
-IPGAQVTVRIGHNVHLELPYKGKPKPSISWLKDGLPLKESEFVRFSKTENKITLSIKNAKKEHGGKYTVI
-LDNAVCRIAVPITVITLGPPSKPKGPIRFDEIKADSVILSWDVPEDNGGGEITCYSIEKRETSQTNWRMV
-CSSVARTTFKVPNLVKDAEYQFRVRAENRYGVSQPLVSSIIVAKHQFRIPGPPGKPVIYNVTSDGMSLTW
-DAPVYDGGSEVTGFHVEKKERNSILWQKVNTSPISGREYRATGLVEGLDYQFRVYAENSAGLSSPSDPSK
-FTLAVSPVDPPGTPDYIDVTRETITLKWNPPLRDGGSKIVGYSIEKRQGNERWVRCNFTDVSECQYTVTG
-LSPGDRYEFRIIARNAVGTISPPSQSSGIIMTRDENVPPIVEFGPEYFDGLIIKSGESLRIKALVQGRPV
-PRVTWFKDGVEIEKRMNMEITDVLGSTSLFVRDATRDHRGVYTVEAKNASGSAKAEIKVKVQDTPGKVVG
-PIRFTNITGEKMTLWWDAPLNDGCAPITHYIIEKRETSRLAWALIEDKCEAQSYTAIKLINGNEYQFRVS
-AVNKFGVGRPLDSDPVVAQIQYTVPDAPGIPEPSNITGNSITLTWARPESDGGSEIQQYILERREKKSTR
-WVKVISKRPISETRFKVTGLTEGNEYEFHVMAENAAGVGPASGISRLIKCREPVNPPGPPTVVKVTDTSK
-TTVSLEWSKPVFDGGMEIIGYIIEMCKADLGDWHKVNAEACVKTRYTVTDLQAGEEYKFRVSAINGAGKG
-DSCEVTGTIKAVDRLTAPELDIDANFKQTHVVRAGASIRLFIAYQGRPTPTAVWSKPDSNLSLRADIHTT
-DSFSTLTVENCNRNDAGKYTLTVENNSGSKSITFTVKVLDTPGPPGPITFKDVTRGSATLMWDAPLLDGG
-ARIHHYVVEKREASRRSWQVISEKCTRQIFKVNDLAEGVPYYFRVSAVNEYGVGEPYEMPEPIVATEQPA
-PPRRLDVVDTSKSSAVLAWLKPDHDGGSRITGYLLEMRQKGSDFWVEAGHTKQLTFTVERLVEKTEYEFR
-VKAKNDAGYSEPREAFSSVIIKEPQIEPTADLTGITNQLITCKAGSPFTIDVPISGRPAPKVTWKLEEMR
-LKETDRVSITTTKDRTTLTVKDSMRGDSGRYFLTLENTAGVKTFSVTVVVIGRPGPVTGPIEVSSVSAES
-CVLSWGEPKDGGGTEITNYIVEKRESGTTAWQLVNSSVKRTQIKVTHLTKYMEYSFRVSSENRFGVSKPL
-ESAPIIAEHPFVPPSAPTRPEVYHVSANAMSIRWEEPYHDGGSKIIGYWVEKKERNTILWVKENKVPCLE
-CNYKVTGLVEGLEYQFRTYALNAAGVSKASEASRPIMAQNPVDAPGRPEVTDVTRSTVSLIWSAPAYDGG
-SKVVGYIIERKPVSEVGDGRWLKCNYTIVSDNFFTVTALSEGDTYEFRVLAKNAAGVISKGSESTGPVTC
-RDEYAPPKAELDARLHGDLVTIRAGSDLVLDAAVGGKPEPKIIWTKGDKELDLCEKVSLQYTGKRATAVI
-KFCDRSDSGKYTLTVKNASGTKAVSVMVKVLDSPGPCGKLTVSRVTQEKCTLAWSLPQEDGGAEITHYIV
-ERRETSRLNWVIVEGECPTLSYVVTRLIKNNEYIFRVRAVNKYGPGVPVESEPIVARNSFTIPSPPGIPE
-EVGTGKEHIIIQWTKPESDGGNEISNYLVDKREKKSLRWTRVNKDYVVYDTRLKVTSLMEGCDYQFRVTA
-VNAAGNSEPSEASNFISCREPSYTPGPPSAPRVVDTTKHSISLAWTKPMYDGGTDIVGYVLEMQEKDTDQ
-WYRVHTNATIRNTEFTVPDLKMGQKYSFRVAAVNVKGMSEYSESIAEIEPVERIEIPDLELADDLKKTVT
-IRAGASLRLMVSVSGRPPPVITWSKQGIDLASRAIIDTTESYSLLIVDKVNRYDAGKYTIEAENQSGKKS
-ATVLVKVYDTPGPCPSVKVKEVSRDSVTITWEIPTIDGGAPVNNYIVEKREAAMRAFKTVTTKCSKTLYR
-ISGLVEGTMYYFRVLPENIYGIGEPCETSDAVLVSEVPLVPAKLEVVDVTKSTVTLAWEKPLYDGGSRLT
-GYVLEACKAGTERWMKVVTLKPTVLEHTVTSLNEGEQYLFRIRAQNEKGVSEPRETVTAVTVQDLRVLPT
-IDLSTMPQKTIHVPAGRPVELVIPIAGRPPPAASWFFAGSKLRESERVTVETHTKVAKLTIRETTIRDTG
-EYTLELKNVTGTTSETIKVIILDKPGPPTGPIKIDEIDATSITISWEPPELDGGAPLSGYVVEQRDAHRP
-GWLPVSESVTRSTFKFTRLTEGNEYVFRVAATNRFGIGSYLQSEVIECRSSIRIPGPPETLQIFDVSRDG
-MTLTWYPPEDDGGSQVTGYIVERKEVRADRWVRVNKVPVTMTRYRSTGLTEGLEYEHRVTAINARGSGKP
-SRPSKPIVAMDPIAPPGKPQNPRVTDTTRTSVSLAWSVPEDEGGSKVTGYLIEMQKVDQHEWTKCNTTPT
-KIREYTLTHLPQGAEYRFRVLACNAGGPGEPAEVPGTVKVTEMLEYPDYELDERYQEGIFVRQGGVIRLT
-IPIKGKPFPICKWTKEGQDISKRAMIATSETHTELVIKEADRGDSGTYDLVLENKCGKKAVYIKVRVIGS
-PNSPEGPLEYDDIQVRSVRVSWRPPADDGGADILGYILERREVPKAAWYTIDSRVRGTSLVVKGLKENVE
-YHFRVSAENQFGISKPLKSEEPVTPKTPLNPPEPPSNPPEVLDVTKSSVSLSWSRPKDDGGSRVTGYYIE
-RKETSTDKWVRHNKTQITTTMYTVTGLVPDAEYQFRIIAQNDVGLSETSPASEPVVCKDPFDKPSQPGEL
-EILSISKDSVTLQWEKPECDGGKEILGYWVEYRQSGDSAWKKSNKERIKDKQFTIGGLLEATEYEFRVFA
-ENETGLSRPRRTAMSIKTKLTSGEAPGIRKEMKDVTTKLGEAAQLSCQIVGRPLPDIKWYRFGKELIQSR
-KYKMSSDGRTHTLTVMTEEQEDEGVYTCIATNEVGEVETSSKLLLQATPQFHPGYPLKEKYYGAVGSTLR
-LHVMYIGRPVPAMTWFHGQKLLQNSENITIENTEHYTHLVMKNVQRKTHAGKYKVQLSNVFGTVDAILDV
-EIQDKPDKPTGPIVIEALLKNSAVISWKPPADDGGSWITNYVVEKCEAKEGAEWQLVSSAISVTTCRIVN
-LTENAGYYFRVSAQNTFGISDPLEVSSVVIIKSPFEKPGAPGKPTITAVTKDSCVVAWKPPASDGGAKIR
-NYYLEKREKKQNKWISVTTEEIRETVFSVKNLIEGLEYEFRVKCENLGGESEWSEISEPITPKSDVPIQA
-PHFKEELRNLNVRYQSNATLVCKVTGHPKPIVKWYRQGKEIIADGLKYRIQEFKGGYHQLIIASVTDDDA
-TVYQVRATNQGGSVSGTASLEVEVPAKIHLPKTLEGMGAVHALRGEVVSIKIPFSGKPDPVITWQKGQDL
-IDNNGHYQVIVTRSFTSLVFPNGVERKDAGFYVVCAKNRFGIDQKTVELDVADVPDPPRGVKVSDVSRDS
-VNLTWTEPASDGGSKITNYIVEKCATTAERWLRVGQARETRYTVINLFGKTSYQFRVIAENKFGLSKPSE
-PSEPTITKEDKTRAMNYDEEVDETREVSMTKASHSSTKELYEKYMIAEDLGRGEFGIVHRCVETSSKKTY
-MAKFVKVKGTDQVLVKKEISILNIARHRNILHLHESFESMEELVMIFEFISGLDIFERINTSAFELNERE
-IVSYVHQVCEALQFLHSHNIGHFDIRPENIIYQTRRSSTIKIIEFGQARQLKPGDNFRLLFTAPEYYAPE
-VHQHDVVSTATDMWSLGTLVYVLLSGINPFLAETNQQIIENIMNAEYTFDEEAFKEISIEAMDFVDRLLV
-KERKSRMTASEALQHPWLKQKIERVSTKVIRTLKHRRYYHTLIKKDLNMVVSAARISCGGAIRSQKGVSV
-AKVKVASIEIGPVSGQIMHAVGEEGGHVKYVCKIENYDQSTQVTWYFGVRQLENSEKYEITYEDGVAILY
-VKDITKLDDGTYRCKVVNDYGEDSSYAELFVKGVREVYDYYCRRTMKKIKRRTDTMRLLERPPEFTLPLY
-NKTAYVGENVRFGVTITVHPEPHVTWYKSGQKIKPGDNDKKYTFESDKGLYQLTINSVTTDDDAEYTVVA
-RNKYGEDSCKAKLTVTLHPPPTDSTLRPMFKRLLANAECQEGQSVCFEIRVSGIPPPTLKWEKDGQPLSL
-GPNIEIIHEGLDYYALHIRDTLPEDTGYYRVTATNTAGSTSCQAHLQVERLRYKKQEFKSKEEHERHVQK
-QIDKTLRMAEILSGTESVPLTQVAKEALREAAVLYKPAVSTKTVKGEFRLEIEEKKEERKLRMPYDVPEP
-RKYKQTTIEEDQRIKQFVPMSDMKWYKKIRDQYEMPGKLDRVVQKRPKRIRLSRWEQFYVMPLPRITDQY
-RPKWRIPKLSQDDLEIVRPARRRTPSPDYDFYYRPRRRSLGDISDEELLLPIDDYLAMKRTEEERLRLEE
-ELELGFSASPPSRSPPHFELSSLRYSSPQAHVKVEETRKDFRYSTYHIPTKAEASTSYAELRERHAQAAY
-RQPKQRQRIMAEREDEELLRPVTTTQHLSEYKSELDFMSKEEKSRKKSRRQREVTEITEIEEEYEISKHA
-QRESSSSASRLLRRRRSLSPTYIELMRPVSELIRSRPQPAEEYEDDTERRSPTPERTRPRSPSPVSSERS
-LSRFERSARFDIFSRYESMKAALKTQKTSERKYEVLSQQPFTLDHAPRITLRMRSHRVPCGQNTRFILNV
-QSKPTAEVKWYHNGVELQESSKIHYTNTSGVLTLEILDCHTDDSGTYRAVCTNYKGEASDYATLDVTGGD
-YTTYASQRRDEEVPRSVFPELTRTEAYAVSSFKKTSEMEASSSVREVKSQMTETRESLSSYEHSASAEMK
-SAALEEKSLEEKSTTRKIKTTLAARILTKPRSMTVYEGESARFSCDTDGEPVPTVTWLRKGQVLSTSARH
-QVTTTKYKSTFEISSVQASDEGNYSVVVENSEGKQEAEFTLTIQKARVTEKAVTSPPRVKSPEPRVKSPE
-AVKSPKRVKSPEPSHPKAVSPTETKPTPTEKVQHLPVSAPPKITQFLKAEASKEIAKLTCVVESSVLRAK
-EVTWYKDGKKLKENGHFQFHYSADGTYELKINNLTESDQGEYVCEISGEGGTSKTNLQFMGQAFKSIHEK
-VSKISETKKSDQKTTESTVTRKTEPKAPEPISSKPVIVTGLQDTTVSSDSVAKFAVKATGEPRPTAIWTK
-DGKAITQGGKYKLSEDKGGFFLEIHKTDTSDSGLYTCTVKNSAGSVSSSCKLTIKAIKDTEAQKVSTQKT
-SEITPQKKAVVQEEISQKALRSEEIKMSEAKSQEKLALKEEASKVLISEEVKKSAATSLEKSIVHEEITK
-TSQASEEVRTHAEIKAFSTQMSINEGQRLVLKANIAGATDVKWVLNGVELTNSEEYRYGVSGSDQTLTIK
-QASHRDEGILTCISKTKEGIVKCQYDLTLSKELSDAPAFISQPRSQNINEGQNVLFTCEISGEPSPEIEW
-FKNNLPISISSNVSISRSRNVYSLEIRNASVSDSGKYTIKAKNFRGQCSATASLMVLPLVEEPSREVVLR
-TSGDTSLQGSFSSQSVQMSASKQEASFSSFSSSSASSMTEMKFASMSAQSMSSMQESFVEMSSSSFMGIS
-NMTQLESSTSKMLKAGIRGIPPKIEALPSDISIDEGKVLTVACAFTGEPTPEVTWSCGGRKIHSQEQGRF
-HIENTDDLTTLIIMDVQKQDGGLYTLSLGNEFGSDSATVNIHIRSI
-X
-SHAR_EOF
-chmod 0644 titin_hum.aa ||
-echo 'restore of titin_hum.aa failed'
-Wc_c="`wc -c < 'titin_hum.aa'`"
-test 27376 -eq "$Wc_c" ||
- echo 'titin_hum.aa: original size 27376, current size' "$Wc_c"
-fi
-# ============= titin_hum.seq ==============
-if test -f 'titin_hum.seq' -a X"$1" != X"-c"; then
- echo 'x - skipping titin_hum.seq (File already exists)'
-else
-echo 'x - extracting titin_hum.seq (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'titin_hum.seq' &&
->gi|20143913|ref|NM_003319.2| Homo sapiens titin (TTN), transcript variant N2-B, mRNA
-AGCAGTCGTGCATTCCCAGCCTCGCCTCGGGTGTAGGGATTGCATAGAAAAGCAAAACTACACAGTCTTG
-ACTGTGTAGTTTTGTTTTTAGGATTAGAGGCTCACCGATTCATGTCGGAGATGGTCAGAAAAACCAACTC
-TCCATAGGACGTCGTTTCAGAAGCAACCTTGGGCTTAGTCCCACCCTTTTTAGGCACTCTTGAGAAATCA
-AGTGCCTAGAAAGATGACAACTCAAGCACCGACGTTTACGCAGCCGTTACAAAGCGTTGTGGTACTGGAG
-GGTAGTACCGCAACCTTTGAGGCTCACATTAGTGGTTTTCCAGTTCCTGAGGTGAGCTGGTTTAGGGATG
-GCCAGGTGATTTCCACTTCCACTCTGCCCGGCGTGCAGATCTCCTTTAGCGATGGCCGCGCTAAACTGAC
-GATCCCCGCCGTGACTAAAGCCAACAGTGGACGATATTCCCTGAAAGCCACCAATGGATCTGGACAAGCG
-ACTAGTACTGCTGAGCTTCTCGTGAAAGCTGAGACAGCACCACCCAACTTCGTTCAACGACTGCAGAGCA
-TGACCGTGAGACAAGGAAGCCAAGTGAGACTCCAAGTGAGAGTGACTGGAATCCCTACACCTGTGGTGAA
-GTTCTACCGGGATGGAGCCGAAATCCAGAGCTCCCTTGATTTCCAAATTTCACAAGAAGGCGACCTCTAC
-AGCTTACTGATTGCAGAAGCATACCCTGAGGACTCAGGGACCTATTCAGTAAATGCCACCAATAGCGTTG
-GAAGAGCTACTTCGACTGCTGAATTACTGGTTCAAGGTGAAGAAGAAGTACCTGCTAAAAAGACAAAGAC
-AATTGTTTCGACTGCTCAGATCTCAGAATCAAGACAAACCCGAATTGAAAAGAAGATTGAAGCCCACTTT
-GATGCCAGATCAATTGCAACAGTTGAGATGGTCATAGATGGTGCCGCTGGGCAACAGCTGCCACATAAAA
-CACCTCCCAGGATTCCTCCGAAGCCAAAGTCAAGATCCCCAACACCACCGTCTATTGCTGCCAAAGCACA
-GCTGGCTCGGCAGCAGTCCCCATCGCCCATAAGACACTCCCCTTCCCCGGTCAGACACGTGCGGGCACCG
-ACCCCATCTCCGGTCAGGTCCGTGTCTCCAGCAGCAAGAATCTCCACATCCCCCATCAGGTCTGTTAGGT
-CTCCATTGCTCATGCGTAAGACTCAGGCATCCACCGTGGCCACAGGTCCTGAAGTGCCTCCCCCTTGGAA
-GCAAGAGGGCTACGTGGCCTCCTCATCTGAGGCTGAGATGAGAGAGACAACGCTGACAACCTCTACTCAG
-ATCAGGACAGAAGAGAGATGGGAAGGGAGATACGGTGTCCAGGAGCAAGTGACCATCAGTGGTGCTGCGG
-GTGCTGCCGCCAGTGTGTCGGCCAGTGCTAGCTACGCAGCAGAGGCTGTTGCCACTGGTGCTAAAGAGGT
-GAAACAAGATGCTGACAAAAGTGCAGCTGTTGCGACTGTTGTTGCTGCCGTTGATATGGCCAGAGTGAGA
-GAACCAGTGATCAGCGCTGTAGAGCAGACTGCTCAGAGGACAACCACGACTGCTGTGCACATCCAACCTG
-CTCAAGAACAGGTAAGAAAGGAAGCGGAGAAGACTGCTGTAACTAAGGTAGTAGTGGCCGCCGATAAAGC
-CAAGGAACAAGAATTAAAATCAAGAACCAAAGAAGTAATTACCACAAAGCAAGAGCAGATGCACGTAACT
-CATGAGCAGATAAGAAAAGAAACTGAAAAAACATTTGTACCAAAGGTAGTAATTTCCGCAGCTAAAGCCA
-AAGAACAAGAAACTAGAATTTCTGAAGAAATTACTAAGAAACAGAAACAAGTAACTCAAGAAGCAATAAT
-GAAGGAAACTAGGAAAACAGTTGTACCTAAAGTCATAGTTGCCACACCCAAAGTCAAAGAACAAGATTTA
-GTATCAAGAGGTAGAGAAGGCATTACTACCAAAAGAGAACAAGTGCAAATAACTCAGGAGAAGATGAGAA
-AGGAAGCCGAGAAAACTGCCTTGTCTACAATAGCAGTTGCTACTGCTAAAGCCAAAGAACAAGAAACAAT
-ACTGAGAACTAGAGAAACTATGGCTACTAGACAAGAACAAATCCAAGTTACCCATGGAAAGGTGGACGTT
-GGAAAAAAGGCTGAAGCTGTAGCAACAGTTGTTGCTGCAGTAGACCAGGCCCGAGTCAGAGAGCCCAGAG
-AGCCTGGGCATCTTGAAGAATCCTATGCTCAGCAGACCACTTTGGAGTACGGATATAAGGAACGCATTTC
-CGCCGCAAAGGTAGCTGAGCCTCCCCAACGTCCAGCCTCAGAACCCCACGTTGTCCCTAAAGCAGTCAAG
-CCTAGAGTAATCCAGGCTCCTTCTGAGACTCATATCAAAACTACTGATCAAAAGGGAATGCACATATCAT
-CACAGATCAAGAAAACTACAGATCTAACAACGGAAAGATTAGTCCATGTGGATAAACGCCCCCGCACAGC
-TAGCCCTCACTTTACTGTTTCAAAAATTTCTGTTCCTAAGACAGAACATGGATATGAGGCATCAATAGCC
-GGTAGTGCTATTGCCACATTACAAAAAGAGTTGTCAGCCACATCTTCTGCTCAGAAGATCACCAAATCGG
-TGAAGGCTCCTACTGTGAAGCCCAGTGAGACTAGAGTAAGGGCAGAGCCCACACCCTTGCCACAGTTCCC
-CTTCGCTGACACACCAGATACTTACAAGAGTGAAGCTGGCGTTGAGGTGAAAAAGGAAGTAGGGGTGAGC
-ATCACTGGCACCACCGTCCGTGAAGAGCGCTTTGAAGTACTGCACGGACGCGAAGCCAAGGTAACAGAAA
-CAGCAAGAGTACCAGCACCTGTTGAAATTCCTGTTACTCCACCAACTTTGGTCTCGGGCTTAAAAAATGT
-GACTGTCATAGAAGGTGAATCTGTCACCTTGGAGTGCCACATCTCTGGATACCCATCCCCGACAGTGACA
-TGGTACAGGGAAGACTACCAAATCGAAAGTTCCATTGACTTCCAGATAACCTTCCAGAGTGGAATTGCTC
-GTCTTATGATTCGCGAAGCATTTGCGGAAGACAGCGGGCGATTTACTTGCAGTGCTGTAAATGAGGCTGG
-AACCGTCAGCACATCCTGCTATCTGGCTGTGCAGGTGTCAGAAGAATTTGAAAAGGAAACCACAGCCGTG
-ACTGAGAAATTTACTACAGAAGAGAAACGCTTTGTTGAGTCAAGAGATGTGGTTATGACTGATACTAGCC
-TCACAGAGGAACAAGCAGGGCCTGGAGAACCTGCCGCGCCTTACTTTATTACAAAACCAGTGGTCCAGAA
-ACTGGTGGAAGGTGGGAGCGTGGTGTTTGGATGCCAAGTTGGCGGCAACCCAAAGCCCCATGTATACTGG
-AAAAAATCTGGTGTTCCTCTAACCACTGGATACAGATACAAAGTGAGTTACAACAAACAAACCGGTGAAT
-GCAAGCTGGTGATTTCTATGACTTTTGCTGATGATGCTGGAGAATACACTATTGTTGTTCGCAATAAGCA
-TGGAGAAACTTCTGCATCTGCTTCCTTGCTTGAAGAAGCTGATTATGAGTTACTGATGAAGTCCCAGCAA
-GAAATGCTTTATCAGACACAAGTGACTGCATTTGTTCAAGAACCTAAAGTTGGAGAAACAGCACCTGGAT
-TTGTATACTCTGAGTATGAAAAAGAGTATGAAAAAGAACAAGCCTTAATTAGGAAGAAAATGGCCAAAGA
-TACTGTAGTGGTCAGAACTTATGTAGAAGATCAGGAATTCCATATTTCTTCCTTTGAAGAGAGACTTATT
-AAAGAAATTGAATATAGAATAATAAAGACTACATTAGAAGAACTTCTTGAAGAAGATGGAGAAGAAAAGA
-TGGCAGTTGACATTTCTGAATCTGAAGCTGTTGAATCAGGATTTGATTTAAGAATCAAGAATTATAGAAT
-TCTTGAGGGGATGGGTGTCACTTTTCATTGCAAGATGTCTGGATATCCATTACCAAAGATTGCTTGGTAC
-AAAGATGGCAAGCGCATCAAACATGGAGAAAGATACCAAATGGACTTTCTACAAGATGGCAGAGCTAGTC
-TGCGTATACCTGTTGTTCTTCCAGAAGATGAAGGAATCTACACTGCATTTGCCAGCAATATTAAAGGAAA
-TGCAATTTGCTCAGGGAAATTGTATGTGGAGCCTGCTGCACCACTTGGAGCTCCGACTTACATTCCCACA
-CTAGAGCCAGTGAGCAGAATCAGATCTCTCTCTCCACGTTCAGTGAGCAGGTCTCCTATACGCATGTCTC
-CTGCACGGATGTCACCTGCAAGGATGTCTCCTGCACGGATGTCCCCTGCAAGAATGTCCCCTGGACGTAG
-GCTGGAGGAGACAGATGAGTCACAACTTGAGAGACTATATAAACCAGTCTTTGTGTTAAAACCTGTTTCT
-TTCAAATGTTTAGAAGGGCAAACTGCCAGATTTGACTTAAAGGTTGTTGGTAGACCTATGCCAGAGACGT
-TCTGGTTTCATGATGGCCAGCAAATTGTCAATGACTATACCCATAAAGTAGTCATTAAAGAAGATGGTAC
-TCAATCACTAATTATTGTCCCTGCCACACCCAGTGATTCTGGGGAATGGACTGTGGTTGCCCAAAACAGG
-GCAGGCAGATCTTCAATTTCAGTGATTTTAACTGTGGAAGCTGTGGAACATCAGGTAAAACCGATGTTTG
-TAGAAAAACTGAAAAATGTCAATATAAAGGAAGGTTCCCGACTTGAAATGAAAGTCAGAGCTACGGGTAA
-CCCCAACCCTGACATTGTATGGTTGAAAAACAGTGACATCATTGTGCCTCATAAATATCCCAAAATCAGA
-ATTGAAGGAACCAAGGGAGAAGCTGCCCTTAAAATCGATTCCACTGTCAGCCAAGATTCTGCCTGGTATA
-CTGCGACTGCTATTAATAAAGCTGGCAGAGACACTACAAGATGCAAAGTAAATGTTGAAGTTGAGTTTGC
-AGAGCCTGAGCCAGAGAGAAAGTTAATCATCCCACGGGGGACATATAGAGCAAAGGAGATTGCAGCCCCA
-GAACTGGAGCCCCTCCATTTGCGATATGGCCAAGAGCAATGGGAAGAAGGTGATCTCTATGACAAAGAGA
-AACAACAGAAACCATTTTTCAAGAAAAAACTCACTTCCTTAAGACTTAAGCGCTTTGGGCCTGCCCACTT
-TGAATGCAGGCTAACACCCATTGGTGACCCAACGATGGTGGTGGAGTGGCTCCATGATGGAAAGCCACTT
-GAAGCAGCCAACAGGCTCCGTATGATCAATGAATTTGGGTACTGCAGCCTTGATTATGGCGTTGCATATT
-CTAGAGACAGTGGTATCATTACTTGCAGAGCCACTAACAAATATGGAACAGATCACACATCTGCTACCCT
-TATTGTTAAAGATGAGAAAAGTCTTGTGGAAGAATCCCAATTGCCTGAGGGGAGGAAAGGCTTACAGAGA
-ATTGAAGAATTAGAGAGAATGGCTCATGAAGGTGCACTTACAGGTGTAACAACAGATCAGAAAGAAAAGC
-AAAAGCCAGACATTGTCTTGTACCCAGAGCCAGTTAGAGTACTTGAAGGGGAGACTGCAAGGTTCCGCTG
-CAGGGTAACAGGCTACCCTCAGCCCAAAGTCAACTGGTACCTCAATGGACAGCTCATCCGCAAAAGCAAA
-AGGTTCAGAGTTCGCTATGATGGTATCCATTACCTGGACATCGTGGACTGCAAATCATATGACACAGGTG
-AAGTGAAGGTCACCGCGGAAAATCCTGAAGGTGTGATAGAGCATAAAGTGAAGCTTGAGATTCAACAGAG
-GGAAGATTTTAGGTCTGTCCTTAGGAGAGCTCCTGAACCAAGGCCTGAGTTTCACGTACATGAACCAGGA
-AAGCTTCAGTTTGAAGTACAAAAAGTGGATAGACCTGTTGACACCACTGAAACCAAAGAAGTTGTGAAGT
-TGAAAAGGGCTGAAAGAATTACCCATGAAAAAGTGCCTGAAGAGTCGGAAGAGCTGCGCAGTAAATTCAA
-GCGCAGAACAGAAGAGGGCTATTATGAAGCCATTACCGCTGTGGAGCTCAAGTCTCGAAAGAAGGATGAA
-TCCTATGAGGAACTCCTCAGGAAGACAAAAGATGAACTTCTCCACTGGACCAAAGAGTTAACTGAAGAGG
-AAAAGAAAGCTCTTGCCGAAGAAGGCAAAATCACGATTCCAACTTTTAAACCTGACAAGATTGAACTAAG
-TCCTAGTATGGAGGCTCCAAAAATCTTCGAAAGAATCCAGAGCCAAACAGTGGGCCAAGGATCTGATGCA
-CACTTCCGGGTCAGAGTCGTGGGGAAACCAGACCCCGAATGTGAATGGTACAAAAATGGTGTCAAAATTG
-AACGGTCTGACCGGATCTACTGGTACTGGCCCGAAGACAATGTTTGTGAATTGGTCATAAGAGATGTGAC
-TGCTGAGGACTCTGCCAGCATCATGGTAAAAGCCATCAACATAGCTGGAGAAACCTCCAGTCACGCATTC
-TTACTTGTCCAAGCCAAGCAATTGATCACTTTCACACAGGAATTACAAGATGTTGTTGCTAAGGAAAAAG
-ACACTATGGCAACCTTTGAATGTGAAACTTCAGAACCATTTGTCAAAGTGAAATGGTATAAAGATGGTAT
-GGAGGTTCATGAGGGAGATAAATACAGGATGCACTCTGACAGAAAGGTTCACTTCCTCTCCATACTGACC
-ATTGATACGTCTGATGCTGAAGATTACAGCTGTGTACTTGTGGAAGATGAAAATGTCAAAACGACTGCTA
-AACTTATTGTTGAAGGTGCAGTTGTTGAGTTTGTGAAAGAACTTCAGGACATAGAAGTTCCAGAATCATA
-TTCAGGAGAATTAGAGTGCATTGTATCCCCAGAAAATATAGAAGGAAAATGGTATCATAATGATGTGGAG
-CTTAAATCCAATGGCAAATATACAATTACATCTCGTCGTGGACGTCAGAACCTCACGGTCAAGGATGTAA
-CCAAGGAGGACCAGGGAGAATACAGCTTTGTCATCGACGGGAAAAAGACAACCTGTAAATTAAAGATGAA
-ACCCCGCCCCATTGCTATCCTACAAGGACTTAGTGACCAAAAAGTCTGTGAGGGTGACATTGTTCAGCTT
-GAAGTTAAAGTCTCCTTGGAAAGTGTGGAAGGCGTCTGGATGAAAGACGGCCAAGAAGTGCAGCCCAGTG
-ACAGGGTTCACATTGTGATAGACAAACAATCTCATATGCTGCTCATTGAAGACATGACTAAGGAAGATGC
-TGGAAATTACTCTTTCACCATTCCAGCCCTTGGCCTCTCCACCAGTGGGCGTGTCTCTGTCTATAGTGTG
-GACGTGATAACACCTCTAAAAGATGTTAATGTGATTGAAGGCACCAAGGCTGTGCTTGAATGTAAGGTGT
-CAGTCCCTGATGTGACTTCTGTTAAGTGGTACTTAAATGATGAACAAATCAAGCCTGATGACCGTGTACA
-GGCCATTGTGAAAGGTACTAAACAGCGACTAGTCATTAACCGAACTCATGCTTCAGACGAAGGACCTTAC
-AAGCTGATAGTTGGCAGAGTTGAAACCAACTGTAATCTCTCTGTAGAAAAAATTAAAATTATCAGAGGTC
-TTCGTGACCTTACCTGTACAGAAACTCAAAATGTGGTGTTTGAGGTTGAGCTGTCCCACTCTGGAATTGA
-TGTCCTGTGGAATTTTAAGGACAAGGAAATCAAGCCCAGTTCTAAATATAAAATTGAAGCACATGGAAAA
-ATATATAAATTGACAGTTCTAAATATGATGAAAGATGATGAAGGAAAATACACATTTTACGCGGGAGAAA
-ATATCACATCTGGAAAACTTACTGTGGCAGGTGGGGCCATCTCCAAGCCACTCACAGATCAGACCGTAGC
-TGAATCCCAGGAAGCTGTGTTTGAATGTGAAGTTGCCAACCCAGATTCCAAAGGCGAATGGTTGAGGGAT
-GGCAAACACCTACCACTGACTAACAACATCAGAAGTGAGTCTGATGGCCACAAAAGGAGACTTATCATTG
-CTGCCACCAAATTAGATGACATTGGAGAATATACCTACAAGGTGGCCACCTCCAAAACATCTGCCAAACT
-CAAAGTTGAAGCTGTCAAAATTAAGAAGACTCTGAAGAACCTCACAGTGACAGAAACACAGGATGCTGTT
-TTCACTGTCGAGCTTACACACCCTAATGTCAAAGGTGTCCAGTGGATCAAAAATGGAGTTGTGCTGGAAT
-CCAATGAAAAGTATGCTATCTCTGTCAAAGGAACAATTTACTCTCTGAGGATTAAAAACTGTGCCATCGT
-GGATGAGTCTGTTTATGGCTTCAGGCTTGGAAGGCTTGGAGCCAGTGCCAGACTGCACGTGGAGACTGTC
-AAGATCATTAAAAAGCCAAAGGATGTGACAGCCTTGGAAAATGCCACTGTTGCCTTTGAAGTTAGTGTTT
-CCCATGACACTGTTCCAGTAAAATGGTTCCATAAGAGTGTGGAAATTAAGCCAAGTGACAAACACAGACT
-GGTCTCAGAAAGGAAAGTCCACAAGCTGATGCTGCAGAACATCTCCCCCTCAGATGCTGGGGAATACACA
-GCTGTGGTCGGGCAATTGGAATGCAAAGCAAAACTGTTTGTGGAGACATTACATATTACAAAAACCATGA
-AAAATATCGAGGTGCCTGAGACCAAAACTGCCTCTTTTGAGTGTGAGGTGTCCCACTTCAATGTCCCTTC
-CATGTGGCTGAAGAATGGTGTGGAAATTGAGATGAGTGAAAAGTTCAAGATAGTTGTGCAGGGAAAACTC
-CATCAGCTGATCATCATGAACACCAGCACAGAGGACTCGGCAGAATACACATTTGTCTGTGGCAATGACC
-AAGTCAGTGCCACCCTGACAGTCACCCCAATCATGATTACTTCCATGCTGAAAGACATCAACGCTGAAGA
-AAAAGACACTATTACTTTTGAGGTGACAGTGAACTATGAAGGCATCTCTTACAAATGGTTAAAGAATGGT
-GTGGAAATCAAATCAACTGACAAGTGCCAGATGAGAACCAAAAAGCTCACACACTCACTGAACATCAGGA
-ATGTTCACTTTGGGGATGCTGCTGACTACACCTTTGTGGCTGGAAAAGCAACATCAACAGCCACACTTTA
-TGTGGAAGCTCGTCATATAGAATTTAGGAAACACATTAAGGACATTAAGGTACTGGAGAAGAAGCGAGCC
-ATGTTTGAATGTGAAGTTTCTGAACCTGACATCACTGTACAGTGGATGAAAGATGACCAGGAACTGCAGA
-TCACAGACAGAATAAAGATTCAGAAGGAGAAATATGTCCACCGCCTTCTGATCCCATCCACCCGGATGTC
-TGATGCTGGGAAGTACACAGTGGTGGCAGGAGGCAACGTGTCAACTGCAAAACTCTTTGTAGAAGGCAGA
-GATGTTCGCATCCGAAGTATTAAAAAGGAGGTTCAGGTCATTGAGAAACAGCGTGCTGTTGTTGAATTTG
-AGGTCAATGAAGACGATGTTGATGCCCACTGGTATAAAGATGGCATTGAAATCAATTTCCAAGTTCAAGA
-ACGACACAAATATGTAGTGGAAAGAAGAATCCACCGAATGTTTATCTCTGAGACCAGACAGAGCGATGCA
-GGAGAATACACCTTTGTGGCAGGAAGGAACAGGAGTTCTGTCACTCTCTATGTCAATGCTCCTGAACCGC
-CCCAAGTTCTGCAGGAGCTCCAGCCTGTCACTGTGCAGTCTGGCAAGCCTGCCCGCTTCTGTGCCGTGAT
-ATCCGGAAGACCACAGCCCAAAATTTCCTGGTACAAGGAAGAGCAGCTGCTTTCCACTGGCTTCAAGTGC
-AAATTTCTTCATGATGGGCAAGAGTACACGCTTTTGCTAATTGAAGCCTTCCCAGAGGATGCGGCAGTCT
-ATACCTGTGAAGCCAAGAATGACTATGGTGTTGCCACAACATCAGCTTCACTCTCAGTGGAAGTTCCAGA
-AGTTGTGTCTCCTGATCAGGAAATGCCTGTTTATCCACCTGCCATCATCACCCCGCTTCAGGACACTGTC
-ACTTCTGAAGGGCAGCCAGCCCGTTTTCAATGCCGGGTTTCTGGAACAGATCTAAAAGTGTCGTGGTACA
-GCAAAGACAAGAAAATCAAGCCATCTCGGTTCTTTAGAATGACTCAATTTGAAGACACTTATCAACTGGA
-AATTGCCGAAGCTTATCCAGAAGATGAAGGAACTTACACGTTTGTTGCTAGTAATGCTGTAGGCCAAGTA
-TCAAGCACAGCCAACCTGAGTCTGGAAGCTCCTGAATCAATTTTGCATGAGAGGATTGAACAAGAGATTG
-AGATGGAAATGAAAGAGTTTTCTAGTTCTTTTCTGTCTGCCGAGGAAGAAGGACTTCATAGCGCCGAACT
-TCAATTATCTAAAATAAATGAAACACTTGAACTTTTGTCTGAATCTCCAGTTTACTCAACTAAATTTGAT
-TCCGAAAAGGAAGGCACTGGCCCAATTTTCATCAAAGAAGTGTCAAATGCTGATATAAGCATGGGGGATG
-TGGCTACACTGTCTGTAACTGTCATTGGCATCCCCAAACCTAAAATTCAGTGGTTCTTTAATGGAGTGCT
-ATTAACCCCTTCTGCTGACTACAAATTTGTTTTTGACGGTGATGATCATAGCCTGATCATTCTGTTCACC
-AAATTGGAGGATGAGGGAGAGTATACATGTATGGCCAGTAATGACTATGGAAAGACAATATGTAGTGCCT
-ATCTAAAAATTAATTCCAAAGGAGAGGGTCACAAAGACACTGAAACAGAATCAGCAGTGGCAAAATCTCT
-GGAAAAGCTGGGAGGTCCTTGTCCTCCTCACTTCCTTAAGGAGTTAAAACCAATTCGCTGTGCTCAAGGG
-CTTCCTGCCATCTTTGAGTACACAGTGGTTGGAGAGCCTGCCCCTACTGTTACATGGTTCAAAGAAAACA
-AGCAGCTTTGCACCAGTGTTTATTACACTATCATTCATAACCCTAATGGCTCTGGAACTTTCATTGTCAA
-TGACCCTCAGAGGGAAGACAGTGGCCTCTATATCTGTAAAGCAGAGAATATGTTGGGTGAGTCCACCTGT
-GCAGCAGAGCTGCTTGTGCTTCTGGAAGACACAGACATGACTGATACCCCCTGCAAAGCAAAGTCCACAC
-CAGAGGCTCCTGAGGATTTTCCACAGACACCCTTAAAGGGTCCCGCAGTTGAAGCACTTGACTCAGAGCA
-GGAAATTGCAACGTTTGTAAAAGACACCATTTTGAAAGCTGCTTTAATTACAGAAGAAAACCAGCAACTA
-TCTTATGAGCATATTGCTAAAGCCAATGAATTGAGCAGTCAGCTTCCTTTGGGAGCTCAGGAATTGCAAT
-CCATTTTGGAGCAAGACAAGCTCACTCCTGAAAGCACCAGGGAATTTCTTTGCATCAATGGCAGTATTCA
-CTTTCAGCCTCTCAAGGAACCATCTCCCAACCTACAGCTGCAGATTGTACAGTCCCAGAAAACCTTCTCC
-AAAGAAGGTATTCTAATGCCTGAAGAGCCTGAGACACAGGCAGTTCTATCAGATACCGAGAAAATCTTCC
-CAAGTGCCATGTCCATAGAACAAATTAATTCATTAACAGTTGAGCCTCTGAAAACTTTATTAGCTGAACC
-TGAAGGGAATTATCCACAGTCTTCAATAGAACCTCCAATGCATTCTTATCTAACCTCTGTGGCTGAGGAA
-GTACTTTCACCAAAAGAAAAGACAGTATCTGACACCAACAGAGAGCAAAGAGTGACTCTTCAAAAGCAAG
-AGGCACAAAGTGCGCTCATCTTGAGTCAGAGCTTAGCTGAGGGACACGTGGAGAGTCTCCAGAGTCCTGA
-TGTCATGATCTCTCAGGTAAACTATGAGCCCCTAGTCCCTTCAGAACACTCATGCACAGAAGGAGGTAAA
-ATTTTGATAGAAAGTGCAAATCCACTGGAAAATGCAGGGCAAGATTCTGCGGTCAGAATTGAGGAAGGCA
-AGTCCTTAAGATTTCCACTAGCACTTGAAGAAAAGCAGGTACTGCTCAAAGAAGAGCATTCTGACAACGT
-GGTGATGCCCCCAGACCAAATCATTGAGTCTAAAAGAGAGCCCGTGGCAATAAAGAAAGTGCAGGAGGTA
-CAGGGAAGGGACCTTCTTTCTAAGGAAAGCTTGCTTTCTGGTATTCCAGAAGAGCAGAGATTAAACCTGA
-AAATTCAAATCTGCCGGGCTTTGCAAGCAGCCGTGGCCAGCGAGCAGCCAGGTCTTTTCTCTGAGTGGCT
-AAGAAATATTGAAAAGGTGGAGGTCGAGGCTGTAAACATCACCCAAGAGCCCAGACACATCATGTGCATG
-TACCTTGTTACTTCGGCAAAGTCTGTAACAGAAGAAGTAACCATCATTATTGAAGATGTTGATCCTCAAA
-TGGCTAACCTGAAAATGGAACTTAGGGATGCTTTGTGTGCTATTATATATGAGGAAATAGACATCCTAAC
-AGCTGAGGGTCCTAGAATTCAGCAAGGAGCCAAAACAAGTTTGCAAGAAGAAATGGATTCTTTTTCAGGT
-TCACAGAAGGTTGAACCCATTACTGAACCAGAAGTTGAATCTAAATATCTGATCTCAACTGAAGAGGTCA
-GTTATTTTAACGTGCAAAGTAGGGTTAAATATTTGGATGCCACACCTGTCACTAAAGGGGTTGCTTCAGC
-TGTTGTCTCTGACGAAAAACAAGATGAGAGTCTGAAACCATCAGAGGAAAAAGAGGAGTCTTCCTCTGAA
-AGTGGTACTGAGGAGGTTGCTACAGTAAAGATACAGGAAGCTGAGGGTGGCTTAATCAAAGAGGATGGCC
-CCATGATACATACACCTTTAGTGGACACTGTTTCTGAGGAAGGTGATATTGTACACCTCACAACATCCAT
-AACAAATGCTAAAGAGGTGAATTGGTATTTTGAGAATAAACTGGTGCCTTCAGATGAAAAGTTCAAGTGT
-TTACAAGATCAAAATACATATACGCTAGTCATCGACAAAGTAAATACCGAAGACCATCAAGGAGAGTATG
-TCTGTGAGGCCTTGAATGACAGCGGAAAAACAGCAACTTCAGCCAAACTCACTGTAGTAAAAAGAGCTGC
-CCCAGTGATCAAGAGGAAAATCGAACCCCTGGAAGTAGCACTGGGCCACCTAGCCAAATTCACCTGTGAG
-ATCCAAAGTGCTCCCAATGTCCGGTTCCAGTGGTTTAAAGCTGGCCGAGAAATTTATGAGAGTGACAAGT
-GTTCTATTCGATCTTCAAAGTATATCTCCAGCCTTGAAATCCTGAGAACCCAGGTGGTTGACTGCGGCGA
-GTATACATGCAAAGCTTCCAATGAGTATGGCAGTGTCAGCTGTACAGCCACACTAACTGTGACAGTGCCT
-GGAGGTGAAAAGAAAGTTCGCAAATTACTTCCGGAACGTAAACCTGAACCAAAGGAAGAAGTTGTTCTGA
-AAAGCGTTCTAAGAAAAAGACCTGAAGAAGAAGAACCTAAAGTAGAACCTAAAAAACTAGAAAAAGTTAA
-AAAACCTGCAGTACCAGAACCACCACCTCCAAAACCTGTTGAAGAGGTTGAAGTACCTACTGTTACAAAA
-AGGGAAAGGAAGATTCCTGAACCAACAAAAGTGCCTGAAATCAAGCCAGCAATACCTCTCCCTGCACCTG
-AACCGAAACCAAAGCCCGAAGCAGAAGTGAAAACAATCAAACCACCTCCTGTGGAACCTGAACCAACCCC
-CATCGCTGCCCCAGTAACAGTGCCAGTGGTTGGAAAGAAAGCAGAAGCCAAAGCACCTAAGGAAGAGGCT
-GCCAAGCCAAAAGGTCCTATCAAAGGTGTACCCAAAAAGACTCCTTCACCAATAGAAGCCGAAAGGAGAA
-AGTTAAGGCCAGGAAGTGGTGGAGAGAAACCTCCTGATGAAGCCCCGTTCACCTACCAGCTAAAGGCTGT
-GCCACTGAAGTTTGTGAAAGAAATCAAAGACATCATCTTGACAGAATCAGAGTTCGTTGGCTCTTCAGCA
-ATCTTTGAATGTTTGGTCTCCCCTTCCACTGCAATTACAACCTGGATGAAAGACGGTAGCAATATCCGTG
-AGAGTCCCAAGCACAGGTTTATTGCAGATGGTAAAGACAGAAAGCTGCACATCATTGATGTTCAACTTTC
-CGATGCTGGTGAATACACCTGTGTTTTACGTTTGGGAAACAAAGAAAAGACCTCCACGGCTAAACTTGTT
-GTAGAAGAACTTCCTGTGCGTTTTGTAAAAACACTGGAAGAGGAAGTCACAGTGGTCAAAGGACAGCCAT
-TGTACTTGAGCTGCGAGTTAAACAAAGAGCGTGACGTGGTCTGGAGGAAGGATGGCAAGATTGTGGTGGA
-GAAACCTGGCCGAATTGTGCCAGGCGTCATTGGCTTGATGCGGGCTCTGACCATCAACGATGCAGATGAC
-ACAGATGCTGGAACATACACAGTTACTGTGGAAAACGCCAACAACCTGGAGTGTTCATCTTGCGTAAAAG
-TAGTAGAAGTCATTAGAGATTGGCTGGTGAAACCTATACGAGACCAGCATGTGAAACCCAAGGGGACAGC
-TATTTTTGCCTGTGATATAGCAAAAGATACTCCAAACATTAAGTGGTTCAAAGGATATGATGAAATCCCT
-GCGGAACCAAATGATAAGACTGAAATACTGAGAGATGGAAATCATCTGTACCTCAAAATTAAGAATGCTA
-TGCCAGAAGATATTGCTGAGTATGCAGTGGAAATTGAAGGAAAAAGATACCCTGCAAAGCTGACACTTGG
-AGAGCGTGAAGTTGAACTGCTTAAACCAATAGAGGACGTTACCATTTATGAGAAAGAAAGTGCAAGCTTT
-GATGCAGAAATCTCAGAGGCAGACATTCCTGGACAATGGAAACTGAAAGGAGAACTTCTAAGGCCCTCAC
-CTACTTGTGAAATCAAAGCAGAAGGTGGAAAACGCTTCTTAACTTTGCACAAAGTCAAACTGGACCAAGC
-TGGTGAAGTCCTCTACCAGGCCCTTAATGCAATTACAACTGCCATTTTGACAGTAAAAGAAATCGAACTT
-GACTTTGCTGTGCCCCTGAAGGATGTCACTGTTCCAGAAAGGCGACAGGCTCGATTCGAATGTGTCCTCA
-CCCGAGAGGCAAATGTTATATGGTCCAAAGGACCTGATATAATTAAGTCATCTGACAAATTTGATATCAT
-CGCTGATGGAAAGAAACATATTCTTGTTATTAATGATTCTCAATTTGATGATGAAGGGGTCTATACTGCT
-GAGGTGGAGGGCAAGAAGACCTCAGCTCGGTTGTTTGTCACAGGTATAAGACTGAAATTCATGTCACCTC
-TTGAAGATCAAACAGTAAAAGAAGGTGAAACAGCAACTTTTGTTTGTGAACTTTCTCATGAAAAAATGCA
-TGTAGTCTGGTTCAAAAATGATGCCAAACTCCATACAAGCAGAACAGTACTCATCTCTTCTGAGGGCAAG
-ACTCACAAATTGGAAATGAAAGAAGTGACATTGGATGATATATCTCAGATAAAAGCTCAAGTCAAGGAGC
-TGAGCTCCACAGCACAGCTGAAGGTCTTAGAGGCCGATCCCTACTTCACTGTGAAATTACATGACAAAAC
-TGCAGTGGAGAAGGATGAGATTACTTTGAAGTGTGAAGTGAGCAAAGATGTACCAGTGAAATGGTTCAAA
-GATGGTGAAGAGATTGTCCCTTCACCCAAATATTCTATCAAGGCAGATGGCCTGCGCCGCATCTTAAAAA
-TCAAAAAGGCGGACCTTAAAGATAAAGGCGAATATGTGTGTGACTGTGGCACAGACAAGACCAAGGCAAA
-TGTTACTGTTGAGGCTCGACTAATAAAAGTGGAAAAGCCTCTGTACGGAGTAGAGGTGTTTGTTGGTGAA
-ACAGCCCACTTTGAAATTGAACTTTCTGAACCTGATGTTCACGGCCAGTGGAAGCTGAAAGGACAGCCTT
-TGACAGCTTCCCCTGACTGTGAAATCATTGAGGATGGAAAGAAGCATATTCTGATCCTTCATAACTGTCA
-GCTGGGTATGACAGGAGAGGTTTCCTTCCAGGCTGCTAATGCCAAATCTGCAGCCAATCTGAAAGTGAAA
-GAATTGCCTCTTATCTTCATCACACCTCTCAGTGATGTTAAAGTCTTCGAGAAAGATGAGGCTAAGTTTG
-AGTGTGAAGTATCCAGGGAGCCCAAAACATTCCGTTGGCTAAAAGGAACCCAGGAAATCACAGGTGATGA
-CAGATTTGAGCTTATAAAGGATGGCACTAAGCATTCAATGGTGATCAAGTCAGCTGCTTTTGAAGATGAA
-GCAAAATACATGTTTGAAGCTGAAGATAAGCACACAAGTGGCAAACTGATCATTGAAGGAATCCGGCTCA
-AATTCCTCACCCCTCTCAAAGATGTAACTGCCAAAGAGAAGGAAAGTGCTGTATTTACTGTGGAGTTATC
-TCATGATAACATCCGAGTTAAATGGTTCAAGAATGACCAGCGCCTACACACCACCAGGTCGGTCTCAATG
-CAAGACGAAGGGAAAACTCATTCGATCACATTCAAAGACCTGTCTATTGATGACACCTCCCAAATTAGAG
-TAGAAGCTATGGGGATGAGTTCAGAAGCTAAACTCACTGTGCTTGAGGGAGACCCATATTTTACAGGAAA
-ACTTCAAGATTATACTGGTGTAGAGAAAGATGAAGTTATTCTACAGTGTGAAATTAGCAAAGCAGATGCA
-CCAGTGAAATGGTTTAAGGATGGGAAGGAAATAAAGCCATCCAAAAATGCTGTTATTAAGGCAGATGGCA
-AGAAACGCATGCTAATCCTAAAGAAAGCCTTGAAATCAGATATTGGACAGTACACCTGTGACTGTGGGAC
-AGATAAGACCTCAGGAAAACTTGACATTGAGGATCGGGAAATTAAACTGGTGCGACCCCTGCACAGTGTG
-GAGGTGATGGAGACTGAGACAGCACGCTTTGAAACCGAAATCTCTGAAGATGATATCCACGCCAACTGGA
-AACTCAAGGGAGAGGCCCTACTCCAAACACCTGATTGTGAAATTAAGGAAGAAGGCAAAATACACTCCCT
-TGTTTTGCACAACTGTCGCCTGGACCAGACGGGTGGGGTGGATTTCCAAGCTGCCAATGTTAAATCTAGT
-GCCCACCTCCGAGTTAAGCCACGAGTAATTGGTCTTCTGAGGCCTTTAAAGGATGTCACCGTGACTGCAG
-GGGAAACAGCCACCTTCGACTGCGAGCTCTCCTACGAAGATATCCCAGTGGAATGGTATCTCAAAGGGAA
-GAAACTAGAGCCCAGCGATAAGGTGGTCCCACGTTCAGAAGGAAAAGTTCATACACTTACTCTGAGGGAT
-GTAAAGTTAGAAGATGCTGGGGAAGTCCAACTAACAGCAAAAGATTTCAAAACTCACGCCAACCTCTTTG
-TGAAAGAACCCCCAGTTGAATTCACTAAGCCTCTTGAGGACCAGACGGTCGAAGAGGGAGCCACTGCAGT
-GCTGGAGTGTGAAGTCTCCAGAGAAAATGCTAAGGTGAAATGGTTCAAAAATGGGACAGAAATCCTCAAA
-AGCAAGAAGTATGAAATTGTTGCTGATGGCAGGGTCAGAAAACTTGTTATACATGACTGTACCCCAGAGG
-ATATTAAAACATACACTTGTGATGCTAAGGATTTTAAGACTTCCTGTAACCTGAATGTCGTGCCTCCTCA
-TGTGGAATTCTTAAGACCACTCACCGACCTTCAAGTTAGAGAAAAAGAAATGGCTCGATTTGAGTGTGAA
-CTTTCCCGAGAAAATGCTAAGGTTAAGTGGTTTAAAGATGGTGCTGAAATTAAAAAGGGCAAAAAATATG
-ACATCATATCCAAGGGAGCAGTGCGCATTCTTGTCATCAACAAATGTCTACTGGATGATGAAGCTGAATA
-TTCCTGTGAAGTAAGGACAGCGAGAACTTCTGGCATGCTGACAGTTCTGGAAGAAGAAGCTGTCTTTACC
-AAAAATCTTGCCAACATTGAAGTTAGTGAAACAGACACTATAAAACTGGTTTGTGAAGTCTCCAAACCTG
-GCGCAGAAGTGATTTGGTATAAAGGGGATGAGGAGATCATTGAAACAGGAAGATATGAAATACTGACTGA
-AGGACGGAAGAGAATCCTGGTCATTCAGAACGCTCACCTTGAGGATGCTGGCAACTACAACTGTCGACTC
-CCAAGCTCTCGAACCGATGGCAAAGTCAAAGTACATGAACTGGCTGCTGAATTTATCTCAAAGCCTCAAA
-ACCTTGAAATACTTGAAGGAGAAAAGGCTGAATTTGTCTGCTCTATATCAAAAGAAAGCTTTCCAGTCCA
-GTGGAAGAGGGATGATAAGACACTTGAATCTGGAGATAAATATGACGTTATTGCTGATGGTAAAAAGAGG
-GTCCTAGTTGTGAAAGATGCCACATTACAAGATATGGGCACTTACGTTGTCATGGTAGGGGCCGCCAGAG
-CAGCAGCTCACTTGACAGTCATTGAAAAACTCAGGATCGTAGTTCCTCTTAAGGACACCCGGGTGAAGGA
-ACAACAGGAAGTTGTCTTCAACTGTGAAGTCAATACTGAAGGTGCCAAAGCCAAATGGTTCAGAAATGAA
-GAAGCTATATTTGATAGTTCAAAATACATCATTCTCCAAAAAGACCTAGTCTACACCCTCAGAATTAGAG
-ATGCACACTTAGATGACCAAGCCAACTATAATGTGTCTTTGACCAATCACAGAGGTGAAAATGTTAAAAG
-TGCAGCCAATCTAATAGTAGAAGAGGAAGACCTTAGGATTGTTGAGCCTCTTAAAGATATTGAAACAATG
-GAGAAGAAATCTGTCACATTCTGGTGCAAGGTGAATCGTCTCAATGTAACACTGAAGTGGACCAAAAATG
-GTGAAGAAGTGCCTTTTGACAACCGTGTCTCATACAGAGTTGATAAGTACAAGCACATGTTAACCATTAA
-AGACTGTGGCTTCCCAGATGAAGGTGAATACATTGTCACTGCTGGACAAGATAAATCTGTTGCTGAGCTT
-CTCATCATAGAAGCCCCGACAGAATTTGTGGAACACTTGGAAGATCAGACAGTCACTGAGTTCGATGACG
-CTGTCTTCTCCTGCCAGCTCTCCAGAGAGAAAGCCAATGTAAAATGGTACAGAAATGGGAGAGAAATCAA
-AGAAGGCAAAAAATACAAATTTGAAAAAGATGGAAGTATACACAGACTCATTATAAAAGATTGCAGGCTG
-GATGATGAGTGTGAATATGCTTGCGGGGTAGAAGACAGGAAGTCTCGTGCTAGACTTTTTGTGGAAGAAA
-TTCCTGTTGAGATCATCAGGCCTCCACAAGATATTCTTGAAGCCCCTGGTGCTGATGTTGTCTTTTTAGC
-AGAACTCAATAAAGATAAGGTGGAAGTCCAATGGCTAAGAAATAACATGGTTGTTGTCCAGGGTGATAAA
-CACCAGATGATGAGTGAAGGAAAGATACATCGACTACAGATTTGTGATATTAAGCCCCGTGACCAGGGTG
-AATACAGATTTATTGCCAAAGACAAAGAAGCCAGAGCTAAGCTTGAACTGGCAGCTGCACCAAAAATCAA
-GACAGCTGACCAAGACCTTGTGGTTGATGTTGGCAAGCCTCTGACAATGGTGGTGCCATATGATGCCTAC
-CCCAAAGCAGAAGCTGAATGGTTTAAAGAAAATGAACCTTTATCTACAAAAACCATTGATACTACGGCTG
-AACAAACTTCTTTCAGAATTTTAGAAGCCAAGAAAGGAGACAAAGGGAGGTATAAAATTGTGCTTCAGAA
-CAAACATGGAAAAGCAGAAGGATTCATCAATTTAAAAGTTATTGATGTTCCTGGGCCAGTACGTAACTTA
-GAAGTGACAGAAACATTTGATGGTGAAGTGAGCCTTGCTTGGGAAGAACCTTTAACTGATGGTGGAAGCA
-AAATCATAGGTTACGTTGTTGAAAGACGTGACATTAAGAGAAAGACCTGGGTTCTGGCCACAGACCGTGC
-AGAGAGTTGTGAGTTTACTGTCACTGGTCTACAGAAAGGAGGAGTTGAGTACCTATTCCGTGTGAGTGCA
-AGAAACAGAGTTGGCACTGGTGAGCCAGTAGAAACTGACAATCCTGTAGAAGCAAGGAGTAAATATGATG
-TTCCAGGCCCTCCTTTGAATGTAACCATCACTGATGTGAATCGATTTGGTGTCTCACTGACATGGGAACC
-ACCAGAGTATGATGGAGGTGCTGAGATCACAAACTACGTCATTGAATTAAGAGACAAGACTTCTATCAGG
-TGGGATACTGCCATGACTGTGAGAGCTGAAGACCTGTCTGCAACTGTTACTGATGTGGTAGAAGGACAGG
-AGTACAGTTTCCGAGTGAGAGCCCAAAATCGAATTGGAGTTGGAAAACCAAGTGCAGCCACACCCTTCGT
-CAAAGTTGCTGATCCAATTGAGAGACCAAGTCCTCCTGTAAACCTAACTTCCTCAGATCAGACTCAGTCA
-TCAGTTCAGCTCAAATGGGAACCTCCTCTGAAAGATGGAGGAAGCCCAATATTAGGCTATATAATTGAGC
-GATGCGAAGAAGGAAAAGATAATTGGATTCGTTGCAATATGAAACTTGTCCCTGAACTGACTTACAAGGT
-TACCGGATTGGAAAAAGGAAATAAATATTTATATAGAGTATCTGCAGAAAATAAAGCTGGTGTTTCAGAT
-CCATCTGAAATTCTTGGTCCTCTCACCGCTGACGATGCATTTGTTGAACCAACAATGGATTTAAGTGCAT
-TTAAAGATGGTCTGGAAGTTATTGTCCCAAATCCTATCACGATCCTGGTTCCAAGTACAGGCTATCCAAG
-GCCAACTGCAACCTGGTGTTTTGGAGATAAAGTACTAGAAACAGGGGACCGGGTGAAAATGAAGACCTTG
-TCTGCCTATGCCGAACTTGTCATTTCTCCAAGTGAACGTTCAGACAAGGGCATTTATACACTGAAATTAG
-AAAACCGTGTGAAAACAATTTCTGGGGAAATTGATGTCAATGTAATTGCTCGCCCAAGTGCACCCAAAGA
-ATTGAAATTTGGTGATATAACCAAGGACTCAGTACATTTGACTTGGGAACCACCTGATGATGATGGAGGA
-AGTCCGTTAACTGGATACGTTGTTGAAAAACGAGAAGTCAGCCGGAAAACATGGACTAAAGTTATGGACT
-TTGTGACTGATCTAGAATTCACAGTTCCTGATCTTGTTCAAGGAAAAGAGTACTTATTTAAAGTTTGTGC
-TCGTAACAAATGTGGCCCTGGAGAACCTGCATATGTTGATGAACCTGTAAATATGTCAACTCCTGCAACG
-GTACCTGACCCACCAGAGAATGTTAAATGGAGAGATCGAACAGCCAATAGCATCTTCTTAACATGGGATC
-CACCTAAAAATGATGGTGGTTCACGCATCAAAGGATATATAGTTGAAAGATGTCCACGTGGTTCTGATAA
-ATGGGTTGCCTGTGGAGAACCTGTTGCAGAAACAAAAATGGAAGTGACAGGTCTTGAGGAAGGCAAATGG
-TATGCCTACCGCGTGAAGGCCTTAAACAGGCAGGGTGCTAGCAAACCAAGCAGACCCACAGAGGAAATCC
-AGGCTGTGGACACACAAGAGGCCCCAGAAATCTTCCTCGATGTGAAGCTCCTTGCTGGTCTCACTGTAAA
-AGCTGGGACCAAGATTGAACTTCCTGCCACCGTAACCGGAAAACCTGAACCTAAAATAACTTGGACAAAG
-GCTGATATGATTCTGAAGCAGGACAAAAGAATTACCATTGAAAATGTCCCTAAGAAATCCACAGTGACTA
-TTGTTGATAGTAAGAGAAGTGACACTGGCACATATATCATTGAGGCTGTGAATGTGTGTGGCCGGGCCAC
-TGCTGTGGTGGAAGTGAACGTCTTAGATAAACCCGGACCACCAGCTGCCTTTGACATCACAGATGTAACC
-AATGAGTCATGTCTTCTAACATGGAACCCACCACGCGATGATGGTGGATCTAAGATCACAAACTATGTTG
-TGGAGAGACGAGCAACTGATAGTGAAGTGTGGCACAAGCTCTCATCCACCGTCAAGGATACAAACTTCAA
-GGCCACCAAATTAATCCCCAATAAAGAGTACATCTTCAGAGTTGCTGCAGAAAACATGTATGGTGTTGGT
-GAACCAGTTCAGGCCTCTCCAATAACAGCCAAATATCAGTTTGATCCACCTGGTCCTCCAACTCGCCTAG
-AACCTTCTGATATCACTAAAGACGCAGTGACTCTCACATGGTGTGAGCCAGATGATGATGGTGGCAGCCC
-AATCACAGGATACTGGGTTGAAAGACTGGATCCTGATACAGATAAATGGGTTAGATGCAATAAGATGCCA
-GTAAAGGACACAACATACAGAGTGAAAGGTCTCACTAATAAGAAAAAATACAGATTCCGTGTGTTGGCTG
-AAAATCTTGCTGGACCTGGAAAACCAAGCAAATCAACTGAACCAATCTTAATAAAGGATCCCATAGATCC
-TCCATGGCCCCCTGGAAAACCAACTGTAAAAGATGTAGGCAAAACATCAGTAAGGTTGAATTGGACAAAA
-CCAGAACATGATGGAGGTGCAAAGATTGAGTCTTATGTCATTGAAATGCTGAAGACTGGAACAGATGAGT
-GGGTCAGAGTGGCGGAAGGGGTTCCCACCACTCAGCACTTGCTCCCAGGGCTCATGGAAGGACAGGAATA
-CTCATTCCGAGTTAGAGCTGTGAATAAGGCTGGGGAAAGTGAACCCAGTGAACCCAGTGACCCTGTGCTT
-TGCCGGGAGAAGCTATATCCTCCATCACCACCACGCTGGCTTGAAGTTATTAATATCACAAAAAATACAG
-CAGACCTAAAATGGACAGTTCCTGAGAAAGATGGAGGGTCCCCCATCACCAACTACATTGTGGAAAAGAG
-AGACGTCAGGCGAAAAGGCTGGCAAACAGTGGATACCACTGTCAAGGACACCAAGTGCACAGTCACCCCA
-CTGACTGAGGGCTCTTTATATGTGTTCCGAGTTGCTGCAGAAAATGCTATAGGACAAAGCGACTACACCG
-AAATTGAGGACTCTGTGCTGGCCAAAGACACCTTTACCACTCCTGGACCACCCTACGCCCTGGCAGTGGT
-TGATGTGACAAAACGACATGTTGACCTAAAGTGGGAGCCACCTAAAAATGATGGTGGAAGACCAATACAG
-AGATATGTCATTGAGAAGAAAGAAAGGTTAGGTACCCGTTGGGTGAAAGCTGGAAAGACTGCAGGACCTG
-ACTGTAACTTCAGAGTAACTGATGTCATCGAAGGAACAGAGGTCCAGTTTCAGGTTCGGGCTGAAAATGA
-AGCTGGAGTTGGCCACCCAAGTGAACCCACAGAAATCCTATCCATTGAAGATCCAACAAGTCCTCCCTCA
-CCACCCCTTGACCTACATGTGACTGATGCTGGGAGAAAACACATTGCCATTGCTTGGAAGCCTCCAGAGA
-AAAATGGTGGAAGTCCTATCATAGGATACCATGTTGAAATGTGTCCAGTAGGCACTGAGAAATGGATGAG
-AGTTAATTCTCGCCCAATAAAGGACTTGAAATTCAAGGTTGAAGAAGGTGTTGTTCCTGACAAAGAATAT
-GTCCTGAGAGTGAGAGCAGTCAATGCTATTGGTGTCAGCGAGCCATCTGAAATCTCTGAAAATGTGGTTG
-CCAAAGACCCAGACTGCAAGCCAACAATTGACCTGGAGACTCATGACATTATTGTTATTGAAGGTGAAAA
-GTTAAGCATTCCTGTTCCCTTCAGAGCTGTCCCAGTTCCAACTGTTAGTTGGCATAAAGATGGCAAAGAA
-GTTAAAGCAAGTGATAGATTAACAATGAAGAATGATCACATCTCTGCACACCTTGAAGTTCCCAAGAGTG
-TCCGTGCAGATGCCGGAATTTATACCATTACACTGGAGAATAAGCTCGGCTCAGCAACAGCCTCAATCAA
-TGTCAAAGTCATAGGCCTACCTGGACCATGCAAAGATATTAAAGCAAGTGACATTACCAAGAGTTCTTGT
-AAGTTAACTTGGGAACCTCCAGAATTTGATGGTGGAACCCCAATTCTTCATTATGTCCTGGAGCGCAGAG
-AAGCTGGGAGGAGAACATATATACCAGTCATGTCTGGTGAGAACAAACTGTCATGGACTGTGAAGGATCT
-CATACCAAATGGTGAATACTTCTTCCGTGTTAAAGCAGTCAACAAGGTTGGTGGAGGAGAATATATTGAA
-CTGAAAAATCCAGTCATTGCTCAAGATCCAAAGCAACCCCCTGATCCACCTGTAGATGTAGAGGTTCATA
-ATCCTACAGCGGAGGCAATGACTATTACATGGAAGCCACCTTTGTATGATGGAGGGAGCAAGATAATGGG
-CTACATCATAGAGAAGATTGCTAAGGGTGAAGAAAGGTGGAAGAGATGCAATGAACACCTGGTACCAATC
-CTGACCTATACAGCAAAAGGACTTGAAGAGGGGAAAGAGTACCAATTCCGTGTGCGAGCAGAGAACGCCG
-CGGGTATTAGTGAACCTTCTCGGGCTACTCCTCCAACCAAAGCTGTAGATCCCATTGATGCCCCCAAAGT
-CATTCTGAGAACAAGCCTAGAAGTGAAACGAGGTGATGAAATAGCACTTGATGCAAGTATTTCTGGATCA
-CCTTACCCAACTATTACATGGATAAAGGATGAAAATGTTATTGTACCAGAGGAAATTAAGAAGCGTGCAG
-CACCCTTGGTTAGGAGAAGGAAGGGTGAAGTTCAAGAAGAAGAACCATTTGTCCTGCCTCTGACACAGCG
-TTTGAGTATTGACAACAGCAAAAAGGGAGAATCTCAGCTACGCGTCCGAGATTCTCTCCGACCTGACCAT
-GGTCTGTATATGATCAAAGTTGAAAATGACCACGGTATTGCAAAAGCTCCTTGTACTGTCAGTGTGTTAG
-ATACACCGGGACCACCAATCAACTTTGTATTTGAAGATATCAGAAAGACCTCAGTCCTTTGTAAATGGGA
-ACCACCCCTTGATGATGGTGGCAGTGAAATCATAAACTACACTTTGGAAAAGAAAGACAAGACAAAACCC
-GACTCAGAATGGATTGTTGTCACTTCAACACTTAGACATTGCAAATATTCAGTAACAAAACTGATTGAAG
-GAAAAGAGTACCTCTTCCGTGTAAGAGCTGAAAACAGATTTGGGCCAGGTCCACCATGTGTTTCAAAGCC
-ACTTGTGGCTAAAGATCCATTTGGACCACCTGATGCACCAGATAAGCCCATTGTGGAAGATGTTACCAGC
-AACAGTATGCTAGTGAAATGGAATGAACCAAAAGATAATGGAAGCCCCATTTTGGGTTACTGGCTTGAAA
-AACGTGAAGTTAACAGTACACATTGGTCTCGTGTCAACAAAAGCCTTCTGAATGCCTTGAAAGCCAATGT
-AGATGGCTTATTAGAAGGACTCACCTATGTCTTCAGAGTATGTGCTGAAAATGCAGCTGGACCTGGAAAG
-TTCAGTCCACCTTCAGATCCCAAAACAGCACATGATCCAATCTCTCCTCCTGGGCCACCTATCCCAAGAG
-TCACTGACACAAGCTCTACAACTATTGAACTAGAATGGGAACCCCCAGCTTTCAATGGTGGTGGGGAAAT
-TGTTGGCTATTTTGTTGATAAGCAGTTGGTTGGCACAAATGAATGGTCACGCTGCACAGAGAAGATGATC
-AAGGTCCGTCAGTACACCGTCAAAGAAATCCGAGAGGGTGCTGATTACAAACTTCGGGTGAGTGCTGTCA
-ATGCCGCAGGGGAAGGACCGCCTGGAGAAACACAACCTGTTACTGTGGCTGAACCACAAGAGCCTCCAGC
-TGTGGAACTGGATGTTTCTGTCAAGGGTGGAATACAAATAATGGCTGGGAAGACTCTTAGAATTCCAGCT
-GTGGTGACTGGTCGCCCTGTACCTACAAAAGTATGGACCAAAGAAGAAGGGGAGCTGGATAAAGACCGTG
-TTGTAATAGACAACGTTGGAACCAAATCTGAACTAATTATCAAGGATGCACTGCGAAAAGACCATGGCAG
-ATATGTGATTACAGCTACAAATAGCTGTGGTTCCAAATTTGCAGCAGCCAGGGTAGAAGTTTTTGATGTC
-CCTGGTCCAGTTCTTGACTTAAAACCTGTTGTAACAAACAGAAAAATGTGTCTACTTAACTGGTCTGATC
-CAGAAGATGATGGAGGAAGTGAAATAACAGGCTTTATCATTGAAAGAAAAGATGCCAAGATGCATACTTG
-GAGACAACCAATAGAGACTGAGAGATCTAAATGTGACATCACAGGTCTGCTTGAGGGACAAGAATATAAG
-TTCCGTGTTATTGCCAAGAACAAGTTTGGCTGTGGCCCTCCTGTTGAAATAGGACCAATTCTTGCAGTTG
-ATCCACTAGGTCCTCCAACATCTCCAGAGAGGCTCACATACACTGAAAGGACAAAGTCCACTATCACACT
-TGACTGGAAAGAGCCCCGCAGTAATGGTGGCAGTCCCATCCAAGGATATATCATTGAAAAACGGCGTCAT
-GACAAACCTGACTTTGAAAGAGTTAACAAGCGACTCTGCCCAACCACATCTTTTCTGGTTGAAAATCTTG
-ATGAACACCAAATGTATGAGTTCCGTGTCAAAGCTGTCAATGAAATTGGTGAAAGTGAACCATCCCTACC
-TCTTAATGTAGTCATACAAGATGATGAAGTGCCTCCAACTATTAAGTTGCGTCTGAGTGTTCGAGGAGAC
-ACTATCAAAGTTAAGGCAGGAGAGCCTGTCCACATCCCTGCAGATGTGACAGGCCTTCCAATGCCTAAGA
-TTGAATGGTCCAAAAATGAAACTGTAATTGAAAAACCCACTGATGCACTTCAGATAACCAAGGAAGAGGT
-ATCCCGAAGTGAGGCAAAAACTGAGCTTAGCATTCCCAAAGCGGTCCGGGAGGACAAAGGCACTTACACA
-GTTACTGCTTCCAATCGCCTTGGCTCAGTGTTCCGAAATGTTCACGTTGAAGTATATGACCGCCCATCCC
-CACCAAGAAATCTTGCTGTTACTGACATTAAAGCTGAATCTTGCTACTTGACATGGGATGCCCCTCTTGA
-TAATGGTGGCAGTGAAATCACCCATTATGTTATTGACAAACGTGATGCAAGTAGGAAGAAAGCAGAATGG
-GAGGAAGTCACCAACACTGCTGTAGAGAAAAGATATGGGATCTGGAAACTTATCCCCAATGGTCAGTATG
-AGTTCCGAGTCAGGGCAGTGAATAAATATGGAATCAGTGATGAGTGCAAATCAGATAAAGTAGTCATTCA
-AGATCCTTATCGCCTTCCTGGACCTCCAGGAAAACCAAAAGTTTTGGCACGCACCAAAGGATCAATGCTA
-GTGAGCTGGACTCCTCCTTTGGACAATGGTGGCTCTCCAATTACTGGCTACTGGCTGGAGAAAAGAGAAG
-AGGGAAGTCCTTATTGGTCACGTGTTAGCCGAGCACCAATAACCAAAGTGGGATTGAAAGGCGTGGAATT
-TAATGTTCCTCGTTTGCTTGAAGGCGTTAAATACCAGTTCAGAGCCATGGCAATAAATGCTGCAGGAATT
-GGTCCTCCCAGTGAACCATCAGATCCAGAGGTTGCAGGAGATCCCATATTTCCACCGGGGCCACCTTCTT
-GCCCAGAAGTTAAAGATAAAACGAAGTCAAGCATCTCACTAGGATGGAAACCTCCAGCCAAAGATGGTGG
-CAGCCCAATCAAAGGATACATTGTAGAAATGCAAGAAGAAGGTACTACTGACTGGAAAAGAGTAAATGAA
-CCAGACAAACTTATAACTACCTGTGAATGTGTGGTGCCTAATCTGAAAGAGCTCAGGAAGTACAGATTCA
-GAGTGAAAGCTGTCAATGAAGCTGGTGAATCTGAACCAAGTGATACAACTGGGGAGATCCCTGCCACTGA
-TATTCAAGAGGAACCAGAAGTTTTCATTGACATTGGAGCACAGGACTGTCTGGTTTGTAAAGCTGGCTCA
-CAGATTAGGATTCCTGCTGTCATCAAGGGACGCCCAACACCAAAATCATCTTGGGAATTTGATGGAAAGG
-CAAAGAAAGCAATGAAGGATGGAGTTCATGACATACCCGAAGATGCACAGCTGGAGACTGCTGAAAACTC
-CTCAGTAATTATTATTCCGGAGTGTAAACGATCTCATACAGGCAAATACAGCATCACAGCCAAGAATAAA
-GCAGGACAAAAGACTGCAAATTGCAGAGTTAAAGTCATGGATGTACCAGGCCCACCCAAAGATCTGAAAG
-TCAGTGATATCACAAGGGGTAGTTGCAGACTTTCATGGAAGATGCCAGACGACGATGGAGGAGACAGGAT
-CAAAGGCTATGTTATTGAGAAGAGGACTATTGATGGAAAAGCCTGGACCAAAGTCAATCCAGACTGTGGA
-AGCACCACATTTGTAGTGCCTGATCTCCTCTCTGAACAGCAATATTTCTTCCGTGTGCGAGCAGAAAACC
-GTTTTGGTATTGGCCCACCTGTGGAAACCATTCAGAGGACCACTGCCAGAGATCCGATATATCCTCCTGA
-TCCTCCTATTAAACTCAAGATTGGCCTCATCACAAAGAACACAGTGCATCTGTCATGGAAACCCCCGAAG
-AATGATGGGGGCTCCCCTGTTACCCACTATATTGTTGAGTGCCTTGCATGGGACCCTACTGGGACAAAGA
-AAGAAGCCTGGAGGCAGTGCAATAAGCGTGATGTGGAAGAACTGCAATTTACTGTTGAAGACCTAGTAGA
-AGGTGGGGAATATGAATTCCGAGTCAAAGCTGTCAATGCTGCAGGAGTCAGCAAGCCTTCAGCCACTGTT
-GGGCCCTGTGACTGTCAAAGACCAGACATGCCACCATCAATTGATCTAAAAGAATTCATGGAGGTTGAAG
-AAGGAACCAATGTTAACATTGTGGCCAAAATTAAAGGTGTGCCATTCCCGACACTAACCTGGTTTAAAGC
-TCCTCCAAAGAAGCCTGATAACAAAGAACCTGTTCTCTATGACACCCATGTCAACAAACTGGTGGTAGAT
-GATACTTGCACTTTAGTTATTCCGCAGTCTCGCAGGAGTGACACTGGCTTATATACCATCACAGCTGTAA
-ATAATCTGGGAACAGCATCAAAGGAGATGAGACTGAATGTCCTGGGTCGTCCTGGCCCTCCAGTGGGACC
-CATAAAATTTGAATCTGTTTCAGCAGATCAAATGACACTATCTTGGTTTCCACCTAAAGATGATGGTGGG
-TCTAAGATTACAAACTATGTAATTGAGAAAAGAGAAGCTAACAGGAAGACATGGGTCCATGTCTCCAGTG
-AACCTAAGGAGTGCACGTACACGATTCCCAAATTGCTAGAAGGCCATGAATATGTATTCCGAATCATGGC
-CCAGAATAAATATGGCATTGGAGAACCTCTTGACAGTGAACCTGAAACAGCAAGAAACCTCTTCTCTGTC
-CCTGGAGCACCAGATAAACCAACAGTTAGCAGCGTGACTCGTAACTCCATGACTGTCAACTGGGAAGAGC
-CAGAATATGATGGAGGCTCTCCTGTGACAGGGTACTGGCTGGAAATGAAAGACACCACTTCAAAGAGATG
-GAAGAGAGTTAACCGAGATCCTATCAAAGCCATGACTTTGGGTGTTTCTTATAAAGTGACTGGTCTTATT
-GAAGGTTCCGACTATCAATTCCGGGTATATGCAATCAATGCTGCTGGCGTGGGTCCAGCAAGTCTGCCAT
-CAGACCCAGCGACTGCTAGAGATCCAATTGCCCCTCCTGGTCCTCCATTTCCCAAAGTGACAGATTGGAC
-TAAATCATCTGCAGATCTGGAGTGGTCTCCCCCACTAAAAGATGGTGGATCCAAAGTAACTGGATACATC
-GTTGAATATAAAGAAGAAGGAAAAGAAGAATGGGAAAAGGGTAAAGATAAAGAAGTGAGAGGAACAAAGC
-TCGTTGTGACAGGATTAAAGGAAGGAGCATTCTACAAATTTAGAGTTAGTGCAGTCAACATTGCTGGCAT
-TGGAGAACCTGGAGAGGTCACAGATGTCATTGAAATGAAGGACAGACTTGTTTCACCTGACCTTCAGCTA
-GATGCCAGTGTCAGAGATAGAATTGTTGTCCATGCTGGAGGGGTGATCCGAATCATTGCCTATGTGTCTG
-GAAAGCCTCCTCCAACCGTCACCTGGAACATGAATGAAAGAACCTTACCTCAAGAAGCCACCATTGAGAC
-CACAGCCATTAGCTCATCCATGGTCATCAAGAACTGCCAGAGGAGCCATCAAGGCGTCTATTCTCTTCTT
-GCCAAAAATGAAGCCGGAGAAAGAAAGAAGACAATTATTGTTGATGTATTAGATGTTCCAGGTCCCGTTG
-GAACACCATTCCTAGCTCACAACCTAACCAATGAGTCCTGCAAACTGACATGGTTTTCTCCAGAAGATGA
-TGGAGGCTCTCCAATCACCAATTATGTCATTGAAAAGCGTGAATCTGACCGCAGAGCATGGACCCCAGTG
-ACATATACAGTTACCCGACAAAATGCTACTGTCCAGGGTCTCATTCAAGGAAAAGCCTACTTTTTCCGAA
-TTGCGGCTGAAAATAGTATTGGCATGGGTCCATTTGTTGAGACATCAGAGGCACTTGTTATCAGAGAGCC
-AATAACTGTACCAGAGCGTCCTGAAGACCTGGAAGTCAAAGAAGTTACTAAAAATACTGTAACTTTGACT
-TGGAATCCTCCTAAGTATGATGGTGGGTCAGAAATTATTAACTATGTCCTAGAAAGTCGGCTCATTGGGA
-CTGAGAAGTTCCACAAAGTTACAAATGACAACTTGCTTAGCAGAAAATACACTGTTAAAGGCTTAAAAGA
-AGGTGATACCTATGAGTACCGTGTCAGTGCTGTCAACATTGTTGGACAAGGCAAACCATCATTTTGCACC
-AAACCAATTACTTGCAAGGATGAGCTGGCACCCCCAACGCTTCACCTCGACTTCAGAGATAAGCTCACGA
-TTCGAGTTGGTGAAGCTTTTGCCCTCACTGGCCGTTACTCAGGCAAACCAAAGCCTAAGGTTTCCTGGTT
-CAAAGATGAAGCTGATGTGCTGGAAGATGATCGCACTCATATAAAGACTACACCAGCAACACTTGCTTTA
-GAGAAGATCAAGGCCAAACGTTCAGATTCCGGCAAATACTGTGTGGTTGTGGAGAACAGTACAGGCTCTA
-GGAAAGGTTTCTGTCAAGTTAATGTTGTTGACCGTCCTGGACCACCAGTAGGACCAGTTAGTTTTGATGA
-GGTGACCAAAGATTACATGGTTATCTCTTGGAAGCCTCCTTTAGATGATGGAGGCAGTAAAATCACCAAT
-TATATTATTGAGAAGAAGGAAGTGGGTAAAGACGTCTGGATGCCAGTGACATCTGCAAGTGCTAAAACAA
-CATGCAAAGTTTCTAAACTACTTGAAGGAAAAGATTATATTTTCCGGATACATGCTGAAAATCTGTATGG
-AATAAGTGATCCTCTGGTGTCTGATTCAATGAAAGCCAAAGATCGTTTCAGGGTTCCTGATGCACCTGAT
-CAGCCAATTGTTACAGAAGTTACCAAAGACTCTGCATTAGTAACCTGGAATAAGCCACATGATGGAGGAA
-AACCCATCACAAACTACATCCTGGAAAAGAGAGAAACTATGTCTAAACGATGGGCTAGAGTTACCAAAGA
-TCCTATTCATCCATACACTAAATTTAGGGTTCCTGATCTTCTAGAAGGATGTCAGTATGAATTCCGGGTT
-TCTGCAGAAAATGAAATTGGTATTGGAGATCCAAGCCCACCATCCAAACCAGTCTTTGCTAAAGATCCAA
-TTGCTAAACCAAGTCCACCTGTTAATCCTGAAGCAATAGATACAACATGCAATTCAGTCGATCTAACTTG
-GCAGCCACCACGTCATGATGGTGGGAGCAAGATTCTGGGTTATATTGTTGAGTACCAGAAAGTTGGAGAT
-GAAGAGTGGAGAAGAGCCAATCACACCCCTGAGTCATGTCCTGAAACTAAATATAAAGTCACCGGTCTTC
-GGGACGGTCAAACCTATAAGTTTAGAGTGTTAGCAGTCAATGCAGCTGGTGAATCAGATCCAGCTCATGT
-TCCGGAGCCAGTCCTAGTAAAAGACAGGCTTGAACCCCCTGAGTTGATTCTTGATGCCAACATGGCAAGA
-GAACAACACATTAAAGTTGGTGATACTCTAAGACTTAGTGCCATCATCAAAGGAGTGCCATTCCCAAAAG
-TAACTTGGAAAAAAGAAGACAGAGATGCTCCAACTAAAGCAAGAATTGATGTGACTCCAGTTGGTAGCAA
-GCTTGAAATTCGTAATGCTGCCCATGAAGATGGTGGAATTTATTCTTTAACAGTGGAGAATCCAGCTGGT
-TCAAAAACTGTCTCAGTAAAAGTACTTGTATTAGATAAACCTGGGCCACCTAGAGATCTGGAAGTCAGTG
-AAATTAGGAAAGATTCATGTTACCTTACTTGGAAAGAACCACTGGATGATGGTGGTTCTGTTATTACCAA
-TTATGTGGTTGAGAGGAGAGATGTTGCCAGCGCCCAGTGGTCACCTCTCTCAGCTACATCAAAGAAAAAG
-AGTCACTTCGCTAAGCATCTGAATGAAGGCAACCAGTACCTCTTCCGAGTAGCTGCGGAGAACCAGTATG
-GACGTGGTCCTTTTGTTGAAACACCAAAACCAATCAAGGCTTTGGATCCTCTCCATCCCCCAGGGCCACC
-CAAGGACCTGCACCATGTAGATGTTGACAAGACTGAAGTCTCCCTAGTCTGGAATAAGCCGGATCGTGAT
-GGTGGTTCTCCAATCACTGGATATTTGGTAGAATATCAAGAAGAAGGCACCCAGGACTGGATTAAATTTA
-AGACTGTGACAAACTTAGAGTGTGTGGTTACTGGACTACAACAAGGAAAGACCTATAGATTCCGTGTAAA
-AGCTGAAAACATTGTGGGTCTTGGTCTCCCTGACACAACTATCCCGATAGAATGTCAAGAAAAACTAGTG
-CCTCCATCCGTGGAGCTAGATGTGAAATTAATTGAAGGTCTTGTGGTAAAGGCTGGAACCACAGTCAGAT
-TCCCTGCTATTATAAGAGGTGTGCCTGTTCCTACTGCAAAGTGGACAACCGATGGGAGTGAGATTAAAAC
-CGATGAGCACTACACAGTTGAAACAGACAACTTCTCATCAGTACTTACCATTAAGAACTGCTTAAGGAGA
-GACACTGGGGAATATCAAATCACAGTTTCCAATGCAGCCGGTAGCAAAACAGTAGCCGTACATCTTACTG
-TTCTTGATGTTCCTGGGCCACCAACAGGTCCTATTAATATTCTGGATGTTACTCCTGAACACATGACTAT
-CTCATGGCAGCCACCTAAGGATGATGGAGGAAGCCCTGTGATAAATTATATTGTTGAGAAACAAGATACA
-AGGAAAGACACGTGGGGTGTTGTCTCTTCCGGAAGCAGTAAGACAAAGCTGAAAATCCCACATCTGCAGA
-AGGGCTGTGAATATGTTTTCCGAGTTAGAGCAGAGAATAAGATAGGTGTTGGTCCTCCCCTTGACTCCAC
-ACCTACTGTTGCTAAGCATAAATTTAGTCCTCCGTCTCCTCCTGGTAAACCAGTGGTTACTGACATTACT
-GAAAATGCAGCAACAGTGTCTTGGACCCTGCCAAAATCTGATGGTGGCAGTCCAATAACTGGCTACTATA
-TGGAACGTCGAGAAGTAACTGGCAAATGGGTGAGGGTCAACAAAACACCTATCGCTGACCTGAAGTTCAG
-AGTGACTGGACTCTATGAAGGAAATACATATGAGTTTAGAGTTTTTGCTGAAAATCTTGCAGGACTAAGC
-AAACCATCCCCAAGTTCTGATCCAATAAAAGCTTGCCGGCCCATCAAACCACCTGGACCACCTATTAATC
-CTAAACTGAAAGACAAGAGCAGAGAAACAGCTGATTTGGTGTGGACAAAGCCTCTCAGTGATGGTGGTAG
-CCCCATTCTAGGATATGTAGTGGAATGTCAGAAACCTGGCACGGCACAATGGAACAGGATTAATAAAGAT
-GAACTCATTAGGCAATGTGCCTTTAGGGTACCTGGACTAATTGAAGGAAATGAGTACAGATTCCGTATAA
-AGGCAGCTAATATTGTAGGAGAGGGTGAGCCAAGAGAACTAGCAGAATCTGTGATTGCAAAAGATATCCT
-TCATCCTCCAGAAGTAGAACTTGATGTTACTTGTCGTGATGTTATTACCGTGAGAGTAGGCCAAACTATC
-CGCATTCTAGCTCGAGTCAAAGGCAGACCTGAACCAGACATAACTTGGACTAAGGAAGGCAAAGTATTGG
-TCCGAGAAAAGAGGGTGGACCTTATTCAGGATCTACCTCGTGTTGAGTTACAAATTAAAGAAGCTGTTAG
-AGCTGATCATGGCAAGTATATCATCTCAGCTAAGAACAGCAGTGGACATGCCCAAGGTTCAGCCATCGTT
-AACGTCCTTGACAGACCTGGGCCTTGCCAGAATTTGAAGGTTACCAATGTAACCAAAGAGAACTGTACAA
-TTTCTTGGGAAAACCCACTAGATAATGGTGGCTCAGAAATAACAAACTTCATAGTAGAATATCGCAAACC
-AAACCAGAAAGGCTGGTCAATTGTTGCATCAGATGTCACTAAACGATTAATCAAGGCCAACCTTTTAGCC
-AACAATGAATACTATTTCCGAGTTTGTGCAGAGAATAAAGTAGGTGTTGGGCCAACCATCGAAACAAAAA
-CTCCCATTCTGGCTATTAACCCTATTGACAGACCAGGTGAGCCTGAAAACCTTCACATTGCAGATAAAGG
-AAAGACATTTGTCTATCTAAAGTGGCGGAGGCCTGACTATGATGGTGGCAGTCCAAATCTGTCATATCAT
-GTTGAGAGAAGGCTTAAGGGCTCCGATGACTGGGAAAGAGTGCATAAAGGAAGCATTAAAGAAACTCACT
-ACATGGTTGACAGATGTGTTGAAAACCAGATTTATGAGTTCAGAGTGCAAACAAAGAATGAAGGTGGGGA
-AAGTGACTGGGTGAAGACAGAGGAAGTTGTTGTGAAAGAAGACTTACAAAAACCAGTACTTGATCTGAAA
-TTAAGTGGGGTCCTAACTGTCAAAGCAGGGGACACCATTAGGCTTGAGGCAGGGGTTAGAGGCAAACCAT
-TCCCAGAAGTTGCATGGACCAAGGACAAAGACGCTACAGACTTAACAAGATCACCAAGGGTCAAGATTGA
-TACCCGTGCTGATTCATCTAAATTTTCTCTTACTAAAGCAAAGCGAAGTGATGGGGGTAAATATGTAGTT
-ACGGCAACTAACACGGCTGGCAGTTTTGTGGCCTATGCCACTGTCAATGTTTTAGATAAGCCTGGTCCTG
-TGAGAAATCTGAAAATTGTTGATGTGTCCAGTGATAGGTGTACTGTTTGCTGGGATCCACCAGAAGATGA
-TGGTGGCTGTGAAATCCAAAATTATATTCTAGAAAAATGTGAGACAAAGCGAATGGTTTGGTCTACCTAT
-TCTGCTACTGTCTTGACACCTGGTACTACAGTAACACGTCTCATAGAAGGAAATGAATATATTTTCAGAG
-TCCGTGCAGAAAATAAAATAGGCACAGGGCCTCCAACAGAAAGTAAACCAGTCATAGCCAAAACCAAGTA
-TGATAAACCTGGTCGCCCTGATCCCCCAGAAGTCACTAAAGTAAGCAAAGAAGAGATGACTGTGGTTTGG
-AATCCACCTGAATATGATGGTGGAAAGTCTATAACTGGATACTTTTTGGAGAAAAAGGAAAAGCATTCAA
-CACGATGGGTCCCTGTCAACAAGAGTGCAATCCCTGAGAGACGTATGAAAGTACAGAATCTCCTCCCAGA
-CCATGAATATCAGTTCCGTGTCAAGGCAGAAAATGAAATTGGAATTGGAGAACCAAGCTTGCCTTCAAGA
-CCGGTGGTGGCAAAAGACCCCATAGAGCCACCTGGTCCACCAACCAATTTCAGAGTGGTTGATACAACCA
-AACATTCCATAACTCTTGGGTGGGGAAAACCAGTCTATGATGGTGGTGCACCGATCATTGGATATGTTGT
-GGAAATGAGACCAAAAATAGCAGATGCGTCTCCTGATGAAGGCTGGAAACGGTGTAATGCTGCAGCACAG
-CTTGTACGCAAGGAATTCACTGTTACCAGCTTGGATGAAAACCAGGAATATGAGTTCAGGGTGTGTGCCC
-AAAACCAAGTTGGTATTGGGCGCCCTGCAGAGCTAAAGGAAGCTATCAAACCTAAAGAAATACTAGAACC
-TCCGGAGATTGATTTGGATGCCAGCATGAGGAAACTGGTCATAGTGAGAGCAGGATGCCCTATTCGTCTC
-TTTGCTATAGTGAGAGGACGACCAGCCCCTAAAGTCACTTGGCGAAAAGTTGGCATTGATAATGTGGTCA
-GAAAAGGACAAGTTGATCTGGTTGACACTATGGCCTTCCTTGTCATCCCCAATTCTACCCGTGATGACTC
-AGGAAAATATTCCTTAACACTTGTGAACCCAGCAGGAGAAAAGGCTGTATTCGTAAATGTCAGAGTATTA
-GACACTCCTGGGCCTGTGTCTGATTTAAAAGTTTCAGATGTCACTAAAACATCATGCCATGTGTCCTGGG
-CCCCTCCTGAAAACGACGGTGGGAGCCAAGTGACACATTATATCGTGGAGAAACGTGAGGCAGACAGAAA
-GACATGGTCGACCGTTACCCCAGAAGTTAAGAAAACAAGCTTCCATGTAACCAATCTTGTCCCTGGGAAT
-GAGTATTACTTCAGAGTAACTGCTGTCAACGAATATGGCCCTGGCGTCCCAACAGATGTCCCAAAACCAG
-TGCTTGCATCAGATCCTCTAAGTGAGCCGGATCCCCCAAGGAAATTAGAAGTGACTGAAATGACCAAGAA
-CAGTGCCACCTTAGCCTGGTTACCTCCCCTACGTGATGGAGGTGCTAAAATCGATGGCTACATCACTAGT
-TACAGAGAAGAAGAGCAGCCTGCAGATCGCTGGACAGAGTACTCAGTGGTAAAAGATCTGAGCCTTGTTG
-TCACTGGCCTAAAGGAAGGAAAGAAATACAAATTTAGAGTAGCGGCCAGAAATGCTGTTGGAGTCAGTTT
-GCCAAGAGAAGCTGAAGGAGTGTATGAAGCCAAAGAACAACTGTTGCCACCAAAGATCCTTATGCCAGAG
-CAAATAACTATCAAAGCTGGGAAAAAACTCCGAATTGAAGCCCATGTGTATGGAAAGCCTCATCCCACCT
-GTAAATGGAAAAAAGGAGAAGATGAAGTTGTCACATCCAGCCACCTGGCAGTGCATAAAGCAGACAGCTC
-TTCAATTCTGATCATAAAAGATGTGACTAGGAAAGACAGTGGTTACTACAGCCTCACAGCAGAGAACAGT
-TCTGGGACAGACACTCAGAAAATCAAAGTTGTAGTCATGGATGCCCCCGGCCCCCCTCAGCCTCCATTTG
-ACATTTCTGATATAGACGCTGATGCTTGCTCCCTGTCATGGCACATCCCTCTGGAGGACGGAGGCAGTAA
-CATCACCAATTATATAGTGGAGAAGTGTGATGTAAGCCGAGGTGACTGGGTCACGGCTCTAGCTTCAGTC
-ACAAAAACTTCCTGCAGGGTTGGAAAGCTGATCCCAGGCCAGGAGTACATCTTCCGGGTCCGTGCTGAAA
-ACCGATTTGGCATTTCAGAGCCTCTCACATCTCCAAAGATGGTTGCGCAGTTCCCATTTGGTGTTCCTAG
-TGAACCAAAGAATGCACGAGTCACCAAAGTCAACAAGGACTGTATTTTTGTTGCTTGGGACAGACCAGAT
-AGTGATGGAGGGAGCCCCATTATTGGTTATCTGATTGAACGCAAGGAAAGAAACAGTTTGCTGTGGGTGA
-AAGCCAATGATACTCTTGTCCGGTCAACTGAATATCCTTGTGCTGGCCTTGTAGAAGGTCTTGAGTATTC
-ATTCAGAATCTATGCCCTAAACAAAGCTGGATCCAGCCCACCCAGCAAACCCACAGAATATGTAACTGCA
-AGAATGCCAGTTGATCCTCCTGGGAAACCTGAGGTTATTGATGTCACCAAGAGTACTGTATCTCTGATCT
-GGGCTCGTCCAAAGCATGATGGAGGCAGTAAAATTATTGGCTATTTCGTAGAAGCTTGCAAACTTCCTGG
-TGATAAATGGGTACGGTGCAATACTGCACCTCACCAGATTCCCCAGGAAGAGTACACAGCTACTGGCCTA
-GAAGAGAAAGCTCAGTATCAATTTAGAGCTATTGCCAGGACCGCGGTAAACATTAGCCCACCTTCTGAAC
-CTTCTGATCCAGTGACTATCCTCGCAGAAAATGTCCCTCCCAGGATAGACCTGAGTGTGGCTATGAAATC
-TTTGCTTACTGTGAAAGCTGGAACTAATGTCTGCTTGGATGCTACTGTTTTTGGTAAACCGATGCCAACA
-GTTTCTTGGAAAAAAGATGGCACACTGCTAAAACCAGCAGAAGGCATAAAGATGGCCATGCAGCGGAATC
-TGTGCACCTTGGAGCTATTCAGCGTGAACCGGAAGGACTCAGGAGACTATACCATTACTGCTGAAAATTC
-AAGTGGTTCTAAATCAGCCACCATTAAGCTTAAAGTGTTAGATAAACCGGGTCCTCCAGCATCTGTTAAA
-ATCAACAAAATGTATTCAGATCGTGCTATGCTTTCTTGGGAACCGCCTCTTGAAGATGGAGGCTCAGAAA
-TCACCAACTATATTGTTGACAAACGTGAAACAAGCAGGCCCAACTGGGCTCAAGTCTCTGCAACTGTGCC
-TATCACCAGCTGCAGCGTGGAGAAACTTATAGAGGGCCATGAGTATCAGTTCCGTATTTGTGCTGAAAAT
-AAATATGGAGTAGGCGATCCAGTCTTCACTGAACCAGCAATTGCCAAAAACCCATATGACCCACCAGGAC
-GCTGTGATCCTCCTGTTATTAGCAACATAACCAAAGATCACATGACAGTCAGCTGGAAGCCACCAGCAGA
-TGATGGGGGCTCACCCATCACTGGCTATTTGCTTGAAAAGCGGGAAACCCAGGCTGTTAACTGGACTAAG
-GTCAACAGAAAACCTATTATAGAAAGAACATTAAAAGCAACAGGTCTTCAAGAAGGTACCGAATATGAGT
-TCCGTGTTACAGCTATAAATAAAGCTGGACCAGGCAAACCCAGTGACGCATCCAAGGCCGCTTATGCTCG
-GGACCCTCAGTATCCTCCTGCGCCACCGGCTTTCCCTAAAGTATATGATACAACTCGCAGCTCTGTGAGT
-CTATCTTGGGGCAAGCCAGCCTATGACGGCGGCAGCCCTATCATTGGTTATCTCGTTGAAGTAAAACGGG
-CTGACTCCGATAACTGGGTGAGGTGCAACTTACCACAGAATCTACAGAAAACCCGCTTTGAGGTTACTGG
-CCTGATGGAAGACACACAATATCAATTCCGTGTGTATGCCGTTAATAAGATTGGATACAGTGACCCCAGT
-GATGTGCCAGATAAACACTATCCCAAGGACATCTTAATTCCACCTGAGGGAGAACTTGATGCGGACTTAA
-GGAAGACACTCATATTACGTGCTGGAGTTACTATGAGACTATATGTACCAGTAAAAGGACGCCCACCTCC
-AAAGATTACTTGGTCTAAACCAAATGTCAATCTAAGAGACAGGATTGGACTGGACATAAAGTCAACTGAC
-TTTGACACTTTCTTGCGCTGTGAAAATGTGAACAAATATGATGCAGGAAAATATATCTTAACCCTGGAGA
-ACAGCTGTGGTAAAAAGGAATATACCATTGTTGTGAAAGTGCTTGATACTCCTGGGCCACCTGTCAATGT
-GACTGTTAAGGAAATATCCAAAGACTCTGCTTATGTTACCTGGGAGCCTCCCATTATTGATGGCGGAAGC
-CCCATCATAAACTATGTGGTACAAAAACGTGATGCAGAGAGGAAATCCTGGTCTACAGTGACAACTGAGT
-GCTCCAAAACAAGCTTCAGAGTAGCTAATTTGGAGGAGGGAAAATCCTACTTCTTCCGAGTGTTTGCTGA
-AAATGAGTATGGCATTGGTGATCCCGGTGAAACTCGTGATGCTGTCAAAGCTTCCCAAACTCCTGGACCA
-GTTGTGGACCTGAAAGTGAGGTCTGTATCTAAGTCATCCTGTAGCATTGGCTGGAAAAAGCCTCACAGTG
-ATGGTGGAAGTCGGATTATTGGATATGTAGTTGATTTCCTGACTGAAGAAAATAAGTGGCAACGAGTTAT
-GAAATCCTTAAGCCTACAGTACTCTGCAAAAGATTTGACTGAAGGGAAGGAATATACCTTCAGAGTGAGT
-GCTGAGAATGAAAATGGAGAAGGAACCCCAAGCGAAATCACTGTTGTGGCAAGGGATGATGTTGTGGCTC
-CTGATCTTGACTTAAAGGGTCTACCTGATTTGTGCTACTTGGCTAAAGAAAACAGCAACTTCCGGCTTAA
-GATCCCCATAAAAGGCAAGCCAGCTCCATCAGTCTCCTGGAAGAAAGGGGAAGATCCTCTAGCAACTGAC
-ACTAGAGTCAGTGTTGAGTCATCTGCGGTTAACACAACTCTTATAGTGTACGATTGCCAAAAATCTGATG
-CTGGAAAATACACAATCACACTTAAGAATGTTGCTGGCACCAAGGAAGGAACTATCTCCATAAAGGTTGT
-TGGCAAGCCTGGCATCCCCACTGGACCAATCAAATTTGATGAAGTCACAGCAGAAGCCATGACCTTAAAG
-TGGGCTCCTCCAAAGGATGATGGAGGTTCTGAAATCACCAACTATATCCTAGAGAAGAGGGATTCTGTGA
-ACAACAAGTGGGTGACGTGCGCCTCAGCTGTCCAGAAAACCACCTTTAGAGTAACCAGACTTCATGAGGG
-CATGGAATATACCTTCAGGGTCAGTGCCGAAAATAAATATGGTGTAGGGGAAGGCCTGAAATCGGAGCCA
-ATTGTTGCGAGACATCCATTTGATGTGCCTGATGCTCCCCCACCTCCCAATATTGTGGATGTCAGACACG
-ATTCAGTATCTCTAACTTGGACTGACCCCAAGAAAACTGGTGGTTCTCCAATTACAGGGTATCATCTCGA
-GTTCAAGGAAAGAAACAGCCTTTTGTGGAAGAGAGCTAACAAGACTCCGATAAGGATGAGAGACTTTAAA
-GTGACAGGATTAACTGAAGGTCTTGAATATGAATTCCGAGTTATGGCAATCAATTTAGCAGGTGTGGGCA
-AGCCAAGCCTACCATCAGAGCCTGTTGTGGCACTGGACCCAATTGATCCTCCTGGAAAACCTGAGGTTAT
-TAACATAACAAGGAATTCAGTGACTCTCATTTGGACTGAACCTAAATATGACGGTGGTCATAAGTTAACT
-GGATATATAGTGGAGAAGCGAGATCTACCTTCGAAGTCTTGGATGAAAGCCAACCATGTTAATGTCCCAG
-AATGTGCCTTTACTGTAACTGACCTTGTTGAGGGTGGAAAATATGAATTCAGAATTAGAGCAAAGAATAC
-AGCAGGTGCTATCAGTGCTCCATCAGAAAGTACAGAAACCATTATTTGCAAGGATGAATACGAGGCACCA
-ACAATTGTCCTTGATCCCACAATAAAAGATGGGCTAACAATTAAAGCAGGGGATACCATTGTTTTGAATG
-CCATTAGCATTCTTGGCAAACCCCTTCCAAAATCAAGTTGGTCCAAGGCAGGAAAAGACATTAGACCATC
-AGATATCACTCAGATAACTTCAACCCCAACATCTTCCATGCTTACTATCAAGTATGCCACTAGAAAAGAT
-GCGGGTGAATATACCATCACTGCTACCAATCCTTTTGGCACGAAGGTGGAACATGTGAAGGTAACAGTCC
-TTGATGTACCTGGTCCCCCAGGTCCTGTTGAAATCAGTAATGTTTCTGCTGAAAAAGCAACACTTACATG
-GACACCTCCCTTGGAAGATGGCGGCTCACCAATTAAGTCCTATATACTTGAAAAGAGAGAAACCAGCCGA
-CTTTTGTGGACAGTGGTTTCTGAAGATATTCAGTCTTGCAGGCATGTGGCAACCAAACTTATCCAAGGAA
-ATGAGTACATCTTCCGGGTCTCAGCTGTAAACCACTATGGCAAAGGAGAACCTGTACAGTCTGAACCTGT
-CAAAATGGTAGACAGATTTGGTCCCCCTGGCCCTCCTGAAAAACCAGAGGTATCAAATGTCACTAAGAAC
-ACTGCCACTGTCAGCTGGAAAAGGCCAGTGGATGATGGTGGCAGCGAAATTACAGGATATCATGTAGAAA
-GGAGAGAAAAGAAAAGCCTGCGATGGGTGAGAGCAATAAAAACACCAGTTTCCGATCTCAGGTGCAAAGT
-AACAGGACTGCAAGAAGGAAGCACCTACGAATTCCGTGTCAGTGCAGAAAACAGAGCAGGAATTGGTCCA
-CCCAGTGAGGCTTCAGATTCTGTTCTGATGAAAGATGCAGCATATCCTCCAGGACCACCTTCAAATCCGC
-ATGTCACTGATACTACCAAGAAATCTGCTTCTTTGGCATGGGGCAAGCCTCATTATGATGGTGGACTTGA
-AATCACTGGCTATGTCGTGGAGCATCAAAAAGTAGGAGACGAGGCCTGGATAAAAGATACCACAGGAACC
-GCCCTCAGAATCACTCAGTTCGTTGTTCCTGATCTTCAGACTAAAGAAAAATACAACTTCAGAATCAGTG
-CCATCAACGATGCAGGTGTTGGGGAGCCAGCGGTGATTCCAGATGTTGAAATCGTAGAACGGGAGATGGC
-TCCTGATTTTGAACTAGATGCCGAGCTTCGAAGAACACTTGTTGTTAGAGCAGGACTCAGTATTAGGATA
-TTTGTGCCAATTAAAGGTCGTCCTGCTCCTGAAGTGACATGGACCAAAGATAACATCAACCTGAAAAACC
-GAGCCAACATTGAAAATACGGAATCATTTACTCTTCTGATTATCCCAGAATGTAACAGATATGATACCGG
-TAAATTTGTCATGACCATTGAAAACCCGGCTGGGAAGAAAAGTGGCTTTGTGAACGTCAGAGTCTTGGAC
-ACGCCAGGCCCAGTCCTCAACCTGCGGCCTACAGACATCACAAAGGACAGTGTCACCCTGCACTGGGACC
-TCCCTCTGATAGATGGAGGCTCACGTATAACAAACTACATTGTAGAGAAACGTGAAGCAACACGGAAATC
-TTATTCCACAGCCACCACTAAGTGCCATAAATGCACATATAAAGTTACCGGCTTGTCTGAAGGGTGTGAA
-TATTTCTTCAGAGTGATGGCAGAGAATGAATATGGAATTGGTGAGCCAACAGAAACTACAGAGCCCGTAA
-AAGCCTCTGAAGCACCATCTCCACCAGACAGCCTTAACATCATGGACATAACTAAGAGCACCGTCAGCCT
-GGCATGGCCTAAGCCCAAACACGATGGTGGCAGCAAGATCACTGGCTATGTGATTGAAGCCCAAAGAAAA
-GGCTCTGACCAGTGGACCCACATCACAACCGTGAAAGGGTTAGAATGTGTTGTGAGGAATCTAACTGAAG
-GAGAGGAATATACCTTCCAAGTGATGGCAGTGAACAGCGCGGGGAGAAGTGCCCCTAGAGAAAGCAGACC
-CGTCATTGTCAAGGAGCAGACAATGCTTCCAGAGCTGGATCTCCGTGGCATCTATCAGAAACTGGTCATT
-GCCAAAGCTGGTGACAACATCAAAGTTGAAATTCCAGTGCTCGGTCGACCGAAGCCCACAGTGACATGGA
-AAAAAGGAGACCAAATTCTTAAACAGACACAGAGAGTTAATTTTGAAACCACAGCGACTTCAACCATTTT
-AAATATCAATGAGTGTGTCAGAAGTGATAGTGGGCCCTATCCATTAACAGCAAGGAACATTGTAGGAGAG
-GTTGGTGATGTCATCACCATTCAAGTCCATGATATCCCAGGGCCACCTACTGGACCAATCAAATTTGATG
-AAGTTTCATCTGATTTTGTAACCTTCTCTTGGGACCCACCTGAGAACGATGGTGGTGTACCAATAAGCAA
-CTATGTAGTGGAAATGCGGCAGACTGACAGTACTACCTGGGTTGAGTTAGCAACCACCGTTATACGTACT
-ACCTATAAAGCCACCCGCCTTACTACTGGATTAGAGTATCAGTTCCGTGTAAAAGCTCAGAATAGATATG
-GAGTTGGACCAGGCATCACATCAGCATGCATAGTTGCCAACTATCCATTTAAGGTTCCTGGACCTCCTGG
-TACCCCTCAGGTAACTGCAGTTACCAAGGATTCAATGACAATTAGCTGGCATGAGCCACTTTCTGATGGT
-GGAAGCCCCATTTTAGGATATCATGTTGAAAGAAAAGAACGAAATGGTATTCTCTGGCAGACTGTGAGCA
-AAGCTTTAGTACCAGGCAACATTTTCAAATCAAGTGGACTTACAGATGGTATTGCTTATGAGTTCCGGGT
-GATTGCAGAAAACATGGCAGGCAAAAGTAAGCCAAGCAAGCCATCAGAACCTATGTTGGCTCTGGATCCC
-ATTGACCCACCTGGAAAACCAGTACCTCTAAATATTACAAGACACACAGTAACACTTAAATGGGCTAAGC
-CTGAATATACTGGGGGCTTTAAAATTACCAGTTATATCGTTGAAAAGAGAGACCTTCCTAATGGACGGTG
-GCTGAAGGCCAACTTCAGCAACATTTTGGAGAATGAATTTACAGTCAGTGGCCTAACAGAAGATGCTGCA
-TATGAATTCCGTGTGATCGCCAAAAATGCTGCAGGTGCCATCAGTCCACCATCTGAGCCATCTGATGCTA
-TCACTTGCAGGGATGATGTTGAGGCACCAAAGATAAAGGTGGATGTTAAATTTAAGGACACGGTTATATT
-AAAAGCAGGTGAAGCATTCAGACTGGAAGCTGATGTTTCAGGCCGCCCACCTCCAACAATGGAATGGAGC
-AAAGATGGAAAAGAGCTGGAAGGCACAGCAAAGTTAGAAATAAAAATTGCAGATTTCTCTACTAATCTGG
-TAAACAAAGATTCAACAAGAAGGGATAGTGGTGCCTATACCCTTACAGCGACTAATCCTGGTGGCTTTGC
-TAAACACATTTTCAATGTCAAAGTTCTTGACAGACCAGGCCCACCTGAAGGACCTTTGGCTGTAACTGAA
-GTGACATCAGAAAAGTGTGTACTATCATGGTTCCCTCCACTGGATGATGGAGGTGCCAAAATTGATCATT
-ACATAGTACAGAAACGTGAAACCAGCAGATTGGCATGGACAAATGTAGCCTCAGAAGTCCAAGTAACAAA
-GCTAAAGGTCACTAAACTCTTGAAAGGCAATGAATACATATTCCGTGTCATGGCTGTAAATAAATATGGA
-GTGGGAGAGCCACTGGAATCAGAGCCTGTGCTTGCAGTGAATCCTTATGGACCCCCTGATCCGCCCAAAA
-ACCCTGAAGTGACAACTATTACTAAAGATTCGATGGTTGTCTGCTGGGGACATCCTGATTCTGATGGTGG
-AAGTGAAATCATCAATTATATTGTGGAACGGCGTGATAAAGCTGGCCAACGCTGGATTAAATGCAACAAA
-AAAACTCTTACTGATTTAAGATATAAAGTGTCTGGACTGACAGAAGGACATGAATATGAGTTCAGGATTA
-TGGCTGAAAATGCTGCTGGAATTAGTGCACCAAGTCCTACCAGTCCATTTTACAAGGCTTGTGACACTGT
-GTTTAAACCTGGACCACCAGGTAACCCACGTGTTCTGGATACAAGCAGATCATCCATTTCAATCGCTTGG
-AATAAACCTATCTATGATGGTGGTTCAGAAATCACTGGGTATATGGTTGAGATTGCCCTGCCAGAGGAAG
-ATGAATGGCAGATTGTCACTCCACCAGCAGGACTCAAGGCAACTTCGTATACTATCACTGGCCTCACAGA
-GAATCAGGAATATAAGATCCGCATCTATGCCATGAATTCCGAAGGACTTGGGGAACCTGCCCTTGTTCCT
-GGAACTCCAAAGGCTGAAGACAGAATGCTGCCTCCAGAAATTGAACTGGATGCTGACCTGCGCAAAGTTG
-TTACTATAAGGGCCTGCTGCACCCTGAGACTTTTTGTTCCCATCAAAGGAAGGCCTGCACCTGAGGTGAA
-GTGGGCCCGGGACCATGGAGAATCTTTAGATAAAGCTAGCATCGAATCCACAAGCTCTTACACCCTGCTT
-ATTGTTGGAAATGTAAACAGATTTGACAGTGGCAAATATATACTAACTGTAGAAAATAGTTCAGGCAGCA
-AGTCTGCATTTGTCAATGTTAGAGTTCTCGATACACCAGGCCCCCCACAGGATCTGAAGGTAAAAGAGGT
-CACTAAGACATCTGTCACACTCACATGGGACCCACCTCTCCTTGATGGAGGTTCAAAAATCAAGAACTAT
-ATTGTTGAAAAGCGGGAATCAACAAGAAAAGCATATTCAACTGTTGCAACAAACTGCCACAAGACTTCCT
-GGAAGGTAGACCAGCTTCAAGAAGGCTGTAGCTACTATTTCAGGGTTCTCGCAGAAAATGAATATGGCAT
-TGGGCTGCCTGCTGAAACCGCAGAATCTGTGAAAGCATCAGAACGACCTCTTCCTCCAGGAAAAATAACT
-TTGATGGATGTCACAAGAAATAGTGTGTCACTCTCTTGGGAGAAACCAGAGCATGATGGAGGCAGCCGAA
-TTCTAGGCTACATTGTGGAGATGCAGACCAAAGGCAGTGACAAATGGGCCACGTGTGCCACAGTCAAGGT
-CACTGAAGCCACTATCACTGGATTAATTCAGGGTGAAGAATACTCTTTCCGTGTTTCAGCTCAGAATGAA
-AAGGGCATCAGTGATCCTAGACAACTGAGTGTGCCAGTGATCGCCAAAGATCTTGTCATTCCACCAGCCT
-TCAAACTCCTGTTCAATACTTTCACTGTACTGGCAGGTGAAGACCTAAAAGTTGATGTTCCATTCATTGG
-CCGCCCTACCCCAGCTGTAACCTGGCATAAAGATAATGTACCACTGAAGCAGACAACTAGAGTAAATGCA
-GAGAGCACAGAAAATAATTCACTACTGACAATAAAGGACGCCTGCCGAGAAGATGTTGGCCATTATGTGG
-TTAAACTGACTAACTCAGCTGGTGAAGCTATTGAAACCCTTAATGTTATCGTTCTTGACAAACCAGGGCC
-TCCAACTGGACCAGTTAAAATGGATGAAGTGACAGCTGATAGTATTACTCTTTCCTGGGGCCCACCCAAG
-TATGATGGTGGAAGTTCTATCAATAATTACATTGTTGAGAAACGGGACACTTCCACAACCACCTGGCAAA
-TTGTATCAGCTACAGTTGCAAGGACAACAATAAAGGCTTGCAGACTGAAGACTGGATGTGAATATCAGTT
-TAGAATTGCAGCTGAAAACAGATATGGGAAGAGTACCTACCTCAATTCAGAGCCTACTGTAGCCCAATAT
-CCATTCAAAGTTCCTGGTCCTCCTGGCACTCCAGTTGTCACACTGTCCTCCAGGGACAGCATGGAAGTAC
-AATGGAATGAGCCAATCAGTGATGGAGGAAGTAGAGTCATTGGCTATCATCTAGAACGCAAGGAAAGAAA
-TAGCATCCTCTGGGTTAAGTTGAATAAAACACCTATTCCTCAAACCAAGTTTAAGACAACTGGCCTTGAA
-GAAGGTGTTGAATATGAATTTAGAGTCTCTGCAGAGAACATCGTGGGCATTGGCAAGCCGAGTAAAGTAT
-CAGAATGTTATGTGGCTCGTGACCCATGTGATCCACCAGGACGGCCAGAGGCAATCATTGTCACAAGGAA
-TTCTGTGACTCTTCAGTGGAAGAAACCCACCTATGACGGTGGAAGCAAGATCACTGGTTATATTGTTGAG
-AAGAAAGAATTACCTGAGGGCCGTTGGATGAAAGCCAGTTTTACAAATATTATTGACACTCATTTTGAAG
-TAACTGGCCTAGTTGAAGATCACAGATATGAGTTCCGGGTTATAGCCCGAAATGCCGCAGGAGTGTTTAG
-TGAGCCTTCAGAAAGCACAGGAGCAATAACAGCTAGAGATGAGGTAGATCCACCACGAATAAGTATGGAT
-CCAAAATACAAAGACACAATCGTGGTTCATGCTGGTGAATCATTCAAGGTTGATGCAGATATTTATGGCA
-AACCAATACCAACCATTCAGTGGATAAAAGGTGATCAGGAGCTTTCAAACACAGCTCGATTAGAAATAAA
-GAGCACCGACTTTGCCACCAGTCTCAGTGTAAAAGATGCAGTACGTGTCGACAGTGGAAATTACATACTG
-AAGGCCAAAAATGTTGCAGGAGAAAGATCAGTTACTGTGAATGTCAAGGTTCTTGACAGACCAGGGCCAC
-CTGAAGGACCTGTTGTTATCTCAGGAGTTACAGCAGAAAAATGCACACTAGCTTGGAAACCCCCACTTCA
-GGATGGTGGGAGTGACATCATAAATTATATTGTGGAAAGGAGAGAAACCAGCCGCTTAGTTTGGACTGTG
-GTTGATGCCAATGTGCAGACTCTCAGCTGCAAGGTTACTAAGCTTCTTGAAGGCAATGAATATACTTTCC
-GTATAATGGCAGTAAACAAATATGGTGTTGGTGAACCTCTTGAATCTGAGCCAGTAGTTGCCAAGAATCC
-ATTTGTAGTACCAGATGCACCAAAAGCTCCAGAAGTCACAACAGTGACCAAGGACTCAATGATTGTTGTA
-TGGGAAAGACCAGCATCTGATGGTGGTAGTGAAATTCTTGGATATGTTCTTGAGAAACGGGATAAAGAAG
-GCATTAGATGGACAAGATGCCATAAGCGTCTGATTGGAGAGTTGCGCCTGAGAGTAACTGGACTCATAGA
-AAATCACGATTATGAGTTCAGAGTTTCTGCTGAGAATGCTGCTGGACTTAGTGAACCAAGCCCTCCTTCT
-GCTTACCAAAAGGCTTGTGATCCTATTTATAAACCAGGACCCCCAAACAACCCCAAAGTCATAGACATAA
-CCAGATCTTCAGTATTCCTTTCTTGGAGCAAACCAATATATGATGGTGGCTGTGAAATTCAAGGATACAT
-TGTTGAAAAATGTGATGTGAGTGTTGGTGAATGGACAATGTGCACTCCACCAACAGGAATTAATAAAACA
-AACATAGAAGTAGAGAAGCTGTTGGAAAAGCATGAATACAACTTCCGTATCTGTGCTATTAATAAAGCTG
-GAGTTGGAGAACATGCTGACGTCCCTGGACCTATTATAGTTGAAGAAAAATTAGAAGCACCAGACATTGA
-TCTTGACCTAGAACTAAGGAAAATCATAAATATAAGGGCAGGTGGCTCCTTAAGGTTATTTGTTCCTATA
-AAAGGTCGTCCTACACCAGAAGTTAAATGGGGAAAGGTGGATGGTGAAATCCGAGATGCAGCTATAATTG
-ATGTCACTAGCAGTTTCACCTCTCTTGTTCTTGACAATGTCAACCGATATGATAGTGGAAAATATACGCT
-TACATTAGAAAACAGCAGTGGAACAAAGTCTGCCTTTGTTACTGTGAGAGTTCTGGACACGCCAAGTCCA
-CCTGTTAACCTGAAAGTCACAGAAATCACCAAAGACTCAGTATCAATTACATGGGAACCTCCTTTGTTGG
-ATGGGGGATCCAAAATAAAAAATTACATTGTTGAGAAACGTGAAGCCACAAGAAAATCATATGCTGCTGT
-TGTAACTAACTGCCATAAGAATTCTTGGAAAATCGATCAGCTCCAAGAAGGTTGCAGTTATTACTTTAGA
-GTCACAGCTGAGAATGAGTATGGTATTGGCCTTCCTGCCCAGACTGCTGATCCAATTAAGGTTGCAGAAG
-TGCCACAACCTCCTGGAAAAATAACTGTGGATGATGTCACCAGAAACAGTGTCTCTCTGAGTTGGACAAA
-ACCTGAACATGATGGTGGCAGTAAAATCATTCAGTATATTGTGGAAATGCAAGCTAAACACAGTGAGAAA
-TGGTCAGAGTGTGCTCGAGTAAAGTCTCTTCAGGCAGTAATTACCAACCTAACTCAAGGGGAAGAATATC
-TTTTTAGAGTTGTTGCTGTAAATGAAAAGGGGAGAAGTGATCCTCGGTCCCTTGCAGTTCCAATAGTTGC
-CAAAGATCTGGTAATTGAGCCAGATGTAAAACCTGCATTCAGTAGTTACAGTGTACAGGTTGGCCAAGAT
-TTGAAAATAGAAGTGCCAATTTCTGGACGTCCTAAGCCAACCATTACCTGGACTAAAGATGGTCTCCCAC
-TGAAGCAGACCACAAGAATCAATGTTACCGATTCACTGGATCTCACCACACTCAGTATTAAAGAAACTCA
-TAAGGATGATGGTGGACAATATGGAATCACAGTTGCCAATGTTGTTGGTCAGAAGACAGCATCCATCGAA
-ATTGTAACTCTAGATAAACCTGATCCTCCAAAAGGACCTGTTAAATTTGATGACGTCAGTGCTGAAAGTA
-TTACATTATCTTGGAACCCTCCATTATATACAGGGGGCTGCCAAATCACCAACTACATTGTTCAGAAAAG
-AGATACAACCACCACAGTATGGGATGTTGTTTCTGCTACTGTTGCTAGAACTACACTCAAAGTGACCAAA
-CTGAAAACTGGTACAGAATACCAATTTAGAATATTTGCCGAAAACAGATATGGACAAAGCTTTGCCTTAG
-AGTCTGATCCAATTGTAGCTCAATATCCCTACAAAGAACCAGGCCCTCCAGGTACACCATTTGCCACAGC
-CATTTCCAAAGACTCCATGGTCATACAGTGGCATGAACCAGTCAACAATGGTGGAAGCCCCGTCATAGGT
-TACCACCTGGAGAGAAAAGAAAGAAACAGTATTTTGTGGACAAAGGTCAACAAAACTATTATTCATGACA
-CCCAATTCAAAGCACAGAATCTTGAAGAAGGCATTGAATATGAATTCAGAGTGTATGCTGAAAATATTGT
-TGGTGTAGGCAAAGCAAGCAAGAATTCTGAATGCTATGTAGCCAGAGATCCCTGTGACCCACCAGGAACC
-CCAGAACCAATAATGGTTAAAAGAAATGAAATCACTTTACAGTGGACCAAACCTGTGTATGATGGTGGAA
-GTATGATTACAGGCTACATTGTAGAGAAACGTGATTTGCCTGATGGTCGTTGGATGAAAGCTAGCTTTAC
-AAATGTCATTGAAACTCAATTTACTGTGTCAGGTCTTACTGAAGATCAAAGATATGAATTCAGAGTCATT
-GCAAAGAATGCAGCTGGTGCAATAAGTAAACCCTCTGACAGTACTGGACCAATAACTGCCAAGGATGAGG
-TTGAACTCCCAAGAATTTCAATGGATCCAAAATTCAGAGACACAATTGTGGTAAATGCTGGAGAAACATT
-CAGACTTGAGGCTGATGTCCATGGAAAGCCCCTACCTACCATTGAGTGGTTAAGAGGAGATAAGGAAATT
-GAAGAATCTGCTAGATGTGAAATAAAGAACACAGATTTCAAGGCTTTACTTATTGTAAAAGATGCAATTA
-GAATTGATGGTGGGCAGTATATTTTAAGAGCTTCCAATGTTGCAGGTTCTAAGTCATTCCCAGTAAATGT
-AAAAGTATTAGATAGACCAGGACCTCCAGAAGGGCCAGTCCAGGTTACTGGAGTCACTTCTGAAAAATGC
-TCTTTAACATGGTCTCCACCACTTCAAGATGGTGGCAGTGACATTTCTCACTATGTTGTTGAAAAGCGAG
-AAACCAGTCGACTTGCCTGGACTGTTGTTGCTTCAGAAGTTGTGACCAATTCTCTGAAAGTTACCAAACT
-CTTAGAAGGTAATGAATATGTTTTCCGTATAATGGCTGTCAACAAATATGGTGTTGGAGAGCCTTTGGAA
-TCTGCACCAGTACTAATGAAAAATCCATTTGTGCTTCCTGGACCACCAAAAAGCTTGGAAGTCACAAATA
-TTGCCAAAGACTCCATGACCGTCTGTTGGAACCGTCCAGATAGTGATGGTGGAAGTGAGATTATTGGTTA
-CATTGTAGAGAAAAGAGACAGAAGTGGCATTCGATGGATAAAATGTAATAAACGCCGCATTACAGATTTG
-CGTCTAAGAGTGACAGGATTAACAGAAGATCATGAGTATGAATTCAGGGTCTCTGCAGAAAATGCTGCTG
-GAGTTGGGGAACCAAGTCCAGCTACAGTTTATTATAAAGCCTGTGATCCTGTGTTCAAACCTGGCCCACC
-TACCAATGCACACATTGTAGACACCACTAAAAATTCAATCACACTTGCCTGGGGTAAACCCATCTATGAT
-GGCGGCAGTGAGATCTTGGGATATGTAGTAGAAATCTGTAAAGCAGATGAAGAAGAATGGCAAATAGTTA
-CTCCACAGACTGGCCTGAGAGTCACTCGATTTGAAATTTCAAAACTCACTGAACACCAAGAGTATAAAAT
-ACGAGTCTGTGCCCTCAACAAAGTTGGTTTAGGTGAGGCTACATCAGTTCCTGGTACTGTGAAACCAGAA
-GATAAACTTGAAGCACCTGAACTTGACCTTGACTCCGAATTAAGAAAAGGAATTGTTGTAAGAGCTGGTG
-GATCTGCCAGAATTCACATTCCATTCAAAGGTCGTCCAACGCCTGAGATCACTTGGTCTCGAGAGGAAGG
-TGAATTCACAGATAAGGTCCAAATTGAAAAGGGAGTAAACTATACCCAACTATCAATAGATAACTGTGAT
-AGAAATGATGCTGGAAAATACATTCTTAAGTTGGAAAACAGCAGTGGATCAAAGTCTGCTTTTGTAACTG
-TGAAAGTTCTTGACACTCCAGGACCACCACAGAATTTGGCAGTCAAAGAAGTGAGAAAAGATTCTGCCTT
-CCTGGTATGGGAGCCACCCATCATTGATGGAGGGGCAAAGGTCAAGAACTATGTGATTGACAAACGTGAG
-TCAACCAGAAAAGCGTATGCTAATGTGAGTAGTAAATGCAGCAAAACAAGTTTTAAAGTGGAAAACCTTA
-CAGAAGGAGCCATTTATTACTTCAGAGTCATGGCTGAAAATGAATTTGGAGTTGGTGTTCCAGTGGAAAC
-TGTTGATGCCGTGAAAGCTGCTGAACCTCCTTCCCCACCAGGAAAGGTTACACTCACTGATGTGTCCCAG
-ACCAGTGCATCACTTATGTGGGAGAAACCTGAACATGATGGCGGTAGCAGAGTCCTGGGGTACGTTGTTG
-AAATGCAGCCCAAAGGAACTGAAAAATGGAGCATTGTGGCTGAATCCAAAGTCTGTAATGCAGTTGTTAC
-TGGTTTGAGTTCTGGACAAGAATATCAGTTCCGTGTCAAGGCTTATAATGAGAAAGGAAAAAGCGATCCA
-AGAGTGTTGGGTGTTCCTGTCATAGCCAAGGACTTGACTATACAGCCTAGTTTAAAGTTACCATTTAACA
-CATATAGTATCCAAGCTGGAGAAGATCTTAAAATAGAAATTCCAGTTATAGGCCGACCAAGACCTAACAT
-TTCTTGGGTCAAAGATGGTGAGCCTCTTAAACAGACAACAAGAGTAAACGTTGAAGAAACAGCTACCTCA
-ACTGTTTTGCACATTAAAGAAGGTAACAAAGATGACTTTGGAAAATACACCGTAACGGCAACAAATAGTG
-CAGGCACAGCAACAGAAAATCTCAGTGTTATCGTTTTAGAAAAGCCTGGACCTCCAGTTGGCCCAGTTCG
-GTTTGATGAAGTTAGTGCAGACTTTGTAGTCATATCTTGGGAACCTCCAGCCTATACTGGTGGCTGCCAA
-ATAAGCAACTACATTGTAGAGAAGCGAGATACAACCACCACCACTTGGCACATGGTATCAGCAACAGTTG
-CAAGAACAACAATTAAAATAACCAAACTGAAAACAGGCACGGAGTACCAGTTTAGAATTTTTGCTGAAAA
-CAGGTATGGAAAAAGTGCCCCACTGGATTCTAAGGCAGTTATTGTACAATATCCATTTAAAGAACCTGGA
-CCACCTGGAACTCCTTTTGTGACATCAATCTCAAAAGATCAGATGCTTGTGCAATGGCATGAGCCAGTGA
-ATGATGGAGGCACCAAAATTATTGGCTACCATCTTGAACAGAAAGAAAAGAACAGTATTTTATGGGTCAA
-GTTAAATAAGACCCCCATTCAGGACACCAAATTCAAAACAACTGGGCTTGATGAGGGCCTTGAGTATGAG
-TTCAAAGTTTCTGCTGAAAATATTGTTGGCATTGGCAAGCCTAGCAAAGTGTCAGAATGCTTTGTTGCTC
-GTGATCCATGTGACCCACCTGGTCGCCCTGAAGCCATTGTTATTACAAGAAACAATGTCACACTGAAATG
-GAAGAAACCTGCCTATGATGGTGGTAGCAAAATAACAGGTTATATTGTAGAAAAGAAAGATCTACCTGAT
-GGCCGCTGGATGAAAGCCAGCTTTACCAACGTATTAGAAACTGAATTTACAGTGAGTGGACTTGTAGAAG
-ACCAAAGATATGAATTTAGAGTAATTGCAAGAAATGCAGCTGGAAACTTTAGTGAACCATCTGATAGTAG
-TGGTGCCATTACTGCAAGAGATGAAATTGATGCACCAAATGCCTCTCTGGATCCAAAATATAAAGATGTC
-ATCGTTGTTCATGCAGGAGAGACTTTTGTTCTTGAAGCCGACATCCGTGGCAAACCTATACCTGATGTTG
-TTTGGTCAAAAGATGGAAAAGAACTTGAAGAAACAGCTGCTAGAATGGAAATTAAATCTACTATTCAGAA
-AACAACTCTTGTTGTCAAAGACTGTATACGGACTGATGGAGGACAATATATTCTGAAACTCAGCAATGTT
-GGTGGTACAAAGTCTATACCCATCACTGTAAAGGTACTTGACAGGCCAGGGCCTCCTGAAGGGCCTCTGA
-AAGTTACTGGAGTTACTGCGGAAAAATGTTACCTGGCATGGAACCCACCTTTGCAAGATGGTGGTGCTAA
-TATTTCACATTACATCATTGAAAAGAGGGAGACAAGCCGACTCTCTTGGACCCAGGTTTCAACTGAGGTA
-CAGGCCCTTAACTACAAAGTTACTAAACTTCTTCCTGGTAATGAGTACATTTTCCGTGTCATGGCTGTGA
-ATAAATATGGAATTGGAGAGCCCTTGGAATCTGGGCCTGTTACGGCCTGTAATCCTTATAAGCCACCAGG
-TCCTCCCTCAACACCTGAAGTCTCAGCAATCACCAAAGATTCTATGGTAGTAACATGGGCACGCCCAGTA
-GACGACGGAGGTACCGAAATTGAGGGCTACATTCTTGAAAAACGAGATAAGGAAGGCGTTAGATGGACCA
-AGTGCAACAAGAAAACATTAACGGATCTGCGGCTCAGGGTAACTGGTCTTACCGAAGGCCATTCCTATGA
-ATTCAGAGTTGCTGCTGAAAATGCAGCTGGTGTGGGAGAACCTAGTGAGCCATCTGTTTTCTACCGTGCG
-TGTGATGCCTTGTATCCACCAGGTCCCCCAAGCAATCCAAAAGTGACGGACACTTCCAGATCTTCTGTCT
-CCCTGGCATGGAGTAAGCCAATTTATGATGGTGGCGCACCTGTTAAAGGCTATGTTGTAGAGGTCAAAGA
-AGCTGCTGCGGATGAATGGACAACCTGCACTCCACCAACAGGATTACAAGGAAAGCAGTTCACAGTGACC
-AAGCTTAAAGAAAACACTGAATATAACTTCCGTATTTGTGCCATCAATTCTGAAGGTGTAGGTGAACCTG
-CAACTCTACCTGGCTCAGTGGTTGCTCAGGAGAGGATAGAGCCACCAGAAATAGAACTCGATGCTGATCT
-CAGAAAGGTGGTCGTTCTGCGTGCAAGTGCTACTTTACGCTTATTTGTCACTATCAAAGGTCGACCAGAA
-CCCGAAGTTAAATGGGAAAAGGCAGAAGGCATTCTCACTGACAGGGCTCAGATAGAGGTGACCAGCTCAT
-TTACAATGTTGGTGATTGATAATGTTACCAGATTTGACAGTGGTCGGTATAATCTGACATTAGAAAATAA
-TAGTGGCTCCAAAACAGCTTTTGTTAACGTCAGAGTTCTTGACTCACCAAGTGCCCCTGTGAATTTGACC
-ATAAGAGAAGTGAAGAAAGACTCAGTGACGTTGTCCTGGGAACCACCACTTATTGATGGTGGAGCTAAGA
-TTACAAACTACATTGTCGAAAAACGAGAAACTACAAGAAAAGCCTATGCTACCATTACAAATAATTGCAC
-TAAAACTACTTTCAGAATTGAAAATCTACAAGAAGGATGTTCTTACTACTTCCGAGTCTTGGCTTCCAAT
-GAATATGGGATTGGTTTGCCAGCTGAAACAACAGAACCCGTTAAAGTGTCTGAACCACCCCTCCCACCTG
-GAAGAGTAACTCTTGTTGATGTGACCCGTAATACAGCTACAATTAAGTGGGAGAAACCAGAAAGTGATGG
-TGGCAGCAAAATTACTGGTTATGTGGTTGAAATGCAGACTAAAGGGAGTGAAAAGTGGAGCACCTGCACA
-CAAGTTAAGACTCTAGAAGCAACTATATCTGGCTTAACTGCAGGAGAAGAGTATGTCTTCAGGGTAGCTG
-CAGTTAACGAAAAGGGAAGAAGTGATCCAAGACAACTTGGAGTGCCAGTAATTGCAAGGGATATTGAAAT
-AAAGCCTTCAGTTGAGCTTCCTTTCCATACTTTCAATGTAAAGGCTAGAGAACAACTTAAGATTGATGTG
-CCATTCAAAGGAAGACCTCAAGCTACTGTGAACTGGAGAAAAGATGGTCAGACTCTTAAAGAGACAACTA
-GAGTCAATGTTTCTTCTTCAAAGACTGTAACATCACTATCTATTAAGGAAGCTTCAAAGGAAGATGTTGG
-AACTTATGAATTATGTGTTTCAAACAGTGCTGGATCCATAACAGTTCCTATTACTATAATTGTCCTTGAC
-AGACCAGGACCTCCAGGTCCTATACGTATTGATGAGGTTAGTTGTGACAGCATAACCATTTCTTGGAATC
-CTCCAGAATATGATGGTGGCTGCCAAATTAGCAATTACATTGTTGAAAAGAAAGAAACCACCTCTACAAC
-ATGGCACATAGTTTCACAAGCAGTTGCAAGAACATCCATTAAAATAGTTCGCCTGACAACAGGAAGTGAG
-TATCAGTTCCGTGTTTGTGCAGAAAACCGCTATGGAAAGAGCTCCTACAGTGAATCTTCAGCTGTTGTTG
-CAGAGTATCCATTCAGTCCCCCAGGTCCTCCTGGTACTCCTAAAGTTGTGCATGCCACAAAATCTACCAT
-GCTTGTAACCTGGCAAGTGCCAGTTAATGATGGAGGAAGTCGAGTAATTGGCTATCATCTTGAGTATAAA
-GAAAGAAGCAGCATTCTTTGGTCAAAAGCAAATAAAATCCTCATTGCTGATACTCAAATGAAAGTCTCCG
-GCCTTGATGAAGGACTGATGTATGAGTATCGTGTATATGCTGAAAATATTGCTGGAATTGGTAAATGCAG
-TAAATCTTGTGAACCAGTCCCTGCAAGAGATCCTTGTGACCCTCCTGGACAACCTGAAGTCACAAATATC
-ACAAGAAAATCAGTGTCACTTAAATGGTCTAAACCACATTATGATGGTGGAGCTAAGATCACAGGATACA
-TTGTTGAACGCAGAGAACTACCAGATGGCCGGTGGCTGAAGTGCAATTATACTAATATACAAGAAACATA
-CTTTGAAGTAACTGAACTTACTGAAGATCAGCGTTATGAATTCCGGGTTTTTGCAAGGAATGCTGCTGAC
-TCAGTTAGTGAGCCATCTGAATCCACTGGGCCTATTATAGTTAAAGATGATGTTGAGCCTCCAAGAGTTA
-TGATGGATGTCAAGTTCCGAGACGTTATTGTTGTCAAAGCTGGAGAGGTCCTTAAGATAAATGCAGACAT
-TGCAGGGCGACCTCTGCCAGTAATTTCCTGGGCCAAGGATGGTATAGAAATTGAAGAAAGAGCAAGAACA
-GAAATCATCTCAACAGACAATCATACTTTGTTAACAGTTAAAGACTGTATAAGACGAGACACTGGGCAAT
-ATGTACTAACACTGAAGAATGTTGCCGGCACTCGGTCTGTGGCCGTTAATTGCAAAGTACTTGATAAGCC
-TGGTCCACCAGCAGGACCACTTGAAATAAATGGCCTCACTGCTGAGAAATGCTCTCTTTCCTGGGGACGT
-CCCCAAGAAGATGGTGGTGCAGATATCGACTATTACATCGTAGAAAAACGTGAAACAAGCCACCTTGCAT
-GGACAATATGTGAAGGAGAGTTACAGATGACATCCTGTAAAGTAACCAAGTTACTCAAAGGCAATGAATA
-TATATTTAGAGTAACTGGTGTTAATAAATATGGTGTTGGTGAGCCCCTAGAGAGTGTAGCTATAAAGGCA
-CTAGATCCATTTACAGTTCCAAGTCCACCCACGTCTTTGGAAATTACTTCTGTGACCAAAGAATCTATGA
-CACTTTGCTGGTCAAGACCCGAGAGTGATGGAGGTAGTGAAATATCTGGATATATAATTGAAAGGCGAGA
-GAAAAATAGCCTAAGATGGGTGCGTGTAAACAAAAAACCAGTTTATGATCTAAGAGTGAAATCAACAGGA
-CTTCGGGAAGGATGTGAATATGAATATCGTGTTTATGCAGAAAATGCTGCTGGCCTAAGTCTTCCAAGTG
-AAACCTCTCCCTTAATTAGGGCAGAAGATCCAGTGTTCCTACCATCTCCTCCATCCAAACCCAAAATTGT
-GGACTCAGGCAAGACAACTATAACTATTGCCTGGGTTAAGCCGCTGTTTGATGGTGGGGCCCCGATAACT
-GGATATACTGTAGAATACAAAAAATCTGATGACACTGACTGGAAAACTTCCATTCAGAGCTTACGAGGGA
-CAGAATATACAATAAGCGGACTAACAACAGGAGCTGAATATGTTTTCAGAGTAAAATCTGTCAATAAGGT
-TGGTGCTAGTGACCCCAGTGATAGCTCTGACCCTCAGATAGCAAAGGAAAGAGAAGAAGAACCTTTATTT
-GATATTGACAGTGAAATGAGGAAGACCTTGATTGTCAAGGCTGGTGCCTCATTTACCATGACTGTGCCTT
-TCCGAGGAAGACCAGTACCCAATGTCTTGTGGAGTAAGCCAGACACTGACCTCCGTACTAGAGCTTATGT
-TGATACCACAGACTCCCGTACATCACTGACCATTGAAAATGCCAACAGAAATGACTCTGGAAAGTACACA
-TTAACAATTCAGAATGTTTTGAGTGCTGCTTCACTGACCTTAGTTGTCAAAGTTTTAGATACCCCAGGTC
-CTCCAACCAACATTACTGTGCAAGATGTAACCAAAGAGTCTGCAGTGTTATCCTGGGATGTTCCTGAAAA
-CGATGGTGGAGCACCAGTGAAGAATTACCACATAGAAAAACGTGAGGCCAGCAAGAAAGCATGGGTCTCT
-GTGACCAACAACTGTAACCGCCTCTCCTACAAAGTTACCAATTTACAAGAAGGAGCTATCTATTACTTCA
-GAGTCTCTGGAGAAAATGAGTTTGGTGTTGGTATACCAGCTGAAACAAAGGAAGGAGTAAAAATAACAGA
-AAAACCAAGCCCACCTGAAAAACTTGGAGTAACAAGTATATCCAAAGACAGTGTTTCCCTGACCTGGCTG
-AAGCCTGAACATGATGGCGGAAGCAGAATTGTACACTATGTCGTTGAAGCACTAGAAAAAGGACAGAAAA
-ACTGGGTTAAATGTGCAGTGGCAAAGTCAACCCATCACGTTGTTTCCGGTCTGAGAGAGAATTCTGAATA
-CTTTTTCCGAGTGTTTGCTGAAAATCAAGCTGGCCTGAGTGACCCGAGAGAGCTTCTGCTTCCTGTTCTT
-ATTAAGGAGCAACTAGAACCACCTGAAATTGATATGAAGAATTTCCCAAGTCACACTGTATATGTTAGAG
-CTGGTTCAAACCTTAAAGTTGACATTCCAATCTCTGGAAAACCACTTCCCAAAGTGACCTTATCAAGAGA
-TGGTGTCCCCCTTAAGGCAACCATGAGATTTAATACCGAAATTACTGCTGAGAACCTGACCATCAATCTC
-AAAGAAAGTGTTACAGCTGACGCTGGGAGATATGAAATCACTGCTGCCAACTCCAGTGGTACAACCAAAG
-CTTTCATTAACATTGTTGTGCTAGACAGGCCTGGTCCTCCAACTGGCCCTGTTGTTATTAGTGATATAAC
-TGAAGAAAGTGTGACTCTCAAATGGGAGCCACCTAAGTATGACGGTGGAAGTCAAGTTACCAACTACATT
-CTACTCAAAAGAGAAACAAGTACTGCAGTGTGGACTGAAGTGTCTGCAACAGTTGCAAGAACCATGATGA
-AAGTCATGAAACTGACCACAGGAGAAGAATACCAATTCCGCATCAAGGCAGAAAACCGCTTTGGCATCAG
-TGATCATATAGATTCAGCTTGTGTGACTGTCAAACTACCATACACAACACCTGGACCACCATCTACACCA
-TGGGTCACTAATGTTACTCGAGAAAGCATCACTGTGGGCTGGCATGAACCAGTGTCAAATGGAGGCAGTG
-CAGTCGTAGGCTATCACCTGGAAATGAAAGACAGAAACAGTATTTTATGGCAAAAAGCCAACAAACTGGT
-CATCCGCACAACTCACTTCAAAGTCACAACAATCAGTGCTGGACTTATTTATGAATTCAGGGTGTATGCA
-GAAAATGCTGCTGGAGTTGGAAAACCTAGCCATCCTTCTGAACCAGTCTTGGCAATTGATGCTTGTGAAC
-CCCCAAGAAATGTTCGTATCACTGATATTTCAAAGAACTCTGTCAGCCTTTCATGGCAACAACCAGCTTT
-CGATGGAGGTAGCAAGATTACAGGCTACATTGTTGAGAGACGTGACCTTCCAGATGGCAGATGGACCAAG
-GCCAGCTTCACCAATGTTACTGAAACTCAATTCATCATCTCTGGCTTGACTCAGAATTCCCAGTATGAAT
-TCCGTGTCTTTGCTAGGAATGCTGTTGGTTCCATTAGCAATCCATCTGAGGTTGTAGGGCCCATTACTTG
-CATCGATTCTTATGGTGGTCCTGTAATTGATTTGCCTCTAGAATATACAGAAGTTGTCAAATACAGAGCA
-GGTACATCTGTGAAGCTCAGAGCTGGCATTTCTGGCAAACCTGCGCCTACTATTGAGTGGTATAAAGATG
-ATAAAGAATTACAAACCAATGCACTGGTGTGTGTTGAAAATACCACGGACCTCGCATCTATACTCATCAA
-AGATGCCGATCGCCTTAATAGTGGATGCTATGAATTAAAACTAAGGAATGCCATGGGCTCAGCCTCAGCC
-ACCATCAGAGTACAGATCCTTGACAAACCAGGCCCACCTGGTGGACCAATTGAATTTAAGACTGTAACTG
-CTGAGAAGATCACCCTTCTCTGGCGGCCTCCAGCTGATGATGGTGGTGCAAAAATCACTCACTACATTGT
-GGAAAAGCGTGAGACAAGCCGCGTTGTGTGGTCTATGGTGTCTGAACATTTGGAAGAGTGCATCATTACA
-ACCACCAAAATTATCAAAGGAAATGAATACATCTTCCGGGTCCGAGCCGTGAACAAATATGGAATTGGCG
-AGCCACTGGAATCTGATTCCGTTGTAGCCAAGAACGCATTTGTTACACCTGGGCCACCAGGCATACCAGA
-AGTGACAAAGATTACCAAGAATTCGATGACTGTTGTATGGAGCAGGCCAATTGCAGATGGCGGTAGTGAT
-ATAAGTGGCTATTTCCTTGAAAAACGAGACAAGAAGAGCCTAGGATGGTTTAAAGTACTAAAAGAGACTA
-TCCGTGACACCAGACAAAAAGTAACAGGACTCACAGAAAACAGTGACTATCAATACAGAGTTTGTGCTGT
-AAACGCTGCTGGACAGGGTCCATTTTCTGAACCATCTGAATTCTACAAAGCTGCTGATCCTATTGATCCT
-CCAGGTCCACCTGCTAAGATAAGAATCGCAGATTCAACCAAGTCATCCATCACCCTTGGCTGGAGTAAGC
-CTGTCTATGATGGGGGCAGTGCTGTTACTGGGTATGTTGTCGAGATAAGACAAGGAGAGGAAGAGGAATG
-GACTACTGTCTCTACCAAAGGAGAGGTCAGAACTACAGAATATGTGGTATCCAACCTGAAACCTGGAGTC
-AATTACTACTTCCGGGTATCTGCTGTAAACTGTGCTGGACAAGGAGAACCTATAGAAATGAATGAACCTG
-TACAAGCTAAAGATATACTTGAGGCACCAGAGATTGACCTGGATGTGGCTCTCAGAACTTCTGTTATTGC
-CAAAGCTGGTGAAGATGTACAAGTGTTGATTCCCTTTAAAGGCAGACCTCCACCTACTGTCACATGGAGA
-AAAGATGAGAAGAATCTTGGCAGTGATGCCAGATACAGCATTGAAAACACTGATTCATCCTCATTACTCA
-CCATTCCTCAAGTTACTCGCAATGATACAGGAAAATATATTCTCACAATAGAAAATGGAGTTGGTGAACC
-TAAGTCTTCAACTGTGAGTGTTAAAGTGCTTGACACACCAGCTGCCTGCCAGAAACTACAGGTTAAACAT
-GTTTCTCGAGGCACAGTCACTTTGCTCTGGGATCCTCCTCTCATTGATGGAGGATCTCCAATAATTAATT
-ATGTCATTGAAAAGAGAGATGCCACCAAGAGAACATGGTCTGTCGTGTCACACAAATGTTCTAGCACATC
-CTTCAAGCTAATAGATTTGTCGGAGAAGACTCCATTCTTCTTCAGAGTTCTTGCAGAAAATGAAATTGGA
-ATTGGGGAACCCTGTGAAACTACAGAGCCAGTGAAGGCTGCTGAAGTACCAGCTCCTATACGTGATCTCT
-CAATGAAAGACTCAACAAAGACATCTGTCATCCTCAGCTGGACCAAACCTGACTTTGATGGTGGTAGCGT
-CATCACAGAATATGTTGTAGAAAGGAAAGGTAAAGGTGAACAGACGTGGTCCCACGCTGGCATAAGTAAG
-ACATGTGAAATTGAGGTTAGCCAACTTAAGGAGCAGTCAGTCCTGGAGTTCAGAGTGTTTGCCAAAAATG
-AGAAAGGACTGAGTGATCCTGTCACTATTGGGCCAATTACAGTGAAAGAACTTATTATTACACCTGAAGT
-TGACCTGTCAGATATCCCTGGGGCACAAGTCACTGTGAGAATTGGGCACAATGTGCACCTTGAATTACCT
-TATAAGGGAAAACCCAAACCATCCATCAGTTGGCTGAAAGATGGCTTGCCACTGAAAGAAAGTGAATTTG
-TTCGCTTCAGTAAAACTGAAAACAAAATTACTTTGAGTATTAAGAATGCCAAGAAGGAGCATGGAGGAAA
-ATACACTGTTATTCTTGATAATGCAGTGTGTAGAATTGCAGTCCCCATTACAGTCATCACCCTTGGCCCA
-CCATCAAAGCCCAAAGGACCCATTCGATTTGATGAAATCAAGGCTGATAGTGTCATCCTGTCATGGGATG
-TACCTGAAGATAATGGAGGAGGAGAAATTACTTGTTACAGCATCGAGAAGCGGGAAACTTCACAAACTAA
-CTGGAGGATGGTGTGTTCAAGTGTTGCCAGAACGACTTTCAAAGTTCCTAATCTAGTCAAAGATGCTGAG
-TACCAGTTTAGAGTGAGAGCAGAAAACAGATACGGAGTCAGCCAACCACTTGTCTCAAGCATTATTGTGG
-CAAAACACCAGTTCAGGATTCCTGGTCCCCCAGGAAAGCCAGTTATATACAATGTGACTTCTGATGGCAT
-GTCACTAACTTGGGATGCTCCAGTTTATGATGGTGGTTCAGAAGTTACTGGATTCCATGTTGAAAAGAAA
-GAAAGAAATAGCATCCTCTGGCAAAAAGTTAATACATCACCAATCTCTGGAAGAGAATATAGAGCCACTG
-GACTGGTAGAAGGTCTGGATTACCAATTCCGTGTATATGCTGAAAATTCTGCTGGCCTAAGCTCACCTAG
-TGACCCAAGCAAATTTACCTTAGCTGTTTCTCCAGTAGACCCACCTGGCACTCCTGACTACATTGATGTC
-ACCCGGGAAACCATCACACTTAAATGGAACCCACCATTGCGTGATGGAGGCAGTAAGATTGTGGGCTATA
-GCATTGAGAAACGGCAAGGAAATGAACGCTGGGTGAGATGCAACTTTACTGACGTCAGTGAATGTCAGTA
-CACAGTTACAGGACTCAGTCCTGGGGATCGCTATGAGTTCAGAATAATTGCAAGAAATGCTGTTGGAACT
-ATAAGCCCGCCCTCACAGTCTTCTGGCATTATTATGACAAGAGATGAAAATGTTCCACCAATAGTAGAGT
-TTGGCCCTGAATACTTTGATGGTCTCATTATTAAGTCCGGAGAGAGCCTTAGAATTAAAGCTTTGGTACA
-AGGAAGACCAGTGCCTCGAGTAACTTGGTTCAAAGATGGAGTGGAAATCGAAAAGAGGATGAATATGGAA
-ATAACCGACGTACTTGGATCCACCAGCCTATTTGTTAGAGATGCTACTCGGGACCATCGTGGTGTATACA
-CAGTGGAAGCCAAAAATGCATCTGGTTCTGCAAAAGCAGAAATTAAAGTGAAAGTACAAGATACACCAGG
-AAAAGTAGTTGGGCCAATAAGATTCACCAATATTACTGGGGAGAAGATGACTCTGTGGTGGGATGCCCCA
-CTCAATGACGGTTGTGCTCCCATAACCCACTACATCATTGAAAAACGGGAAACCAGCAGACTTGCCTGGG
-CACTAATTGAGGATAAATGTGAAGCCCAAAGTTACACTGCCATTAAACTAATAAACGGCAATGAATACCA
-ATTCCGTGTTTCTGCAGTTAACAAGTTTGGTGTTGGCAGGCCACTTGATTCTGATCCAGTGGTTGCTCAA
-ATACAATATACTGTTCCTGATGCCCCTGGCATTCCAGAACCTAGCAACATAACAGGCAACAGCATTACCC
-TGACATGGGCAAGGCCAGAATCAGATGGTGGCAGTGAAATTCAACAGTATATCCTTGAAAGAAGAGAAAA
-GAAAAGCACAAGATGGGTAAAAGTGATCAGCAAACGACCAATCTCTGAAACAAGATTCAAAGTCACTGGT
-CTGACAGAAGGCAATGAGTATGAATTCCATGTCATGGCTGAAAATGCTGCAGGAGTTGGACCTGCAAGTG
-GCATCTCAAGACTCATTAAATGTAGAGAGCCCGTCAACCCACCAGGTCCTCCCACAGTGGTCAAAGTAAC
-AGACACATCAAAGACAACTGTGAGCTTAGAATGGTCCAAACCAGTGTTTGATGGTGGCATGGAAATAATT
-GGGTATATTATTGAAATGTGTAAGGCCGACTTAGGAGACTGGCACAAGGTGAATGCAGAGGCATGTGTGA
-AAACAAGATATACAGTCACTGATCTACAAGCAGGTGAAGAATACAAATTCCGAGTTAGTGCTATCAATGG
-TGCTGGAAAAGGCGACAGCTGTGAAGTGACTGGCACAATTAAAGCAGTTGACCGGTTAACAGCTCCTGAG
-TTAGACATAGATGCAAACTTCAAACAGACTCATGTTGTTAGAGCTGGGGCCAGTATTCGCCTCTTCATTG
-CCTACCAAGGTAGACCTACTCCTACAGCTGTGTGGAGCAAACCAGACTCTAACCTTAGCCTTCGGGCTGA
-TATCCATACAACAGATTCCTTCAGCACCCTCACTGTGGAAAACTGCAACAGAAATGATGCAGGGAAATAT
-ACCCTTACTGTGGAAAACAACAGTGGTAGTAAGTCAATCACATTCACCGTGAAAGTGCTAGACACTCCAG
-GCCCACCTGGCCCAATTACCTTCAAAGATGTGACCCGGGGATCTGCTACATTGATGTGGGATGCCCCTCT
-TCTTGACGGTGGTGCCCGAATCCATCATTATGTGGTAGAGAAACGAGAGGCAAGTCGCCGTAGTTGGCAG
-GTTATCAGTGAAAAATGCACTCGTCAGATCTTCAAGGTCAATGACCTGGCCGAAGGTGTTCCGTACTATT
-TCCGTGTTTCTGCAGTAAATGAGTATGGTGTTGGTGAGCCCTATGAAATGCCAGAACCAATTGTAGCCAC
-AGAACAGCCTGCTCCACCTAGGAGACTTGATGTTGTTGATACTAGCAAATCCTCCGCAGTCTTAGCTTGG
-CTTAAACCTGACCACGATGGAGGCAGCCGGATCACTGGCTACCTGCTTGAAATGAGACAAAAGGGATCTG
-ACTTCTGGGTTGAAGCTGGTCACACCAAACAGCTAACTTTCACAGTAGAGCGTCTTGTTGAGAAAACTGA
-ATATGAATTCCGTGTGAAGGCCAAGAATGATGCTGGCTATAGTGAACCCAGAGAAGCCTTCTCTTCTGTC
-ATCATTAAGGAGCCTCAAATCGAGCCCACTGCTGACCTCACTGGAATTACCAATCAGCTTATAACTTGCA
-AAGCAGGAAGCCCATTTACCATTGACGTACCAATCAGTGGTCGTCCTGCCCCCAAAGTAACATGGAAACT
-GGAAGAAATGAGACTTAAAGAGACAGATCGAGTGAGCATTACAACAACAAAAGACAGAACCACACTGACT
-GTAAAGGACAGCATGAGAGGTGACTCTGGAAGATACTTCTTGACCCTGGAAAATACAGCTGGTGTTAAAA
-CATTTAGCGTCACAGTTGTGGTCATTGGAAGGCCAGGTCCAGTAACCGGCCCCATTGAGGTCTCATCTGT
-CTCAGCTGAATCGTGTGTCCTGTCATGGGGAGAACCTAAAGATGGAGGAGGCACTGAAATTACTAATTAC
-ATAGTTGAAAAGCGTGAATCGGGTACAACAGCTTGGCAGCTTGTCAATTCCAGTGTCAAGCGCACTCAAA
-TTAAAGTCACTCATCTCACAAAATACATGGAATATTCTTTCCGTGTCAGTTCAGAGAACAGATTTGGTGT
-CAGCAAACCTCTAGAATCAGCACCAATAATTGCTGAACATCCATTTGTCCCACCAAGCGCTCCTACCAGA
-CCTGAGGTCTACCATGTGTCTGCCAATGCCATGTCTATTCGTTGGGAAGAACCCTACCACGATGGTGGCA
-GTAAAATCATTGGCTACTGGGTTGAGAAGAAAGAACGTAATACAATTCTTTGGGTGAAAGAAAACAAAGT
-GCCATGCTTAGAGTGCAACTACAAAGTAACTGGTTTAGTAGAAGGACTGGAATATCAGTTCAGAACTTAT
-GCACTCAATGCTGCAGGTGTTAGCAAGGCCAGCGAAGCTTCAAGACCTATAATGGCTCAAAATCCAGTTG
-ATGCACCAGGCAGACCAGAGGTGACAGATGTCACAAGATCAACAGTATCACTGATTTGGTCTGCCCCAGC
-GTATGATGGAGGCAGCAAGGTTGTGGGCTACATCATAGAGCGTAAGCCAGTCAGTGAGGTAGGAGATGGT
-CGCTGGCTGAAGTGCAACTACACCATTGTATCTGACAATTTCTTCACCGTGACTGCTCTCAGTGAAGGAG
-ACACTTATGAGTTCCGTGTGTTAGCCAAGAATGCAGCAGGCGTAATTAGCAAAGGGTCTGAATCTACAGG
-CCCTGTCACTTGCCGAGATGAATACGCTCCACCCAAAGCCGAACTGGATGCCCGATTACACGGTGATCTG
-GTTACCATCAGAGCAGGTTCTGATCTTGTTCTGGATGCTGCAGTTGGTGGCAAACCTGAACCCAAAATTA
-TCTGGACCAAAGGAGACAAGGAGCTAGATCTCTGTGAAAAAGTCTCTTTGCAGTATACTGGCAAACGAGC
-AACTGCTGTGATCAAGTTCTGTGACAGAAGTGACAGTGGAAAATACACTTTAACAGTGAAAAATGCCAGC
-GGGACCAAGGCCGTGTCTGTCATGGTCAAAGTGCTTGATTCCCCTGGCCCATGTGGAAAGCTCACCGTCA
-GCAGAGTAACACAGGAGAAGTGCACTTTAGCCTGGAGCCTTCCGCAGGAAGACGGAGGAGCAGAAATCAC
-TCACTACATCGTGGAAAGACGCGAGACTAGCAGGCTCAACTGGGTGATTGTTGAAGGCGAATGCCCAACC
-CTATCCTATGTCGTTACCAGGCTCATCAAGAACAATGAGTACATATTCCGAGTGAGGGCAGTAAACAAAT
-ATGGCCCTGGTGTGCCTGTTGAATCAGAGCCAATTGTAGCCAGAAACTCATTCACTATTCCATCACCACC
-CGGCATACCTGAAGAAGTTGGGACTGGCAAAGAGCATATCATCATTCAGTGGACAAAACCTGAATCTGAT
-GGTGGCAATGAAATCAGCAACTACCTAGTAGACAAACGTGAGAAGAAGAGCCTGCGCTGGACACGTGTCA
-ACAAAGACTATGTGGTGTATGATACCAGGCTGAAGGTGACCAGCCTGATGGAGGGTTGTGATTACCAGTT
-CCGGGTGACCGCAGTGAATGCAGCTGGTAACAGTGAGCCCAGCGAAGCTTCCAACTTCATCTCATGCAGA
-GAACCATCATATACCCCTGGACCACCTTCTGCTCCAAGAGTTGTGGATACCACCAAACACAGCATTAGTT
-TGGCATGGACCAAACCCATGTACGATGGTGGTACTGACATTGTAGGATATGTTCTGGAAATGCAAGAGAA
-GGACACTGATCAGTGGTACCGAGTGCATACCAATGCCACAATAAGAAATACTGAATTCACTGTGCCAGAC
-CTTAAAATGGGCCAGAAATATTCCTTCAGAGTTGCTGCCGTGAACGTGAAGGGTATGAGCGAATACAGCG
-AATCAATTGCTGAAATTGAGCCCGTGGAAAGAATAGAAATACCAGATCTTGAGCTTGCAGATGATCTAAA
-GAAGACTGTGACCATCAGGGCTGGGGCCTCCTTGCGCTTGATGGTGTCTGTATCTGGAAGACCACCTCCT
-GTCATAACGTGGAGCAAGCAGGGCATTGACCTTGCAAGCCGGGCAATTATTGACACCACTGAGAGCTACT
-CATTGCTAATAGTGGACAAAGTTAATCGGTACGATGCTGGAAAATACACAATTGAAGCTGAAAACCAATC
-TGGCAAGAAATCAGCAACAGTCCTTGTTAAAGTCTATGATACTCCTGGTCCCTGTCCTTCAGTGAAAGTT
-AAGGAAGTATCAAGAGATTCTGTGACTATAACTTGGGAAATTCCCACGATTGATGGTGGAGCTCCAGTCA
-ACAATTACATCGTTGAGAAGCGTGAAGCTGCTATGAGAGCATTCAAAACAGTAACTACCAAATGCAGCAA
-GACACTTTACAGAATTTCTGGACTTGTAGAAGGAACCATGTACTATTTCAGAGTGCTGCCAGAAAATATT
-TATGGCATTGGAGAACCTTGTGAAACATCTGATGCAGTACTGGTCTCAGAAGTGCCTTTGGTGCCTGCAA
-AGCTAGAAGTGGTCGATGTCACCAAATCCACTGTTACCCTTGCCTGGGAAAAACCACTCTACGATGGTGG
-TAGCCGACTCACTGGATATGTTCTCGAGGCCTGCAAAGCTGGCACAGAGAGATGGATGAAGGTTGTCACC
-TTAAAACCCACAGTCCTAGAGCACACTGTTACTTCCTTAAATGAAGGTGAACAATACTTATTTAGAATAA
-GGGCACAAAATGAGAAAGGTGTGTCAGAACCAAGAGAGACTGTCACAGCCGTGACTGTACAAGACCTCAG
-AGTGTTGCCAACAATCGATCTTTCTACAATGCCTCAGAAGACCATCCATGTCCCAGCTGGCAGACCAGTA
-GAGCTGGTGATACCTATTGCTGGCCGTCCACCTCCTGCTGCTTCCTGGTTCTTTGCTGGTTCTAAACTGA
-GAGAATCAGAGCGTGTCACAGTTGAAACTCACACTAAAGTAGCTAAATTAACCATCCGTGAAACCACTAT
-CAGAGATACTGGAGAATACACACTTGAATTGAAGAATGTTACCGGAACTACTTCAGAAACCATTAAAGTT
-ATCATTCTTGACAAGCCTGGTCCACCAACAGGACCTATTAAGATTGATGAAATTGATGCTACATCAATTA
-CCATTTCCTGGGAACCACCTGAATTGGACGGTGGTGCTCCACTGAGTGGTTATGTGGTAGAACAACGTGA
-CGCTCATCGTCCAGGATGGCTGCCCGTTTCTGAATCAGTGACTAGGTCCACGTTTAAGTTTACCAGACTC
-ACCGAAGGAAATGAGTATGTGTTCCGTGTGGCTGCAACAAACCGCTTCGGGATTGGCTCTTACTTGCAGT
-CTGAGGTCATAGAGTGTCGCAGCAGCATCCGTATTCCTGGACCCCCAGAAACATTACAGATATTTGATGT
-TTCCCGTGATGGCATGACACTTACTTGGTACCCACCAGAGGATGACGGTGGCTCCCAAGTGACTGGATAT
-ATTGTGGAGCGCAAAGAAGTGAGAGCAGATCGATGGGTCCGTGTAAATAAAGTACCTGTGACAATGACAC
-GGTACCGCTCCACTGGCCTTACTGAAGGCTTAGAATATGAACACCGTGTCACAGCCATTAATGCAAGAGG
-GTCTGGGAAACCAAGTCGTCCTTCCAAACCCATCGTTGCCATGGATCCAATTGCTCCTCCAGGAAAGCCA
-CAAAACCCAAGAGTTACTGATACAACAAGGACATCAGTCTCCCTGGCCTGGAGTGTTCCAGAAGATGAAG
-GAGGATCTAAAGTCACAGGCTACTTGATTGAAATGCAAAAAGTAGATCAACATGAATGGACCAAGTGTAA
-CACCACTCCAACCAAGATTCGAGAGTATACTCTAACACACCTACCTCAGGGTGCAGAATACAGGTTCCGC
-GTCCTAGCTTGTAATGCTGGTGGACCTGGTGAGCCTGCTGAGGTACCAGGAACAGTCAAAGTCACTGAAA
-TGCTTGAATATCCTGATTATGAACTTGATGAAAGATACCAAGAAGGTATCTTTGTAAGGCAAGGTGGCGT
-CATCAGACTTACCATACCAATCAAAGGAAAACCATTCCCAATATGTAAATGGACCAAGGAAGGCCAGGAT
-ATTAGTAAGCGTGCCATGATTGCAACATCTGAAACACACACTGAGCTTGTGATCAAAGAAGCAGACAGGG
-GTGATTCTGGCACTTATGACCTGGTTCTGGAAAATAAATGTGGCAAGAAGGCTGTCTACATCAAGGTCAG
-GGTGATAGGAAGTCCCAACAGTCCAGAAGGGCCACTGGAATATGATGACATCCAAGTCCGCTCTGTGAGG
-GTCAGCTGGAGACCTCCTGCTGATGATGGTGGTGCTGACATCTTAGGCTACATCCTCGAGAGACGAGAAG
-TGCCTAAAGCCGCCTGGTATACCATTGATTCCAGAGTCCGAGGTACATCTCTGGTGGTAAAAGGCCTCAA
-AGAGAATGTAGAATACCATTTCCGTGTTTCAGCAGAAAACCAGTTTGGCATAAGCAAACCCTTGAAATCT
-GAGGAACCAGTCACACCAAAAACACCATTGAATCCTCCAGAACCTCCAAGCAATCCTCCAGAAGTACTCG
-ATGTAACCAAGAGTTCTGTTAGCTTGTCCTGGTCCCGGCCCAAAGATGATGGTGGTTCTAGAGTCACAGG
-CTACTACATCGAACGCAAAGAGACATCCACTGACAAGTGGGTCAGACACAACAAGACTCAGATCACCACC
-ACAATGTACACTGTCACAGGGCTTGTTCCCGATGCTGAGTATCAGTTCCGCATCATCGCACAGAATGATG
-TTGGCCTGAGTGAGACCAGCCCTGCTTCTGAACCAGTTGTTTGCAAAGATCCATTTGATAAACCAAGCCA
-ACCAGGAGAACTTGAGATTCTTTCAATATCCAAAGATAGTGTCACTCTACAGTGGGAGAAACCTGAATGT
-GATGGTGGTAAAGAAATTCTTGGATACTGGGTTGAATATAGACAGTCTGGAGACAGTGCCTGGAAGAAGA
-GCAATAAGGAACGTATTAAGGACAAGCAATTCACAATAGGAGGTTTGCTGGAAGCTACTGAGTATGAATT
-CAGGGTTTTTGCTGAGAATGAGACTGGGCTGAGCAGACCTCGCAGAACTGCTATGTCTATAAAGACTAAA
-CTCACATCTGGAGAGGCCCCAGGAATACGCAAAGAAATGAAGGATGTTACCACAAAATTGGGTGAAGCTG
-CTCAACTCTCATGCCAGATTGTTGGAAGGCCTCTTCCTGACATTAAATGGTACAGATTTGGTAAAGAGCT
-CATACAAAGCCGGAAATACAAAATGTCTTCAGATGGACGCACACACACTCTTACAGTAATGACAGAGGAA
-CAGGAAGATGAAGGTGTTTATACCTGCATAGCCACCAATGAGGTTGGAGAAGTAGAAACCAGTAGTAAGC
-TTCTCCTGCAAGCAACACCGCAGTTCCATCCTGGTTACCCACTGAAAGAGAAATATTATGGAGCTGTGGG
-TTCCACACTTCGGCTTCATGTTATGTACATTGGTCGTCCAGTACCTGCCATGACTTGGTTCCATGGTCAG
-AAACTTTTGCAAAACTCAGAAAACATTACTATTGAAAACACTGAGCACTATACTCATCTTGTCATGAAGA
-ATGTCCAACGTAAGACTCATGCTGGGAAATACAAAGTCCAGCTCAGCAATGTTTTTGGAACAGTTGATGC
-CATCCTTGATGTGGAAATACAAGATAAACCAGACAAACCTACAGGACCAATTGTGATCGAAGCTCTATTG
-AAGAACTCCGCAGTGATCAGCTGGAAACCACCCGCAGATGACGGAGGCTCCTGGATCACCAACTATGTGG
-TGGAAAAATGTGAGGCCAAGGAGGGGGCTGAATGGCAATTGGTGTCTTCAGCCATCTCAGTGACAACCTG
-TAGAATTGTGAACCTCACAGAAAATGCTGGCTATTACTTCCGGGTTTCAGCTCAGAACACTTTCGGCATC
-AGTGACCCTCTAGAAGTGTCCTCAGTTGTGATCATTAAGAGTCCATTTGAAAAGCCAGGTGCTCCTGGCA
-AACCAACTATTACTGCTGTCACAAAAGATTCTTGTGTTGTGGCCTGGAAGCCACCTGCCAGTGATGGAGG
-TGCAAAGATTAGAAATTACTACCTTGAGAAGCGTGAGAAGAAGCAGAATAAATGGATTTCTGTGACAACA
-GAAGAAATTCGAGAAACTGTCTTTTCAGTGAAAAACCTTATTGAAGGTCTTGAATACGAGTTTCGTGTGA
-AATGTGAAAATCTAGGTGGGGAAAGTGAATGGAGTGAAATATCAGAACCCATCACTCCCAAATCTGATGT
-CCCAATTCAGGCACCACACTTTAAAGAGGAACTGAGAAATCTAAATGTCAGATATCAGAGCAATGCTACC
-TTGGTCTGCAAAGTGACTGGTCATCCAAAACCTATCGTCAAATGGTACAGACAAGGCAAAGAAATCATTG
-CAGATGGATTAAAATATAGGATTCAAGAATTTAAGGGTGGCTACCACCAGCTCATCATTGCAAGTGTCAC
-AGATGATGATGCCACAGTTTACCAAGTCAGAGCTACCAACCAAGGGGGATCTGTGTCTGGCACTGCCTCC
-TTGGAAGTGGAAGTTCCAGCTAAGATACACTTACCTAAAACTCTTGAAGGCATGGGAGCAGTTCATGCTC
-TCCGAGGTGAAGTGGTCAGCATCAAGATTCCTTTCAGTGGCAAACCAGATCCTGTGATCACCTGGCAGAA
-AGGACAAGATCTCATTGACAATAATGGCCACTACCAAGTTATTGTCACAAGATCCTTCACATCACTTGTT
-TTCCCCAATGGGGTAGAGAGAAAAGATGCTGGTTTCTATGTGGTCTGTGCTAAAAACAGATTTGGAATTG
-ATCAGAAGACAGTTGAACTGGATGTGGCTGATGTTCCTGACCCACCCAGAGGAGTCAAAGTTAGTGATGT
-CTCACGAGATTCTGTCAACTTAACATGGACTGAGCCAGCCTCTGATGGTGGCAGCAAAATCACCAACTAC
-ATTGTTGAAAAATGTGCAACTACTGCAGAAAGATGGCTCCGTGTAGGACAGGCCCGAGAAACACGTTATA
-CCGTGATCAACTTATTTGGAAAAACAAGTTACCAGTTCCGGGTAATAGCTGAAAATAAATTTGGTCTGAG
-CAAGCCTTCAGAGCCTTCAGAACCAACCATAACCAAAGAAGATAAGACCAGAGCTATGAACTATGATGAA
-GAGGTAGATGAAACCAGGGAAGTCTCCATGACTAAAGCATCTCACTCTTCAACCAAGGAACTCTATGAGA
-AATATATGATTGCTGAAGATCTTGGGCGTGGTGAGTTTGGAATTGTCCATCGTTGTGTTGAAACATCCTC
-AAAGAAGACATACATGGCCAAATTTGTTAAAGTCAAAGGGACTGATCAGGTTTTGGTAAAGAAGGAAATT
-TCCATTCTGAATATTGCTAGGCATAGAAACATCTTACACCTCCATGAATCATTTGAAAGCATGGAAGAAT
-TAGTTATGATCTTTGAGTTTATATCAGGACTTGACATATTTGAGCGCATTAACACAAGTGCTTTTGAACT
-TAATGAAAGAGAAATTGTAAGTTATGTTCACCAGGTCTGTGAAGCACTTCAGTTTTTACACAGTCATAAT
-ATTGGACACTTTGACATTAGACCAGAAAATATCATTTACCAAACCAGAAGAAGCTCTACCATTAAAATCA
-TAGAATTTGGTCAAGCCCGTCAGCTGAAACCAGGGGACAACTTCAGGCTTCTATTCACTGCCCCAGAATA
-CTATGCACCTGAAGTCCACCAGCATGATGTTGTCAGCACAGCCACAGACATGTGGTCACTTGGAACACTG
-GTATATGTGCTATTGAGTGGTATCAACCCATTCCTGGCTGAAACTAACCAACAGATCATTGAGAATATCA
-TGAATGCTGAATATACTTTCGATGAGGAAGCATTCAAAGAGATTAGCATTGAAGCCATGGATTTTGTTGA
-CCGGTTGTTAGTGAAAGAGAGGAAATCTCGCATGACAGCATCGGAGGCTCTCCAGCACCCATGGTTGAAG
-CAGAAGATAGAAAGAGTCAGTACTAAAGTTATCAGAACATTAAAACACCGGCGTTATTACCACACCCTGA
-TCAAGAAAGACCTCAACATGGTTGTGTCAGCAGCCCGGATCTCCTGTGGTGGTGCAATTCGATCTCAGAA
-GGGAGTGAGTGTTGCTAAAGTTAAAGTGGCATCCATTGAAATTGGCCCAGTTTCTGGGCAGATAATGCAT
-GCAGTTGGTGAAGAAGGAGGACATGTCAAATATGTATGCAAAATTGAAAATTATGATCAGTCTACCCAAG
-TGACTTGGTACTTTGGCGTCCGACAGCTGGAGAACAGTGAGAAATACGAAATCACCTACGAAGATGGAGT
-GGCCATCCTCTATGTCAAAGACATTACCAAATTAGATGATGGTACCTACAGATGCAAAGTAGTCAATGAC
-TATGGTGAAGACAGTTCTTATGCAGAGCTATTTGTTAAAGGTGTGAGAGAAGTCTATGACTATTACTGCC
-GTAGAACCATGAAGAAAATTAAGCGCAGAACAGACACAATGAGACTCCTGGAAAGGCCACCAGAATTTAC
-CCTGCCTCTCTATAATAAGACAGCTTATGTAGGTGAAAATGTCCGGTTTGGAGTAACTATAACTGTCCAC
-CCAGAGCCTCATGTAACATGGTATAAATCAGGTCAGAAAATCAAACCAGGTGACAATGACAAGAAGTACA
-CATTTGAGTCAGACAAGGGTCTTTACCAATTAACAATCAACAGTGTCACTACAGATGATGACGCTGAATA
-TACTGTTGTGGCAAGGAACAAATATGGTGAAGACAGCTGTAAAGCAAAGCTGACAGTAACCCTACACCCA
-CCTCCAACAGATAGTACCTTAAGACCCATGTTCAAAAGGTTACTGGCAAATGCAGAATGCCAAGAAGGCC
-AAAGTGTCTGCTTTGAGATCAGAGTGTCTGGCATCCCCCCACCAACATTAAAATGGGAGAAAGATGGTCA
-GCCACTGTCCCTCGGGCCTAACATTGAAATTATCCATGAAGGCTTGGATTATTATGCTCTGCACATCAGG
-GACACTTTGCCTGAAGACACGGGTTATTATAGAGTCACAGCCACTAACACAGCTGGGTCCACCAGCTGCC
-AGGCTCACCTACAAGTGGAACGCCTGAGGTACAAGAAACAGGAATTCAAGAGTAAGGAGGAGCATGAGCG
-ACACGTACAAAAACAAATTGACAAAACCCTCAGAATGGCTGAAATTCTTTCTGGAACTGAAAGTGTACCA
-CTGACACAGGTAGCTAAAGAGGCTCTGAGAGAAGCTGCTGTCCTTTATAAACCGGCTGTAAGCACCAAGA
-CTGTAAAAGGGGAATTCAGACTTGAGATAGAAGAAAAGAAGGAGGAGAGAAAACTCCGGATGCCTTATGA
-TGTACCAGAGCCACGCAAGTATAAGCAGACTACCATAGAAGAAGACCAACGCATCAAGCAGTTCGTGCCC
-ATGTCTGACATGAAGTGGTATAAAAAGATACGTGATCAGTATGAAATGCCTGGGAAACTTGACAGAGTTG
-TACAGAAACGACCCAAGCGCATCCGCCTTTCAAGATGGGAACAGTTCTATGTGATGCCTCTTCCACGCAT
-TACAGATCAATACAGACCTAAATGGCGTATTCCTAAACTGTCCCAAGATGATCTTGAGATAGTGAGACCA
-GCCCGCCGGCGTACACCTTCTCCTGATTATGACTTTTACTACCGACCTAGAAGACGTTCTCTTGGGGACA
-TCTCTGATGAAGAATTACTCCTCCCCATTGATGACTACTTAGCAATGAAAAGAACAGAGGAAGAGAGGCT
-GCGTCTTGAAGAAGAGCTTGAGTTAGGTTTTTCAGCTTCACCCCCAAGTCGAAGCCCTCCACACTTTGAG
-CTTTCTAGCCTACGTTACTCTTCACCACAAGCTCATGTCAAGGTGGAGGAAACAAGAAAAGACTTCAGGT
-ATTCAACCTATCACATCCCAACGAAGGCTGAAGCTAGTACAAGTTATGCAGAACTGAGGGAACGGCATGC
-CCAGGCTGCGTACAGACAGCCAAAGCAACGGCAAAGAATCATGGCTGAGAGGGAGGATGAAGAGTTGCTT
-CGCCCAGTTACGACCACCCAGCATCTCTCAGAATACAAAAGCGAACTTGACTTCATGTCAAAGGAGGAAA
-AGTCTAGAAAGAAATCAAGGCGACAAAGAGAAGTGACAGAAATAACAGAAATTGAGGAAGAATACGAAAT
-CTCAAAACATGCTCAAAGAGAATCATCCTCATCTGCGTCTAGACTACTGAGACGACGGCGCTCCCTGTCT
-CCAACTTATATTGAGTTAATGAGGCCAGTGTCTGAGCTGATCCGGTCACGTCCACAACCGGCTGAGGAAT
-ACGAAGATGACACAGAAAGAAGGTCACCTACTCCAGAGAGAACTCGCCCACGATCCCCCAGCCCTGTGTC
-TAGTGAGAGATCACTCTCGAGATTTGAGAGGTCTGCAAGATTTGATATCTTTTCCAGGTATGAGTCCATG
-AAAGCTGCTTTAAAAACTCAGAAGACATCAGAAAGGAAGTATGAAGTTTTGAGTCAGCAGCCTTTCACAC
-TGGACCATGCCCCTCGAATCACACTGAGAATGCGCTCGCACAGGGTACCATGTGGCCAAAATACACGTTT
-TATTTTAAATGTTCAGTCTAAGCCAACTGCCGAGGTTAAATGGTACCACAATGGTGTGGAACTCCAAGAA
-AGCAGTAAGATTCATTACACCAACACGAGTGGAGTCCTCACCCTGGAAATTCTGGACTGTCATACTGATG
-ACAGTGGAACCTACCGTGCTGTGTGCACCAACTACAAGGGCGAAGCTTCTGACTATGCAACGTTGGACGT
-GACAGGAGGGGATTATACCACCTATGCTTCCCAACGCAGAGATGAAGAGGTCCCCAGATCTGTTTTCCCT
-GAGCTGACAAGAACAGAGGCGTATGCTGTTTCATCATTTAAGAAAACATCTGAGATGGAAGCTTCGTCTT
-CTGTCAGGGAAGTGAAATCACAGATGACGGAGACAAGGGAAAGTCTCTCCTCATATGAACACTCTGCATC
-TGCAGAAATGAAAAGTGCTGCATTAGAAGAAAAGTCACTGGAAGAAAAATCCACAACCAGAAAGATCAAG
-ACGACTTTGGCAGCAAGAATTCTAACAAAGCCACGGTCCATGACCGTCTACGAGGGCGAGTCTGCAAGGT
-TTTCTTGTGACACCGATGGTGAGCCGGTACCAACTGTGACCTGGCTGCGTAAAGGACAAGTGCTAAGTAC
-TTCTGCCCGCCACCAAGTGACCACCACAAAGTACAAATCAACCTTTGAGATCTCTTCAGTCCAGGCTTCC
-GATGAGGGCAATTACAGCGTGGTGGTAGAAAACAGTGAAGGGAAACAAGAAGCAGAGTTCACTCTGACTA
-TTCAAAAGGCCAGGGTAACTGAAAAGGCTGTGACATCACCACCAAGAGTCAAATCCCCAGAGCCTCGGGT
-GAAATCCCCAGAAGCAGTTAAGTCTCCAAAACGAGTGAAATCTCCAGAACCTTCTCACCCGAAAGCCGTA
-TCACCCACAGAGACAAAACCAACACCAACAGAGAAAGTTCAGCACCTCCCAGTCTCTGCCCCACCAAAGA
-TTACTCAGTTCCTGAAAGCAGAAGCTTCTAAAGAGATTGCAAAACTGACCTGTGTGGTTGAAAGCAGTGT
-ATTAAGGGCAAAAGAGGTCACCTGGTATAAAGATGGCAAGAAACTGAAGGAAAATGGGCATTTCCAGTTT
-CATTATTCAGCAGATGGTACCTATGAGCTCAAAATCAATAACCTCACTGAATCTGATCAAGGAGAATATG
-TTTGTGAGATTTCTGGTGAAGGTGGAACGTCTAAAACCAACTTACAATTTATGGGGCAAGCCTTTAAGAG
-TATCCATGAGAAGGTATCAAAAATATCAGAAACTAAGAAATCAGATCAGAAAACCACTGAGTCAACAGTA
-ACCAGAAAAACTGAACCAAAAGCTCCTGAACCAATTTCCTCAAAACCAGTAATTGTTACTGGGTTGCAGG
-ATACAACTGTTTCTTCAGACAGTGTTGCTAAATTTGCAGTTAAGGCTACTGGAGAACCCCGGCCAACTGC
-CATCTGGACAAAAGATGGAAAGGCCATTACACAAGGAGGTAAATATAAACTCTCTGAAGACAAGGGAGGG
-TTCTTCTTAGAAATTCATAAGACTGATACTTCTGACAGTGGACTTTATACTTGTACAGTAAAAAATTCAG
-CTGGATCTGTGTCCTCTAGCTGCAAATTAACAATAAAAGCTATAAAAGATACTGAGGCACAGAAAGTCTC
-TACACAAAAGACTTCTGAAATTACACCTCAGAAGAAAGCTGTTGTCCAAGAGGAAATTTCCCAAAAAGCC
-CTAAGGTCTGAAGAAATTAAGATGTCAGAGGCAAAATCTCAAGAAAAGTTAGCCCTCAAAGAGGAAGCTT
-CAAAGGTTCTGATTTCTGAAGAAGTCAAGAAATCAGCAGCAACCTCCCTGGAAAAATCCATTGTCCATGA
-GGAAATCACTAAAACATCACAGGCATCAGAAGAAGTCAGAACTCATGCTGAGATTAAAGCATTTTCTACT
-CAGATGAGCATAAACGAAGGTCAAAGACTGGTTTTAAAAGCCAACATTGCTGGTGCCACTGATGTGAAAT
-GGGTACTGAATGGCGTAGAGCTTACCAACTCTGAGGAGTACCGATATGGTGTCTCAGGCAGCGATCAGAC
-CCTAACCATCAAGCAAGCCAGTCACAGAGATGAAGGAATCCTCACCTGCATAAGCAAAACCAAGGAAGGA
-ATCGTCAAGTGTCAGTATGATTTGACACTGAGCAAAGAACTCTCAGATGCTCCAGCCTTCATCTCACAGC
-CTAGATCTCAAAATATTAATGAAGGACAAAATGTTCTCTTTACTTGTGAAATCAGTGGCGAGCCATCCCC
-TGAAATCGAATGGTTTAAAAACAACCTGCCAATTTCTATTTCTTCAAATGTCAGCATAAGCCGCTCCAGA
-AATGTATACTCCCTTGAAATCCGAAATGCATCAGTCAGCGACAGTGGAAAGTACACAATTAAGGCCAAAA
-ATTTCCGTGGCCAGTGTTCAGCTACAGCTTCCTTAATGGTCCTTCCTCTAGTTGAAGAACCTTCCAGAGA
-GGTAGTATTGAGAACAAGTGGTGACACAAGCTTGCAAGGAAGCTTCTCGTCTCAGTCAGTCCAAATGTCT
-GCCTCCAAGCAGGAGGCCTCCTTCAGCAGTTTCAGCAGCAGCAGTGCTAGCAGCATGACTGAGATGAAAT
-TTGCAAGCATGTCTGCCCAAAGCATGTCCTCCATGCAAGAGTCCTTTGTAGAAATGAGTTCCAGCAGCTT
-TATGGGAATATCTAATATGACACAACTGGAAAGCTCAACTAGTAAAATGCTTAAAGCAGGCATAAGAGGA
-ATTCCGCCTAAAATTGAAGCTCTTCCATCTGATATCAGCATTGATGAAGGCAAAGTTCTAACAGTAGCCT
-GTGCTTTCACGGGTGAGCCTACCCCAGAAGTAACATGGTCCTGTGGTGGAAGAAAAATCCACAGTCAAGA
-ACAGGGGAGGTTCCACATTGAAAACACAGATGACCTGACAACCCTGATCATCATGGACGTACAGAAACAA
-GATGGTGGACTTTATACCCTGAGTTTAGGGAATGAATTTGGATCTGACTCTGCCACTGTGAATATACATA
-TTCGATCCATTTAAGAGGGCCTGTGCCCTTATACTCTACACTCATTCTTAACTTTTCGCAAACGTTTCAC
-ACGGACTAATCTTTCTGAACTGTAAATATTTAAAGAAAAAAAAGTAGTTTTGTATCAACCTAAATGAGTC
-AAAGTTCAAAAATATTCATTTCAATCTTTTCATAATTGTTGACCTAAGAATATAATACATTTGCTAGTGA
-CATGTACATACTGTATATAGCCGGATTAACGGTTATAAAGTTTTGTACCATTTATTTTATGACATTTTAC
-AATGTAAGTTTTGAAACTAACTGTTGGTAGGAGAAAGTTTCTTATGGAACGAATACCCTGCTCAACATTT
-AATCAATCTTTGTGCCTCAACATACTGTTGATGTCTAAGTATGCCTCAGTGGGTTGAGAAAATCCCCATT
-GAAGATGTCCTGTCCACCTAAAAGAGAATGATGCTGTGCATATCACTTGATATGTGCACCAATACCTACT
-GAATCAGAAATGTAAGGCATTGGTGATGTTTGCATTTACCCTCCTGTAAGCAACACTTTAACGTCTTACA
-TTTTCTCTGATGATGTCACACAAAATTATCATGACAAATATTACCAGAGCAAAGTGTAACGGCCAACACT
-TTGTTCGCTCATTTTACGCTGTCTCTGACATAAGGAGTGCCTGAATAGCTTGGAAAAGTAACATCTCCTG
-GCCATCCCTTCATTTAACCAAGCTATTCAAGTATTCCTATGCCAGAGCAGTGCCAACTCTTGGAGGTCCC
-AGAGTGCAGCCAATGCCTTTGTGTGGTAGTTCTAAATTTTAATTGCACCTGAAAAACCTGGGCACCTAAG
-CAATGAGCCACAGCAAAAAGTAAAGAACAACAACAAAATAAAGCTGTTGTTAAATTTTAAACAATATTAC
-TAATTGCCCAAAATGTCAATTTGATGTAGTTCTTTTCATGCAAGTATAAATTCAATTGTTAGTTATAATT
-GTTGGACCTCCTTGAGATAGTAACAACAAAATAAAGCAAGCTATCTGCACCTCAAAA
-X
-SHAR_EOF
-chmod 0644 titin_hum.seq ||
-echo 'restore of titin_hum.seq failed'
-Wc_c="`wc -c < 'titin_hum.seq'`"
-test 83286 -eq "$Wc_c" ||
- echo 'titin_hum.seq: original size 83286, current size' "$Wc_c"
-fi
-# ============= uascii.h ==============
-if test -f 'uascii.h' -a X"$1" != X"-c"; then
- echo 'x - skipping uascii.h (File already exists)'
-else
-echo 'x - extracting uascii.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'uascii.h' &&
-/* Concurrent read version */
-/* ascii.gbl ascii translation to amino acids */
-/* modified 10-Mar-1987 for B, Z */
-X
-/* $Name: fa_34_26_5 $ - $Id: uascii.h,v 1.6 2004/12/30 16:45:01 wrp Exp $ */
-X
-#define NA 123
-#define NANN 50
-#define ESS 49 /* code for ',' in FASTS,FASTF, FASTM */
-#define EL 125
-#define ES 126
-#define AAMASK 127
-X
-#ifndef XTERNAL
-/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
-/* 32 ! " # $ % & ' ( ) * + , - . / */
-/* 48 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
-/* 64 @ A B C D E F G H I J K L M N O */
-/* 80 P Q R S T U V W X Y Z [ \ ] ^ _ */
-/* 96 ` a b c d e f g h i j k l m n o */
-/*112 p q r s t u v w x y z { | } ~ ^? */
-X
-int aascii[128]={
-X EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA, /* 15 */
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, /* 31 */
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,24,NA,NA,NA,NA,NA, /* 47 */
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, /* 63 */
-X NA, 1,21, 5, 4, 7,14, 8, 9,10,NA,12,11,13, 3,NA, /* 79 */
-X 15, 6, 2,16,17,23,20,18,23,19,22,NA,NA,NA,NA,NA, /* 95 */
-X NA, 1,21, 5, 4, 7,14, 8, 9,10,NA,12,11,13, 3,NA, /*111 */
-X 15, 6, 2,16,17,23,20,18,23,19,22,NA,NA,NA,NA,NA}; /*127 */
-X
-int nascii[128]={
-/* 0 1 2 3 5 6 7 8 9 10 11 12 13 14 15 15
-X @ A B C D E F G H I J K L M N O
-X P Q R S T U V W X Y Z */
-X EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,16,NA,NA,
-X NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,ES,NA,NA,ES,NA,
-X NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
-X 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA,
-X NA, 1,15, 2,12,NA,NA, 3,13,NA,NA,11,NA, 8,16,NA,
-X 6, 7, 6,10, 4, 5,14, 9,17, 7,NA,NA,NA,NA,NA,NA};
-X
-int *pascii;
-int qascii[128];
-int lascii[128];
-#else
-#define AAMASK 127
-extern int aascii[128];
-extern int nascii[128];
-X
-extern int *pascii;
-extern int qascii[128];
-extern int lascii[128];
-#endif
-SHAR_EOF
-chmod 0644 uascii.h ||
-echo 'restore of uascii.h failed'
-Wc_c="`wc -c < 'uascii.h'`"
-test 2006 -eq "$Wc_c" ||
- echo 'uascii.h: original size 2006, current size' "$Wc_c"
-fi
-# ============= upam.h ==============
-if test -f 'upam.h' -a X"$1" != X"-c"; then
- echo 'x - skipping upam.h (File already exists)'
-else
-echo 'x - extracting upam.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'upam.h' &&
-/* Concurrent read version */
-/* 20-June-1986 universal pam file */
-X
-/* $Name: fa_34_26_5 $ - $Id: upam.h,v 1.19 2006/02/07 17:58:19 wrp Exp $ */
-X
-/* modified to accomodate both lower and upper case amino acid numbers
-X as a result MAXSQ = 50
-*/
-X
-#ifndef UPAM_GBL_DEF
-#define UPAM_GBL_DEF
-X
-#define EOSEQ 0
-#define MAXSQ 50
-#define MAXUC 24
-#define MAXLC 48
-X
-#define MAXHASH 32
-#define NMAP MAXHASH+1
-X
-#ifndef XTERNAL
-X
-int pamoff=0;
-X
-/*extern int gdelval, ggapval;*/
-X
-/* char sqnam[]="aa"; */
-/* char sqtype[]="protein"; */
-X
-char aa[MAXSQ+1] = {"\0ARNDCQEGHILKMFPSTWYVBZX*ARNDCQEGHILKMFPSTWYVBZX*\0"};
-char aax[MAXSQ+1] = {"\0ARNDCQEGHILKMFPSTWYVBZX*arndcqeghilkmfpstwyvbzx*\0"};
-X
-int naa = 24; /* this should be calculated from aa[] */
-int naax = 48;
-X
-/* haa[] used to map all valid amino acid codes into a hash value;
-X now, there is an additional hash value - not-mapped - NM */
-X
-/* this has been expanded to accomodate '*' */
-int haa[MAXSQ+1] = {
-X NMAP,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP,
-X 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,NMAP};
-X
-int haax[MAXSQ+1] = {
-X NMAP,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,3,7,NMAP,
-X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
-X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
-X NMAP};
-X
-/*
-X PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
-X Expected score = -0.844, Entropy = 0.354 bits
-X Lowest score = -8, Highest score = 17
-*/
-int apam250[450] = {
-X 2,
--2, 6,
-X 0, 0, 2,
-X 0,-1, 2, 4,
--2,-4,-4,-5,12,
-X 0, 1, 1, 2,-5, 4,
-X 0,-1, 1, 3,-5, 2, 4,
-X 1,-3, 0, 1,-3,-1, 0, 5,
--1, 2, 2, 1,-3, 3, 1,-2, 6,
--1,-2,-2,-2,-2,-2,-2,-3,-2, 5,
--2,-3,-3,-4,-6,-2,-3,-4,-2, 2, 6,
--1, 3, 1, 0,-5, 1, 0,-2, 0,-2,-3, 5,
--1, 0,-2,-3,-5,-1,-2,-3,-2, 2, 4, 0, 6,
--4,-4,-4,-6,-4,-5,-5,-5,-2, 1, 2,-5, 0, 9,
-X 1, 0,-1,-1,-3, 0,-1,-1, 0,-2,-3,-1,-2,-5, 6,
-X 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-3, 0,-2,-3, 1, 2,
-X 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-2, 0,-1,-3, 0, 1, 3,
--6, 2,-4,-7,-8,-5,-7,-7,-3,-5,-2,-3,-4, 0,-6,-2,-5,17,
--3,-4,-2,-4, 0,-4,-4,-5, 0,-1,-1,-4,-2, 7,-5,-3,-3, 0,10,
-X 0,-2,-2,-2,-2,-2,-2,-1,-2, 4, 2,-2, 2,-1,-1,-1, 0,-6,-2, 4,
-X 0,-1, 2, 3,-4, 1, 2, 0, 1,-2,-3, 1,-2,-5,-1, 0, 0,-5,-3,-2, 2,
-X 0, 0, 1, 3,-5, 3, 3,-1, 2,-2,-3, 0,-2,-5, 0, 0,-1,-6,-4,-2, 2, 3,
-X 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1,
-X 0,-1, 0,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-2,-1, 0, 0,-4,-2,-1,-1,-1,-1, 8};
-X
-/*
-X This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
-X PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
-X Expected score = -1.64, Entropy = 0.979 bits
-X Lowest score = -8, Highest score = 12
-*/
-int apam120[450] = {
-X 3,
-X -3, 6,
-X 0,-1, 4,
-X 0,-3, 2, 5,
-X -3,-4,-5,-7, 9,
-X -1, 1, 0, 1,-7, 6,
-X 0,-3, 1, 3,-7, 2, 5,
-X 1,-4, 0, 0,-5,-3,-1, 5,
-X -3, 1, 2, 0,-4, 3,-1,-4, 7,
-X -1,-2,-2,-3,-3,-3,-3,-4,-4, 6,
-X -3,-4,-4,-5,-7,-2,-4,-5,-3, 1, 5,
-X -2, 2, 1,-1,-7, 0,-1,-3,-2,-2,-4, 5,
-X -2,-1,-3,-4,-6,-1,-4,-4,-4, 1, 3, 0, 8,
-X -4,-4,-4,-7,-6,-6,-6,-5,-2, 0, 0,-6,-1, 8,
-X 1,-1,-2,-2,-3, 0,-1,-2,-1,-3,-3,-2,-3,-5, 6,
-X 1,-1, 1, 0,-1,-2,-1, 1,-2,-2,-4,-1,-2,-3, 1, 3,
-X 1,-2, 0,-1,-3,-2,-2,-1,-3, 0,-3,-1,-1,-4,-1, 2, 4,
-X -7, 1,-5,-8,-8,-6,-8,-8,-5,-7,-5,-5,-7,-1,-7,-2,-6, 12,
-X -4,-6,-2,-5,-1,-5,-4,-6,-1,-2,-3,-6,-4, 4,-6,-3,-3,-1, 8,
-X 0,-3,-3,-3,-2,-3,-3,-2,-3, 3, 1,-4, 1,-3,-2,-2, 0,-8,-3, 5,
-X 0,-2, 3, 4,-6, 0, 3, 0, 1,-3,-4, 0,-4,-5,-2, 0, 0,-6,-3,-3, 4,
-X -1,-1, 0, 3,-7, 4, 4,-2, 1,-3,-3,-1,-2,-6,-1,-1,-2,-7,-5,-3, 2, 4,
-X -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2,
-X -1,-2,-1,-2,-4,-1,-1,-2,-2,-1,-2,-2,-2,-3,-2,-1,-1,-5,-3,-1,-1,-1,-2, 6};
-X
-/*
-# VTML160
-#
-# This matrix was produced with scripts written by
-# Tobias Mueller and Sven Rahmann [June-2001].
-#
-# VTML160 substitution matrix, Units = Third-Bits
-# Expected Score = -1.297840 Third-Bits
-# Lowest Score = -7, Highest Score = 16
-#
-# Entropy H = 0.562489 Bits
-#
-# 30-Jun-2001
-*/
-int avt160[450] = {
-X 5,
-X -2, 7,
-X -1, 0, 7,
-X -1, -3, 3, 7,
-X 1, -3, -3, -5, 13,
-X -1, 2, 0, 1, -4, 6,
-X -1, -1, 0, 3, -5, 2, 6,
-X 0, -3, 0, -1, -2, -3, -2, 8,
-X -2, 1, 1, 0, -2, 2, -1, -3, 9,
-X -1, -4, -4, -6, -1, -4, -5, -7, -4, 6,
-X -2, -3, -4, -6, -4, -2, -4, -6, -3, 3, 6,
-X -1, 4, 0, 0, -4, 2, 1, -2, 0, -4, -3, 5,
-X -1, -2, -3, -5, -1, -1, -3, -5, -3, 2, 4, -2, 8,
-X -3, -5, -5, -7, -4, -4, -6, -6, 0, 0, 2, -5, 1, 9,
-X 0, -2, -2, -1, -3, -1, -1, -3, -2, -4, -3, -1, -4, -5, 9,
-X 1, -1, 1, 0, 1, 0, 0, 0, -1, -3, -3, -1, -3, -3, 0, 4,
-X 1, -1, 0, -1, 0, -1, -1, -2, -1, -1, -2, -1, -1, -3, -1, 2, 5,
-X -5, -4, -5, -7, -7, -6, -7, -5, -1, -2, -1, -5, -4, 3, -5, -4, -6, 16,
-X -3, -3, -2, -5, -1, -4, -3, -5, 3, -2, -1, -3, -2, 6, -6, -2, -3, 4, 10,
-X 0, -4, -4, -4, 1, -3, -3, -5, -3, 4, 2, -3, 1, -1, -3, -2, 0, -5, -3, 5,
-X -1, -2, 5, 6, -4, 0, 2, -1, 0, -5, -5, 0, -4, -6, -2, 1, 0, -6, -3, -4, 5,
-X -1, 0, 0, 3, -5, 4, 5, -2, 0, -4, -3, 2, -3, -5, -1, 0, -1, -7, -4, -3, 2, 5,
-X 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-X -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, 6};
-X
-/*
-X Matrix made by matblas from blosum50.iij
-X BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-X Blocks Database = /data/blocks_5.0/blocks.dat
-X Cluster Percentage: >= 50
-X Entropy = 0.4808, Expected = -0.3573
-*/
-int abl50[450] = {
-X 5,
-X -2, 7,
-X -1,-1, 7,
-X -2,-2, 2, 8,
-X -1,-4,-2,-4,13,
-X -1, 1, 0, 0,-3, 7,
-X -1, 0, 0, 2,-3, 2, 6,
-X 0,-3, 0,-1,-3,-2,-3, 8,
-X -2, 0, 1,-1,-3, 1, 0,-2,10,
-X -1,-4,-3,-4,-2,-3,-4,-4,-4, 5,
-X -2,-3,-4,-4,-2,-2,-3,-4,-3, 2, 5,
-X -1, 3, 0,-1,-3, 2, 1,-2, 0,-3,-3, 6,
-X -1,-2,-2,-4,-2, 0,-2,-3,-1, 2, 3,-2, 7,
-X -3,-3,-4,-5,-2,-4,-3,-4,-1, 0, 1,-4, 0, 8,
-X -1,-3,-2,-1,-4,-1,-1,-2,-2,-3,-4,-1,-3,-4,10,
-X 1,-1, 1, 0,-1, 0,-1, 0,-1,-3,-3, 0,-2,-3,-1, 5,
-X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 2, 5,
-X -3,-3,-4,-5,-5,-1,-3,-3,-3,-3,-2,-3,-1, 1,-4,-4,-3,15,
-X -2,-1,-2,-3,-3,-1,-2,-3, 2,-1,-1,-2, 0, 4,-3,-2,-2, 2, 8,
-X 0,-3,-3,-4,-1,-3,-3,-4,-4, 4, 1,-3, 1,-1,-3,-2, 0,-3,-1, 5,
-X -2,-1, 4, 5,-3, 0, 1,-1, 0,-4,-4, 0,-3,-4,-2, 0, 0,-5,-3,-4, 5,
-X -1, 0, 0, 1,-3, 4, 5,-2, 0,-3,-3, 1,-1,-4,-1, 0,-1,-2,-2,-3, 2, 5,
-X -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1,
-X -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1, 7};
-X
-/*
-X A R N D C Q E G H I L K M F P S T W Y V B Z X * */
-int a_md10[450]= {
-X 11, /* A */
--12, 12, /* R */
--12,-13, 13, /* N */
--11,-18, -3, 12, /* D */
--13,-10,-14,-20, 17, /* C */
--13, -5,-11,-13,-19, 13, /* Q */
--10,-15,-12, -2,-22, -5, 12, /* E */
-X -8, -9,-11, -9,-12,-16, -9, 11, /* G */
--16, -5, -5,-10,-12, -3,-15,-16, 16, /* H */
--13,-17,-14,-19,-17,-20,-19,-21,-18, 12, /* I */
--15,-14,-19,-21,-16,-12,-20,-21,-13, -7, 10, /* L */
--14, -2, -6,-15,-21, -6, -8,-15,-13,-17,-18, 12, /* K */
--13,-14,-15,-18,-15,-14,-18,-19,-15, -4, -4,-12, 16, /* M */
--18,-22,-19,-22,-11,-22,-23,-22,-14,-11, -6,-23,-14, 14, /* F */
-X -7,-12,-17,-18,-18, -8,-17,-16,-10,-19,-10,-16,-17,-17, 13, /* P */
-X -5,-10, -4,-12, -7,-13,-15, -7,-11,-14,-13,-13,-15,-11, -6, 11, /* S */
-X -4,-12, -7,-14,-14,-13,-15,-14,-13, -7,-16,-10, -7,-19, -9, -4, 12, /* T */
--21, -9,-21,-21,-10,-17,-21,-13,-21,-21,-13,-21,-17,-13,-21,-15,-18, 18, /* W */
--20,-17,-12,-13, -7,-16,-21,-20, -3,-15,-16,-20,-17, -3,-20,-12,-17,-12, 15, /* Y */
-X -6,-17,-17,-15,-12,-17,-14,-13,-19, -1, -8,-18, -5,-12,-16,-14,-10,-16,-18, 11, /* V */
--12,-15, 5, 5,-17,-12, -7,-10, -7,-16,-20,-11,-17,-21,-17, -8,-10,-22,-13,-16, 13, /* B */
--16,-18,-17, -8,-32, 1, 9,-17,-17,-29,-26,-11,-24,-34,-21,-21,-21,-29,-29,-22, -9, 13, /* Z */
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};
-X
-int a_md20[450] = {
-X 10,
--10, 12,
-X -9,-10, 13,
-X -8,-14, -1, 12,
--10, -7,-11,-16, 17,
--10, -3, -8, -9,-16, 13,
-X -7,-11, -9, 1,-19, -3, 11,
-X -5, -6, -8, -6, -9,-12, -7, 11,
--12, -3, -2, -7, -9, 0,-12,-13, 15,
--10,-14,-11,-16,-14,-16,-16,-17,-14, 12,
--12,-11,-15,-18,-13, -9,-17,-18,-10, -4, 10,
--11, 0, -4,-12,-17, -3, -5,-12, -9,-14,-15, 12,
-X -9,-11,-12,-15,-12,-11,-15,-16,-12, -1, -2, -9, 15,
--15,-19,-16,-19, -8,-18,-20,-19,-11, -8, -4,-19,-10, 13,
-X -5, -9,-13,-15,-14, -5,-14,-12, -7,-15, -7,-13,-14,-14, 12,
-X -2, -8, -1, -9, -4,-10,-12, -5, -8,-11,-10,-10,-12, -8, -3, 10,
-X -1, -9, -4,-11,-10,-10,-12,-11,-10, -4,-12, -7, -4,-15, -7, -1, 11,
--17, -6,-18,-18, -7,-14,-18,-10,-17,-17,-10,-17,-14,-10,-18,-12,-15, 18,
--16,-14, -9,-11, -4,-12,-18,-17, 0,-12,-12,-17,-14, 0,-16, -9,-13, -9, 14,
-X -3,-14,-14,-12, -9,-14,-11,-11,-15, 2, -5,-15, -2, -9,-13,-11, -7,-13,-14, 11,
-X -9,-12, 6, 6,-14, -9, -4, -7, -4,-13,-17, -8,-13,-18,-14, -5, -7,-19,-10,-13, 12,
--12,-13,-13, -4,-27, 4, 10,-13,-12,-24,-21, -6,-20,-29,-17,-17,-17,-24,-24,-18, -6, 12,
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9 };
-X
-int a_md40[450] = {
-X 9,
-X -7, 11,
-X -6, -6, 12,
-X -6,-10, 1, 11,
-X -7, -5, -8,-13, 16,
-X -7, 0, -5, -6,-12, 12,
-X -5, -8, -5, 3,-15, 0, 11,
-X -3, -4, -5, -4, -7, -9, -4, 10,
-X -9, 0, 0, -4, -6, 2, -8,-10, 14,
-X -6,-10, -8,-12,-11,-12,-12,-13,-11, 11,
-X -9, -9,-12,-14,-10, -6,-13,-14, -7, -1, 9,
-X -8, 3, -1, -8,-12, -1, -3, -9, -6,-11,-12, 11,
-X -6, -8, -9,-12, -9, -8,-11,-12, -9, 1, 1, -7, 14,
--11,-15,-12,-15, -5,-14,-16,-15, -7, -5, -1,-16, -7, 13,
-X -2, -6, -9,-11,-11, -3,-11, -9, -4,-11, -5,-10,-10,-11, 12,
-X 0, -5, 1, -6, -2, -7, -8, -2, -6, -8, -7, -7, -8, -6, -1, 9,
-X 1, -6, -2, -8, -7, -7, -8, -7, -7, -2, -9, -5, -2,-11, -4, 1, 10,
--14, -4,-14,-15, -4,-11,-15, -7,-13,-13, -8,-13,-11, -7,-14, -9,-12, 18,
--13,-10, -6, -8, -2, -9,-14,-13, 2, -9, -9,-13,-11, 2,-13, -7,-10, -6, 14,
-X -1,-11,-10, -9, -7,-11, -8, -8,-12, 4, -2,-12, 0, -6, -9, -7, -4,-10,-11, 10,
-X -6, -8, 6, 6,-10, -6, -1, -4, -2,-10,-13, -5,-10,-14,-10, -3, -5,-15, -7,-10, 11,
-X -8, -8, -8, 0,-21, 6, 10, -8, -7,-18,-16, -3,-15,-23,-12,-12,-12,-19,-18,-14, -3, 11,
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-X -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 9};
-X
-/*
-X Matrix made by matblas from blosum62.iij
-X * column uses minimum score
-X BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
-X Blocks Database = /data/blocks_5.0/blocks.dat
-X Cluster Percentage: >= 62
-X Entropy = 0.6979, Expected = -0.5209
-*/
-X
-int abl62[450] = {
-X 4,
-X -1, 5,
-X -2, 0, 6,
-X -2,-2, 1, 6,
-X 0,-3,-3,-3, 9,
-X -1, 1, 0, 0,-3, 5,
-X -1, 0, 0, 2,-4, 2, 5,
-X 0,-2, 0,-1,-3,-2,-2, 6,
-X -2, 0, 1,-1,-3, 0, 0,-2, 8,
-X -1,-3,-3,-3,-1,-3,-3,-4,-3, 4,
-X -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,
-X -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,
-X -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5,
-X -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,
-X -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,
-X 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4,
-X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,
-X -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11,
-X -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,
-X 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,
-X -2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4,
-X -1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,
-X 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1,
-X 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1, 6};
-X
-/* blosum80 in 1/2 bit units (previous versions had 1/3 bit units) */
-/*
-X Matrix made by matblas from blosum80.iij
-X * column uses minimum score
-X BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
-X Blocks Database = /data/blocks_5.0/blocks.dat
-X Cluster Percentage: >= 80
-X Entropy = 0.9868, Expected = -0.7442
-*/
-X
-int abl80[450] = {
-X 5,
-X -2, 6,
-X -2,-1, 6,
-X -2,-2, 1, 6,
-X -1,-4,-3,-4, 9,
-X -1, 1, 0,-1,-4, 6,
-X -1,-1,-1, 1,-5, 2, 6,
-X 0,-3,-1,-2,-4,-2,-3, 6,
-X -2, 0, 0,-2,-4, 1, 0,-3, 8,
-X -2,-3,-4,-4,-2,-3,-4,-5,-4, 5,
-X -2,-3,-4,-5,-2,-3,-4,-4,-3, 1, 4,
-X -1, 2, 0,-1,-4, 1, 1,-2,-1,-3,-3, 5,
-X -1,-2,-3,-4,-2, 0,-2,-4,-2, 1, 2,-2, 6,
-X -3,-4,-4,-4,-3,-4,-4,-4,-2,-1, 0,-4, 0, 6,
-X -1,-2,-3,-2,-4,-2,-2,-3,-3,-4,-3,-1,-3,-4, 8,
-X 1,-1, 0,-1,-2, 0, 0,-1,-1,-3,-3,-1,-2,-3,-1, 5,
-X 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-2,-1,-1,-2,-2, 1, 5,
-X -3,-4,-4,-6,-3,-3,-4,-4,-3,-3,-2,-4,-2, 0,-5,-4,-4,11,
-X -2,-3,-3,-4,-3,-2,-3,-4, 2,-2,-2,-3,-2, 3,-4,-2,-2, 2, 7,
-X 0,-3,-4,-4,-1,-3,-3,-4,-4, 3, 1,-3, 1,-1,-3,-2, 0,-3,-2, 4,
-X -2,-2, 4, 4,-4, 0, 1,-1,-1,-4,-4,-1,-3,-4,-2, 0,-1,-5,-3,-4, 4,
-X -1, 0, 0, 1,-4, 3, 4,-3, 0,-4,-3, 1,-2,-4,-2, 0,-1,-4,-3,-3, 0, 4,
-X -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1,
-X -1,-1,-1,-2,-3,-1,-1,-2,-2,-2,-2,-1,-1,-2,-2,-1,-1,-3,-2,-1,-2,-1,-1, 6};
-X
-/* DNA alphabet
-X
-X A, C, G, T, U 1-4, 5
-X R, Y 6, 7
-X M (A or C) 8
-X W (A or T) 9
-X S (C or G) 10
-X K (G or T) 11
-X D (not C) 12
-X H (not G) 13
-X V (not T) 14
-X B (not A) 15
-X N 16
-X X 17
-*/
-X
-char nt[MAXSQ+1] ={"\0ACGTURYMWSKDHVBNXACGTURYMWSKDHVBNX\0"};
-char ntx[MAXSQ+1]={"\0ACGTURYMWSKDHVBNXacgturymwskdhvbnx\0"};
-char ntc[MAXSQ+1]={"\0TGCAAYRKWSMHDBVNXtgcaayrkwsmhdbvnx\0"};
-X
-/* nt complement to encoding */
-X /* A:T C:G G:C T:A U:A */
-int gc_nt[MAXSQ+1]={ 0, 4, 3, 2, 1, 1,
-X /* R:Y Y:R M:K W:W */
-X 7, 6, 11, 9,
-X /* S:S K:M D:H H:D */
-X 10, 8, 13, 12,
-X /* B:V V:B N:N X:X */
-X 15, 14, 16, 16};
-X
-int nnt = 17;
-int nntx = 34;
-X
-int hnt[MAXSQ+1] = {
-X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
-X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,NMAP};
-int hntx[MAXSQ+1] = {
-X NMAP,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,NMAP,
-X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,
-X NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP,NMAP};
-X
-int npam[450] = {
-/* A C G T U R Y M W S K D H V B N X */
-X 5, /* A */
-X -4, 5, /* C */
-X -4,-4, 5, /* G */
-X -4,-4,-4, 5, /* T */
-X -4,-4,-4, 5, 5, /* U */
-X 2,-1, 2,-1,-1, 2, /* R (A G)*/
-X -1, 2,-1, 2, 2,-2, 2, /* Y (C T)*/
-X 2, 2,-1,-1,-1,-1,-1, 2, /* M (A C)*/
-X 2,-1,-1, 2, 2, 1, 1, 1, 2, /* W (A T)*/
-X -1, 2, 2,-1,-1, 1, 1, 1,-1, 2, /* S (C G)*/
-X -1,-1, 2, 2, 2, 1, 1,-1, 1, 1, 2, /* K (G T)*/
-X 1,-2, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* D (!C) */
-X 1, 1,-2, 1, 1,-1, 1, 1, 1,-1,-1,-1, 1, /* H (!G) */
-X 1, 1, 1,-2,-2, 1,-1, 1,-1, 1,-1,-1,-1, 1, /* V (!T) */
-X -2, 1, 1, 1, 1,-1, 1,-1,-1, 1, 1,-1,-1,-1, 1, /* B (!A) */
-X -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* N */
-X -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; /* X */
-/* A C G T U R Y M W S K D H V B N */
-X
-int *pam; /* Pam matrix- 1D */
-int *pam12;
-int *pam12x;
-int pamh1[MAXSQ+1]; /* used for kfact replacement */
-X
-/* Robinson & Robinson counts */
-long rrcounts[25] = {
-X 0,
-X 35155,
-X 23105,
-X 20212,
-X 24161,
-X 8669,
-X 19208,
-X 28354,
-X 33229,
-X 9906,
-X 23161,
-X 40625,
-X 25872,
-X 10101,
-X 17367,
-X 23435,
-X 32070,
-X 26311,
-X 5990,
-X 14488,
-X 29012,
-X 0, 0, 0, 0 };
-X
-long rrtotal = 450431;
-#else
-X
-/* extern char sqnam[]; */
-/* extern char sqtype[]; */
-/* extern int gdelval, ggapval; */
-extern int pamoff;
-extern char aa[MAXSQ+1];
-extern char aax[MAXSQ+1];
-extern char nt[MAXSQ+1];
-extern char ntx[MAXSQ+1];
-extern char ntc[MAXSQ+1];
-extern int gc_nt[MAXSQ+1];
-X
-extern int naa;
-extern int naax;
-extern int nnt;
-extern int nntx;
-X
-extern int haa[MAXSQ+1];
-extern int haax[MAXSQ+1];
-extern int hnt[MAXSQ+1];
-extern int hntx[MAXSQ+1];
-/* extern int had[MAXSQ+1]; */
-X
-extern int apam250[450];
-extern int apam120[450];
-extern int a_md10[450];
-extern int a_md20[450];
-extern int a_md40[450];
-extern int abl50[450];
-extern int abl62[450];
-extern int abl80[450];
-extern int npam[450];
-extern int *pam;
-extern int *pam12;
-extern int *pam12x;
-extern int pamh1[MAXSQ+1];
-extern long rrcounts[25];
-extern long rrtotal;
-#endif
-#endif
-SHAR_EOF
-chmod 0644 upam.h ||
-echo 'restore of upam.h failed'
-Wc_c="`wc -c < 'upam.h'`"
-test 16008 -eq "$Wc_c" ||
- echo 'upam.h: original size 16008, current size' "$Wc_c"
-fi
-# ============= url_subs.c ==============
-if test -f 'url_subs.c' -a X"$1" != X"-c"; then
- echo 'x - skipping url_subs.c (File already exists)'
-else
-echo 'x - extracting url_subs.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'url_subs.c' &&
-X
-/* copyright (c) 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: url_subs.c,v 1.9 2006/08/20 18:18:33 wrp Exp $ */
-X
-/* 30 Dec 2004 - modify REF_URL to accomodate current Entrez */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "defs.h"
-#include "structs.h"
-#include "param.h"
-X
-#ifndef DEF_PROT_LIB
-#define DEF_PROT_LIB "q"
-#endif
-X
-#ifndef FASTA_HOST
-#define FASTA_HOST "your.fasta.host.here/fasta/cgi"
-#endif
-X
-void do_url1(FILE *fp, struct mngmsg m_msg, struct pstruct pst,
-X char *l_name, int n1, struct a_struct aln, long loffset)
-{
-X char my_l_name[200];
-X char *db;
-X char pgm[10], lib[MAX_FN];
-X char *ref_url, *lbp=NULL;
-X char *srch_url, *srch_url1;
-X
-X if (m_msg.ldnaseq==SEQT_DNA) db="nucleotide";
-X else db="Protein";
-X
-X if (strncmp(m_msg.f_id0,"rss",3)==0) {
-X strncpy(pgm,"fa",sizeof(pgm));
-X }
-X else if (strncmp(m_msg.f_id0,"rfx",3)==0) {
-X strncpy(pgm,"fx",sizeof(pgm));
-X }
-X else { strncpy(pgm,m_msg.f_id0,sizeof(pgm)); }
-X
-X if (m_msg.lname[0]!='%') {
-X strncpy(lib,m_msg.lname,sizeof(lib));
-X }
-X else {
-X strncpy(lib,"%25",sizeof(lib));
-X strncat(lib,&m_msg.lname[1],sizeof(lib));
-X }
-X lib[sizeof(lib)-1]='\0';
-X
-X strncpy(my_l_name,l_name,sizeof(my_l_name));
-X my_l_name[sizeof(my_l_name)-1]='\0';
-X
-X if (pgm[0]=='t' || strcmp(pgm,"fx") || strcmp(pgm,"fy")==0 ) {
-X if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
-X lbp = &my_l_name[strlen(my_l_name)-2];
-X if ( *lbp == '_' ) *lbp = '\0';
-X }
-X
-X /* change the program name for fastx, tfastx, tfasta */
-X /* fastx returns proteins */
-X if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) strncpy(pgm,"fa",sizeof(pgm));
-X else if (strcmp(pgm,"ff")==0) strncpy(pgm,"fa",sizeof(pgm));
-X else if (pgm[0]=='t') {
-X strncpy(pgm,"fx",sizeof(pgm));
-X strncpy(lib,DEF_PROT_LIB,sizeof(lib));
-X }
-X
-X fflush(fp);
-X if ((ref_url = getenv("REF_URL"))==NULL)
-X fprintf(fp,"<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A> ",
-X db,my_l_name);
-X else
-X fprintf(fp,ref_url,db,my_l_name);
-X
-X if ((srch_url = getenv("SRCH_URL"))==NULL)
-X fprintf(fp,"<A HREF=\"http://%s/searchfa.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d\">Re-search database</A> ",
-X FASTA_HOST,my_l_name,db,lib,pgm,
-X loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X else
-X fprintf(fp,srch_url,my_l_name,db,lib,pgm,
-X loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X
-X if ((srch_url1 = getenv("SRCH_URL1"))==NULL)
-X fprintf(fp,"<A HREF=\"http://%s/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d\">General re-search</A>\n<p>\n",
-X FASTA_HOST,my_l_name,db,lib,pgm,
-X loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X else
-X fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
-X loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X
-X /* put back "_r" */
-X if (lbp!=NULL) *lbp = '_';
-X
-X /*
-X if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
-X fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X else
-X fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
-X loffset+aln.amin1+1,loffset+aln.amax1,n1);
-X */
-X fflush(fp);
-X
-}
-SHAR_EOF
-chmod 0644 url_subs.c ||
-echo 'restore of url_subs.c failed'
-Wc_c="`wc -c < 'url_subs.c'`"
-test 3335 -eq "$Wc_c" ||
- echo 'url_subs.c: original size 3335, current size' "$Wc_c"
-fi
-# ============= uthr_subs.h ==============
-if test -f 'uthr_subs.h' -a X"$1" != X"-c"; then
- echo 'x - skipping uthr_subs.h (File already exists)'
-else
-echo 'x - extracting uthr_subs.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'uthr_subs.h' &&
-X
-/***************************************/
-/* thread global variable declarations */
-/***************************************/
-X
-X
-/* $Name: fa_34_26_5 $ - $Id: uthr_subs.h,v 1.1.1.1 1999/10/22 20:56:02 wrp Exp $ */
-X
-X
-#ifndef MAX_WORKERS
-#define MAX_WORKERS 2
-#endif
-#define NUM_WORK_BUF 2*MAX_WORKERS
-X
-#include <synch.h>
-#include <thread.h>
-X
-#define check(status,string) \
-X if (status == -1) perror(string) /* error macro for thread calls */
-X
-#ifndef XTERNAL
-X
-thread_t threads[MAX_WORKERS];
-X
-/* mutex stuff */
-X
-mutex_t reader_mutex; /* empty buffer pointer structure lock */
-mutex_t worker_mutex; /* full buffer pointer structure lock */
-X
-/* condition variable stuff */
-X
-cond_t reader_cond_var; /* condition variable for reader */
-cond_t worker_cond_var; /* condition variable for workers */
-X
-mutex_t start_mutex; /* start-up synchronisation lock */
-cond_t start_cond_var; /* start-up synchronisation condition variable */
-X
-#else
-X
-extern thread_t threads[];
-X
-/* mutex stuff */
-X
-extern mutex_t reader_mutex;
-extern mutex_t worker_mutex;
-X
-/* condition variable stuff */
-X
-extern cond_t reader_cond_var;
-extern cond_t worker_cond_var;
-X
-extern mutex_t start_mutex;
-extern cond_t start_cond_var;
-X
-#endif
-SHAR_EOF
-chmod 0644 uthr_subs.h ||
-echo 'restore of uthr_subs.h failed'
-Wc_c="`wc -c < 'uthr_subs.h'`"
-test 1229 -eq "$Wc_c" ||
- echo 'uthr_subs.h: original size 1229, current size' "$Wc_c"
-fi
-# ============= vtml160.mat ==============
-if test -f 'vtml160.mat' -a X"$1" != X"-c"; then
- echo 'x - skipping vtml160.mat (File already exists)'
-else
-echo 'x - extracting vtml160.mat (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'vtml160.mat' &&
-#
-# VTML160
-#
-# This matrix was produced with scripts written by
-# Tobias Mueller and Sven Rahmann [June-2001].
-#
-# VTML160 substitution matrix, Units = Third-Bits
-# Expected Score = -1.297840 Third-Bits
-# Lowest Score = -7, Highest Score = 16
-#
-# Entropy H = 0.562489 Bits
-#
-# 30-Jun-2001
-X A R N D C Q E G H I L K M F P S T W Y V B Z X *
-A 5 -2 -1 -1 1 -1 -1 0 -2 -1 -2 -1 -1 -3 0 1 1 -5 -3 0 -1 -1 0 -7
-R -2 7 0 -3 -3 2 -1 -3 1 -4 -3 4 -2 -5 -2 -1 -1 -4 -3 -4 -2 0 0 -7
-N -1 0 7 3 -3 0 0 0 1 -4 -4 0 -3 -5 -2 1 0 -5 -2 -4 5 0 0 -7
-D -1 -3 3 7 -5 1 3 -1 0 -6 -6 0 -5 -7 -1 0 -1 -7 -5 -4 6 3 0 -7
-C 1 -3 -3 -5 13 -4 -5 -2 -2 -1 -4 -4 -1 -4 -3 1 0 -7 -1 1 -4 -5 0 -7
-Q -1 2 0 1 -4 6 2 -3 2 -4 -2 2 -1 -4 -1 0 -1 -6 -4 -3 0 4 0 -7
-E -1 -1 0 3 -5 2 6 -2 -1 -5 -4 1 -3 -6 -1 0 -1 -7 -3 -3 2 5 0 -7
-G 0 -3 0 -1 -2 -3 -2 8 -3 -7 -6 -2 -5 -6 -3 0 -2 -5 -5 -5 -1 -2 0 -7
-H -2 1 1 0 -2 2 -1 -3 9 -4 -3 0 -3 0 -2 -1 -1 -1 3 -3 0 0 0 -7
-I -1 -4 -4 -6 -1 -4 -5 -7 -4 6 3 -4 2 0 -4 -3 -1 -2 -2 4 -5 -4 0 -7
-L -2 -3 -4 -6 -4 -2 -4 -6 -3 3 6 -3 4 2 -3 -3 -2 -1 -1 2 -5 -3 0 -7
-K -1 4 0 0 -4 2 1 -2 0 -4 -3 5 -2 -5 -1 -1 -1 -5 -3 -3 0 2 0 -7
-M -1 -2 -3 -5 -1 -1 -3 -5 -3 2 4 -2 8 1 -4 -3 -1 -4 -2 1 -4 -3 0 -7
-F -3 -5 -5 -7 -4 -4 -6 -6 0 0 2 -5 1 9 -5 -3 -3 3 6 -1 -6 -5 0 -7
-P 0 -2 -2 -1 -3 -1 -1 -3 -2 -4 -3 -1 -4 -5 9 0 -1 -5 -6 -3 -2 -1 0 -7
-S 1 -1 1 0 1 0 0 0 -1 -3 -3 -1 -3 -3 0 4 2 -4 -2 -2 1 0 0 -7
-T 1 -1 0 -1 0 -1 -1 -2 -1 -1 -2 -1 -1 -3 -1 2 5 -6 -3 0 0 -1 0 -7
-W -5 -4 -5 -7 -7 -6 -7 -5 -1 -2 -1 -5 -4 3 -5 -4 -6 16 4 -5 -6 -7 0 -7
-Y -3 -3 -2 -5 -1 -4 -3 -5 3 -2 -1 -3 -2 6 -6 -2 -3 4 10 -3 -3 -4 0 -7
-V 0 -4 -4 -4 1 -3 -3 -5 -3 4 2 -3 1 -1 -3 -2 0 -5 -3 5 -4 -3 0 -7
-B -1 -2 5 6 -4 0 2 -1 0 -5 -5 0 -4 -6 -2 1 0 -6 -3 -4 5 2 0 -7
-Z -1 0 0 3 -5 4 5 -2 0 -4 -3 2 -3 -5 -1 0 -1 -7 -4 -3 2 5 0 -7
-XX 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -7
-* -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 1
-SHAR_EOF
-chmod 0644 vtml160.mat ||
-echo 'restore of vtml160.mat failed'
-Wc_c="`wc -c < 'vtml160.mat'`"
-test 2771 -eq "$Wc_c" ||
- echo 'vtml160.mat: original size 2771, current size' "$Wc_c"
-fi
-# ============= w_mw.h ==============
-if test -f 'w_mw.h' -a X"$1" != X"-c"; then
- echo 'x - skipping w_mw.h (File already exists)'
-else
-echo 'x - extracting w_mw.h (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'w_mw.h' &&
-X
-/* $Name: fa_34_26_5 $ - $Id: w_mw.h,v 1.17 2006/04/12 18:00:02 wrp Exp $ */
-X
-/* 21-July-2000 - changes for p2_complib/p2_workcomp:
-X there are now two sequence numbers; the old (worker) seqnm,
-X and a new manager (master) sequence number, m_seqnm
-*/
-X
-#ifndef BFR
-#define BFR 300
-#endif
-#ifndef BFR2
-#define BFR2 100
-#endif
-X
-#define MAXSQL 125000
-#define MMAXSQL 2000000
-#ifndef MAXWRKR
-#define MAXWRKR 64
-#endif
-#define MAXLSEQ 50000
-#define DESLIN 60
-#define NDES 100
-X
-struct qmng_str
-{
-X int n0; /* query sequence length */
-X int nm0; /* number of segments */
-X int escore_flg; /* use escores */
-X int qshuffle; /* query shuffle */
-X int pam_pssm; /* flag for pssm/profile search */
-X int s_func; /* for p_workcomp: func==0>simple comparison, ==1>alignments */
-X int slist; /* number of alignments to do */
-X int seqnm; /* query sequence number - used for identity searches */
-X char libstr[MAX_FN];
-};
-X
-struct comstr
-{
-X int m_seqnm; /* sequence number */
-X int seqnm; /* sequence number */
-X int score[3]; /* score */
-X double escore;
-X float comp;
-X float H;
-X int segnum;
-X int seglen;
-X int frame;
-X int r_score, qr_score;
-X double r_escore, qr_escore;
-};
-X
-struct comstr2
-{
-X int m_seqnm; /* sequence number */
-X int seqnm; /* sequence number */
-X int score[3]; /* score */
-X double escore;
-X int segnum;
-X int seglen;
-X int sw_score;
-X
-X /* int a_len; */ /* consensus alignment length */
-X /* int min0, max0, min1, max1;
-X int nident, ngap_q, ngap_l; */ /* number of identities, gaps in q, l */
-X
-X struct a_struct aln_d;
-X float percent, gpercent;
-X int aln_code_n;
-};
-X
-/* The message structure */
-X
-struct wrkmsg
-{
-X char lname [80]; /* name of the library */
-X char libenv[80]; /* directory in which library resides */
-X int lb_off; /* offset in the library */
-X int lb_stop; /* stop position in library */
-X int lb_code; /* continue code */
-X int lb_size; /* library size */
-X int p_size; /* parcel size */
-X int libfn; /* current library being searched */
-X int stage; /* current stage number */
-};
-X
-struct sql
-{
-X int n1;
-X int *n1tot_p;
-X int sfnum[10]; /* superfamily number */
-X int nsfnum;
-#ifndef USE_FSEEKO
-X long lseek; /* location of sequence in file */
-#else
-X off_t lseek;
-#endif
-X long loffset; /* offset from the beginning of the sequence */
-X int wrkr; /* worker that has sequence */
-X int cont;
-X char *bline; /* descriptive line */
-};
-X
-struct sqs
-{
-X int n1; /* size of library sequence */
-X unsigned char *aa1; /* sequence data */
-};
-X
-#include "aln_structs.h"
-X
-struct sqs2
-{
-X int n1; /* size of library sequence */
-X int m_seqnm; /* location in master list */
-X unsigned char *aa1;
-X int walign_dflg[2];
-X int sw_score[2];
-X struct a_res_str a_res[2]; /* need a_res for each frame */
-};
-X
-struct stage2_str {
-X int m_seqnm; /* manager sequence number */
-X int seqnm; /* worker sequence number */
-X int frame; /* query frame */
-};
-SHAR_EOF
-chmod 0644 w_mw.h ||
-echo 'restore of w_mw.h failed'
-Wc_c="`wc -c < 'w_mw.h'`"
-test 2899 -eq "$Wc_c" ||
- echo 'w_mw.h: original size 2899, current size' "$Wc_c"
-fi
-# ============= work_thr.c ==============
-if test -f 'work_thr.c' -a X"$1" != X"-c"; then
- echo 'x - skipping work_thr.c (File already exists)'
-else
-echo 'x - extracting work_thr.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'work_thr.c' &&
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: work_thr.c,v 1.23 2007/04/26 18:33:20 wrp Exp $ */
-X
-/* work_thr.c - threaded worker */
-X
-/* modified 21-Oct-1998 to work with reverse complement for DNA */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/types.h>
-#include <signal.h>
-X
-#include "defs.h" /* various constants */
-#include "mw.h" /* defines beststr */
-#include "structs.h"
-#include "param.h" /* pstruct, thr_str, buf_head, rstruct */
-X
-/***************************************/
-/* thread global variable declarations */
-/***************************************/
-X
-#define XTERNAL
-#include "thr.h"
-#undef XTERNAL
-X
-void alloc_pam (int, int, struct pstruct *);
-int **alloc_pam2p(int, int);
-void revcomp(unsigned char *seq, int n, int *c_nt);
-#ifdef WIN32
-void pthread_exit(void *);
-#else
-void THR_EXIT(void *);
-#endif
-X
-/* functions getting/sending buffers to threads (thr_sub.c) */
-extern void wait_thr(void);
-extern int get_wbuf(struct buf_head **cur_buf, int max_work_buf);
-extern void put_wbuf(struct buf_head *cur_buf, int max_work_buf);
-X
-/* dropxx.c functions */
-extern void init_work (unsigned char *aa0, int n0,
-X struct pstruct *ppst, void **f_arg);
-X
-extern void do_work (unsigned char *aa0, int n0, unsigned char *aa1, int n1,
-X int frame,
-X struct pstruct *ppst, void *f_str, int qr_flg,
-X struct rstruct *rst);
-X
-extern void close_work (unsigned char *, int, struct pstruct *, void **);
-X
-extern void irand(int);
-extern int shuffle(unsigned char *, unsigned char *, int);
-extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
-extern void qshuffle(unsigned char *aa0, int n0, int nm0);
-extern void free_pam2p(int **);
-X
-void
-work_thread (struct thr_str *work_info)
-{
-X struct buf_head *cur_buf;
-X struct buf_str *cur_buf_p;
-X struct buf_str *p_rbuf;
-X unsigned char *aa1s;
-X int cur_cnt;
-X int my_worker;
-X int i, j, npam, n0, nm0;
-X int ix_score, debug_lib, zsflag, zs_win, do_shuffle, ieven=0;
-X int frame;
-X
-X struct rstruct rrst;
-X struct pstruct my_pst, *my_ppst;
-X unsigned char *aa0[6], *aa0s;
-X void *f_str[6], *qf_str;
-X
-X my_worker = work_info->worker;
-X
-X wait_thr(); /* wait for start_thread predicate to drop to 0 */
-X
-X /* do init_work */
-X
-X /* let each thread have its own copy of the query */
-X n0 = work_info->n0;
-X nm0 = work_info->nm0;
-X
-X if ((aa0[0]=(unsigned char *)calloc((size_t)n0+2,sizeof(unsigned char)))
-X ==NULL) {
-X fprintf(stderr," cannot allocate aa00[%d] for worker %d\n",
-X n0, my_worker);
-X exit(1);
-X }
-X *aa0[0]='\0';
-X aa0[0]++;
-X memcpy(aa0[0],work_info->aa0,n0+1);
-X
-X /* make certain that all but 0 have their own copy of pst */
-X if (my_worker) {
-X my_ppst = &my_pst;
-X memcpy(my_ppst,work_info->ppst,sizeof(struct pstruct));
-X
-X alloc_pam(MAXSQ, MAXSQ, my_ppst);
-X
-X npam = (my_pst.ext_sq_set) ? my_pst.nsqx : my_pst.nsq;
-X
-X for (i=0; i<=npam; i++) {
-X for (j=0; j<=npam; j++) {
-X my_pst.pam2[0][i][j] = work_info->ppst->pam2[0][i][j];
-X my_pst.pam2[1][i][j] = work_info->ppst->pam2[1][i][j];
-X }
-X }
-X
-X if (work_info->ppst->pam_pssm && work_info->ppst->pam2p[0]) {
-X my_ppst->pam2p[0] = alloc_pam2p(n0,npam);
-X my_ppst->pam2p[1] = alloc_pam2p(n0,npam);
-X for (i=0; i<n0; i++) {
-X for (j=0; j <= npam; j++) {
-X my_pst.pam2p[0][i][j] = work_info->ppst->pam2p[0][i][j];
-X my_pst.pam2p[1][i][j] = work_info->ppst->pam2p[1][i][j];
-X }
-X }
-X }
-X }
-X else my_ppst=work_info->ppst;
-X
-X /* note that aa[5,4,3,2] are never used, but are provided so that frame
-X can range from 0 .. 5; likewise for f_str[5..2] */
-X
-X aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
-X init_work (aa0[0], n0, my_ppst, &f_str[0]);
-X
-X f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
-X
-X if (work_info->qframe == 2) {
-X if ((aa0[1]=(unsigned char *)calloc((size_t)n0+2,sizeof(unsigned char)))==NULL) {
-X fprintf(stderr," cannot allocate aa01[%d] for worker %d\n",
-X n0, my_worker);
-X }
-X *aa0[1]='\0';
-X aa0[1]++;
-X memcpy(aa0[1],work_info->aa0,n0+1);
-X revcomp(aa0[1],n0,my_ppst->c_nt);
-X init_work (aa0[1], n0, my_ppst, &f_str[1]);
-X }
-X
-X if (work_info->qshuffle) {
-X if ((aa0s=(unsigned char *)calloc(n0+2,sizeof(char)))==NULL) {
-X fprintf(stderr,"cannot allocate aa0s[%d]\n",n0+2);
-X exit(1);
-X }
-X *aa0s='\0';
-X aa0s++;
-X memcpy(aa0s,aa0[0],n0);
-X qshuffle(aa0s,n0,nm0);
-X init_work (aa0s, n0, my_ppst, &qf_str);
-X }
-X
-X ix_score = my_ppst->score_ix;
-X debug_lib = my_ppst->debug_lib;
-X zsflag = my_ppst->zsflag;
-X zs_win = my_ppst->zs_win;
-X
-X if (zsflag >= 10) {
-X if((aa1s=calloc(work_info->max_tot+1,sizeof(char))) == NULL) {
-X fprintf(stderr,"unable to allocate shuffled library sequence\n");
-X }
-X else {
-X *aa1s=0;
-X aa1s++;
-X do_shuffle =1;
-X irand(0);
-X }
-X }
-X else {do_shuffle = 0;}
-X
-X /* main work loop */
-X while (get_wbuf(&cur_buf,work_info->max_work_buf)) {
-X
-X cur_cnt = cur_buf->buf_cnt;
-X if (cur_cnt == -1) break;
-X cur_buf_p = cur_buf->buf;
-X
-X while (cur_cnt--) { /* count down the number of sequences */
-X p_rbuf = cur_buf_p++; /* step through each sequence */
-X p_rbuf->rst.score[0] = p_rbuf->rst.score[1] = p_rbuf->rst.score[2] = 0;
-X frame = p_rbuf->frame;
-X
-#ifdef DEBUG
-X if (debug_lib) {
-X if (frame >= 2) fprintf(stderr,"* frame: %d\n",frame);
-X for (i=0; i<p_rbuf->n1; i++)
-X if (p_rbuf->aa1b[i]>=my_ppst->nsqx) {
-X fprintf(stderr,
-X "%s residue[%d/%d] %d range (%d)\n",
-X p_rbuf->libstr,i,p_rbuf->n1,p_rbuf->aa1b[i],my_ppst->nsqx);
-X p_rbuf->aa1b[i]=0;
-X p_rbuf->n1=i-1;
-X break;
-X }
-X }
-#endif
-X
-X do_work (aa0[frame], n0, p_rbuf->aa1b, p_rbuf->n1, frame,
-X my_ppst, f_str[frame], 0, &p_rbuf->rst);
-X
-X if (work_info->qshuffle) {
-X do_work(aa0s,n0,p_rbuf->aa1b, p_rbuf->n1, frame,
-X my_ppst, qf_str, 1, &rrst);
-X p_rbuf->qr_score = rrst.score[ix_score];
-X p_rbuf->qr_escore = rrst.escore;
-X }
-X
-X if (do_shuffle) {
-X if (zs_win > 0) wshuffle(p_rbuf->aa1b,aa1s,p_rbuf->n1,zs_win,&ieven);
-X else shuffle(p_rbuf->aa1b,aa1s,p_rbuf->n1);
-X
-X do_work (aa0[frame], n0, aa1s, p_rbuf->n1, frame,
-X my_ppst, f_str[frame], 0, &rrst);
-X p_rbuf->r_score = rrst.score[ix_score];
-X p_rbuf->r_escore = rrst.escore;
-X }
-X }
-X
-X cur_buf->have_results = 1;
-X
-X put_wbuf(cur_buf,work_info->max_work_buf);
-X
-X } /* end main while */
-X
-X close_work(aa0[0], n0, my_ppst, &f_str[0]);
-X free(aa0[0]-1);
-X if (work_info->qframe == 2) {
-X close_work(aa0[1], n0, my_ppst, &f_str[1]);
-X free(aa0[1]-1);
-X }
-X
-X if (do_shuffle) free(aa1s-1);
-X
-X if (my_worker) {
-X free(my_pst.pam2[1][0]);
-X free(my_pst.pam2[0][0]);
-X free(my_pst.pam2[1]);
-X free(my_pst.pam2[0]);
-X
-X if (my_pst.pam_pssm) {
-X free_pam2p(my_pst.pam2p[0]);
-X free_pam2p(my_pst.pam2p[1]);
-X }
-X }
-X
-#ifdef WIN32
-X pthread_exit(&work_info->status);
-#else
-X THR_EXIT(&work_info->status);
-#endif
-X
-} /* end work_thread */
-X
-SHAR_EOF
-chmod 0644 work_thr.c ||
-echo 'restore of work_thr.c failed'
-Wc_c="`wc -c < 'work_thr.c'`"
-test 7001 -eq "$Wc_c" ||
- echo 'work_thr.c: original size 7001, current size' "$Wc_c"
-fi
-# ============= workacc.c ==============
-if test -f 'workacc.c' -a X"$1" != X"-c"; then
- echo 'x - skipping workacc.c (File already exists)'
-else
-echo 'x - extracting workacc.c (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'workacc.c' &&
-X
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
-X U. of Virginia */
-X
-/* $Name: fa_34_26_5 $ - $Id: workacc.c,v 1.19 2006/02/07 17:58:19 wrp Exp $ */
-X
-/* Concurrent read version */
-X
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-X
-#include "param.h"
-X
-#define XTERNAL
-#include "uascii.h"
-#include "upam.h"
-#undef XTERNAL
-X
-char err_str[128];
-X
-/* Initialization - set up defaults - assume protein sequence */
-void w_init ()
-{
-X pascii=aascii;
-}
-X
-#ifndef MPI_SRC
-/* allocate memory for pam matrix - identical to version in initfa/sw.c */
-alloc_pam (int d1, int d2, struct pstruct *ppst)
-{
-X int i, *d2p;
-X char err_str[128];
-X
-X if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X return -1;
-X }
-X
-X if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X return -1;
-X }
-X
-X if ((d2p = pam12 = (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);
-X return -1;
-X }
-X for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[0][i] = d2p;
-X
-X if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {
-X sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);
-X return -1;
-X }
-X for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[1][i] = d2p;
-X
-X return 1;
-}
-X
-int **
-alloc_pam2p(int len, int nsq) {
-X int i;
-X int **pam2p;
-X
-X if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {
-X fprintf(stderr," Cannot allocate pam2p: %d\n",len);
-X return NULL;
-X }
-X
-X if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {
-X fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);
-X free(pam2p);
-X return NULL;
-X }
-X
-X for (i=1; i<len; i++) {
-X pam2p[i] = pam2p[0] + (i*(nsq+1));
-X }
-X
-X return pam2p;
-}
-X
-void free_pam2p(int **pam2p) {
-X if (pam2p) {
-X free(pam2p[0]);
-X free(pam2p);
-X }
-}
-X
-void
-aancpy(char *to, char *from, int count, struct pstruct pst)
-{
-X char *tp, *sq;
-X int nsq;
-X
-X tp=to;
-X
-X if (pst.ext_sq_set) {
-X nsq = pst.nsqx;
-X sq = pst.sqx;
-X }
-X else {
-X nsq = pst.nsq;
-X sq = pst.sq;
-X }
-X
-X while (count-- && *from) {
-X if (*from <= nsq) *tp++ = sq[*(from++)];
-X else *tp++ = *from++;
-X }
-X *tp='\0';
-}
-#endif
-X
-/* copies from from to to shuffling */
-X
-void
-shuffle(unsigned char *from, unsigned char *to, int n)
-{
-X int i,j; unsigned char tmp;
-X
-X if (from != to) memcpy((void *)to,(void *)from,(size_t)n);
-X
-X for (i=n; i>0; i--) {
-X j = nrand(i);
-X tmp = to[j];
-X to[j] = to[i-1];
-X to[i-1] = tmp;
-X }
-X to[n] = 0;
-}
-X
-/* this shuffle is for FASTS */
-/* convert ',' -> '\0', shuffle each of the substrings */
-qshuffle(unsigned char *aa0, int n0, int nm0)
-{
-X unsigned char **aa0start, *aap, tmp;
-X int i,j,k, ns;
-X
-X if ((aa0start=(unsigned char **)calloc(nm0+1,
-X sizeof(unsigned char *)))==NULL) {
-X fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);
-X exit(1);
-X }
-X aa0start[0]=aa0;
-X for (k=1,i=0; i<n0; i++) {
-X if (aa0[i]==EOSEQ || aa0[i]==ESS) {
-X aa0[i]='\0';
-X aa0start[k++] = &aa0[i+1];
-X }
-X }
-X
-X /* aa0start has the beginning of each substring */
-X for (k=0; k<nm0; k++) {
-X aap=aa0start[k];
-X ns = strlen((char *)aap);
-X for (i=ns; i>1; i--) {
-X j = nrand(i);
-X tmp = aap[j];
-X aap[j] = aap[i-1];
-X aap[i-1] = tmp;
-X }
-X aap[ns] = 0;
-X }
-X
-X for (k=1; k<nm0; k++) {
-/* aap = aa0start[k];
-X while (*aap) fputc(pst.sq[*aap++],stderr);
-X fputc('\n',stderr);
-*/
-X aa0start[k][-1]=ESS;
-X }
-X
-X free(aa0start);
-}
-X
-/* copies from from to from shuffling */
-void
-wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven)
-{
-X int i,j, k, mm;
-X unsigned char tmp, *top;
-X
-X memcpy((void *)to,(void *)from,n);
-X
-X mm = n%wsiz;
-X
-X if (*ieven) {
-X for (k=0; k<(n-wsiz); k += wsiz) {
-X top = &to[k];
-X for (i=wsiz; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X }
-X top = &to[n-mm];
-X for (i=mm; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X *ieven = 0;
-X }
-X else {
-X for (k=n; k>=wsiz; k -= wsiz) {
-X top = &to[k-wsiz];
-X for (i=wsiz; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X }
-X top = &to[0];
-X for (i=mm; i>0; i--) {
-X j = nrand(i);
-X tmp = top[j];
-X top[j] = top[i-1];
-X top[i-1] = tmp;
-X }
-X *ieven = 1;
-X }
-X to[n] = 0;
-}
-X
-void initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz) /* initialize arrays */
-{
-X *seqc0=(char *)calloc((size_t)(seqsiz+1)*4,sizeof(char));
-X *seqc0a= *seqc0+seqsiz+1;
-X *seqc1= *seqc0a+seqsiz+1;
-X *seqca= *seqc1+seqsiz+1;
-X if (*seqc0==NULL)
-X {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
-X exit(1);}
-}
-X
-void freeseq(char **seqc0, char **seqc1, char **seqca)
-{
-X free(*seqc0);
-}
-X
-#define ESS 49
-X
-void
-revcomp(unsigned char *seq, int n, int *c_nt)
-{
-X unsigned char tmp;
-X int i, ni;
-X
-X for (i=0, ni = n-1; i< n/2; i++,ni--) {
-X tmp = c_nt[seq[i]];
-X seq[i] = c_nt[seq[ni]];
-X seq[ni] = tmp;
-X }
-X if ((n%2)==1) {
-X i = n/2;
-X seq[i] = c_nt[seq[i]];
-X }
-X seq[n]=0;
-}
-SHAR_EOF
-chmod 0644 workacc.c ||
-echo 'restore of workacc.c failed'
-Wc_c="`wc -c < 'workacc.c'`"
-test 5262 -eq "$Wc_c" ||
- echo 'workacc.c: original size 5262, current size' "$Wc_c"
-fi
-# ============= xurt8c.aa ==============
-if test -f 'xurt8c.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping xurt8c.aa (File already exists)'
-else
-echo 'x - extracting xurt8c.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'xurt8c.aa' &&
->XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
-MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
-ILSYLAAKYNLYGKDLKERVRIDMYADGTQDLMMMIIGAPFKAPQEKEESLALAVKRAKNRYFPVFEKIL
-KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
-HYVDVVRTVLKF
-SHAR_EOF
-chmod 0644 xurt8c.aa ||
-echo 'restore of xurt8c.aa failed'
-Wc_c="`wc -c < 'xurt8c.aa'`"
-test 302 -eq "$Wc_c" ||
- echo 'xurt8c.aa: original size 302, current size' "$Wc_c"
-fi
-# ============= xurt8c.lc ==============
-if test -f 'xurt8c.lc' -a X"$1" != X"-c"; then
- echo 'x - skipping xurt8c.lc (File already exists)'
-else
-echo 'x - extracting xurt8c.lc (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'xurt8c.lc' &&
->XURT8C | 40001 | glutathione transferase (EC 2.5.1.18) 8, cytosolic - rat
-MEVKPKLYYFQGRGRMEVIRWLLATAGVEFEEEFLETREQYEKLQKDDCLLFGQVPLVEIDGMLLTQTRA
-ilsylaakynlygkdlkervridmyadgtqdlmmmiigapfkapqekeeslalavkraknryfpvfekil
-KDHGEAFLVGNQLSWADIQLLEAILMVEEVSAPVLSDFPLLQAFKTRISNIPTIKKFLQPGSQRKPPPDG
-HYVDVVRTVLKF
-SHAR_EOF
-chmod 0644 xurt8c.lc ||
-echo 'restore of xurt8c.lc failed'
-Wc_c="`wc -c < 'xurt8c.lc'`"
-test 302 -eq "$Wc_c" ||
- echo 'xurt8c.lc: original size 302, current size' "$Wc_c"
-fi
-# ============= xurtg.aa ==============
-if test -f 'xurtg.aa' -a X"$1" != X"-c"; then
- echo 'x - skipping xurtg.aa (File already exists)'
-else
-echo 'x - extracting xurtg.aa (Text)'
-sed 's/^X//' << 'SHAR_EOF' > 'xurtg.aa' &&
->XURTG glutathione transferase (EC 2.5.1.18) Ya - rat
-MSGKPVLHYFNARGRMECIRWLLAAAGVEFDEKFIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRA
-ILNYIATKYDLYGKDMKERALIDMYTEGILDLTEMIMQLVICPPDQKEAKTALAKDRTKNRYLPAFEKVL
-KSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKLPMDA
-KQIEEARKIFKF
-SHAR_EOF
-chmod 0644 xurtg.aa ||
-echo 'restore of xurtg.aa failed'
-Wc_c="`wc -c < 'xurtg.aa'`"
-test 281 -eq "$Wc_c" ||
- echo 'xurtg.aa: original size 281, current size' "$Wc_c"
-fi
-exit 0